diff options
author | thegeorg <thegeorg@yandex-team.ru> | 2022-02-10 16:45:12 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:12 +0300 |
commit | 49116032d905455a7b1c994e4a696afc885c1e71 (patch) | |
tree | be835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/libs/hyperscan/src/nfa | |
parent | 4e839db24a3bbc9f1c610c43d6faaaa99824dcca (diff) | |
download | ydb-49116032d905455a7b1c994e4a696afc885c1e71.tar.gz |
Restoring authorship annotation for <thegeorg@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfa')
33 files changed, 6441 insertions, 6441 deletions
diff --git a/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.cpp b/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.cpp index 842665f1cc..ae71e141a2 100644 --- a/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.cpp +++ b/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.cpp @@ -214,7 +214,7 @@ static bool double_byte_ok(const AccelScheme &info) { return !info.double_byte.empty() && info.double_cr.count() < info.double_byte.size() && - info.double_cr.count() <= 2; + info.double_cr.count() <= 2; } static diff --git a/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.h b/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.h index cb47d38cc5..53a6f35b3d 100644 --- a/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.h +++ b/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,11 +40,11 @@ namespace ue2 { class ReportManager; struct Grey; -enum DfaType { - McClellan, - Sheng, - Gough -}; +enum DfaType { + McClellan, + Sheng, + Gough +}; class accel_dfa_build_strat : public dfa_build_strat { public: @@ -58,8 +58,8 @@ public: virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out); virtual std::map<dstate_id_t, AccelScheme> getAccelInfo(const Grey &grey); - virtual DfaType getType() const = 0; - + virtual DfaType getType() const = 0; + private: bool only_accel_init; }; diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile.cpp b/contrib/libs/hyperscan/src/nfa/goughcompile.cpp index cb7f0eafc9..d41c6f4235 100644 --- a/contrib/libs/hyperscan/src/nfa/goughcompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/goughcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -91,7 +91,7 @@ public: void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out) override; u32 max_allowed_offset_accel() const override { return 0; } - DfaType getType() const override { return Gough; } + DfaType getType() const override { return Gough; } raw_som_dfa &rdfa; const GoughGraph ≫ @@ -375,7 +375,7 @@ unique_ptr<GoughGraph> makeCFG(const raw_som_dfa &raw) { } u16 top_sym = raw.alpha_remap[TOP]; - DEBUG_PRINTF("top: %hu, kind %s\n", top_sym, to_string(raw.kind).c_str()); + DEBUG_PRINTF("top: %hu, kind %s\n", top_sym, to_string(raw.kind).c_str()); /* create edges, JOIN variables (on edge targets) */ map<dstate_id_t, GoughEdge> seen; diff --git a/contrib/libs/hyperscan/src/nfa/limex_compile.cpp b/contrib/libs/hyperscan/src/nfa/limex_compile.cpp index bad7434d79..9233ae515e 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_compile.cpp +++ b/contrib/libs/hyperscan/src/nfa/limex_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -85,18 +85,18 @@ namespace ue2 { */ static constexpr u32 NO_STATE = ~0; -/* Maximum number of states taken as a small NFA */ -static constexpr u32 MAX_SMALL_NFA_STATES = 64; - -/* Maximum bounded repeat upper bound to consider as a fast NFA */ -static constexpr u64a MAX_REPEAT_SIZE = 200; - -/* Maximum bounded repeat char reach size to consider as a fast NFA */ -static constexpr u32 MAX_REPEAT_CHAR_REACH = 26; - -/* Minimum bounded repeat trigger distance to consider as a fast NFA */ -static constexpr u8 MIN_REPEAT_TRIGGER_DISTANCE = 6; - +/* Maximum number of states taken as a small NFA */ +static constexpr u32 MAX_SMALL_NFA_STATES = 64; + +/* Maximum bounded repeat upper bound to consider as a fast NFA */ +static constexpr u64a MAX_REPEAT_SIZE = 200; + +/* Maximum bounded repeat char reach size to consider as a fast NFA */ +static constexpr u32 MAX_REPEAT_CHAR_REACH = 26; + +/* Minimum bounded repeat trigger distance to consider as a fast NFA */ +static constexpr u8 MIN_REPEAT_TRIGGER_DISTANCE = 6; + namespace { struct precalcAccel { @@ -992,7 +992,7 @@ u32 addSquashMask(const build_info &args, const NFAVertex &v, // see if we've already seen it, otherwise add a new one. auto it = find(squash.begin(), squash.end(), sit->second); if (it != squash.end()) { - return verify_u32(std::distance(squash.begin(), it)); + return verify_u32(std::distance(squash.begin(), it)); } u32 idx = verify_u32(squash.size()); squash.push_back(sit->second); @@ -1019,7 +1019,7 @@ u32 addReports(const flat_set<ReportID> &r, vector<ReportID> &reports, auto it = search(begin(reports), end(reports), begin(my_reports), end(my_reports)); if (it != end(reports)) { - u32 offset = verify_u32(std::distance(begin(reports), it)); + u32 offset = verify_u32(std::distance(begin(reports), it)); DEBUG_PRINTF("reusing found report list at %u\n", offset); return offset; } @@ -1922,8 +1922,8 @@ struct Factory { } static - void writeExceptions(const build_info &args, - const map<ExceptionProto, vector<u32>> &exceptionMap, + void writeExceptions(const build_info &args, + const map<ExceptionProto, vector<u32>> &exceptionMap, const vector<u32> &repeatOffsets, implNFA_t *limex, const u32 exceptionsOffset, const u32 reportListOffset) { @@ -1975,59 +1975,59 @@ struct Factory { limex->exceptionOffset = exceptionsOffset; limex->exceptionCount = ecount; - - if (args.num_states > 64 && args.cc.target_info.has_avx512vbmi()) { - const u8 *exceptionMask = (const u8 *)(&limex->exceptionMask); - u8 *shufMask = (u8 *)&limex->exceptionShufMask; - u8 *bitMask = (u8 *)&limex->exceptionBitMask; - u8 *andMask = (u8 *)&limex->exceptionAndMask; - - u32 tot_cnt = 0; - u32 pos = 0; - bool valid = true; - size_t tot = sizeof(limex->exceptionMask); - size_t base = 0; - - // We normally have up to 64 exceptions to handle, - // but treat 384 state Limex differently to simplify operations - size_t limit = 64; - if (args.num_states > 256 && args.num_states <= 384) { - limit = 48; - } - - for (size_t i = 0; i < tot; i++) { - if (!exceptionMask[i]) { - continue; - } - u32 bit_cnt = popcount32(exceptionMask[i]); - - tot_cnt += bit_cnt; - if (tot_cnt > limit) { - valid = false; - break; - } - - u32 emsk = exceptionMask[i]; - while (emsk) { - u32 t = findAndClearLSB_32(&emsk); - bitMask[pos] = 1U << t; - andMask[pos] = 1U << t; - shufMask[pos++] = i + base; - - if (pos == 32 && - (args.num_states > 128 && args.num_states <= 256)) { - base += 32; - } - } - } - // Avoid matching unused bytes - for (u32 i = pos; i < 64; i++) { - bitMask[i] = 0xff; - } - if (valid) { - setLimexFlag(limex, LIMEX_FLAG_EXTRACT_EXP); - } - } + + if (args.num_states > 64 && args.cc.target_info.has_avx512vbmi()) { + const u8 *exceptionMask = (const u8 *)(&limex->exceptionMask); + u8 *shufMask = (u8 *)&limex->exceptionShufMask; + u8 *bitMask = (u8 *)&limex->exceptionBitMask; + u8 *andMask = (u8 *)&limex->exceptionAndMask; + + u32 tot_cnt = 0; + u32 pos = 0; + bool valid = true; + size_t tot = sizeof(limex->exceptionMask); + size_t base = 0; + + // We normally have up to 64 exceptions to handle, + // but treat 384 state Limex differently to simplify operations + size_t limit = 64; + if (args.num_states > 256 && args.num_states <= 384) { + limit = 48; + } + + for (size_t i = 0; i < tot; i++) { + if (!exceptionMask[i]) { + continue; + } + u32 bit_cnt = popcount32(exceptionMask[i]); + + tot_cnt += bit_cnt; + if (tot_cnt > limit) { + valid = false; + break; + } + + u32 emsk = exceptionMask[i]; + while (emsk) { + u32 t = findAndClearLSB_32(&emsk); + bitMask[pos] = 1U << t; + andMask[pos] = 1U << t; + shufMask[pos++] = i + base; + + if (pos == 32 && + (args.num_states > 128 && args.num_states <= 256)) { + base += 32; + } + } + } + // Avoid matching unused bytes + for (u32 i = pos; i < 64; i++) { + bitMask[i] = 0xff; + } + if (valid) { + setLimexFlag(limex, LIMEX_FLAG_EXTRACT_EXP); + } + } } static @@ -2353,7 +2353,7 @@ struct Factory { writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset, repeatsOffset); - writeExceptions(args, exceptionMap, repeatOffsets, limex, exceptionsOffset, + writeExceptions(args, exceptionMap, repeatOffsets, limex, exceptionsOffset, reportListOffset); writeLimexMasks(args, limex); @@ -2489,68 +2489,68 @@ bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops, #endif // NDEBUG static -bool isFast(const build_info &args) { - const NGHolder &h = args.h; - const u32 num_states = args.num_states; - - if (num_states > MAX_SMALL_NFA_STATES) { - return false; - } - - unordered_map<NFAVertex, bool> pos_trigger; - for (u32 i = 0; i < args.repeats.size(); i++) { - const BoundedRepeatData &br = args.repeats[i]; - assert(!contains(pos_trigger, br.pos_trigger)); - pos_trigger[br.pos_trigger] = br.repeatMax <= MAX_REPEAT_SIZE; - } - - // Small NFA without bounded repeat should be fast. - if (pos_trigger.empty()) { - return true; - } - - vector<NFAVertex> cur; - unordered_set<NFAVertex> visited; - for (const auto &m : args.tops) { - for (NFAVertex v : m.second) { - cur.push_back(v); - visited.insert(v); - } - } - - u8 pos_dist = 0; - while (!cur.empty()) { - vector<NFAVertex> next; - for (const auto &v : cur) { - if (contains(pos_trigger, v)) { - const CharReach &cr = h[v].char_reach; - if (!pos_trigger[v] && cr.count() > MAX_REPEAT_CHAR_REACH) { - return false; - } - } - for (const auto &w : adjacent_vertices_range(v, h)) { - if (w == v) { - continue; - } - u32 j = args.state_ids.at(w); - if (j == NO_STATE) { - continue; - } - if (!contains(visited, w)) { - next.push_back(w); - visited.insert(w); - } - } - } - if (++pos_dist >= MIN_REPEAT_TRIGGER_DISTANCE) { - break; - } - swap(cur, next); - } - return true; -} - -static +bool isFast(const build_info &args) { + const NGHolder &h = args.h; + const u32 num_states = args.num_states; + + if (num_states > MAX_SMALL_NFA_STATES) { + return false; + } + + unordered_map<NFAVertex, bool> pos_trigger; + for (u32 i = 0; i < args.repeats.size(); i++) { + const BoundedRepeatData &br = args.repeats[i]; + assert(!contains(pos_trigger, br.pos_trigger)); + pos_trigger[br.pos_trigger] = br.repeatMax <= MAX_REPEAT_SIZE; + } + + // Small NFA without bounded repeat should be fast. + if (pos_trigger.empty()) { + return true; + } + + vector<NFAVertex> cur; + unordered_set<NFAVertex> visited; + for (const auto &m : args.tops) { + for (NFAVertex v : m.second) { + cur.push_back(v); + visited.insert(v); + } + } + + u8 pos_dist = 0; + while (!cur.empty()) { + vector<NFAVertex> next; + for (const auto &v : cur) { + if (contains(pos_trigger, v)) { + const CharReach &cr = h[v].char_reach; + if (!pos_trigger[v] && cr.count() > MAX_REPEAT_CHAR_REACH) { + return false; + } + } + for (const auto &w : adjacent_vertices_range(v, h)) { + if (w == v) { + continue; + } + u32 j = args.state_ids.at(w); + if (j == NO_STATE) { + continue; + } + if (!contains(visited, w)) { + next.push_back(w); + visited.insert(w); + } + } + } + if (++pos_dist >= MIN_REPEAT_TRIGGER_DISTANCE) { + break; + } + swap(cur, next); + } + return true; +} + +static u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) { u32 rv = 0; for (const auto &m : state_ids) { @@ -2570,7 +2570,7 @@ bytecode_ptr<NFA> generate(NGHolder &h, const unordered_map<NFAVertex, NFAStateSet> &squashMap, const map<u32, set<NFAVertex>> &tops, const set<NFAVertex> &zombies, bool do_accel, - bool stateCompression, bool &fast, u32 hint, + bool stateCompression, bool &fast, u32 hint, const CompileContext &cc) { const u32 num_states = max_state(states) + 1; DEBUG_PRINTF("total states: %u\n", num_states); @@ -2625,7 +2625,7 @@ bytecode_ptr<NFA> generate(NGHolder &h, if (nfa) { DEBUG_PRINTF("successful build with NFA engine: %s\n", nfa_type_name(limex_model)); - fast = isFast(arg); + fast = isFast(arg); return nfa; } } diff --git a/contrib/libs/hyperscan/src/nfa/limex_compile.h b/contrib/libs/hyperscan/src/nfa/limex_compile.h index 2562727d68..4afdcdb3e4 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_compile.h +++ b/contrib/libs/hyperscan/src/nfa/limex_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -78,7 +78,7 @@ bytecode_ptr<NFA> generate(NGHolder &g, const std::set<NFAVertex> &zombies, bool do_accel, bool stateCompression, - bool &fast, + bool &fast, u32 hint, const CompileContext &cc); diff --git a/contrib/libs/hyperscan/src/nfa/limex_exceptional.h b/contrib/libs/hyperscan/src/nfa/limex_exceptional.h index 65bc9d97cd..6c7335f1b9 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_exceptional.h +++ b/contrib/libs/hyperscan/src/nfa/limex_exceptional.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -47,8 +47,8 @@ #define AND_STATE JOIN(and_, STATE_T) #define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b))) #define OR_STATE JOIN(or_, STATE_T) -#define EXPAND_STATE JOIN(expand_, STATE_T) -#define SHUFFLE_BYTE_STATE JOIN(shuffle_byte_, STATE_T) +#define EXPAND_STATE JOIN(expand_, STATE_T) +#define SHUFFLE_BYTE_STATE JOIN(shuffle_byte_, STATE_T) #define TESTBIT_STATE JOIN(testbit_, STATE_T) #define EXCEPTION_T JOIN(struct NFAException, SIZE) #define CONTEXT_T JOIN(NFAContext, SIZE) @@ -210,7 +210,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, /** \brief Process all of the exceptions associated with the states in the \a * estate. */ static really_inline -int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, +int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) { assert(diffmask > 0); // guaranteed by caller macro @@ -235,72 +235,72 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, ctx->local_succ = ZERO_STATE; #endif - struct proto_cache new_cache = {0, NULL}; - enum CacheResult cacheable = CACHE_RESULT; - -#if defined(HAVE_AVX512VBMI) && SIZE > 64 - if (likely(limex->flags & LIMEX_FLAG_EXTRACT_EXP)) { - m512 emask = EXPAND_STATE(*STATE_ARG_P); - emask = SHUFFLE_BYTE_STATE(load_m512(&limex->exceptionShufMask), emask); - emask = and512(emask, load_m512(&limex->exceptionAndMask)); - u64a word = eq512mask(emask, load_m512(&limex->exceptionBitMask)); - - do { - u32 bit = FIND_AND_CLEAR_FN(&word); - const EXCEPTION_T *e = &exceptions[bit]; - - if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ, -#ifndef BIG_MODEL - &local_succ, -#endif - limex, offset, ctx, &new_cache, &cacheable, - in_rev, flags)) { - return PE_RV_HALT; - } - } while (word); - } else { - // A copy of the estate as an array of GPR-sized chunks. - CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; - CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; -#ifdef ESTATE_ON_STACK - memcpy(chunks, &estate, sizeof(STATE_T)); -#else - memcpy(chunks, estatep, sizeof(STATE_T)); -#endif - memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T)); - - u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; - base_index[0] = 0; - for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) { - base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]); - } - - do { - u32 t = findAndClearLSB_32(&diffmask); -#ifdef ARCH_64_BIT - t >>= 1; // Due to diffmask64, which leaves holes in the bitmask. -#endif - assert(t < ARRAY_LENGTH(chunks)); - CHUNK_T word = chunks[t]; - assert(word != 0); - do { - u32 bit = FIND_AND_CLEAR_FN(&word); - u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit); - u32 idx = local_index + base_index[t]; - const EXCEPTION_T *e = &exceptions[idx]; - - if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ, -#ifndef BIG_MODEL - &local_succ, -#endif - limex, offset, ctx, &new_cache, &cacheable, - in_rev, flags)) { - return PE_RV_HALT; - } - } while (word); - } while (diffmask); - } -#else + struct proto_cache new_cache = {0, NULL}; + enum CacheResult cacheable = CACHE_RESULT; + +#if defined(HAVE_AVX512VBMI) && SIZE > 64 + if (likely(limex->flags & LIMEX_FLAG_EXTRACT_EXP)) { + m512 emask = EXPAND_STATE(*STATE_ARG_P); + emask = SHUFFLE_BYTE_STATE(load_m512(&limex->exceptionShufMask), emask); + emask = and512(emask, load_m512(&limex->exceptionAndMask)); + u64a word = eq512mask(emask, load_m512(&limex->exceptionBitMask)); + + do { + u32 bit = FIND_AND_CLEAR_FN(&word); + const EXCEPTION_T *e = &exceptions[bit]; + + if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ, +#ifndef BIG_MODEL + &local_succ, +#endif + limex, offset, ctx, &new_cache, &cacheable, + in_rev, flags)) { + return PE_RV_HALT; + } + } while (word); + } else { + // A copy of the estate as an array of GPR-sized chunks. + CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; + CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; +#ifdef ESTATE_ON_STACK + memcpy(chunks, &estate, sizeof(STATE_T)); +#else + memcpy(chunks, estatep, sizeof(STATE_T)); +#endif + memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T)); + + u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; + base_index[0] = 0; + for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) { + base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]); + } + + do { + u32 t = findAndClearLSB_32(&diffmask); +#ifdef ARCH_64_BIT + t >>= 1; // Due to diffmask64, which leaves holes in the bitmask. +#endif + assert(t < ARRAY_LENGTH(chunks)); + CHUNK_T word = chunks[t]; + assert(word != 0); + do { + u32 bit = FIND_AND_CLEAR_FN(&word); + u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit); + u32 idx = local_index + base_index[t]; + const EXCEPTION_T *e = &exceptions[idx]; + + if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ, +#ifndef BIG_MODEL + &local_succ, +#endif + limex, offset, ctx, &new_cache, &cacheable, + in_rev, flags)) { + return PE_RV_HALT; + } + } while (word); + } while (diffmask); + } +#else // A copy of the estate as an array of GPR-sized chunks. CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; @@ -341,7 +341,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, } } while (word); } while (diffmask); -#endif +#endif #ifndef BIG_MODEL *succ = OR_STATE(*succ, local_succ); @@ -373,8 +373,8 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, #undef AND_STATE #undef EQ_STATE #undef OR_STATE -#undef EXPAND_STATE -#undef SHUFFLE_BYTE_STATE +#undef EXPAND_STATE +#undef SHUFFLE_BYTE_STATE #undef TESTBIT_STATE #undef PE_FN #undef RUN_EXCEPTION_FN diff --git a/contrib/libs/hyperscan/src/nfa/limex_internal.h b/contrib/libs/hyperscan/src/nfa/limex_internal.h index 59795a65b7..23b1bd9707 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_internal.h +++ b/contrib/libs/hyperscan/src/nfa/limex_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -86,7 +86,7 @@ #define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */ #define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */ #define LIMEX_FLAG_CANNOT_DIE 4 /**< limex cannot have no states on */ -#define LIMEX_FLAG_EXTRACT_EXP 8 /**< use limex exception bit extraction */ +#define LIMEX_FLAG_EXTRACT_EXP 8 /**< use limex exception bit extraction */ enum LimExTrigger { LIMEX_TRIGGER_NONE = 0, @@ -158,9 +158,9 @@ struct LimExNFA##size { \ u_##size shift[MAX_SHIFT_COUNT]; \ u32 shiftCount; /**< number of shift masks used */ \ u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \ - m512 exceptionShufMask; /**< exception byte shuffle mask */ \ - m512 exceptionBitMask; /**< exception bit mask */ \ - m512 exceptionAndMask; /**< exception and mask */ \ + m512 exceptionShufMask; /**< exception byte shuffle mask */ \ + m512 exceptionBitMask; /**< exception bit mask */ \ + m512 exceptionAndMask; /**< exception and mask */ \ }; CREATE_NFA_LIMEX(32) diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan.c b/contrib/libs/hyperscan/src/nfa/mcclellan.c index fe21700632..71f71e3275 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellan.c +++ b/contrib/libs/hyperscan/src/nfa/mcclellan.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -167,68 +167,68 @@ u32 doNormal16(const struct mcclellan *m, const u8 **c_inout, const u8 *end, } static really_inline -u32 doNormalWide16(const struct mcclellan *m, const u8 **c_inout, - const u8 *end, u32 s, char *qstate, u16 *offset, - char do_accel, enum MatchMode mode) { - const u8 *c = *c_inout; - - u32 wide_limit = m->wide_limit; - const char *wide_base - = (const char *)m - sizeof(struct NFA) + m->wide_offset; - - const u16 *succ_table - = (const u16 *)((const char *)m + sizeof(struct mcclellan)); - assert(ISALIGNED_N(succ_table, 2)); - u32 sherman_base = m->sherman_limit; - const char *sherman_base_offset - = (const char *)m - sizeof(struct NFA) + m->sherman_offset; - u32 as = m->alphaShift; - - s &= STATE_MASK; - - while (c < end && s) { - u8 cprime = m->remap[*c]; - DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u) &c: %p\n", *c, - ourisprint(*c) ? *c : '?', cprime, s, c); - - if (unlikely(s >= wide_limit)) { - const char *wide_entry - = findWideEntry16(m, wide_base, wide_limit, s); - DEBUG_PRINTF("doing wide head (%u)\n", s); - s = doWide16(wide_entry, &c, end, m->remap, (u16 *)&s, qstate, - offset); - } else if (s >= sherman_base) { - const char *sherman_state - = findShermanState(m, sherman_base_offset, sherman_base, s); - DEBUG_PRINTF("doing sherman (%u)\n", s); - s = doSherman16(sherman_state, cprime, succ_table, as); - } else { - DEBUG_PRINTF("doing normal\n"); - s = succ_table[(s << as) + cprime]; - } - - DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); - c++; - - if (do_accel && (s & ACCEL_FLAG)) { - break; - } - if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { - break; - } - - s &= STATE_MASK; - } - - *c_inout = c; - return s; -} - -static really_inline -char mcclellanExec16_i(const struct mcclellan *m, u32 *state, char *qstate, - const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, - void *ctxt, char single, const u8 **c_final, - enum MatchMode mode) { +u32 doNormalWide16(const struct mcclellan *m, const u8 **c_inout, + const u8 *end, u32 s, char *qstate, u16 *offset, + char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + + u32 wide_limit = m->wide_limit; + const char *wide_base + = (const char *)m - sizeof(struct NFA) + m->wide_offset; + + const u16 *succ_table + = (const u16 *)((const char *)m + sizeof(struct mcclellan)); + assert(ISALIGNED_N(succ_table, 2)); + u32 sherman_base = m->sherman_limit; + const char *sherman_base_offset + = (const char *)m - sizeof(struct NFA) + m->sherman_offset; + u32 as = m->alphaShift; + + s &= STATE_MASK; + + while (c < end && s) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u) &c: %p\n", *c, + ourisprint(*c) ? *c : '?', cprime, s, c); + + if (unlikely(s >= wide_limit)) { + const char *wide_entry + = findWideEntry16(m, wide_base, wide_limit, s); + DEBUG_PRINTF("doing wide head (%u)\n", s); + s = doWide16(wide_entry, &c, end, m->remap, (u16 *)&s, qstate, + offset); + } else if (s >= sherman_base) { + const char *sherman_state + = findShermanState(m, sherman_base_offset, sherman_base, s); + DEBUG_PRINTF("doing sherman (%u)\n", s); + s = doSherman16(sherman_state, cprime, succ_table, as); + } else { + DEBUG_PRINTF("doing normal\n"); + s = succ_table[(s << as) + cprime]; + } + + DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); + c++; + + if (do_accel && (s & ACCEL_FLAG)) { + break; + } + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + break; + } + + s &= STATE_MASK; + } + + *c_inout = c; + return s; +} + +static really_inline +char mcclellanExec16_i(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, + void *ctxt, char single, const u8 **c_final, + enum MatchMode mode) { assert(ISALIGNED_N(state, 2)); if (!len) { if (mode == STOP_AT_MATCH) { @@ -238,7 +238,7 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, char *qstate, } u32 s = *state; - u16 offset = 0; + u16 offset = 0; const u8 *c = buf; const u8 *c_end = buf + len; const struct mstate_aux *aux @@ -267,12 +267,12 @@ without_accel: goto exit; } - if (unlikely(m->has_wide)) { - s = doNormalWide16(m, &c, min_accel_offset, s, qstate, &offset, 0, - mode); - } else { - s = doNormal16(m, &c, min_accel_offset, s, 0, mode); - } + if (unlikely(m->has_wide)) { + s = doNormalWide16(m, &c, min_accel_offset, s, qstate, &offset, 0, + mode); + } else { + s = doNormal16(m, &c, min_accel_offset, s, 0, mode); + } if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { if (mode == STOP_AT_MATCH) { @@ -324,11 +324,11 @@ with_accel: } } - if (unlikely(m->has_wide)) { - s = doNormalWide16(m, &c, c_end, s, qstate, &offset, 1, mode); - } else { - s = doNormal16(m, &c, c_end, s, 1, mode); - } + if (unlikely(m->has_wide)) { + s = doNormalWide16(m, &c, c_end, s, qstate, &offset, 1, mode); + } else { + s = doNormal16(m, &c, c_end, s, 1, mode); + } if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { if (mode == STOP_AT_MATCH) { @@ -366,47 +366,47 @@ exit: } static never_inline -char mcclellanExec16_i_cb(const struct mcclellan *m, u32 *state, char *qstate, - const u8 *buf, size_t len, u64a offAdj, - NfaCallback cb, void *ctxt, char single, - const u8 **final_point) { - return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, - single, final_point, CALLBACK_OUTPUT); +char mcclellanExec16_i_cb(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point) { + return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, + single, final_point, CALLBACK_OUTPUT); } static never_inline -char mcclellanExec16_i_sam(const struct mcclellan *m, u32 *state, char *qstate, - const u8 *buf, size_t len, u64a offAdj, - NfaCallback cb, void *ctxt, char single, - const u8 **final_point) { - return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, - single, final_point, STOP_AT_MATCH); +char mcclellanExec16_i_sam(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point) { + return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, + single, final_point, STOP_AT_MATCH); } static never_inline -char mcclellanExec16_i_nm(const struct mcclellan *m, u32 *state, char *qstate, - const u8 *buf, size_t len, u64a offAdj, - NfaCallback cb, void *ctxt, char single, - const u8 **final_point) { - return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, - single, final_point, NO_MATCHES); +char mcclellanExec16_i_nm(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point) { + return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, + single, final_point, NO_MATCHES); } static really_inline -char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, char *qstate, - const u8 *buf, size_t len, u64a offAdj, - NfaCallback cb, void *ctxt, char single, - const u8 **final_point, enum MatchMode mode) { +char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point, enum MatchMode mode) { if (mode == CALLBACK_OUTPUT) { - return mcclellanExec16_i_cb(m, state, qstate, buf, len, offAdj, cb, - ctxt, single, final_point); + return mcclellanExec16_i_cb(m, state, qstate, buf, len, offAdj, cb, + ctxt, single, final_point); } else if (mode == STOP_AT_MATCH) { - return mcclellanExec16_i_sam(m, state, qstate, buf, len, offAdj, cb, - ctxt, single, final_point); + return mcclellanExec16_i_sam(m, state, qstate, buf, len, offAdj, cb, + ctxt, single, final_point); } else { assert(mode == NO_MATCHES); - return mcclellanExec16_i_nm(m, state, qstate, buf, len, offAdj, cb, - ctxt, single, final_point); + return mcclellanExec16_i_nm(m, state, qstate, buf, len, offAdj, cb, + ctxt, single, final_point); } } @@ -612,10 +612,10 @@ char mcclellanCheckEOD(const struct NFA *nfa, u32 s, u64a offset, const struct mcclellan *m = getImplNfa(nfa); const struct mstate_aux *aux = get_aux(m, s); - if (m->has_wide == 1 && s >= m->wide_limit) { - return MO_CONTINUE_MATCHING; - } - + if (m->has_wide == 1 && s >= m->wide_limit) { + return MO_CONTINUE_MATCHING; + } + if (!aux->accept_eod) { return MO_CONTINUE_MATCHING; } @@ -688,9 +688,9 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, /* do main buffer region */ const u8 *final_look; - char rv = mcclellanExec16_i_ni(m, &s, q->state, cur_buf + sp, - local_ep - sp, offset + sp, cb, context, - single, &final_look, mode); + char rv = mcclellanExec16_i_ni(m, &s, q->state, cur_buf + sp, + local_ep - sp, offset + sp, cb, context, + single, &final_look, mode); if (rv == MO_DEAD) { *(u16 *)q->state = 0; return MO_DEAD; @@ -760,16 +760,16 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer, const struct mcclellan *m = getImplNfa(n); u32 s = m->start_anchored; - if (mcclellanExec16_i(m, &s, NULL, buffer, length, offset, cb, context, - single, NULL, CALLBACK_OUTPUT) + if (mcclellanExec16_i(m, &s, NULL, buffer, length, offset, cb, context, + single, NULL, CALLBACK_OUTPUT) == MO_DEAD) { return s ? MO_ALIVE : MO_DEAD; } - if (m->has_wide == 1 && s >= m->wide_limit) { - return MO_ALIVE; - } - + if (m->has_wide == 1 && s >= m->wide_limit) { + return MO_ALIVE; + } + const struct mstate_aux *aux = get_aux(m, s); if (aux->accept_eod) { @@ -848,7 +848,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, offset + sp, cb, context, single, &final_look, mode); - + if (rv == MO_HALT_MATCHING) { *(u8 *)q->state = 0; return MO_DEAD; @@ -1097,8 +1097,8 @@ char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, u16 s = *(u16 *)q->state; DEBUG_PRINTF("checking accepts for %hu\n", s); - return (m->has_wide == 1 && s >= m->wide_limit) ? - 0 : mcclellanHasAccept(m, get_aux(m, s), report); + return (m->has_wide == 1 && s >= m->wide_limit) ? + 0 : mcclellanHasAccept(m, get_aux(m, s), report); } char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) { @@ -1108,8 +1108,8 @@ char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) { u16 s = *(u16 *)q->state; DEBUG_PRINTF("checking accepts for %hu\n", s); - return (m->has_wide == 1 && s >= m->wide_limit) ? - 0 : !!get_aux(m, s)->accept; + return (m->has_wide == 1 && s >= m->wide_limit) ? + 0 : !!get_aux(m, s)->accept; } char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) { @@ -1194,12 +1194,12 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset, void *state, UNUSED u8 key) { const struct mcclellan *m = getImplNfa(nfa); u16 s = offset ? m->start_floating : m->start_anchored; - - // new byte - if (m->has_wide) { - unaligned_store_u16((u16 *)state + 1, 0); - } - + + // new byte + if (m->has_wide) { + unaligned_store_u16((u16 *)state + 1, 0); + } + if (s) { unaligned_store_u16(state, s); return 1; @@ -1229,24 +1229,24 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, const u8 *buf, char top, size_t start_off, size_t len, NfaCallback cb, void *ctxt) { const struct mcclellan *m = getImplNfa(nfa); - u32 s; - - if (top) { - s = m->start_anchored; - - // new byte - if (m->has_wide) { - unaligned_store_u16((u16 *)state + 1, 0); - } - } else { - s = unaligned_load_u16(state); - } - + u32 s; + + if (top) { + s = m->start_anchored; + + // new byte + if (m->has_wide) { + unaligned_store_u16((u16 *)state + 1, 0); + } + } else { + s = unaligned_load_u16(state); + } + if (m->flags & MCCLELLAN_FLAG_SINGLE) { - mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off, + mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off, start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); } else { - mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off, + mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off, start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); } @@ -1277,16 +1277,16 @@ char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa, char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { - const struct mcclellan *m = getImplNfa(nfa); - assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 - : nfa->scratchStateSize == 2); + const struct mcclellan *m = getImplNfa(nfa); + assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 + : nfa->scratchStateSize == 2); assert(ISALIGNED_N(q->state, 2)); *(u16 *)q->state = 0; - - // new byte - if (m->has_wide) { - unaligned_store_u16((u16 *)q->state + 1, 0); - } + + // new byte + if (m->has_wide) { + unaligned_store_u16((u16 *)q->state + 1, 0); + } return 0; } @@ -1312,39 +1312,39 @@ char nfaExecMcClellan8_expandState(UNUSED const struct NFA *nfa, void *dest, char nfaExecMcClellan16_queueCompressState(UNUSED const struct NFA *nfa, const struct mq *q, UNUSED s64a loc) { - const struct mcclellan *m = getImplNfa(nfa); + const struct mcclellan *m = getImplNfa(nfa); void *dest = q->streamState; const void *src = q->state; - assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 - : nfa->scratchStateSize == 2); - assert(m->has_wide == 1 ? nfa->streamStateSize == 4 - : nfa->streamStateSize == 2); - + assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 + : nfa->scratchStateSize == 2); + assert(m->has_wide == 1 ? nfa->streamStateSize == 4 + : nfa->streamStateSize == 2); + assert(ISALIGNED_N(src, 2)); unaligned_store_u16(dest, *(const u16 *)(src)); - - // new byte - if (m->has_wide) { - unaligned_store_u16((u16 *)dest + 1, *((const u16 *)src + 1)); - } + + // new byte + if (m->has_wide) { + unaligned_store_u16((u16 *)dest + 1, *((const u16 *)src + 1)); + } return 0; } char nfaExecMcClellan16_expandState(UNUSED const struct NFA *nfa, void *dest, const void *src, UNUSED u64a offset, UNUSED u8 key) { - const struct mcclellan *m = getImplNfa(nfa); - assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 - : nfa->scratchStateSize == 2); - assert(m->has_wide == 1 ? nfa->streamStateSize == 4 - : nfa->streamStateSize == 2); - + const struct mcclellan *m = getImplNfa(nfa); + assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 + : nfa->scratchStateSize == 2); + assert(m->has_wide == 1 ? nfa->streamStateSize == 4 + : nfa->streamStateSize == 2); + assert(ISALIGNED_N(dest, 2)); *(u16 *)dest = unaligned_load_u16(src); - - // new byte - if (m->has_wide) { - *((u16 *)dest + 1) = unaligned_load_u16((const u16 *)src + 1); - } + + // new byte + if (m->has_wide) { + *((u16 *)dest + 1) = unaligned_load_u16((const u16 *)src + 1); + } return 0; } diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h b/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h index 431b554693..7b0e7f48cd 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h +++ b/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -82,108 +82,108 @@ u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); return succ_table[(daddy << as) + cprime]; } - -static really_inline -u16 doWide16(const char *wide_entry, const u8 **c_inout, const u8 *end, - const u8 *remap, const u16 *s, char *qstate, u16 *offset) { - // Internal relative offset after the last visit of the wide state. - if (qstate != NULL) { // stream mode - *offset = unaligned_load_u16((const u16 *)(qstate + 2)); - } - - u8 successful = 0; - const u8 *c = *c_inout; - u32 len_c = end - c; - - u16 width = *(const u16 *)(wide_entry + WIDE_WIDTH_OFFSET); - assert(width >= 8); - const u8 *symbols = (const u8 *)(wide_entry + WIDE_SYMBOL_OFFSET16); - const u16 *trans = (const u16 *)(wide_entry + - WIDE_TRANSITION_OFFSET16(width)); - - assert(*offset < width); - u16 len_w = width - *offset; - const u8 *sym = symbols + *offset; - - char tmp[16]; - u16 pos = 0; - - if (*offset == 0 && remap[*c] != *sym) { - goto normal; - } - - // both in (16, +oo). - while (len_w >= 16 && len_c >= 16) { - m128 str_w = loadu128(sym); - for (size_t i = 0; i < 16; i++) { - tmp[i] = remap[*(c + i)]; - } - m128 str_c = loadu128(tmp); - - u32 z = movemask128(eq128(str_w, str_c)); - pos = ctz32(~z); - assert(pos <= 16); - - if (pos < 16) { - goto normal; - } - - sym += 16; - c += 16; - len_w -= 16; - len_c -= 16; - } - - pos = 0; - // at least one in (0, 16). - u32 loadLength_w = MIN(len_w, 16); - u32 loadLength_c = MIN(len_c, 16); - m128 str_w = loadbytes128(sym, loadLength_w); - for (size_t i = 0; i < loadLength_c; i++) { - tmp[i] = remap[*(c + i)]; - } - m128 str_c = loadbytes128(tmp, loadLength_c); - - u32 z = movemask128(eq128(str_w, str_c)); - pos = ctz32(~z); - - pos = MIN(pos, MIN(loadLength_w, loadLength_c)); - - if (loadLength_w <= loadLength_c) { - assert(pos <= loadLength_w); - // successful matching. - if (pos == loadLength_w) { - c -= 1; - successful = 1; - } - // failure, do nothing. - } else { - assert(pos <= loadLength_c); - // successful partial matching. - if (pos == loadLength_c) { - c -= 1; - goto partial; - } - // failure, do nothing. - } - -normal: - *offset = 0; - if (qstate != NULL) { - // Internal relative offset. - unaligned_store_u16(qstate + 2, *offset); - } - c += pos; - *c_inout = c; - return successful ? *trans : *(trans + 1 + remap[*c]); - -partial: - *offset = sym - symbols + pos; - if (qstate != NULL) { - // Internal relative offset. - unaligned_store_u16(qstate + 2, *offset); - } - c += pos; - *c_inout = c; - return *s; -} + +static really_inline +u16 doWide16(const char *wide_entry, const u8 **c_inout, const u8 *end, + const u8 *remap, const u16 *s, char *qstate, u16 *offset) { + // Internal relative offset after the last visit of the wide state. + if (qstate != NULL) { // stream mode + *offset = unaligned_load_u16((const u16 *)(qstate + 2)); + } + + u8 successful = 0; + const u8 *c = *c_inout; + u32 len_c = end - c; + + u16 width = *(const u16 *)(wide_entry + WIDE_WIDTH_OFFSET); + assert(width >= 8); + const u8 *symbols = (const u8 *)(wide_entry + WIDE_SYMBOL_OFFSET16); + const u16 *trans = (const u16 *)(wide_entry + + WIDE_TRANSITION_OFFSET16(width)); + + assert(*offset < width); + u16 len_w = width - *offset; + const u8 *sym = symbols + *offset; + + char tmp[16]; + u16 pos = 0; + + if (*offset == 0 && remap[*c] != *sym) { + goto normal; + } + + // both in (16, +oo). + while (len_w >= 16 && len_c >= 16) { + m128 str_w = loadu128(sym); + for (size_t i = 0; i < 16; i++) { + tmp[i] = remap[*(c + i)]; + } + m128 str_c = loadu128(tmp); + + u32 z = movemask128(eq128(str_w, str_c)); + pos = ctz32(~z); + assert(pos <= 16); + + if (pos < 16) { + goto normal; + } + + sym += 16; + c += 16; + len_w -= 16; + len_c -= 16; + } + + pos = 0; + // at least one in (0, 16). + u32 loadLength_w = MIN(len_w, 16); + u32 loadLength_c = MIN(len_c, 16); + m128 str_w = loadbytes128(sym, loadLength_w); + for (size_t i = 0; i < loadLength_c; i++) { + tmp[i] = remap[*(c + i)]; + } + m128 str_c = loadbytes128(tmp, loadLength_c); + + u32 z = movemask128(eq128(str_w, str_c)); + pos = ctz32(~z); + + pos = MIN(pos, MIN(loadLength_w, loadLength_c)); + + if (loadLength_w <= loadLength_c) { + assert(pos <= loadLength_w); + // successful matching. + if (pos == loadLength_w) { + c -= 1; + successful = 1; + } + // failure, do nothing. + } else { + assert(pos <= loadLength_c); + // successful partial matching. + if (pos == loadLength_c) { + c -= 1; + goto partial; + } + // failure, do nothing. + } + +normal: + *offset = 0; + if (qstate != NULL) { + // Internal relative offset. + unaligned_store_u16(qstate + 2, *offset); + } + c += pos; + *c_inout = c; + return successful ? *trans : *(trans + 1 + remap[*c]); + +partial: + *offset = sym - symbols + pos; + if (qstate != NULL) { + // Internal relative offset. + unaligned_store_u16(qstate + 2, *offset); + } + c += pos; + *c_inout = c; + return *s; +} diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h b/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h index 60b3cf028e..482fdb1bc9 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h +++ b/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -50,16 +50,16 @@ extern "C" #define SHERMAN_CHARS_OFFSET 4 #define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len)) -#define WIDE_STATE 2 -#define WIDE_ENTRY_OFFSET8(weo_pos) (2 + (weo_pos)) -#define WIDE_ENTRY_OFFSET16(weo_pos) (4 + (weo_pos)) - -#define WIDE_WIDTH_OFFSET 0 -#define WIDE_SYMBOL_OFFSET8 1 -#define WIDE_TRANSITION_OFFSET8(wto_width) (1 + (wto_width)) -#define WIDE_SYMBOL_OFFSET16 2 -#define WIDE_TRANSITION_OFFSET16(wto_width) (2 + ROUNDUP_N(wto_width, 2)) - +#define WIDE_STATE 2 +#define WIDE_ENTRY_OFFSET8(weo_pos) (2 + (weo_pos)) +#define WIDE_ENTRY_OFFSET16(weo_pos) (4 + (weo_pos)) + +#define WIDE_WIDTH_OFFSET 0 +#define WIDE_SYMBOL_OFFSET8 1 +#define WIDE_TRANSITION_OFFSET8(wto_width) (1 + (wto_width)) +#define WIDE_SYMBOL_OFFSET16 2 +#define WIDE_TRANSITION_OFFSET16(wto_width) (2 + ROUNDUP_N(wto_width, 2)) + struct report_list { u32 count; ReportID report[]; @@ -89,17 +89,17 @@ struct mcclellan { u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ u16 accept_limit_8; /**< 8 bit, lowest accept state */ u16 sherman_limit; /**< lowest sherman state */ - u16 wide_limit; /**< 8/16 bit, lowest wide head state */ + u16 wide_limit; /**< 8/16 bit, lowest wide head state */ u8 alphaShift; u8 flags; u8 has_accel; /**< 1 iff there are any accel plans */ - u8 has_wide; /**< 1 iff there exists any wide state */ + u8 has_wide; /**< 1 iff there exists any wide state */ u8 remap[256]; /**< remaps characters to a smaller alphabet */ ReportID arb_report; /**< one of the accepts that this dfa may raise */ - u32 accel_offset; /**< offset of accel structures from start of McClellan */ + u32 accel_offset; /**< offset of accel structures from start of McClellan */ u32 haig_offset; /**< reserved for use by Haig, relative to start of NFA */ - u32 wide_offset; /**< offset of the wide state entries to the start of the - * nfa structure */ + u32 wide_offset; /**< offset of the wide state entries to the start of the + * nfa structure */ }; static really_inline @@ -120,43 +120,43 @@ char *findMutableShermanState(char *sherman_base_offset, u16 sherman_base, return sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); } -static really_inline -const char *findWideEntry8(UNUSED const struct mcclellan *m, - const char *wide_base, u32 wide_limit, u32 s) { - UNUSED u8 type = *(const u8 *)wide_base; - assert(type == WIDE_STATE); - const u32 entry_offset - = *(const u32 *)(wide_base - + WIDE_ENTRY_OFFSET8((s - wide_limit) * sizeof(u32))); - - const char *rv = wide_base + entry_offset; - assert(rv < (const char *)m + m->length - sizeof(struct NFA)); - return rv; -} - -static really_inline -const char *findWideEntry16(UNUSED const struct mcclellan *m, - const char *wide_base, u32 wide_limit, u32 s) { - UNUSED u8 type = *(const u8 *)wide_base; - assert(type == WIDE_STATE); - const u32 entry_offset - = *(const u32 *)(wide_base - + WIDE_ENTRY_OFFSET16((s - wide_limit) * sizeof(u32))); - - const char *rv = wide_base + entry_offset; - assert(rv < (const char *)m + m->length - sizeof(struct NFA)); - return rv; -} - -static really_inline -char *findMutableWideEntry16(char *wide_base, u32 wide_limit, u32 s) { - u32 entry_offset - = *(const u32 *)(wide_base - + WIDE_ENTRY_OFFSET16((s - wide_limit) * sizeof(u32))); - - return wide_base + entry_offset; -} - +static really_inline +const char *findWideEntry8(UNUSED const struct mcclellan *m, + const char *wide_base, u32 wide_limit, u32 s) { + UNUSED u8 type = *(const u8 *)wide_base; + assert(type == WIDE_STATE); + const u32 entry_offset + = *(const u32 *)(wide_base + + WIDE_ENTRY_OFFSET8((s - wide_limit) * sizeof(u32))); + + const char *rv = wide_base + entry_offset; + assert(rv < (const char *)m + m->length - sizeof(struct NFA)); + return rv; +} + +static really_inline +const char *findWideEntry16(UNUSED const struct mcclellan *m, + const char *wide_base, u32 wide_limit, u32 s) { + UNUSED u8 type = *(const u8 *)wide_base; + assert(type == WIDE_STATE); + const u32 entry_offset + = *(const u32 *)(wide_base + + WIDE_ENTRY_OFFSET16((s - wide_limit) * sizeof(u32))); + + const char *rv = wide_base + entry_offset; + assert(rv < (const char *)m + m->length - sizeof(struct NFA)); + return rv; +} + +static really_inline +char *findMutableWideEntry16(char *wide_base, u32 wide_limit, u32 s) { + u32 entry_offset + = *(const u32 *)(wide_base + + WIDE_ENTRY_OFFSET16((s - wide_limit) * sizeof(u32))); + + return wide_base + entry_offset; +} + #ifdef __cplusplus } #endif diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp b/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp index 3b73488581..27ec1716e9 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -56,19 +56,19 @@ #include <cstring> #include <map> #include <memory> -#include <queue> +#include <queue> #include <set> #include <vector> #include <boost/range/adaptor/map.hpp> -#include "mcclellandump.h" -#include "util/dump_util.h" -#include "util/dump_charclass.h" - +#include "mcclellandump.h" +#include "util/dump_util.h" +#include "util/dump_charclass.h" + using namespace std; using boost::adaptors::map_keys; -using boost::dynamic_bitset; +using boost::dynamic_bitset; #define ACCEL_DFA_MAX_OFFSET_DEPTH 4 @@ -88,8 +88,8 @@ namespace /* anon */ { struct dstate_extra { u16 daddytaken = 0; bool shermanState = false; - bool wideState = false; - bool wideHead = false; + bool wideState = false; + bool wideHead = false; }; struct dfa_info { @@ -97,8 +97,8 @@ struct dfa_info { raw_dfa &raw; vector<dstate> &states; vector<dstate_extra> extra; - vector<vector<dstate_id_t>> wide_state_chain; - vector<vector<symbol_t>> wide_symbol_chain; + vector<vector<dstate_id_t>> wide_state_chain; + vector<vector<symbol_t>> wide_symbol_chain; const u16 alpha_size; /* including special symbols */ const array<u16, ALPHABET_SIZE> &alpha_remap; const u16 impl_alpha_size; @@ -122,14 +122,14 @@ struct dfa_info { return extra[raw_id].shermanState; } - bool is_widestate(dstate_id_t raw_id) const { - return extra[raw_id].wideState; - } - - bool is_widehead(dstate_id_t raw_id) const { - return extra[raw_id].wideHead; - } - + bool is_widestate(dstate_id_t raw_id) const { + return extra[raw_id].wideState; + } + + bool is_widehead(dstate_id_t raw_id) const { + return extra[raw_id].wideHead; + } + size_t size(void) const { return states.size(); } }; @@ -142,35 +142,35 @@ u8 dfa_info::getAlphaShift() const { } } -struct state_prev_info { - vector<vector<dstate_id_t>> prev_vec; - explicit state_prev_info(size_t alpha_size) : prev_vec(alpha_size) {} -}; - -struct DfaPrevInfo { - u16 impl_alpha_size; - u16 state_num; - vector<state_prev_info> states; - set<dstate_id_t> accepts; - - explicit DfaPrevInfo(raw_dfa &rdfa); -}; - -DfaPrevInfo::DfaPrevInfo(raw_dfa &rdfa) - : impl_alpha_size(rdfa.getImplAlphaSize()), state_num(rdfa.states.size()), - states(state_num, state_prev_info(impl_alpha_size)){ - for (size_t i = 0; i < states.size(); i++) { - for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { - dstate_id_t curr = rdfa.states[i].next[sym]; - states[curr].prev_vec[sym].push_back(i); - } - if (!rdfa.states[i].reports.empty() - || !rdfa.states[i].reports_eod.empty()) { - DEBUG_PRINTF("accept raw state: %ld\n", i); - accepts.insert(i); - } - } -} +struct state_prev_info { + vector<vector<dstate_id_t>> prev_vec; + explicit state_prev_info(size_t alpha_size) : prev_vec(alpha_size) {} +}; + +struct DfaPrevInfo { + u16 impl_alpha_size; + u16 state_num; + vector<state_prev_info> states; + set<dstate_id_t> accepts; + + explicit DfaPrevInfo(raw_dfa &rdfa); +}; + +DfaPrevInfo::DfaPrevInfo(raw_dfa &rdfa) + : impl_alpha_size(rdfa.getImplAlphaSize()), state_num(rdfa.states.size()), + states(state_num, state_prev_info(impl_alpha_size)){ + for (size_t i = 0; i < states.size(); i++) { + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + dstate_id_t curr = rdfa.states[i].next[sym]; + states[curr].prev_vec[sym].push_back(i); + } + if (!rdfa.states[i].reports.empty() + || !rdfa.states[i].reports_eod.empty()) { + DEBUG_PRINTF("accept raw state: %ld\n", i); + accepts.insert(i); + } + } +} } // namespace static @@ -198,11 +198,11 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { for (size_t j = 0; j < alphaSize; j++) { size_t c_prime = (i << alphaShift) + j; - // wide state has no aux structure. - if (m->has_wide && succ_table[c_prime] >= m->wide_limit) { - continue; - } - + // wide state has no aux structure. + if (m->has_wide && succ_table[c_prime] >= m->wide_limit) { + continue; + } + mstate_aux *aux = getAux(n, succ_table[c_prime]); if (aux->accept) { @@ -217,8 +217,8 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { /* handle the sherman states */ char *sherman_base_offset = (char *)n + m->sherman_offset; - u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count; - for (u16 j = m->sherman_limit; j < sherman_ceil; j++) { + u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count; + for (u16 j = m->sherman_limit; j < sherman_ceil; j++) { char *sherman_cur = findMutableShermanState(sherman_base_offset, m->sherman_limit, j); assert(*(sherman_cur + SHERMAN_TYPE_OFFSET) == SHERMAN_STATE); @@ -227,11 +227,11 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { for (u8 i = 0; i < len; i++) { u16 succ_i = unaligned_load_u16((u8 *)&succs[i]); - // wide state has no aux structure. - if (m->has_wide && succ_i >= m->wide_limit) { - continue; - } - + // wide state has no aux structure. + if (m->has_wide && succ_i >= m->wide_limit) { + continue; + } + mstate_aux *aux = getAux(n, succ_i); if (aux->accept) { @@ -245,51 +245,51 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { unaligned_store_u16((u8 *)&succs[i], succ_i); } } - - /* handle the wide states */ - if (m->has_wide) { - u32 wide_limit = m->wide_limit; - char *wide_base = (char *)n + m->wide_offset; - assert(*wide_base == WIDE_STATE); - u16 wide_number = verify_u16(info.wide_symbol_chain.size()); - // traverse over wide head states. - for (u16 j = wide_limit; j < wide_limit + wide_number; j++) { - char *wide_cur - = findMutableWideEntry16(wide_base, wide_limit, j); - u16 width = *(const u16 *)(wide_cur + WIDE_WIDTH_OFFSET); - u16 *trans = (u16 *)(wide_cur + WIDE_TRANSITION_OFFSET16(width)); - - // check successful transition - u16 next = unaligned_load_u16((u8 *)trans); - if (next < wide_limit) { - mstate_aux *aux = getAux(n, next); - if (aux->accept) { - next |= ACCEPT_FLAG; - } - if (aux->accel_offset) { - next |= ACCEL_FLAG; - } - unaligned_store_u16((u8 *)trans, next); - } - trans++; - - // check failure transition - for (symbol_t k = 0; k < alphaSize; k++) { - u16 next_k = unaligned_load_u16((u8 *)&trans[k]); - if (next_k >= wide_limit) { - continue; - } - mstate_aux *aux_k = getAux(n, next_k); - if (aux_k->accept) { - next_k |= ACCEPT_FLAG; - } - if (aux_k->accel_offset) { - next_k |= ACCEL_FLAG; - } - unaligned_store_u16((u8 *)&trans[k], next_k); - } - } - } + + /* handle the wide states */ + if (m->has_wide) { + u32 wide_limit = m->wide_limit; + char *wide_base = (char *)n + m->wide_offset; + assert(*wide_base == WIDE_STATE); + u16 wide_number = verify_u16(info.wide_symbol_chain.size()); + // traverse over wide head states. + for (u16 j = wide_limit; j < wide_limit + wide_number; j++) { + char *wide_cur + = findMutableWideEntry16(wide_base, wide_limit, j); + u16 width = *(const u16 *)(wide_cur + WIDE_WIDTH_OFFSET); + u16 *trans = (u16 *)(wide_cur + WIDE_TRANSITION_OFFSET16(width)); + + // check successful transition + u16 next = unaligned_load_u16((u8 *)trans); + if (next < wide_limit) { + mstate_aux *aux = getAux(n, next); + if (aux->accept) { + next |= ACCEPT_FLAG; + } + if (aux->accel_offset) { + next |= ACCEL_FLAG; + } + unaligned_store_u16((u8 *)trans, next); + } + trans++; + + // check failure transition + for (symbol_t k = 0; k < alphaSize; k++) { + u16 next_k = unaligned_load_u16((u8 *)&trans[k]); + if (next_k >= wide_limit) { + continue; + } + mstate_aux *aux_k = getAux(n, next_k); + if (aux_k->accept) { + next_k |= ACCEPT_FLAG; + } + if (aux_k->accel_offset) { + next_k |= ACCEL_FLAG; + } + unaligned_store_u16((u8 *)&trans[k], next_k); + } + } + } } u32 mcclellan_build_strat::max_allowed_offset_accel() const { @@ -335,20 +335,20 @@ void populateBasicInfo(size_t state_size, const dfa_info &info, m->start_anchored = info.implId(info.raw.start_anchored); m->start_floating = info.implId(info.raw.start_floating); m->has_accel = accel_count ? 1 : 0; - m->has_wide = info.wide_state_chain.size() > 0 ? 1 : 0; - - if (state_size == sizeof(u8) && m->has_wide == 1) { - // allocate 1 more byte for wide state use. - nfa->scratchStateSize += sizeof(u8); - nfa->streamStateSize += sizeof(u8); - } - - if (state_size == sizeof(u16) && m->has_wide == 1) { - // allocate 2 more bytes for wide state use. - nfa->scratchStateSize += sizeof(u16); - nfa->streamStateSize += sizeof(u16); - } - + m->has_wide = info.wide_state_chain.size() > 0 ? 1 : 0; + + if (state_size == sizeof(u8) && m->has_wide == 1) { + // allocate 1 more byte for wide state use. + nfa->scratchStateSize += sizeof(u8); + nfa->streamStateSize += sizeof(u8); + } + + if (state_size == sizeof(u16) && m->has_wide == 1) { + // allocate 2 more bytes for wide state use. + nfa->scratchStateSize += sizeof(u16); + nfa->streamStateSize += sizeof(u16); + } + if (single) { m->flags |= MCCLELLAN_FLAG_SINGLE; } @@ -521,24 +521,24 @@ size_t calcShermanRegionSize(const dfa_info &info) { } static -size_t calcWideRegionSize(const dfa_info &info) { - if (info.wide_state_chain.empty()) { - return 0; - } - - // wide info header - size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 4; - - // wide info body - for (const auto &chain : info.wide_symbol_chain) { - rv += ROUNDUP_N(chain.size(), 2) + - (info.impl_alpha_size + 1) * sizeof(u16) + 2; - } - - return ROUNDUP_16(rv); -} - -static +size_t calcWideRegionSize(const dfa_info &info) { + if (info.wide_state_chain.empty()) { + return 0; + } + + // wide info header + size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 4; + + // wide info body + for (const auto &chain : info.wide_symbol_chain) { + rv += ROUNDUP_N(chain.size(), 2) + + (info.impl_alpha_size + 1) * sizeof(u16) + 2; + } + + return ROUNDUP_16(rv); +} + +static void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, const vector<u32> &reports, const vector<u32> &reports_eod, vector<u32> &reportOffsets) { @@ -552,60 +552,60 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, /* returns false on error */ static -bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base, - dstate_id_t *wide_limit) { +bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base, + dstate_id_t *wide_limit) { info.states[0].impl_id = 0; /* dead is always 0 */ vector<dstate_id_t> norm; vector<dstate_id_t> sherm; - vector<dstate_id_t> wideHead; - vector<dstate_id_t> wideState; + vector<dstate_id_t> wideHead; + vector<dstate_id_t> wideState; if (info.size() > (1 << 16)) { DEBUG_PRINTF("too many states\n"); - *wide_limit = 0; + *wide_limit = 0; return false; } for (u32 i = 1; i < info.size(); i++) { - if (info.is_widehead(i)) { - wideHead.push_back(i); - } else if (info.is_widestate(i)) { - wideState.push_back(i); - } else if (info.is_sherman(i)) { + if (info.is_widehead(i)) { + wideHead.push_back(i); + } else if (info.is_widestate(i)) { + wideState.push_back(i); + } else if (info.is_sherman(i)) { sherm.push_back(i); } else { norm.push_back(i); } } - dstate_id_t next = 1; + dstate_id_t next = 1; for (const dstate_id_t &s : norm) { - DEBUG_PRINTF("[norm] mapping state %u to %u\n", s, next); - info.states[s].impl_id = next++; + DEBUG_PRINTF("[norm] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; } - *sherman_base = next; + *sherman_base = next; for (const dstate_id_t &s : sherm) { - DEBUG_PRINTF("[sherm] mapping state %u to %u\n", s, next); - info.states[s].impl_id = next++; - } - - *wide_limit = next; - for (const dstate_id_t &s : wideHead) { - DEBUG_PRINTF("[widehead] mapping state %u to %u\n", s, next); - info.states[s].impl_id = next++; - } - - for (const dstate_id_t &s : wideState) { - DEBUG_PRINTF("[wide] mapping state %u to %u\n", s, next); - info.states[s].impl_id = next++; - } - + DEBUG_PRINTF("[sherm] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; + } + + *wide_limit = next; + for (const dstate_id_t &s : wideHead) { + DEBUG_PRINTF("[widehead] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; + } + + for (const dstate_id_t &s : wideState) { + DEBUG_PRINTF("[wide] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; + } + /* Check to see if we haven't over allocated our states */ - DEBUG_PRINTF("next sherman %u masked %u\n", next, - (dstate_id_t)(next & STATE_MASK)); - return (next - 1) == ((next - 1) & STATE_MASK); + DEBUG_PRINTF("next sherman %u masked %u\n", next, + (dstate_id_t)(next & STATE_MASK)); + return (next - 1) == ((next - 1) & STATE_MASK); } static @@ -622,16 +622,16 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, assert(alphaShift <= 8); u16 count_real_states; - u16 wide_limit; - if (!allocateFSN16(info, &count_real_states, &wide_limit)) { + u16 wide_limit; + if (!allocateFSN16(info, &count_real_states, &wide_limit)) { DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", info.size()); return nullptr; } - DEBUG_PRINTF("count_real_states: %d\n", count_real_states); - DEBUG_PRINTF("non_wide_states: %d\n", wide_limit); - + DEBUG_PRINTF("count_real_states: %d\n", count_real_states); + DEBUG_PRINTF("non_wide_states: %d\n", wide_limit); + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); map<dstate_id_t, AccelScheme> accel_escape_info = info.strat.getAccelInfo(cc.grey); @@ -639,7 +639,7 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) * count_real_states; - size_t aux_size = sizeof(mstate_aux) * wide_limit; + size_t aux_size = sizeof(mstate_aux) * wide_limit; size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); @@ -647,24 +647,24 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, + ri->getReportListSize(), 32); size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); size_t sherman_size = calcShermanRegionSize(info); - size_t wide_offset = ROUNDUP_16(sherman_offset + sherman_size); - size_t wide_size = calcWideRegionSize(info); - size_t total_size = wide_offset + wide_size; + size_t wide_offset = ROUNDUP_16(sherman_offset + sherman_size); + size_t wide_size = calcWideRegionSize(info); + size_t total_size = wide_offset + wide_size; accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - DEBUG_PRINTF("aux_offset %zu\n", aux_offset); - DEBUG_PRINTF("aux_size %zu\n", aux_size); - DEBUG_PRINTF("rl size %u\n", ri->getReportListSize()); - DEBUG_PRINTF("accel_offset %zu\n", accel_offset + sizeof(NFA)); - DEBUG_PRINTF("accel_size %zu\n", accel_size); - DEBUG_PRINTF("sherman_offset %zu\n", sherman_offset); - DEBUG_PRINTF("sherman_size %zu\n", sherman_size); - DEBUG_PRINTF("wide_offset %zu\n", wide_offset); - DEBUG_PRINTF("wide_size %zu\n", wide_size); - DEBUG_PRINTF("total_size %zu\n", total_size); - + DEBUG_PRINTF("aux_offset %zu\n", aux_offset); + DEBUG_PRINTF("aux_size %zu\n", aux_size); + DEBUG_PRINTF("rl size %u\n", ri->getReportListSize()); + DEBUG_PRINTF("accel_offset %zu\n", accel_offset + sizeof(NFA)); + DEBUG_PRINTF("accel_size %zu\n", accel_size); + DEBUG_PRINTF("sherman_offset %zu\n", sherman_offset); + DEBUG_PRINTF("sherman_size %zu\n", sherman_size); + DEBUG_PRINTF("wide_offset %zu\n", wide_offset); + DEBUG_PRINTF("wide_size %zu\n", wide_size); + DEBUG_PRINTF("total_size %zu\n", total_size); + auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); char *nfa_base = (char *)nfa.get(); @@ -679,9 +679,9 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset); mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); - m->wide_limit = wide_limit; - m->wide_offset = wide_offset; - + m->wide_limit = wide_limit; + m->wide_offset = wide_offset; + /* copy in the mc header information */ m->sherman_offset = sherman_offset; m->sherman_end = total_size; @@ -689,7 +689,7 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, /* do normal states */ for (size_t i = 0; i < info.size(); i++) { - if (info.is_sherman(i) || info.is_widestate(i)) { + if (info.is_sherman(i) || info.is_widestate(i)) { continue; } @@ -727,7 +727,7 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, mstate_aux *this_aux = getAux(nfa.get(), fs); assert(fs >= count_real_states); - assert(fs < wide_limit); + assert(fs < wide_limit); char *curr_sherman_entry = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; @@ -771,71 +771,71 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, } } - if (!info.wide_state_chain.empty()) { - /* do wide states using info */ - u16 wide_number = verify_u16(info.wide_symbol_chain.size()); - char *wide_base = nfa_base + m->wide_offset; - assert(ISALIGNED_16(wide_base)); - - char *wide_top = wide_base; - *(u8 *)(wide_top++) = WIDE_STATE; - wide_top = ROUNDUP_PTR(wide_top, 2); - *(u16 *)(wide_top) = wide_number; - wide_top += 2; - - char *curr_wide_entry = wide_top + wide_number * sizeof(u32); - u32 *wide_offset_list = (u32 *)wide_top; - - /* get the order of writing wide states */ - vector<size_t> order(wide_number); - for (size_t i = 0; i < wide_number; i++) { - dstate_id_t head = info.wide_state_chain[i].front(); - size_t pos = info.implId(head) - m->wide_limit; - order[pos] = i; - } - - for (size_t i : order) { - vector<dstate_id_t> &state_chain = info.wide_state_chain[i]; - vector<symbol_t> &symbol_chain = info.wide_symbol_chain[i]; - - u16 width = verify_u16(symbol_chain.size()); - *(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width; - u8 *chars = (u8 *)(curr_wide_entry + WIDE_SYMBOL_OFFSET16); - - // store wide state symbol chain - for (size_t j = 0; j < width; j++) { - *(chars++) = verify_u8(symbol_chain[j]); - } - - // store wide state transition table - u16 *trans = (u16 *)(curr_wide_entry - + WIDE_TRANSITION_OFFSET16(width)); - dstate_id_t tail = state_chain[width - 1]; - symbol_t last = symbol_chain[width -1]; - dstate_id_t tran = info.states[tail].next[last]; - // 1. successful transition - *trans++ = info.implId(tran); - // 2. failure transition - for (size_t j = 0; verify_u16(j) < width - 1; j++) { - if (symbol_chain[j] != last) { - tran = info.states[state_chain[j]].next[last]; - } - } - for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) { - if (sym != last) { - *trans++ = info.implId(info.states[tail].next[sym]); - } - else { - *trans++ = info.implId(tran); - } - } - - *wide_offset_list++ = verify_u32(curr_wide_entry - wide_base); - - curr_wide_entry = (char *)trans; - } - } - + if (!info.wide_state_chain.empty()) { + /* do wide states using info */ + u16 wide_number = verify_u16(info.wide_symbol_chain.size()); + char *wide_base = nfa_base + m->wide_offset; + assert(ISALIGNED_16(wide_base)); + + char *wide_top = wide_base; + *(u8 *)(wide_top++) = WIDE_STATE; + wide_top = ROUNDUP_PTR(wide_top, 2); + *(u16 *)(wide_top) = wide_number; + wide_top += 2; + + char *curr_wide_entry = wide_top + wide_number * sizeof(u32); + u32 *wide_offset_list = (u32 *)wide_top; + + /* get the order of writing wide states */ + vector<size_t> order(wide_number); + for (size_t i = 0; i < wide_number; i++) { + dstate_id_t head = info.wide_state_chain[i].front(); + size_t pos = info.implId(head) - m->wide_limit; + order[pos] = i; + } + + for (size_t i : order) { + vector<dstate_id_t> &state_chain = info.wide_state_chain[i]; + vector<symbol_t> &symbol_chain = info.wide_symbol_chain[i]; + + u16 width = verify_u16(symbol_chain.size()); + *(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width; + u8 *chars = (u8 *)(curr_wide_entry + WIDE_SYMBOL_OFFSET16); + + // store wide state symbol chain + for (size_t j = 0; j < width; j++) { + *(chars++) = verify_u8(symbol_chain[j]); + } + + // store wide state transition table + u16 *trans = (u16 *)(curr_wide_entry + + WIDE_TRANSITION_OFFSET16(width)); + dstate_id_t tail = state_chain[width - 1]; + symbol_t last = symbol_chain[width -1]; + dstate_id_t tran = info.states[tail].next[last]; + // 1. successful transition + *trans++ = info.implId(tran); + // 2. failure transition + for (size_t j = 0; verify_u16(j) < width - 1; j++) { + if (symbol_chain[j] != last) { + tran = info.states[state_chain[j]].next[last]; + } + } + for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) { + if (sym != last) { + *trans++ = info.implId(info.states[tail].next[sym]); + } + else { + *trans++ = info.implId(tran); + } + } + + *wide_offset_list++ = verify_u32(curr_wide_entry - wide_base); + + curr_wide_entry = (char *)trans; + } + } + markEdges(nfa.get(), succ_table, info); if (accel_states && nfa) { @@ -997,7 +997,7 @@ bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc, return nfa; } -#define MAX_SHERMAN_LIST_LEN 9 +#define MAX_SHERMAN_LIST_LEN 9 static void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate, @@ -1081,16 +1081,16 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, if (trust_daddy_states) { // Use the daddy already set for this state so long as it isn't already // a Sherman state. - dstate_id_t daddy = currState.daddy; - if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) { + dstate_id_t daddy = currState.daddy; + if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) { hinted.insert(currState.daddy); } else { // Fall back to granddaddy, which has already been processed (due // to BFS ordering) and cannot be a Sherman state. dstate_id_t granddaddy = info.states[currState.daddy].daddy; - if (info.is_widestate(granddaddy)) { - return; - } + if (info.is_widestate(granddaddy)) { + return; + } assert(!info.is_sherman(granddaddy)); hinted.insert(granddaddy); } @@ -1102,7 +1102,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, assert(donor < curr_id); u32 score = 0; - if (info.is_sherman(donor) || info.is_widestate(donor)) { + if (info.is_sherman(donor) || info.is_widestate(donor)) { continue; } @@ -1175,290 +1175,290 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { return false; } -/* \brief Test for only-one-predecessor property. */ -static -bool check_property1(const DfaPrevInfo &info, const u16 impl_alpha_size, - const dstate_id_t curr_id, dstate_id_t &prev_id, - symbol_t &prev_sym) { - u32 num_prev = 0; - bool test_p1 = false; - - for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { - num_prev += info.states[curr_id].prev_vec[sym].size(); - DEBUG_PRINTF("Check symbol: %u, with its vector size: %lu\n", sym, - info.states[curr_id].prev_vec[sym].size()); - if (num_prev == 1 && !test_p1) { - test_p1 = true; - prev_id = info.states[curr_id].prev_vec[sym].front(); //[0] for sure??? - prev_sym = sym; - } - } - - return num_prev == 1; -} - -/* \brief Test for same-failure-action property. */ -static -bool check_property2(const raw_dfa &rdfa, const u16 impl_alpha_size, - const dstate_id_t curr_id, const dstate_id_t prev_id, - const symbol_t curr_sym, const symbol_t prev_sym) { - const dstate &prevState = rdfa.states[prev_id]; - const dstate &currState = rdfa.states[curr_id]; - - // Compare transition tables between currState and prevState. - u16 score = 0; - for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { - if (currState.next[sym] == prevState.next[sym] - && sym != curr_sym && sym != prev_sym) { - score++; - } - } - DEBUG_PRINTF("(Score: %u/%u)\n", score, impl_alpha_size); - - // 2 cases. - if (curr_sym != prev_sym && score >= impl_alpha_size - 2 - && currState.next[prev_sym] == prevState.next[curr_sym]) { - return true; - } else if (curr_sym == prev_sym && score == impl_alpha_size - 1) { - return true; - } - return false; -} - -/* \brief Check whether adding current prev_id will generate a circle.*/ -static -bool check_circle(const DfaPrevInfo &info, const u16 impl_alpha_size, - const vector<dstate_id_t> &chain, const dstate_id_t id) { - const vector<vector<dstate_id_t>> &prev_vec = info.states[id].prev_vec; - const dstate_id_t tail = chain.front(); - for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { - auto iter = find(prev_vec[sym].begin(), prev_vec[sym].end(), tail); - if (iter != prev_vec[sym].end()) { - // Tail is one of id's predecessors, forming a circle. - return true; - } - } - return false; -} - -/* \brief Returns a chain of state ids and symbols. */ -static -dstate_id_t find_chain_candidate(const raw_dfa &rdfa, const DfaPrevInfo &info, - const dstate_id_t curr_id, - const symbol_t curr_sym, - vector<dstate_id_t> &temp_chain) { - //Record current id first. - temp_chain.push_back(curr_id); - - const u16 size = info.impl_alpha_size; - - // Stop when entering root cloud. - if (rdfa.start_anchored != DEAD_STATE - && is_cyclic_near(rdfa, rdfa.start_anchored) - && curr_id < size) { - return curr_id; - } - if (rdfa.start_floating != DEAD_STATE - && curr_id >= rdfa.start_floating - && curr_id < rdfa.start_floating + size * 3) { - return curr_id; - } - - // Stop when reaching anchored or floating. - if (curr_id == rdfa.start_anchored || curr_id == rdfa.start_floating) { - return curr_id; - } - - dstate_id_t prev_id = 0; - symbol_t prev_sym = ALPHABET_SIZE; - - // Check the only-one-predecessor property. - if (!check_property1(info, size, curr_id, prev_id, prev_sym)) { - return curr_id; - } - assert(prev_id != 0 && prev_sym != ALPHABET_SIZE); - DEBUG_PRINTF("(P1 test passed.)\n"); - - // Circle testing for the prev_id that passes the P1 test. - if (check_circle(info, size, temp_chain, prev_id)) { - DEBUG_PRINTF("(A circle is found.)\n"); - return curr_id; - } - - // Check the same-failure-action property. - if (!check_property2(rdfa, size, curr_id, prev_id, curr_sym, prev_sym)) { - return curr_id; - } - DEBUG_PRINTF("(P2 test passed.)\n"); - - if (!rdfa.states[prev_id].reports.empty() - || !rdfa.states[prev_id].reports_eod.empty()) { - return curr_id; - } else { - return find_chain_candidate(rdfa, info, prev_id, prev_sym, temp_chain); - } -} - -/* \brief Always store the non-extensible chains found till now. */ -static -bool store_chain_longest(vector<vector<dstate_id_t>> &candidate_chain, - vector<dstate_id_t> &temp_chain, - dynamic_bitset<> &added, bool head_is_new) { - dstate_id_t head = temp_chain.front(); - u16 length = temp_chain.size(); - - if (head_is_new) { - DEBUG_PRINTF("This is a new chain!\n"); - - // Add this new chain and get it marked. - candidate_chain.push_back(temp_chain); - - for (auto &id : temp_chain) { - DEBUG_PRINTF("(Marking s%u ...)\n", id); - added.set(id); - } - - return true; - } - - DEBUG_PRINTF("This is a longer chain!\n"); - assert(!candidate_chain.empty()); - - auto chain = find_if(candidate_chain.begin(), candidate_chain.end(), - [&](const vector<dstate_id_t> &it) { - return it.front() == head; - }); - - // Not a valid head, just do nothing and return. - if (chain == candidate_chain.end()) { - return false; - } - - u16 len = chain->size(); - - if (length > len) { - // Find out the branch node first. - size_t piv = 0; - for (; piv < length; piv++) { - if ((*chain)[piv] != temp_chain[piv]) { - break; - } - } - - for (size_t j = piv + 1; j < length; j++) { - DEBUG_PRINTF("(Marking s%u (new branch) ...)\n", temp_chain[j]); - added.set(temp_chain[j]); - } - - // Unmark old unuseful nodes. - // (Except the tail node, which is in working queue) - for (size_t j = piv + 1; j < verify_u16(len - 1); j++) { - DEBUG_PRINTF("(UnMarking s%u (old branch)...)\n", (*chain)[j]); - added.reset((*chain)[j]); - } - - chain->assign(temp_chain.begin(), temp_chain.end()); - } - - return false; -} - -/* \brief Generate wide_symbol_chain from wide_state_chain. */ -static -void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) { - raw_dfa &rdfa = info.raw; - assert(chain_tail.size() == info.wide_state_chain.size()); - - for (size_t i = 0; i < info.wide_state_chain.size(); i++) { - vector<dstate_id_t> &state_chain = info.wide_state_chain[i]; - vector<symbol_t> symbol_chain; - - info.extra[state_chain[0]].wideHead = true; - size_t width = state_chain.size() - 1; - - for (size_t j = 0; j < width; j++) { - dstate_id_t curr_id = state_chain[j]; - dstate_id_t next_id = state_chain[j + 1]; - - // The last state of the chain doesn't belong to a wide state. - info.extra[curr_id].wideState = true; - - // The tail symbol comes from vector chain_tail; - if (j == width - 1) { - symbol_chain.push_back(chain_tail[i]); - } else { - for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) { - if (rdfa.states[curr_id].next[sym] == next_id) { - symbol_chain.push_back(sym); - break; - } - } - } - } - - info.wide_symbol_chain.push_back(symbol_chain); - } -} - -/* \brief Find potential regions of states to be packed into wide states. */ -static -void find_wide_state(dfa_info &info) { - DfaPrevInfo dinfo(info.raw); - queue<dstate_id_t> work_queue; - - dynamic_bitset<> added(info.raw.states.size()); - for (auto it : dinfo.accepts) { - work_queue.push(it); - added.set(it); - } - - vector<symbol_t> chain_tail; - while (!work_queue.empty()) { - dstate_id_t curr_id = work_queue.front(); - work_queue.pop(); - DEBUG_PRINTF("Newly popped state: s%u\n", curr_id); - - for (symbol_t sym = 0; sym < dinfo.impl_alpha_size; sym++) { - for (auto info_it : dinfo.states[curr_id].prev_vec[sym]) { - if (added.test(info_it)) { - DEBUG_PRINTF("(s%u already marked.)\n", info_it); - continue; - } - - vector<dstate_id_t> temp_chain; - // Head is a state failing the test of the chain. - dstate_id_t head = find_chain_candidate(info.raw, dinfo, - info_it, sym, - temp_chain); - - // A candidate chain should contain 8 substates at least. - if (temp_chain.size() < 8) { - DEBUG_PRINTF("(Not enough substates, continue.)\n"); - continue; - } - - bool head_is_new = !added.test(head); - if (head_is_new) { - added.set(head); - work_queue.push(head); - DEBUG_PRINTF("Newly pushed state: s%u\n", head); - } - - reverse(temp_chain.begin(), temp_chain.end()); - temp_chain.push_back(curr_id); - - assert(head > 0 && head == temp_chain.front()); - if (store_chain_longest(info.wide_state_chain, temp_chain, - added, head_is_new)) { - chain_tail.push_back(sym); - } - } - } - } - - generate_symbol_chain(info, chain_tail); -} - +/* \brief Test for only-one-predecessor property. */ +static +bool check_property1(const DfaPrevInfo &info, const u16 impl_alpha_size, + const dstate_id_t curr_id, dstate_id_t &prev_id, + symbol_t &prev_sym) { + u32 num_prev = 0; + bool test_p1 = false; + + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + num_prev += info.states[curr_id].prev_vec[sym].size(); + DEBUG_PRINTF("Check symbol: %u, with its vector size: %lu\n", sym, + info.states[curr_id].prev_vec[sym].size()); + if (num_prev == 1 && !test_p1) { + test_p1 = true; + prev_id = info.states[curr_id].prev_vec[sym].front(); //[0] for sure??? + prev_sym = sym; + } + } + + return num_prev == 1; +} + +/* \brief Test for same-failure-action property. */ +static +bool check_property2(const raw_dfa &rdfa, const u16 impl_alpha_size, + const dstate_id_t curr_id, const dstate_id_t prev_id, + const symbol_t curr_sym, const symbol_t prev_sym) { + const dstate &prevState = rdfa.states[prev_id]; + const dstate &currState = rdfa.states[curr_id]; + + // Compare transition tables between currState and prevState. + u16 score = 0; + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + if (currState.next[sym] == prevState.next[sym] + && sym != curr_sym && sym != prev_sym) { + score++; + } + } + DEBUG_PRINTF("(Score: %u/%u)\n", score, impl_alpha_size); + + // 2 cases. + if (curr_sym != prev_sym && score >= impl_alpha_size - 2 + && currState.next[prev_sym] == prevState.next[curr_sym]) { + return true; + } else if (curr_sym == prev_sym && score == impl_alpha_size - 1) { + return true; + } + return false; +} + +/* \brief Check whether adding current prev_id will generate a circle.*/ +static +bool check_circle(const DfaPrevInfo &info, const u16 impl_alpha_size, + const vector<dstate_id_t> &chain, const dstate_id_t id) { + const vector<vector<dstate_id_t>> &prev_vec = info.states[id].prev_vec; + const dstate_id_t tail = chain.front(); + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + auto iter = find(prev_vec[sym].begin(), prev_vec[sym].end(), tail); + if (iter != prev_vec[sym].end()) { + // Tail is one of id's predecessors, forming a circle. + return true; + } + } + return false; +} + +/* \brief Returns a chain of state ids and symbols. */ +static +dstate_id_t find_chain_candidate(const raw_dfa &rdfa, const DfaPrevInfo &info, + const dstate_id_t curr_id, + const symbol_t curr_sym, + vector<dstate_id_t> &temp_chain) { + //Record current id first. + temp_chain.push_back(curr_id); + + const u16 size = info.impl_alpha_size; + + // Stop when entering root cloud. + if (rdfa.start_anchored != DEAD_STATE + && is_cyclic_near(rdfa, rdfa.start_anchored) + && curr_id < size) { + return curr_id; + } + if (rdfa.start_floating != DEAD_STATE + && curr_id >= rdfa.start_floating + && curr_id < rdfa.start_floating + size * 3) { + return curr_id; + } + + // Stop when reaching anchored or floating. + if (curr_id == rdfa.start_anchored || curr_id == rdfa.start_floating) { + return curr_id; + } + + dstate_id_t prev_id = 0; + symbol_t prev_sym = ALPHABET_SIZE; + + // Check the only-one-predecessor property. + if (!check_property1(info, size, curr_id, prev_id, prev_sym)) { + return curr_id; + } + assert(prev_id != 0 && prev_sym != ALPHABET_SIZE); + DEBUG_PRINTF("(P1 test passed.)\n"); + + // Circle testing for the prev_id that passes the P1 test. + if (check_circle(info, size, temp_chain, prev_id)) { + DEBUG_PRINTF("(A circle is found.)\n"); + return curr_id; + } + + // Check the same-failure-action property. + if (!check_property2(rdfa, size, curr_id, prev_id, curr_sym, prev_sym)) { + return curr_id; + } + DEBUG_PRINTF("(P2 test passed.)\n"); + + if (!rdfa.states[prev_id].reports.empty() + || !rdfa.states[prev_id].reports_eod.empty()) { + return curr_id; + } else { + return find_chain_candidate(rdfa, info, prev_id, prev_sym, temp_chain); + } +} + +/* \brief Always store the non-extensible chains found till now. */ +static +bool store_chain_longest(vector<vector<dstate_id_t>> &candidate_chain, + vector<dstate_id_t> &temp_chain, + dynamic_bitset<> &added, bool head_is_new) { + dstate_id_t head = temp_chain.front(); + u16 length = temp_chain.size(); + + if (head_is_new) { + DEBUG_PRINTF("This is a new chain!\n"); + + // Add this new chain and get it marked. + candidate_chain.push_back(temp_chain); + + for (auto &id : temp_chain) { + DEBUG_PRINTF("(Marking s%u ...)\n", id); + added.set(id); + } + + return true; + } + + DEBUG_PRINTF("This is a longer chain!\n"); + assert(!candidate_chain.empty()); + + auto chain = find_if(candidate_chain.begin(), candidate_chain.end(), + [&](const vector<dstate_id_t> &it) { + return it.front() == head; + }); + + // Not a valid head, just do nothing and return. + if (chain == candidate_chain.end()) { + return false; + } + + u16 len = chain->size(); + + if (length > len) { + // Find out the branch node first. + size_t piv = 0; + for (; piv < length; piv++) { + if ((*chain)[piv] != temp_chain[piv]) { + break; + } + } + + for (size_t j = piv + 1; j < length; j++) { + DEBUG_PRINTF("(Marking s%u (new branch) ...)\n", temp_chain[j]); + added.set(temp_chain[j]); + } + + // Unmark old unuseful nodes. + // (Except the tail node, which is in working queue) + for (size_t j = piv + 1; j < verify_u16(len - 1); j++) { + DEBUG_PRINTF("(UnMarking s%u (old branch)...)\n", (*chain)[j]); + added.reset((*chain)[j]); + } + + chain->assign(temp_chain.begin(), temp_chain.end()); + } + + return false; +} + +/* \brief Generate wide_symbol_chain from wide_state_chain. */ +static +void generate_symbol_chain(dfa_info &info, vector<symbol_t> &chain_tail) { + raw_dfa &rdfa = info.raw; + assert(chain_tail.size() == info.wide_state_chain.size()); + + for (size_t i = 0; i < info.wide_state_chain.size(); i++) { + vector<dstate_id_t> &state_chain = info.wide_state_chain[i]; + vector<symbol_t> symbol_chain; + + info.extra[state_chain[0]].wideHead = true; + size_t width = state_chain.size() - 1; + + for (size_t j = 0; j < width; j++) { + dstate_id_t curr_id = state_chain[j]; + dstate_id_t next_id = state_chain[j + 1]; + + // The last state of the chain doesn't belong to a wide state. + info.extra[curr_id].wideState = true; + + // The tail symbol comes from vector chain_tail; + if (j == width - 1) { + symbol_chain.push_back(chain_tail[i]); + } else { + for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) { + if (rdfa.states[curr_id].next[sym] == next_id) { + symbol_chain.push_back(sym); + break; + } + } + } + } + + info.wide_symbol_chain.push_back(symbol_chain); + } +} + +/* \brief Find potential regions of states to be packed into wide states. */ +static +void find_wide_state(dfa_info &info) { + DfaPrevInfo dinfo(info.raw); + queue<dstate_id_t> work_queue; + + dynamic_bitset<> added(info.raw.states.size()); + for (auto it : dinfo.accepts) { + work_queue.push(it); + added.set(it); + } + + vector<symbol_t> chain_tail; + while (!work_queue.empty()) { + dstate_id_t curr_id = work_queue.front(); + work_queue.pop(); + DEBUG_PRINTF("Newly popped state: s%u\n", curr_id); + + for (symbol_t sym = 0; sym < dinfo.impl_alpha_size; sym++) { + for (auto info_it : dinfo.states[curr_id].prev_vec[sym]) { + if (added.test(info_it)) { + DEBUG_PRINTF("(s%u already marked.)\n", info_it); + continue; + } + + vector<dstate_id_t> temp_chain; + // Head is a state failing the test of the chain. + dstate_id_t head = find_chain_candidate(info.raw, dinfo, + info_it, sym, + temp_chain); + + // A candidate chain should contain 8 substates at least. + if (temp_chain.size() < 8) { + DEBUG_PRINTF("(Not enough substates, continue.)\n"); + continue; + } + + bool head_is_new = !added.test(head); + if (head_is_new) { + added.set(head); + work_queue.push(head); + DEBUG_PRINTF("Newly pushed state: s%u\n", head); + } + + reverse(temp_chain.begin(), temp_chain.end()); + temp_chain.push_back(curr_id); + + assert(head > 0 && head == temp_chain.front()); + if (store_chain_longest(info.wide_state_chain, temp_chain, + added, head_is_new)) { + chain_tail.push_back(sym); + } + } + } + } + + generate_symbol_chain(info, chain_tail); +} + bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, const CompileContext &cc, bool trust_daddy_states, @@ -1477,31 +1477,31 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, bytecode_ptr<NFA> nfa; if (!using8bit) { - // Wide state optimization - if (cc.grey.allowWideStates && strat.getType() == McClellan - && !is_triggered(raw.kind)) { - find_wide_state(info); - } - + // Wide state optimization + if (cc.grey.allowWideStates && strat.getType() == McClellan + && !is_triggered(raw.kind)) { + find_wide_state(info); + } + u16 total_daddy = 0; bool any_cyclic_near_anchored_state = is_cyclic_near(raw, raw.start_anchored); - // Sherman optimization - if (info.impl_alpha_size > 16) { - for (u32 i = 0; i < info.size(); i++) { - if (info.is_widestate(i)) { - continue; - } - find_better_daddy(info, i, using8bit, - any_cyclic_near_anchored_state, - trust_daddy_states, cc.grey); - total_daddy += info.extra[i].daddytaken; - } - - DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, - info.size() * info.impl_alpha_size, info.size(), - info.impl_alpha_size); + // Sherman optimization + if (info.impl_alpha_size > 16) { + for (u32 i = 0; i < info.size(); i++) { + if (info.is_widestate(i)) { + continue; + } + find_better_daddy(info, i, using8bit, + any_cyclic_near_anchored_state, + trust_daddy_states, cc.grey); + total_daddy += info.extra[i].daddytaken; + } + + DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, + info.size() * info.impl_alpha_size, info.size(), + info.impl_alpha_size); } nfa = mcclellanCompile16(info, cc, accel_states); diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile.h b/contrib/libs/hyperscan/src/nfa/mcclellancompile.h index a56016018b..73cb9fd775 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellancompile.h +++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -60,7 +60,7 @@ public: u32 max_allowed_offset_accel() const override; u32 max_stop_char() const override; u32 max_floating_stop_char() const override; - DfaType getType() const override { return McClellan; } + DfaType getType() const override { return McClellan; } private: raw_dfa &rdfa; diff --git a/contrib/libs/hyperscan/src/nfa/mcclellandump.h b/contrib/libs/hyperscan/src/nfa/mcclellandump.h index a4cd81c031..5b63a20634 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellandump.h +++ b/contrib/libs/hyperscan/src/nfa/mcclellandump.h @@ -1,62 +1,62 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MCCLELLAN_DUMP_H -#define MCCLELLAN_DUMP_H - -#ifdef DUMP_SUPPORT - -#include "rdfa.h" - -#include <cstdio> -#include <string> - -struct mcclellan; -struct mstate_aux; -struct NFA; -union AccelAux; - -namespace ue2 { - -void nfaExecMcClellan8_dump(const struct NFA *nfa, const std::string &base); -void nfaExecMcClellan16_dump(const struct NFA *nfa, const std::string &base); - -/* These functions are shared with the Gough dump code. */ - -const mstate_aux *getAux(const NFA *n, dstate_id_t i); -void describeEdge(FILE *f, const u16 *t, u16 i); -void dumpAccelText(FILE *f, const union AccelAux *accel); -void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel); -void describeAlphabet(FILE *f, const mcclellan *m); -void dumpDotPreambleDfa(FILE *f); - -} // namespace ue2 - -#endif // DUMP_SUPPORT - -#endif // MCCLELLAN_DUMP_H +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCCLELLAN_DUMP_H +#define MCCLELLAN_DUMP_H + +#ifdef DUMP_SUPPORT + +#include "rdfa.h" + +#include <cstdio> +#include <string> + +struct mcclellan; +struct mstate_aux; +struct NFA; +union AccelAux; + +namespace ue2 { + +void nfaExecMcClellan8_dump(const struct NFA *nfa, const std::string &base); +void nfaExecMcClellan16_dump(const struct NFA *nfa, const std::string &base); + +/* These functions are shared with the Gough dump code. */ + +const mstate_aux *getAux(const NFA *n, dstate_id_t i); +void describeEdge(FILE *f, const u16 *t, u16 i); +void dumpAccelText(FILE *f, const union AccelAux *accel); +void dumpAccelDot(FILE *f, u16 i, const union AccelAux *accel); +void describeAlphabet(FILE *f, const mcclellan *m); +void dumpDotPreambleDfa(FILE *f); + +} // namespace ue2 + +#endif // DUMP_SUPPORT + +#endif // MCCLELLAN_DUMP_H diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng.c b/contrib/libs/hyperscan/src/nfa/mcsheng.c index d285793483..22cac119fb 100644 --- a/contrib/libs/hyperscan/src/nfa/mcsheng.c +++ b/contrib/libs/hyperscan/src/nfa/mcsheng.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -173,7 +173,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, u32 sheng_limit_x4 = sheng_limit * 0x01010101; m128 simd_stop_limit = set4x32(sheng_stop_limit_x4); m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit); - DEBUG_PRINTF("end %hhu, accel %hu --> limit %hhu\n", sheng_limit, + DEBUG_PRINTF("end %hhu, accel %hu --> limit %hhu\n", sheng_limit, m->sheng_accel_limit, sheng_stop_limit); #endif @@ -181,7 +181,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, m128 shuffle_mask = masks[*(c++)]; \ s = pshufb_m128(shuffle_mask, s); \ u32 s_gpr_x4 = movd(s); /* convert to u8 */ \ - DEBUG_PRINTF("c %hhu (%c) --> s %u\n", c[-1], c[-1], s_gpr_x4); \ + DEBUG_PRINTF("c %hhu (%c) --> s %u\n", c[-1], c[-1], s_gpr_x4); \ if (s_gpr_x4 >= sheng_stop_limit_x4) { \ s_gpr = s_gpr_x4; \ goto exit; \ @@ -191,7 +191,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, u8 s_gpr; while (c < c_end) { #if defined(HAVE_BMI2) && defined(ARCH_64_BIT) - /* This version uses pext for efficiently bitbashing out scaled + /* This version uses pext for efficiently bitbashing out scaled * versions of the bytes to process from a u64a */ u64a data_bytes = unaligned_load_u64a(c); @@ -201,7 +201,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, s = pshufb_m128(shuffle_mask0, s); m128 s_max = s; m128 s_max0 = s_max; - DEBUG_PRINTF("c %02llx --> s %u\n", cc0 >> 4, movd(s)); + DEBUG_PRINTF("c %02llx --> s %u\n", cc0 >> 4, movd(s)); #define SHENG_SINGLE_UNROLL_ITER(iter) \ assert(iter); \ @@ -217,7 +217,7 @@ u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, s_max = max_u8_m128(s_max, s); \ } \ m128 s_max##iter = s_max; \ - DEBUG_PRINTF("c %02llx --> s %u max %u\n", cc##iter >> 4, \ + DEBUG_PRINTF("c %02llx --> s %u max %u\n", cc##iter >> 4, \ movd(s), movd(s_max)); SHENG_SINGLE_UNROLL_ITER(1); @@ -1184,7 +1184,7 @@ char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q) { static char mcshengHasAccept(const struct mcsheng *m, const struct mstate_aux *aux, - ReportID report) { + ReportID report) { assert(m && aux); if (!aux->accept) { @@ -1405,1332 +1405,1332 @@ char nfaExecMcSheng16_expandState(UNUSED const struct NFA *nfa, void *dest, *(u16 *)dest = unaligned_load_u16(src); return 0; } - -#if defined(HAVE_AVX512VBMI) -static really_inline -const struct mstate_aux *get_aux64(const struct mcsheng64 *m, u32 s) { - const char *nfa = (const char *)m - sizeof(struct NFA); - const struct mstate_aux *aux - = s + (const struct mstate_aux *)(nfa + m->aux_offset); - - assert(ISALIGNED(aux)); - return aux; -} - -static really_inline -u32 mcshengEnableStarts64(const struct mcsheng64 *m, u32 s) { - const struct mstate_aux *aux = get_aux64(m, s); - - DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top); - return aux->top; -} - -static really_inline -char doComplexReport64(NfaCallback cb, void *ctxt, const struct mcsheng64 *m, - u32 s, u64a loc, char eod, u32 *cached_accept_state, - u32 *cached_accept_id) { - DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n", - s & STATE_MASK, loc, eod); - - if (!eod && s == *cached_accept_state) { - if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - - return MO_CONTINUE_MATCHING; /* continue execution */ - } - - const struct mstate_aux *aux = get_aux64(m, s); - size_t offset = eod ? aux->accept_eod : aux->accept; - - assert(offset); - const struct report_list *rl - = (const void *)((const char *)m + offset - sizeof(struct NFA)); - assert(ISALIGNED(rl)); - - DEBUG_PRINTF("report list size %u\n", rl->count); - u32 count = rl->count; - - if (!eod && count == 1) { - *cached_accept_state = s; - *cached_accept_id = rl->report[0]; - - DEBUG_PRINTF("reporting %u\n", rl->report[0]); - if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - - return MO_CONTINUE_MATCHING; /* continue execution */ - } - - for (u32 i = 0; i < count; i++) { - DEBUG_PRINTF("reporting %u\n", rl->report[i]); - if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - } - - return MO_CONTINUE_MATCHING; /* continue execution */ -} - -static really_inline -u32 doSheng64(const struct mcsheng64 *m, const u8 **c_inout, const u8 *soft_c_end, - const u8 *hard_c_end, u32 s_in, char do_accel) { - assert(s_in < m->sheng_end); - assert(s_in); /* should not already be dead */ - assert(soft_c_end <= hard_c_end); - DEBUG_PRINTF("s_in = %u (adjusted %u)\n", s_in, s_in - 1); - m512 s = set64x8(s_in - 1); - const u8 *c = *c_inout; - const u8 *c_end = hard_c_end - SHENG_CHUNK + 1; - if (!do_accel) { - c_end = MIN(soft_c_end, hard_c_end - SHENG_CHUNK + 1); - } - - const m512 *masks = m->sheng_succ_masks; - u8 sheng_limit = m->sheng_end - 1; /* - 1: no dead state */ - u8 sheng_stop_limit = do_accel ? m->sheng_accel_limit : sheng_limit; - - /* When we use movd to get a u32 containing our state, it will have 4 lanes - * all duplicating the state. We can create versions of our limits with 4 - * copies to directly compare against, this prevents us generating code to - * extract a single copy of the state from the u32 for checking. */ - u32 sheng_stop_limit_x4 = sheng_stop_limit * 0x01010101; - -#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) - u32 sheng_limit_x4 = sheng_limit * 0x01010101; - m512 simd_stop_limit = set16x32(sheng_stop_limit_x4); - m512 accel_delta = set64x8(sheng_limit - sheng_stop_limit); - DEBUG_PRINTF("end %hhu, accel %hu --> limit %hhu\n", sheng_limit, - m->sheng_accel_limit, sheng_stop_limit); -#endif - -#define SHENG64_SINGLE_ITER do { \ - m512 succ_mask = masks[*(c++)]; \ - s = vpermb512(s, succ_mask); \ - u32 s_gpr_x4 = movd512(s); /* convert to u8 */ \ - DEBUG_PRINTF("c %hhu (%c) --> s %u\n", c[-1], c[-1], s_gpr_x4); \ - if (s_gpr_x4 >= sheng_stop_limit_x4) { \ - s_gpr = s_gpr_x4; \ - goto exit; \ - } \ - } while (0) - - u8 s_gpr; - while (c < c_end) { -#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) - /* This version uses pext for efficiently bitbashing out scaled - * versions of the bytes to process from a u64a */ - - u64a data_bytes = unaligned_load_u64a(c); - u64a cc0 = pdep64(data_bytes, 0x3fc0); /* extract scaled low byte */ - data_bytes &= ~0xffULL; /* clear low bits for scale space */ - - m512 succ_mask0 = load512((const char *)masks + cc0); - s = vpermb512(s, succ_mask0); - m512 s_max = s; - m512 s_max0 = s_max; - DEBUG_PRINTF("c %02llx --> s %u\n", cc0 >> 6, movd512(s)); - -#define SHENG64_SINGLE_UNROLL_ITER(iter) \ - assert(iter); \ - u64a cc##iter = pext64(data_bytes, mcsheng64_pext_mask[iter]); \ - assert(cc##iter == (u64a)c[iter] << 6); \ - m512 succ_mask##iter = load512((const char *)masks + cc##iter); \ - s = vpermb512(s, succ_mask##iter); \ - if (do_accel && iter == 7) { \ - /* in the final iteration we also have to check against accel */ \ - m512 s_temp = sadd_u8_m512(s, accel_delta); \ - s_max = max_u8_m512(s_max, s_temp); \ - } else { \ - s_max = max_u8_m512(s_max, s); \ - } \ - m512 s_max##iter = s_max; \ - DEBUG_PRINTF("c %02llx --> s %u max %u\n", cc##iter >> 6, \ - movd512(s), movd512(s_max)); - - SHENG64_SINGLE_UNROLL_ITER(1); - SHENG64_SINGLE_UNROLL_ITER(2); - SHENG64_SINGLE_UNROLL_ITER(3); - SHENG64_SINGLE_UNROLL_ITER(4); - SHENG64_SINGLE_UNROLL_ITER(5); - SHENG64_SINGLE_UNROLL_ITER(6); - SHENG64_SINGLE_UNROLL_ITER(7); - - if (movd512(s_max7) >= sheng_limit_x4) { - DEBUG_PRINTF("exit found\n"); - - /* Explicitly check the last byte as it is more likely as it also - * checks for acceleration. */ - if (movd512(s_max6) < sheng_limit_x4) { - c += SHENG_CHUNK; - s_gpr = movq512(s); - assert(s_gpr >= sheng_stop_limit); - goto exit; - } - - /* use shift-xor to create a register containing all of the max - * values */ - m512 blended = rshift64_m512(s_max0, 56); - blended = xor512(blended, rshift64_m512(s_max1, 48)); - blended = xor512(blended, rshift64_m512(s_max2, 40)); - blended = xor512(blended, rshift64_m512(s_max3, 32)); - blended = xor512(blended, rshift64_m512(s_max4, 24)); - blended = xor512(blended, rshift64_m512(s_max5, 16)); - blended = xor512(blended, rshift64_m512(s_max6, 8)); - blended = xor512(blended, s); - blended = xor512(blended, rshift64_m512(blended, 8)); - DEBUG_PRINTF("blended %016llx\n", movq512(blended)); - - m512 final = min_u8_m512(blended, simd_stop_limit); - m512 cmp = sub_u8_m512(final, simd_stop_limit); - m128 tmp = cast512to128(cmp); - u64a stops = ~movemask128(tmp); - assert(stops); - u32 earliest = ctz32(stops); - DEBUG_PRINTF("stops %02llx, earliest %u\n", stops, earliest); - assert(earliest < 8); - c += earliest + 1; - s_gpr = movq512(blended) >> (earliest * 8); - assert(s_gpr >= sheng_stop_limit); - goto exit; - } else { - c += SHENG_CHUNK; - } -#else - SHENG64_SINGLE_ITER; - SHENG64_SINGLE_ITER; - SHENG64_SINGLE_ITER; - SHENG64_SINGLE_ITER; - - SHENG64_SINGLE_ITER; - SHENG64_SINGLE_ITER; - SHENG64_SINGLE_ITER; - SHENG64_SINGLE_ITER; -#endif - } - - assert(c_end - c < SHENG_CHUNK); - if (c < soft_c_end) { - assert(soft_c_end - c < SHENG_CHUNK); - switch (soft_c_end - c) { - case 7: - SHENG64_SINGLE_ITER; // fallthrough - case 6: - SHENG64_SINGLE_ITER; // fallthrough - case 5: - SHENG64_SINGLE_ITER; // fallthrough - case 4: - SHENG64_SINGLE_ITER; // fallthrough - case 3: - SHENG64_SINGLE_ITER; // fallthrough - case 2: - SHENG64_SINGLE_ITER; // fallthrough - case 1: - SHENG64_SINGLE_ITER; // fallthrough - } - } - - assert(c >= soft_c_end); - - s_gpr = movq512(s); -exit: - assert(c <= hard_c_end); - DEBUG_PRINTF("%zu from end; s %hhu\n", c_end - c, s_gpr); - assert(c >= soft_c_end || s_gpr >= sheng_stop_limit); - /* undo state adjustment to match mcclellan view */ - if (s_gpr == sheng_limit) { - s_gpr = 0; - } else if (s_gpr < sheng_limit) { - s_gpr++; - } - - *c_inout = c; - return s_gpr; -} - -static really_inline -const char *findShermanState64(UNUSED const struct mcsheng64 *m, - const char *sherman_base_offset, - u32 sherman_base, u32 s) { - const char *rv - = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); - assert(rv < (const char *)m + m->length - sizeof(struct NFA)); - UNUSED u8 type = *(const u8 *)(rv + SHERMAN_TYPE_OFFSET); - assert(type == SHERMAN_STATE); - return rv; -} - -static really_inline -const u8 *run_mcsheng_accel64(const struct mcsheng64 *m, - const struct mstate_aux *aux, u32 s, - const u8 **min_accel_offset, - const u8 *c, const u8 *c_end) { - DEBUG_PRINTF("skipping\n"); - u32 accel_offset = aux[s].accel_offset; - - assert(aux[s].accel_offset); - assert(accel_offset >= m->aux_offset); - assert(!m->sherman_offset || accel_offset < m->sherman_offset); - - const union AccelAux *aaux = (const void *)((const char *)m + accel_offset); - const u8 *c2 = run_accel(aaux, c, c_end); - - if (c2 < *min_accel_offset + BAD_ACCEL_DIST) { - *min_accel_offset = c2 + BIG_ACCEL_PENALTY; - } else { - *min_accel_offset = c2 + SMALL_ACCEL_PENALTY; - } - - if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) { - *min_accel_offset = c_end; - } - - DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", - c2 - c, *min_accel_offset - c2, c_end - c2); - - return c2; -} - -static really_inline -u32 doNormal64_16(const struct mcsheng64 *m, const u8 **c_inout, const u8 *end, - u32 s, char do_accel, enum MatchMode mode) { - const u8 *c = *c_inout; - const u16 *succ_table - = (const u16 *)((const char *)m + sizeof(struct mcsheng64)); - assert(ISALIGNED_N(succ_table, 2)); - u32 sheng_end = m->sheng_end; - u32 sherman_base = m->sherman_limit; - const char *sherman_base_offset - = (const char *)m - sizeof(struct NFA) + m->sherman_offset; - u32 as = m->alphaShift; - - /* Adjust start of succ table so we can index into using state id (rather - * than adjust to normal id). As we will not be processing states with low - * state ids, we will not be accessing data before the succ table. Note: due - * to the size of the sheng tables, the succ_table pointer will still be - * inside the engine.*/ - succ_table -= sheng_end << as; - s &= STATE_MASK; - while (c < end && s >= sheng_end) { - u8 cprime = m->remap[*c]; - DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c, - ourisprint(*c) ? *c : '?', cprime, s); - if (s < sherman_base) { - DEBUG_PRINTF("doing normal\n"); - assert(s < m->state_count); - s = succ_table[(s << as) + cprime]; - } else { - const char *sherman_state - = findShermanState64(m, sherman_base_offset, sherman_base, s); - DEBUG_PRINTF("doing sherman (%u)\n", s); - s = doSherman16(sherman_state, cprime, succ_table, as); - } - - DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); - c++; - - if (do_accel && (s & ACCEL_FLAG)) { - break; - } - if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { - break; - } - - s &= STATE_MASK; - } - - *c_inout = c; - return s; -} - -static really_inline -char mcsheng64Exec16_i(const struct mcsheng64 *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **c_final, enum MatchMode mode) { - assert(ISALIGNED_N(state, 2)); - if (!len) { - if (mode == STOP_AT_MATCH) { - *c_final = buf; - } - return MO_ALIVE; - } - - u32 s = *state; - const u8 *c = buf; - const u8 *c_end = buf + len; - const u8 sheng_end = m->sheng_end; - const struct mstate_aux *aux - = (const struct mstate_aux *)((const char *)m + m->aux_offset - - sizeof(struct NFA)); - - s &= STATE_MASK; - - u32 cached_accept_id = 0; - u32 cached_accept_state = 0; - - DEBUG_PRINTF("s: %u, len %zu\n", s, len); - - const u8 *min_accel_offset = c; - if (!m->has_accel || len < ACCEL_MIN_LEN) { - min_accel_offset = c_end; - goto without_accel; - } - - goto with_accel; - -without_accel: - do { - assert(c < min_accel_offset); - int do_accept; - if (!s) { - goto exit; - } else if (s < sheng_end) { - s = doSheng64(m, &c, min_accel_offset, c_end, s, 0); - do_accept = mode != NO_MATCHES && get_aux64(m, s)->accept; - } else { - s = doNormal64_16(m, &c, min_accel_offset, s, 0, mode); - - do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG); - } - - if (do_accept) { - if (mode == STOP_AT_MATCH) { - *state = s & STATE_MASK; - *c_final = c - 1; - return MO_MATCHES_PENDING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_DEAD; /* termination requested */ - } - } else if (doComplexReport64(cb, ctxt, m, s & STATE_MASK, loc, 0, - &cached_accept_state, - &cached_accept_id) - == MO_HALT_MATCHING) { - return MO_DEAD; - } - } - - assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */ - } while (c < min_accel_offset); - - if (c == c_end) { - goto exit; - } - -with_accel: - do { - assert(c < c_end); - int do_accept; - - if (!s) { - goto exit; - } else if (s < sheng_end) { - if (s > m->sheng_accel_limit) { - c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end); - if (c == c_end) { - goto exit; - } else { - goto without_accel; - } - } - s = doSheng64(m, &c, c_end, c_end, s, 1); - do_accept = mode != NO_MATCHES && get_aux64(m, s)->accept; - } else { - if (s & ACCEL_FLAG) { - DEBUG_PRINTF("skipping\n"); - s &= STATE_MASK; - c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end); - if (c == c_end) { - goto exit; - } else { - goto without_accel; - } - } - - s = doNormal64_16(m, &c, c_end, s, 1, mode); - do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG); - } - - if (do_accept) { - if (mode == STOP_AT_MATCH) { - *state = s & STATE_MASK; - *c_final = c - 1; - return MO_MATCHES_PENDING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_DEAD; /* termination requested */ - } - } else if (doComplexReport64(cb, ctxt, m, s & STATE_MASK, loc, 0, - &cached_accept_state, - &cached_accept_id) - == MO_HALT_MATCHING) { - return MO_DEAD; - } - } - - assert(c <= c_end); - } while (c < c_end); - -exit: - s &= STATE_MASK; - - if (mode == STOP_AT_MATCH) { - *c_final = c_end; - } - *state = s; - - return MO_ALIVE; -} - -static never_inline -char mcsheng64Exec16_i_cb(const struct mcsheng64 *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcsheng64Exec16_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, CALLBACK_OUTPUT); -} - -static never_inline -char mcsheng64Exec16_i_sam(const struct mcsheng64 *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcsheng64Exec16_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, STOP_AT_MATCH); -} - -static never_inline -char mcsheng64Exec16_i_nm(const struct mcsheng64 *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcsheng64Exec16_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, NO_MATCHES); -} - -static really_inline -char mcsheng64Exec16_i_ni(const struct mcsheng64 *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point, - enum MatchMode mode) { - if (mode == CALLBACK_OUTPUT) { - return mcsheng64Exec16_i_cb(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); - } else if (mode == STOP_AT_MATCH) { - return mcsheng64Exec16_i_sam(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); - } else { - assert (mode == NO_MATCHES); - return mcsheng64Exec16_i_nm(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); - } -} - -static really_inline -u32 doNormal64_8(const struct mcsheng64 *m, const u8 **c_inout, const u8 *end, u32 s, - char do_accel, enum MatchMode mode) { - const u8 *c = *c_inout; - u32 sheng_end = m->sheng_end; - u32 accel_limit = m->accel_limit_8; - u32 accept_limit = m->accept_limit_8; - - const u32 as = m->alphaShift; - const u8 *succ_table = (const u8 *)((const char *)m - + sizeof(struct mcsheng64)); - /* Adjust start of succ table so we can index into using state id (rather - * than adjust to normal id). As we will not be processing states with low - * state ids, we will not be accessing data before the succ table. Note: due - * to the size of the sheng tables, the succ_table pointer will still be - * inside the engine.*/ - succ_table -= sheng_end << as; - - assert(s >= sheng_end); - while (c < end && s >= sheng_end) { - u8 cprime = m->remap[*c]; - DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c, - ourisprint(*c) ? *c : '?', cprime); - s = succ_table[(s << as) + cprime]; - - DEBUG_PRINTF("s: %u\n", s); - c++; - if (do_accel) { - if (s >= accel_limit) { - break; - } - } else { - if (mode != NO_MATCHES && s >= accept_limit) { - break; - } - } - } - *c_inout = c; - return s; -} - -static really_inline -char mcsheng64Exec8_i(const struct mcsheng64 *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **c_final, enum MatchMode mode) { - if (!len) { - *c_final = buf; - return MO_ALIVE; - } - u32 s = *state; - const u8 *c = buf; - const u8 *c_end = buf + len; - const u8 sheng_end = m->sheng_end; - - const struct mstate_aux *aux - = (const struct mstate_aux *)((const char *)m + m->aux_offset - - sizeof(struct NFA)); - u32 accept_limit = m->accept_limit_8; - - u32 cached_accept_id = 0; - u32 cached_accept_state = 0; - - DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit); - - DEBUG_PRINTF("s: %u, len %zu\n", s, len); - - const u8 *min_accel_offset = c; - if (!m->has_accel || len < ACCEL_MIN_LEN) { - min_accel_offset = c_end; - goto without_accel; - } - - goto with_accel; - -without_accel: - do { - assert(c < min_accel_offset); - if (!s) { - goto exit; - } else if (s < sheng_end) { - s = doSheng64(m, &c, min_accel_offset, c_end, s, 0); - } else { - s = doNormal64_8(m, &c, min_accel_offset, s, 0, mode); - assert(c <= min_accel_offset); - } - - if (mode != NO_MATCHES && s >= accept_limit) { - if (mode == STOP_AT_MATCH) { - DEBUG_PRINTF("match - pausing\n"); - *state = s; - *c_final = c - 1; - return MO_MATCHES_PENDING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_DEAD; - } - } else if (doComplexReport64(cb, ctxt, m, s, loc, 0, - &cached_accept_state, - &cached_accept_id) - == MO_HALT_MATCHING) { - return MO_DEAD; - } - } - - assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */ - } while (c < min_accel_offset); - - if (c == c_end) { - goto exit; - } - -with_accel: - do { - u32 accel_limit = m->accel_limit_8; - - assert(c < c_end); - if (!s) { - goto exit; - } else if (s < sheng_end) { - if (s > m->sheng_accel_limit) { - c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end); - if (c == c_end) { - goto exit; - } else { - goto without_accel; - } - } - s = doSheng64(m, &c, c_end, c_end, s, 1); - } else { - if (s >= accel_limit && aux[s].accel_offset) { - c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end); - if (c == c_end) { - goto exit; - } else { - goto without_accel; - } - } - s = doNormal64_8(m, &c, c_end, s, 1, mode); - } - - if (mode != NO_MATCHES && s >= accept_limit) { - if (mode == STOP_AT_MATCH) { - DEBUG_PRINTF("match - pausing\n"); - *state = s; - *c_final = c - 1; - return MO_MATCHES_PENDING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_DEAD; - } - } else if (doComplexReport64(cb, ctxt, m, s, loc, 0, - &cached_accept_state, - &cached_accept_id) - == MO_HALT_MATCHING) { - return MO_DEAD; - } - } - - assert(c <= c_end); - } while (c < c_end); - -exit: - *state = s; - if (mode == STOP_AT_MATCH) { - *c_final = c_end; - } - return MO_ALIVE; -} - -static never_inline -char mcsheng64Exec8_i_cb(const struct mcsheng64 *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcsheng64Exec8_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, CALLBACK_OUTPUT); -} - -static never_inline -char mcsheng64Exec8_i_sam(const struct mcsheng64 *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcsheng64Exec8_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, STOP_AT_MATCH); -} - -static never_inline -char mcsheng64Exec8_i_nm(const struct mcsheng64 *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcsheng64Exec8_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, NO_MATCHES); -} - -static really_inline -char mcsheng64Exec8_i_ni(const struct mcsheng64 *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point, - enum MatchMode mode) { - if (mode == CALLBACK_OUTPUT) { - return mcsheng64Exec8_i_cb(m, state, buf, len, offAdj, cb, ctxt, single, - final_point); - } else if (mode == STOP_AT_MATCH) { - return mcsheng64Exec8_i_sam(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); - } else { - assert(mode == NO_MATCHES); - return mcsheng64Exec8_i_nm(m, state, buf, len, offAdj, cb, ctxt, single, - final_point); - } -} - -static really_inline -char mcshengCheckEOD64(const struct NFA *nfa, u32 s, u64a offset, - NfaCallback cb, void *ctxt) { - const struct mcsheng64 *m = getImplNfa(nfa); - const struct mstate_aux *aux = get_aux64(m, s); - - if (!aux->accept_eod) { - return MO_CONTINUE_MATCHING; - } - return doComplexReport64(cb, ctxt, m, s, offset, 1, NULL, NULL); -} - -static really_inline -char nfaExecMcSheng64_16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, - const u8 *hend, NfaCallback cb, void *context, - struct mq *q, char single, s64a end, - enum MatchMode mode) { - assert(n->type == MCSHENG_64_NFA_16); - const struct mcsheng64 *m = getImplNfa(n); - s64a sp; - - assert(ISALIGNED_N(q->state, 2)); - u32 s = *(u16 *)q->state; - - if (q->report_current) { - assert(s); - assert(get_aux64(m, s)->accept); - - int rv; - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - rv = cb(0, q_cur_offset(q), m->arb_report, context); - } else { - u32 cached_accept_id = 0; - u32 cached_accept_state = 0; - - rv = doComplexReport64(cb, context, m, s, q_cur_offset(q), 0, - &cached_accept_state, &cached_accept_id); - } - - q->report_current = 0; - - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - } - - sp = q_cur_loc(q); - q->cur++; - - const u8 *cur_buf = sp < 0 ? hend : buffer; - - assert(q->cur); - if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { - DEBUG_PRINTF("this is as far as we go\n"); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u16 *)q->state = s; - return MO_ALIVE; - } - - while (1) { - assert(q->cur < q->end); - s64a ep = q->items[q->cur].location; - if (mode != NO_MATCHES) { - ep = MIN(ep, end); - } - - assert(ep >= sp); - - s64a local_ep = ep; - if (sp < 0) { - local_ep = MIN(0, ep); - } - - /* do main buffer region */ - const u8 *final_look; - char rv = mcsheng64Exec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, single, - &final_look, mode); - if (rv == MO_DEAD) { - *(u16 *)q->state = 0; - return MO_DEAD; - } - if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { - DEBUG_PRINTF("this is as far as we go\n"); - DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); - - assert(q->cur); - assert(final_look != cur_buf + local_ep); - - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = final_look - cur_buf + 1; /* due to - * early -1 */ - *(u16 *)q->state = s; - return MO_MATCHES_PENDING; - } - - assert(rv == MO_ALIVE); - assert(q->cur); - if (mode != NO_MATCHES && q->items[q->cur].location > end) { - DEBUG_PRINTF("this is as far as we go\n"); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u16 *)q->state = s; - return MO_ALIVE; - } - - sp = local_ep; - - if (sp == 0) { - cur_buf = buffer; - } - - if (sp != ep) { - continue; - } - - switch (q->items[q->cur].type) { - case MQE_TOP: - assert(sp + offset || !s); - if (sp + offset == 0) { - s = m->start_anchored; - break; - } - s = mcshengEnableStarts64(m, s); - break; - case MQE_END: - *(u16 *)q->state = s; - q->cur++; - return s ? MO_ALIVE : MO_DEAD; - default: - assert(!"invalid queue event"); - } - - q->cur++; - } -} - -static really_inline -char nfaExecMcSheng64_8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, - const u8 *hend, NfaCallback cb, void *context, - struct mq *q, char single, s64a end, - enum MatchMode mode) { - assert(n->type == MCSHENG_64_NFA_8); - const struct mcsheng64 *m = getImplNfa(n); - s64a sp; - - u32 s = *(u8 *)q->state; - - if (q->report_current) { - assert(s); - assert(s >= m->accept_limit_8); - - int rv; - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - - rv = cb(0, q_cur_offset(q), m->arb_report, context); - } else { - u32 cached_accept_id = 0; - u32 cached_accept_state = 0; - - rv = doComplexReport64(cb, context, m, s, q_cur_offset(q), 0, - &cached_accept_state, &cached_accept_id); - } - - q->report_current = 0; - - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - } - - sp = q_cur_loc(q); - q->cur++; - - const u8 *cur_buf = sp < 0 ? hend : buffer; - - if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { - DEBUG_PRINTF("this is as far as we go\n"); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u8 *)q->state = s; - return MO_ALIVE; - } - - while (1) { - DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" : - q->items[q->cur].type == MQE_END ? "END" : "???", - q->items[q->cur].location + offset); - assert(q->cur < q->end); - s64a ep = q->items[q->cur].location; - if (mode != NO_MATCHES) { - ep = MIN(ep, end); - } - - assert(ep >= sp); - - s64a local_ep = ep; - if (sp < 0) { - local_ep = MIN(0, ep); - } - - const u8 *final_look; - char rv = mcsheng64Exec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, single, - &final_look, mode); - if (rv == MO_HALT_MATCHING) { - *(u8 *)q->state = 0; - return MO_DEAD; - } - if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { - DEBUG_PRINTF("this is as far as we go\n"); - DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); - - assert(q->cur); - assert(final_look != cur_buf + local_ep); - - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = final_look - cur_buf + 1; /* due to - * early -1 */ - *(u8 *)q->state = s; - return MO_MATCHES_PENDING; - } - - assert(rv == MO_ALIVE); - assert(q->cur); - if (mode != NO_MATCHES && q->items[q->cur].location > end) { - DEBUG_PRINTF("this is as far as we go\n"); - assert(q->cur); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u8 *)q->state = s; - return MO_ALIVE; - } - - sp = local_ep; - - if (sp == 0) { - cur_buf = buffer; - } - - if (sp != ep) { - continue; - } - - switch (q->items[q->cur].type) { - case MQE_TOP: - assert(sp + offset || !s); - if (sp + offset == 0) { - s = (u8)m->start_anchored; - break; - } - s = mcshengEnableStarts64(m, s); - break; - case MQE_END: - *(u8 *)q->state = s; - q->cur++; - return s ? MO_ALIVE : MO_DEAD; - default: - assert(!"invalid queue event"); - } - - q->cur++; - } -} - -char nfaExecMcSheng64_8_Q(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCSHENG_64_NFA_8); - const struct mcsheng64 *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - return nfaExecMcSheng64_8_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCSHENG_FLAG_SINGLE, end, - CALLBACK_OUTPUT); -} - -char nfaExecMcSheng64_16_Q(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCSHENG_64_NFA_16); - const struct mcsheng64 *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - return nfaExecMcSheng64_16_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCSHENG_FLAG_SINGLE, end, - CALLBACK_OUTPUT); -} - -char nfaExecMcSheng64_8_reportCurrent(const struct NFA *n, struct mq *q) { - const struct mcsheng64 *m = getImplNfa(n); - NfaCallback cb = q->cb; - void *ctxt = q->context; - u32 s = *(u8 *)q->state; - u8 single = m->flags & MCSHENG_FLAG_SINGLE; - u64a offset = q_cur_offset(q); - assert(q_cur_type(q) == MQE_START); - assert(s); - - if (s >= m->accept_limit_8) { - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - cb(0, offset, m->arb_report, ctxt); - } else { - u32 cached_accept_id = 0; - u32 cached_accept_state = 0; - - doComplexReport64(cb, ctxt, m, s, offset, 0, &cached_accept_state, - &cached_accept_id); - } - } - - return 0; -} - -char nfaExecMcSheng64_16_reportCurrent(const struct NFA *n, struct mq *q) { - const struct mcsheng64 *m = getImplNfa(n); - NfaCallback cb = q->cb; - void *ctxt = q->context; - u32 s = *(u16 *)q->state; - const struct mstate_aux *aux = get_aux64(m, s); - u8 single = m->flags & MCSHENG_FLAG_SINGLE; - u64a offset = q_cur_offset(q); - assert(q_cur_type(q) == MQE_START); - DEBUG_PRINTF("state %u\n", s); - assert(s); - - if (aux->accept) { - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - cb(0, offset, m->arb_report, ctxt); - } else { - u32 cached_accept_id = 0; - u32 cached_accept_state = 0; - - doComplexReport64(cb, ctxt, m, s, offset, 0, &cached_accept_state, - &cached_accept_id); - } - } - - return 0; -} - -static -char mcshengHasAccept64(const struct mcsheng64 *m, const struct mstate_aux *aux, - ReportID report) { - assert(m && aux); - - if (!aux->accept) { - return 0; - } - - const struct report_list *rl = (const struct report_list *) - ((const char *)m + aux->accept - sizeof(struct NFA)); - assert(ISALIGNED_N(rl, 4)); - - DEBUG_PRINTF("report list has %u entries\n", rl->count); - - for (u32 i = 0; i < rl->count; i++) { - if (rl->report[i] == report) { - return 1; - } - } - - return 0; -} - -char nfaExecMcSheng64_8_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { - assert(n && q); - - const struct mcsheng64 *m = getImplNfa(n); - u8 s = *(u8 *)q->state; - DEBUG_PRINTF("checking accepts for %hhu\n", s); - - return mcshengHasAccept64(m, get_aux64(m, s), report); -} - -char nfaExecMcSheng64_8_inAnyAccept(const struct NFA *n, struct mq *q) { - assert(n && q); - - const struct mcsheng64 *m = getImplNfa(n); - u8 s = *(u8 *)q->state; - DEBUG_PRINTF("checking accepts for %hhu\n", s); - - return !!get_aux64(m, s)->accept; -} - -char nfaExecMcSheng64_16_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { - assert(n && q); - - const struct mcsheng64 *m = getImplNfa(n); - u16 s = *(u16 *)q->state; - DEBUG_PRINTF("checking accepts for %hu\n", s); - - return mcshengHasAccept64(m, get_aux64(m, s), report); -} - -char nfaExecMcSheng64_16_inAnyAccept(const struct NFA *n, struct mq *q) { - assert(n && q); - - const struct mcsheng64 *m = getImplNfa(n); - u16 s = *(u16 *)q->state; - DEBUG_PRINTF("checking accepts for %hu\n", s); - - return !!get_aux64(m, s)->accept; -} - -char nfaExecMcSheng64_8_Q2(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCSHENG_64_NFA_8); - const struct mcsheng64 *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - return nfaExecMcSheng64_8_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCSHENG_FLAG_SINGLE, end, - STOP_AT_MATCH); -} - -char nfaExecMcSheng64_16_Q2(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCSHENG_64_NFA_16); - const struct mcsheng64 *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - return nfaExecMcSheng64_16_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCSHENG_FLAG_SINGLE, end, - STOP_AT_MATCH); -} - -char nfaExecMcSheng64_8_QR(const struct NFA *n, struct mq *q, ReportID report) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCSHENG_64_NFA_8); - const struct mcsheng64 *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - char rv = nfaExecMcSheng64_8_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCSHENG_FLAG_SINGLE, - 0 /* end */, NO_MATCHES); - if (rv && nfaExecMcSheng64_8_inAccept(n, report, q)) { - return MO_MATCHES_PENDING; - } else { - return rv; - } -} - -char nfaExecMcSheng64_16_QR(const struct NFA *n, struct mq *q, ReportID report) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCSHENG_64_NFA_16); - const struct mcsheng64 *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - char rv = nfaExecMcSheng64_16_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCSHENG_FLAG_SINGLE, - 0 /* end */, NO_MATCHES); - - if (rv && nfaExecMcSheng64_16_inAccept(n, report, q)) { - return MO_MATCHES_PENDING; - } else { - return rv; - } -} - -char nfaExecMcSheng64_8_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, UNUSED u8 key) { - const struct mcsheng64 *m = getImplNfa(nfa); - u8 s = offset ? m->start_floating : m->start_anchored; - if (s) { - *(u8 *)state = s; - return 1; - } - return 0; -} - -char nfaExecMcSheng64_16_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, UNUSED u8 key) { - const struct mcsheng64 *m = getImplNfa(nfa); - u16 s = offset ? m->start_floating : m->start_anchored; - if (s) { - unaligned_store_u16(state, s); - return 1; - } - return 0; -} - -char nfaExecMcSheng64_8_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, u64a offset, - NfaCallback callback, void *context) { - return mcshengCheckEOD64(nfa, *(const u8 *)state, offset, callback, - context); -} - -char nfaExecMcSheng64_16_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, u64a offset, - NfaCallback callback, void *context) { - assert(ISALIGNED_N(state, 2)); - return mcshengCheckEOD64(nfa, *(const u16 *)state, offset, callback, - context); -} - -char nfaExecMcSheng64_8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { - assert(nfa->scratchStateSize == 1); - *(u8 *)q->state = 0; - return 0; -} - -char nfaExecMcSheng64_16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { - assert(nfa->scratchStateSize == 2); - assert(ISALIGNED_N(q->state, 2)); - *(u16 *)q->state = 0; - return 0; -} - -char nfaExecMcSheng64_8_queueCompressState(UNUSED const struct NFA *nfa, - const struct mq *q, UNUSED s64a loc) { - void *dest = q->streamState; - const void *src = q->state; - assert(nfa->scratchStateSize == 1); - assert(nfa->streamStateSize == 1); - *(u8 *)dest = *(const u8 *)src; - return 0; -} - -char nfaExecMcSheng64_8_expandState(UNUSED const struct NFA *nfa, void *dest, - const void *src, UNUSED u64a offset, - UNUSED u8 key) { - assert(nfa->scratchStateSize == 1); - assert(nfa->streamStateSize == 1); - *(u8 *)dest = *(const u8 *)src; - return 0; -} - -char nfaExecMcSheng64_16_queueCompressState(UNUSED const struct NFA *nfa, - const struct mq *q, - UNUSED s64a loc) { - void *dest = q->streamState; - const void *src = q->state; - assert(nfa->scratchStateSize == 2); - assert(nfa->streamStateSize == 2); - assert(ISALIGNED_N(src, 2)); - unaligned_store_u16(dest, *(const u16 *)(src)); - return 0; -} - -char nfaExecMcSheng64_16_expandState(UNUSED const struct NFA *nfa, void *dest, - const void *src, UNUSED u64a offset, - UNUSED u8 key) { - assert(nfa->scratchStateSize == 2); - assert(nfa->streamStateSize == 2); - assert(ISALIGNED_N(dest, 2)); - *(u16 *)dest = unaligned_load_u16(src); - return 0; -} -#endif + +#if defined(HAVE_AVX512VBMI) +static really_inline +const struct mstate_aux *get_aux64(const struct mcsheng64 *m, u32 s) { + const char *nfa = (const char *)m - sizeof(struct NFA); + const struct mstate_aux *aux + = s + (const struct mstate_aux *)(nfa + m->aux_offset); + + assert(ISALIGNED(aux)); + return aux; +} + +static really_inline +u32 mcshengEnableStarts64(const struct mcsheng64 *m, u32 s) { + const struct mstate_aux *aux = get_aux64(m, s); + + DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top); + return aux->top; +} + +static really_inline +char doComplexReport64(NfaCallback cb, void *ctxt, const struct mcsheng64 *m, + u32 s, u64a loc, char eod, u32 *cached_accept_state, + u32 *cached_accept_id) { + DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n", + s & STATE_MASK, loc, eod); + + if (!eod && s == *cached_accept_state) { + if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + const struct mstate_aux *aux = get_aux64(m, s); + size_t offset = eod ? aux->accept_eod : aux->accept; + + assert(offset); + const struct report_list *rl + = (const void *)((const char *)m + offset - sizeof(struct NFA)); + assert(ISALIGNED(rl)); + + DEBUG_PRINTF("report list size %u\n", rl->count); + u32 count = rl->count; + + if (!eod && count == 1) { + *cached_accept_state = s; + *cached_accept_id = rl->report[0]; + + DEBUG_PRINTF("reporting %u\n", rl->report[0]); + if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + for (u32 i = 0; i < count; i++) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); + if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + } + + return MO_CONTINUE_MATCHING; /* continue execution */ +} + +static really_inline +u32 doSheng64(const struct mcsheng64 *m, const u8 **c_inout, const u8 *soft_c_end, + const u8 *hard_c_end, u32 s_in, char do_accel) { + assert(s_in < m->sheng_end); + assert(s_in); /* should not already be dead */ + assert(soft_c_end <= hard_c_end); + DEBUG_PRINTF("s_in = %u (adjusted %u)\n", s_in, s_in - 1); + m512 s = set64x8(s_in - 1); + const u8 *c = *c_inout; + const u8 *c_end = hard_c_end - SHENG_CHUNK + 1; + if (!do_accel) { + c_end = MIN(soft_c_end, hard_c_end - SHENG_CHUNK + 1); + } + + const m512 *masks = m->sheng_succ_masks; + u8 sheng_limit = m->sheng_end - 1; /* - 1: no dead state */ + u8 sheng_stop_limit = do_accel ? m->sheng_accel_limit : sheng_limit; + + /* When we use movd to get a u32 containing our state, it will have 4 lanes + * all duplicating the state. We can create versions of our limits with 4 + * copies to directly compare against, this prevents us generating code to + * extract a single copy of the state from the u32 for checking. */ + u32 sheng_stop_limit_x4 = sheng_stop_limit * 0x01010101; + +#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) + u32 sheng_limit_x4 = sheng_limit * 0x01010101; + m512 simd_stop_limit = set16x32(sheng_stop_limit_x4); + m512 accel_delta = set64x8(sheng_limit - sheng_stop_limit); + DEBUG_PRINTF("end %hhu, accel %hu --> limit %hhu\n", sheng_limit, + m->sheng_accel_limit, sheng_stop_limit); +#endif + +#define SHENG64_SINGLE_ITER do { \ + m512 succ_mask = masks[*(c++)]; \ + s = vpermb512(s, succ_mask); \ + u32 s_gpr_x4 = movd512(s); /* convert to u8 */ \ + DEBUG_PRINTF("c %hhu (%c) --> s %u\n", c[-1], c[-1], s_gpr_x4); \ + if (s_gpr_x4 >= sheng_stop_limit_x4) { \ + s_gpr = s_gpr_x4; \ + goto exit; \ + } \ + } while (0) + + u8 s_gpr; + while (c < c_end) { +#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) + /* This version uses pext for efficiently bitbashing out scaled + * versions of the bytes to process from a u64a */ + + u64a data_bytes = unaligned_load_u64a(c); + u64a cc0 = pdep64(data_bytes, 0x3fc0); /* extract scaled low byte */ + data_bytes &= ~0xffULL; /* clear low bits for scale space */ + + m512 succ_mask0 = load512((const char *)masks + cc0); + s = vpermb512(s, succ_mask0); + m512 s_max = s; + m512 s_max0 = s_max; + DEBUG_PRINTF("c %02llx --> s %u\n", cc0 >> 6, movd512(s)); + +#define SHENG64_SINGLE_UNROLL_ITER(iter) \ + assert(iter); \ + u64a cc##iter = pext64(data_bytes, mcsheng64_pext_mask[iter]); \ + assert(cc##iter == (u64a)c[iter] << 6); \ + m512 succ_mask##iter = load512((const char *)masks + cc##iter); \ + s = vpermb512(s, succ_mask##iter); \ + if (do_accel && iter == 7) { \ + /* in the final iteration we also have to check against accel */ \ + m512 s_temp = sadd_u8_m512(s, accel_delta); \ + s_max = max_u8_m512(s_max, s_temp); \ + } else { \ + s_max = max_u8_m512(s_max, s); \ + } \ + m512 s_max##iter = s_max; \ + DEBUG_PRINTF("c %02llx --> s %u max %u\n", cc##iter >> 6, \ + movd512(s), movd512(s_max)); + + SHENG64_SINGLE_UNROLL_ITER(1); + SHENG64_SINGLE_UNROLL_ITER(2); + SHENG64_SINGLE_UNROLL_ITER(3); + SHENG64_SINGLE_UNROLL_ITER(4); + SHENG64_SINGLE_UNROLL_ITER(5); + SHENG64_SINGLE_UNROLL_ITER(6); + SHENG64_SINGLE_UNROLL_ITER(7); + + if (movd512(s_max7) >= sheng_limit_x4) { + DEBUG_PRINTF("exit found\n"); + + /* Explicitly check the last byte as it is more likely as it also + * checks for acceleration. */ + if (movd512(s_max6) < sheng_limit_x4) { + c += SHENG_CHUNK; + s_gpr = movq512(s); + assert(s_gpr >= sheng_stop_limit); + goto exit; + } + + /* use shift-xor to create a register containing all of the max + * values */ + m512 blended = rshift64_m512(s_max0, 56); + blended = xor512(blended, rshift64_m512(s_max1, 48)); + blended = xor512(blended, rshift64_m512(s_max2, 40)); + blended = xor512(blended, rshift64_m512(s_max3, 32)); + blended = xor512(blended, rshift64_m512(s_max4, 24)); + blended = xor512(blended, rshift64_m512(s_max5, 16)); + blended = xor512(blended, rshift64_m512(s_max6, 8)); + blended = xor512(blended, s); + blended = xor512(blended, rshift64_m512(blended, 8)); + DEBUG_PRINTF("blended %016llx\n", movq512(blended)); + + m512 final = min_u8_m512(blended, simd_stop_limit); + m512 cmp = sub_u8_m512(final, simd_stop_limit); + m128 tmp = cast512to128(cmp); + u64a stops = ~movemask128(tmp); + assert(stops); + u32 earliest = ctz32(stops); + DEBUG_PRINTF("stops %02llx, earliest %u\n", stops, earliest); + assert(earliest < 8); + c += earliest + 1; + s_gpr = movq512(blended) >> (earliest * 8); + assert(s_gpr >= sheng_stop_limit); + goto exit; + } else { + c += SHENG_CHUNK; + } +#else + SHENG64_SINGLE_ITER; + SHENG64_SINGLE_ITER; + SHENG64_SINGLE_ITER; + SHENG64_SINGLE_ITER; + + SHENG64_SINGLE_ITER; + SHENG64_SINGLE_ITER; + SHENG64_SINGLE_ITER; + SHENG64_SINGLE_ITER; +#endif + } + + assert(c_end - c < SHENG_CHUNK); + if (c < soft_c_end) { + assert(soft_c_end - c < SHENG_CHUNK); + switch (soft_c_end - c) { + case 7: + SHENG64_SINGLE_ITER; // fallthrough + case 6: + SHENG64_SINGLE_ITER; // fallthrough + case 5: + SHENG64_SINGLE_ITER; // fallthrough + case 4: + SHENG64_SINGLE_ITER; // fallthrough + case 3: + SHENG64_SINGLE_ITER; // fallthrough + case 2: + SHENG64_SINGLE_ITER; // fallthrough + case 1: + SHENG64_SINGLE_ITER; // fallthrough + } + } + + assert(c >= soft_c_end); + + s_gpr = movq512(s); +exit: + assert(c <= hard_c_end); + DEBUG_PRINTF("%zu from end; s %hhu\n", c_end - c, s_gpr); + assert(c >= soft_c_end || s_gpr >= sheng_stop_limit); + /* undo state adjustment to match mcclellan view */ + if (s_gpr == sheng_limit) { + s_gpr = 0; + } else if (s_gpr < sheng_limit) { + s_gpr++; + } + + *c_inout = c; + return s_gpr; +} + +static really_inline +const char *findShermanState64(UNUSED const struct mcsheng64 *m, + const char *sherman_base_offset, + u32 sherman_base, u32 s) { + const char *rv + = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); + assert(rv < (const char *)m + m->length - sizeof(struct NFA)); + UNUSED u8 type = *(const u8 *)(rv + SHERMAN_TYPE_OFFSET); + assert(type == SHERMAN_STATE); + return rv; +} + +static really_inline +const u8 *run_mcsheng_accel64(const struct mcsheng64 *m, + const struct mstate_aux *aux, u32 s, + const u8 **min_accel_offset, + const u8 *c, const u8 *c_end) { + DEBUG_PRINTF("skipping\n"); + u32 accel_offset = aux[s].accel_offset; + + assert(aux[s].accel_offset); + assert(accel_offset >= m->aux_offset); + assert(!m->sherman_offset || accel_offset < m->sherman_offset); + + const union AccelAux *aaux = (const void *)((const char *)m + accel_offset); + const u8 *c2 = run_accel(aaux, c, c_end); + + if (c2 < *min_accel_offset + BAD_ACCEL_DIST) { + *min_accel_offset = c2 + BIG_ACCEL_PENALTY; + } else { + *min_accel_offset = c2 + SMALL_ACCEL_PENALTY; + } + + if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) { + *min_accel_offset = c_end; + } + + DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", + c2 - c, *min_accel_offset - c2, c_end - c2); + + return c2; +} + +static really_inline +u32 doNormal64_16(const struct mcsheng64 *m, const u8 **c_inout, const u8 *end, + u32 s, char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + const u16 *succ_table + = (const u16 *)((const char *)m + sizeof(struct mcsheng64)); + assert(ISALIGNED_N(succ_table, 2)); + u32 sheng_end = m->sheng_end; + u32 sherman_base = m->sherman_limit; + const char *sherman_base_offset + = (const char *)m - sizeof(struct NFA) + m->sherman_offset; + u32 as = m->alphaShift; + + /* Adjust start of succ table so we can index into using state id (rather + * than adjust to normal id). As we will not be processing states with low + * state ids, we will not be accessing data before the succ table. Note: due + * to the size of the sheng tables, the succ_table pointer will still be + * inside the engine.*/ + succ_table -= sheng_end << as; + s &= STATE_MASK; + while (c < end && s >= sheng_end) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c, + ourisprint(*c) ? *c : '?', cprime, s); + if (s < sherman_base) { + DEBUG_PRINTF("doing normal\n"); + assert(s < m->state_count); + s = succ_table[(s << as) + cprime]; + } else { + const char *sherman_state + = findShermanState64(m, sherman_base_offset, sherman_base, s); + DEBUG_PRINTF("doing sherman (%u)\n", s); + s = doSherman16(sherman_state, cprime, succ_table, as); + } + + DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); + c++; + + if (do_accel && (s & ACCEL_FLAG)) { + break; + } + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + break; + } + + s &= STATE_MASK; + } + + *c_inout = c; + return s; +} + +static really_inline +char mcsheng64Exec16_i(const struct mcsheng64 *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **c_final, enum MatchMode mode) { + assert(ISALIGNED_N(state, 2)); + if (!len) { + if (mode == STOP_AT_MATCH) { + *c_final = buf; + } + return MO_ALIVE; + } + + u32 s = *state; + const u8 *c = buf; + const u8 *c_end = buf + len; + const u8 sheng_end = m->sheng_end; + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset + - sizeof(struct NFA)); + + s &= STATE_MASK; + + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + DEBUG_PRINTF("s: %u, len %zu\n", s, len); + + const u8 *min_accel_offset = c; + if (!m->has_accel || len < ACCEL_MIN_LEN) { + min_accel_offset = c_end; + goto without_accel; + } + + goto with_accel; + +without_accel: + do { + assert(c < min_accel_offset); + int do_accept; + if (!s) { + goto exit; + } else if (s < sheng_end) { + s = doSheng64(m, &c, min_accel_offset, c_end, s, 0); + do_accept = mode != NO_MATCHES && get_aux64(m, s)->accept; + } else { + s = doNormal64_16(m, &c, min_accel_offset, s, 0, mode); + + do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG); + } + + if (do_accept) { + if (mode == STOP_AT_MATCH) { + *state = s & STATE_MASK; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; /* termination requested */ + } + } else if (doComplexReport64(cb, ctxt, m, s & STATE_MASK, loc, 0, + &cached_accept_state, + &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */ + } while (c < min_accel_offset); + + if (c == c_end) { + goto exit; + } + +with_accel: + do { + assert(c < c_end); + int do_accept; + + if (!s) { + goto exit; + } else if (s < sheng_end) { + if (s > m->sheng_accel_limit) { + c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doSheng64(m, &c, c_end, c_end, s, 1); + do_accept = mode != NO_MATCHES && get_aux64(m, s)->accept; + } else { + if (s & ACCEL_FLAG) { + DEBUG_PRINTF("skipping\n"); + s &= STATE_MASK; + c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + + s = doNormal64_16(m, &c, c_end, s, 1, mode); + do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG); + } + + if (do_accept) { + if (mode == STOP_AT_MATCH) { + *state = s & STATE_MASK; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; /* termination requested */ + } + } else if (doComplexReport64(cb, ctxt, m, s & STATE_MASK, loc, 0, + &cached_accept_state, + &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); + } while (c < c_end); + +exit: + s &= STATE_MASK; + + if (mode == STOP_AT_MATCH) { + *c_final = c_end; + } + *state = s; + + return MO_ALIVE; +} + +static never_inline +char mcsheng64Exec16_i_cb(const struct mcsheng64 *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcsheng64Exec16_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, CALLBACK_OUTPUT); +} + +static never_inline +char mcsheng64Exec16_i_sam(const struct mcsheng64 *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcsheng64Exec16_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, STOP_AT_MATCH); +} + +static never_inline +char mcsheng64Exec16_i_nm(const struct mcsheng64 *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcsheng64Exec16_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, NO_MATCHES); +} + +static really_inline +char mcsheng64Exec16_i_ni(const struct mcsheng64 *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point, + enum MatchMode mode) { + if (mode == CALLBACK_OUTPUT) { + return mcsheng64Exec16_i_cb(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else if (mode == STOP_AT_MATCH) { + return mcsheng64Exec16_i_sam(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else { + assert (mode == NO_MATCHES); + return mcsheng64Exec16_i_nm(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } +} + +static really_inline +u32 doNormal64_8(const struct mcsheng64 *m, const u8 **c_inout, const u8 *end, u32 s, + char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + u32 sheng_end = m->sheng_end; + u32 accel_limit = m->accel_limit_8; + u32 accept_limit = m->accept_limit_8; + + const u32 as = m->alphaShift; + const u8 *succ_table = (const u8 *)((const char *)m + + sizeof(struct mcsheng64)); + /* Adjust start of succ table so we can index into using state id (rather + * than adjust to normal id). As we will not be processing states with low + * state ids, we will not be accessing data before the succ table. Note: due + * to the size of the sheng tables, the succ_table pointer will still be + * inside the engine.*/ + succ_table -= sheng_end << as; + + assert(s >= sheng_end); + while (c < end && s >= sheng_end) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c, + ourisprint(*c) ? *c : '?', cprime); + s = succ_table[(s << as) + cprime]; + + DEBUG_PRINTF("s: %u\n", s); + c++; + if (do_accel) { + if (s >= accel_limit) { + break; + } + } else { + if (mode != NO_MATCHES && s >= accept_limit) { + break; + } + } + } + *c_inout = c; + return s; +} + +static really_inline +char mcsheng64Exec8_i(const struct mcsheng64 *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **c_final, enum MatchMode mode) { + if (!len) { + *c_final = buf; + return MO_ALIVE; + } + u32 s = *state; + const u8 *c = buf; + const u8 *c_end = buf + len; + const u8 sheng_end = m->sheng_end; + + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset + - sizeof(struct NFA)); + u32 accept_limit = m->accept_limit_8; + + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit); + + DEBUG_PRINTF("s: %u, len %zu\n", s, len); + + const u8 *min_accel_offset = c; + if (!m->has_accel || len < ACCEL_MIN_LEN) { + min_accel_offset = c_end; + goto without_accel; + } + + goto with_accel; + +without_accel: + do { + assert(c < min_accel_offset); + if (!s) { + goto exit; + } else if (s < sheng_end) { + s = doSheng64(m, &c, min_accel_offset, c_end, s, 0); + } else { + s = doNormal64_8(m, &c, min_accel_offset, s, 0, mode); + assert(c <= min_accel_offset); + } + + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; + } + } else if (doComplexReport64(cb, ctxt, m, s, loc, 0, + &cached_accept_state, + &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */ + } while (c < min_accel_offset); + + if (c == c_end) { + goto exit; + } + +with_accel: + do { + u32 accel_limit = m->accel_limit_8; + + assert(c < c_end); + if (!s) { + goto exit; + } else if (s < sheng_end) { + if (s > m->sheng_accel_limit) { + c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doSheng64(m, &c, c_end, c_end, s, 1); + } else { + if (s >= accel_limit && aux[s].accel_offset) { + c = run_mcsheng_accel64(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doNormal64_8(m, &c, c_end, s, 1, mode); + } + + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; + } + } else if (doComplexReport64(cb, ctxt, m, s, loc, 0, + &cached_accept_state, + &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); + } while (c < c_end); + +exit: + *state = s; + if (mode == STOP_AT_MATCH) { + *c_final = c_end; + } + return MO_ALIVE; +} + +static never_inline +char mcsheng64Exec8_i_cb(const struct mcsheng64 *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcsheng64Exec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, CALLBACK_OUTPUT); +} + +static never_inline +char mcsheng64Exec8_i_sam(const struct mcsheng64 *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcsheng64Exec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, STOP_AT_MATCH); +} + +static never_inline +char mcsheng64Exec8_i_nm(const struct mcsheng64 *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcsheng64Exec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, NO_MATCHES); +} + +static really_inline +char mcsheng64Exec8_i_ni(const struct mcsheng64 *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point, + enum MatchMode mode) { + if (mode == CALLBACK_OUTPUT) { + return mcsheng64Exec8_i_cb(m, state, buf, len, offAdj, cb, ctxt, single, + final_point); + } else if (mode == STOP_AT_MATCH) { + return mcsheng64Exec8_i_sam(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else { + assert(mode == NO_MATCHES); + return mcsheng64Exec8_i_nm(m, state, buf, len, offAdj, cb, ctxt, single, + final_point); + } +} + +static really_inline +char mcshengCheckEOD64(const struct NFA *nfa, u32 s, u64a offset, + NfaCallback cb, void *ctxt) { + const struct mcsheng64 *m = getImplNfa(nfa); + const struct mstate_aux *aux = get_aux64(m, s); + + if (!aux->accept_eod) { + return MO_CONTINUE_MATCHING; + } + return doComplexReport64(cb, ctxt, m, s, offset, 1, NULL, NULL); +} + +static really_inline +char nfaExecMcSheng64_16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, + const u8 *hend, NfaCallback cb, void *context, + struct mq *q, char single, s64a end, + enum MatchMode mode) { + assert(n->type == MCSHENG_64_NFA_16); + const struct mcsheng64 *m = getImplNfa(n); + s64a sp; + + assert(ISALIGNED_N(q->state, 2)); + u32 s = *(u16 *)q->state; + + if (q->report_current) { + assert(s); + assert(get_aux64(m, s)->accept); + + int rv; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + rv = cb(0, q_cur_offset(q), m->arb_report, context); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + rv = doComplexReport64(cb, context, m, s, q_cur_offset(q), 0, + &cached_accept_state, &cached_accept_id); + } + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + sp = q_cur_loc(q); + q->cur++; + + const u8 *cur_buf = sp < 0 ? hend : buffer; + + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u16 *)q->state = s; + return MO_ALIVE; + } + + while (1) { + assert(q->cur < q->end); + s64a ep = q->items[q->cur].location; + if (mode != NO_MATCHES) { + ep = MIN(ep, end); + } + + assert(ep >= sp); + + s64a local_ep = ep; + if (sp < 0) { + local_ep = MIN(0, ep); + } + + /* do main buffer region */ + const u8 *final_look; + char rv = mcsheng64Exec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_DEAD) { + *(u16 *)q->state = 0; + return MO_DEAD; + } + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { + DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + + assert(q->cur); + assert(final_look != cur_buf + local_ep); + + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = final_look - cur_buf + 1; /* due to + * early -1 */ + *(u16 *)q->state = s; + return MO_MATCHES_PENDING; + } + + assert(rv == MO_ALIVE); + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u16 *)q->state = s; + return MO_ALIVE; + } + + sp = local_ep; + + if (sp == 0) { + cur_buf = buffer; + } + + if (sp != ep) { + continue; + } + + switch (q->items[q->cur].type) { + case MQE_TOP: + assert(sp + offset || !s); + if (sp + offset == 0) { + s = m->start_anchored; + break; + } + s = mcshengEnableStarts64(m, s); + break; + case MQE_END: + *(u16 *)q->state = s; + q->cur++; + return s ? MO_ALIVE : MO_DEAD; + default: + assert(!"invalid queue event"); + } + + q->cur++; + } +} + +static really_inline +char nfaExecMcSheng64_8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, + const u8 *hend, NfaCallback cb, void *context, + struct mq *q, char single, s64a end, + enum MatchMode mode) { + assert(n->type == MCSHENG_64_NFA_8); + const struct mcsheng64 *m = getImplNfa(n); + s64a sp; + + u32 s = *(u8 *)q->state; + + if (q->report_current) { + assert(s); + assert(s >= m->accept_limit_8); + + int rv; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + + rv = cb(0, q_cur_offset(q), m->arb_report, context); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + rv = doComplexReport64(cb, context, m, s, q_cur_offset(q), 0, + &cached_accept_state, &cached_accept_id); + } + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + sp = q_cur_loc(q); + q->cur++; + + const u8 *cur_buf = sp < 0 ? hend : buffer; + + if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u8 *)q->state = s; + return MO_ALIVE; + } + + while (1) { + DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" : + q->items[q->cur].type == MQE_END ? "END" : "???", + q->items[q->cur].location + offset); + assert(q->cur < q->end); + s64a ep = q->items[q->cur].location; + if (mode != NO_MATCHES) { + ep = MIN(ep, end); + } + + assert(ep >= sp); + + s64a local_ep = ep; + if (sp < 0) { + local_ep = MIN(0, ep); + } + + const u8 *final_look; + char rv = mcsheng64Exec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_HALT_MATCHING) { + *(u8 *)q->state = 0; + return MO_DEAD; + } + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { + DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + + assert(q->cur); + assert(final_look != cur_buf + local_ep); + + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = final_look - cur_buf + 1; /* due to + * early -1 */ + *(u8 *)q->state = s; + return MO_MATCHES_PENDING; + } + + assert(rv == MO_ALIVE); + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + assert(q->cur); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u8 *)q->state = s; + return MO_ALIVE; + } + + sp = local_ep; + + if (sp == 0) { + cur_buf = buffer; + } + + if (sp != ep) { + continue; + } + + switch (q->items[q->cur].type) { + case MQE_TOP: + assert(sp + offset || !s); + if (sp + offset == 0) { + s = (u8)m->start_anchored; + break; + } + s = mcshengEnableStarts64(m, s); + break; + case MQE_END: + *(u8 *)q->state = s; + q->cur++; + return s ? MO_ALIVE : MO_DEAD; + default: + assert(!"invalid queue event"); + } + + q->cur++; + } +} + +char nfaExecMcSheng64_8_Q(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_64_NFA_8); + const struct mcsheng64 *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng64_8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + CALLBACK_OUTPUT); +} + +char nfaExecMcSheng64_16_Q(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_64_NFA_16); + const struct mcsheng64 *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng64_16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + CALLBACK_OUTPUT); +} + +char nfaExecMcSheng64_8_reportCurrent(const struct NFA *n, struct mq *q) { + const struct mcsheng64 *m = getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; + u32 s = *(u8 *)q->state; + u8 single = m->flags & MCSHENG_FLAG_SINGLE; + u64a offset = q_cur_offset(q); + assert(q_cur_type(q) == MQE_START); + assert(s); + + if (s >= m->accept_limit_8) { + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + cb(0, offset, m->arb_report, ctxt); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + doComplexReport64(cb, ctxt, m, s, offset, 0, &cached_accept_state, + &cached_accept_id); + } + } + + return 0; +} + +char nfaExecMcSheng64_16_reportCurrent(const struct NFA *n, struct mq *q) { + const struct mcsheng64 *m = getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; + u32 s = *(u16 *)q->state; + const struct mstate_aux *aux = get_aux64(m, s); + u8 single = m->flags & MCSHENG_FLAG_SINGLE; + u64a offset = q_cur_offset(q); + assert(q_cur_type(q) == MQE_START); + DEBUG_PRINTF("state %u\n", s); + assert(s); + + if (aux->accept) { + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + cb(0, offset, m->arb_report, ctxt); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + doComplexReport64(cb, ctxt, m, s, offset, 0, &cached_accept_state, + &cached_accept_id); + } + } + + return 0; +} + +static +char mcshengHasAccept64(const struct mcsheng64 *m, const struct mstate_aux *aux, + ReportID report) { + assert(m && aux); + + if (!aux->accept) { + return 0; + } + + const struct report_list *rl = (const struct report_list *) + ((const char *)m + aux->accept - sizeof(struct NFA)); + assert(ISALIGNED_N(rl, 4)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + + for (u32 i = 0; i < rl->count; i++) { + if (rl->report[i] == report) { + return 1; + } + } + + return 0; +} + +char nfaExecMcSheng64_8_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + + const struct mcsheng64 *m = getImplNfa(n); + u8 s = *(u8 *)q->state; + DEBUG_PRINTF("checking accepts for %hhu\n", s); + + return mcshengHasAccept64(m, get_aux64(m, s), report); +} + +char nfaExecMcSheng64_8_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcsheng64 *m = getImplNfa(n); + u8 s = *(u8 *)q->state; + DEBUG_PRINTF("checking accepts for %hhu\n", s); + + return !!get_aux64(m, s)->accept; +} + +char nfaExecMcSheng64_16_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + + const struct mcsheng64 *m = getImplNfa(n); + u16 s = *(u16 *)q->state; + DEBUG_PRINTF("checking accepts for %hu\n", s); + + return mcshengHasAccept64(m, get_aux64(m, s), report); +} + +char nfaExecMcSheng64_16_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcsheng64 *m = getImplNfa(n); + u16 s = *(u16 *)q->state; + DEBUG_PRINTF("checking accepts for %hu\n", s); + + return !!get_aux64(m, s)->accept; +} + +char nfaExecMcSheng64_8_Q2(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_64_NFA_8); + const struct mcsheng64 *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng64_8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + STOP_AT_MATCH); +} + +char nfaExecMcSheng64_16_Q2(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_64_NFA_16); + const struct mcsheng64 *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng64_16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + STOP_AT_MATCH); +} + +char nfaExecMcSheng64_8_QR(const struct NFA *n, struct mq *q, ReportID report) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_64_NFA_8); + const struct mcsheng64 *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + char rv = nfaExecMcSheng64_8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, + 0 /* end */, NO_MATCHES); + if (rv && nfaExecMcSheng64_8_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } else { + return rv; + } +} + +char nfaExecMcSheng64_16_QR(const struct NFA *n, struct mq *q, ReportID report) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_64_NFA_16); + const struct mcsheng64 *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + char rv = nfaExecMcSheng64_16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, + 0 /* end */, NO_MATCHES); + + if (rv && nfaExecMcSheng64_16_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } else { + return rv; + } +} + +char nfaExecMcSheng64_8_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct mcsheng64 *m = getImplNfa(nfa); + u8 s = offset ? m->start_floating : m->start_anchored; + if (s) { + *(u8 *)state = s; + return 1; + } + return 0; +} + +char nfaExecMcSheng64_16_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct mcsheng64 *m = getImplNfa(nfa); + u16 s = offset ? m->start_floating : m->start_anchored; + if (s) { + unaligned_store_u16(state, s); + return 1; + } + return 0; +} + +char nfaExecMcSheng64_8_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { + return mcshengCheckEOD64(nfa, *(const u8 *)state, offset, callback, + context); +} + +char nfaExecMcSheng64_16_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { + assert(ISALIGNED_N(state, 2)); + return mcshengCheckEOD64(nfa, *(const u16 *)state, offset, callback, + context); +} + +char nfaExecMcSheng64_8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { + assert(nfa->scratchStateSize == 1); + *(u8 *)q->state = 0; + return 0; +} + +char nfaExecMcSheng64_16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { + assert(nfa->scratchStateSize == 2); + assert(ISALIGNED_N(q->state, 2)); + *(u16 *)q->state = 0; + return 0; +} + +char nfaExecMcSheng64_8_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecMcSheng64_8_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecMcSheng64_16_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, + UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 2); + assert(nfa->streamStateSize == 2); + assert(ISALIGNED_N(src, 2)); + unaligned_store_u16(dest, *(const u16 *)(src)); + return 0; +} + +char nfaExecMcSheng64_16_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 2); + assert(nfa->streamStateSize == 2); + assert(ISALIGNED_N(dest, 2)); + *(u16 *)dest = unaligned_load_u16(src); + return 0; +} +#endif diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng.h b/contrib/libs/hyperscan/src/nfa/mcsheng.h index 91872779cd..0329e12128 100644 --- a/contrib/libs/hyperscan/src/nfa/mcsheng.h +++ b/contrib/libs/hyperscan/src/nfa/mcsheng.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -80,78 +80,78 @@ char nfaExecMcSheng16_expandState(const struct NFA *nfa, void *dest, #define nfaExecMcSheng16_B_Reverse NFA_API_NO_IMPL #define nfaExecMcSheng16_zombie_status NFA_API_ZOMBIE_NO_IMPL -#if defined(HAVE_AVX512VBMI) -/* 64-8 bit Sheng-McClellan hybrid */ -char nfaExecMcSheng64_8_testEOD(const struct NFA *nfa, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context); -char nfaExecMcSheng64_8_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecMcSheng64_8_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecMcSheng64_8_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecMcSheng64_8_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecMcSheng64_8_inAccept(const struct NFA *n, ReportID report, - struct mq *q); -char nfaExecMcSheng64_8_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecMcSheng64_8_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecMcSheng64_8_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecMcSheng64_8_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc); -char nfaExecMcSheng64_8_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); +#if defined(HAVE_AVX512VBMI) +/* 64-8 bit Sheng-McClellan hybrid */ +char nfaExecMcSheng64_8_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecMcSheng64_8_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng64_8_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng64_8_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecMcSheng64_8_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecMcSheng64_8_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecMcSheng64_8_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecMcSheng64_8_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecMcSheng64_8_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecMcSheng64_8_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecMcSheng64_8_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecMcSheng64_8_B_Reverse NFA_API_NO_IMPL +#define nfaExecMcSheng64_8_zombie_status NFA_API_ZOMBIE_NO_IMPL + +/* 64-16 bit Sheng-McClellan hybrid */ +char nfaExecMcSheng64_16_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecMcSheng64_16_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng64_16_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng64_16_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecMcSheng64_16_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecMcSheng64_16_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecMcSheng64_16_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecMcSheng64_16_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecMcSheng64_16_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecMcSheng64_16_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecMcSheng64_16_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); +#define nfaExecMcSheng64_16_B_Reverse NFA_API_NO_IMPL +#define nfaExecMcSheng64_16_zombie_status NFA_API_ZOMBIE_NO_IMPL +#else // !HAVE_AVX512VBMI +#define nfaExecMcSheng64_8_B_Reverse NFA_API_NO_IMPL +#define nfaExecMcSheng64_8_zombie_status NFA_API_ZOMBIE_NO_IMPL +#define nfaExecMcSheng64_8_Q NFA_API_NO_IMPL +#define nfaExecMcSheng64_8_Q2 NFA_API_NO_IMPL +#define nfaExecMcSheng64_8_QR NFA_API_NO_IMPL +#define nfaExecMcSheng64_8_inAccept NFA_API_NO_IMPL +#define nfaExecMcSheng64_8_inAnyAccept NFA_API_NO_IMPL +#define nfaExecMcSheng64_8_queueInitState NFA_API_NO_IMPL +#define nfaExecMcSheng64_8_queueCompressState NFA_API_NO_IMPL +#define nfaExecMcSheng64_8_expandState NFA_API_NO_IMPL +#define nfaExecMcSheng64_8_initCompressedState NFA_API_NO_IMPL +#define nfaExecMcSheng64_8_testEOD NFA_API_NO_IMPL +#define nfaExecMcSheng64_8_reportCurrent NFA_API_NO_IMPL + +#define nfaExecMcSheng64_16_B_Reverse NFA_API_NO_IMPL +#define nfaExecMcSheng64_16_zombie_status NFA_API_ZOMBIE_NO_IMPL +#define nfaExecMcSheng64_16_Q NFA_API_NO_IMPL +#define nfaExecMcSheng64_16_Q2 NFA_API_NO_IMPL +#define nfaExecMcSheng64_16_QR NFA_API_NO_IMPL +#define nfaExecMcSheng64_16_inAccept NFA_API_NO_IMPL +#define nfaExecMcSheng64_16_inAnyAccept NFA_API_NO_IMPL +#define nfaExecMcSheng64_16_queueInitState NFA_API_NO_IMPL +#define nfaExecMcSheng64_16_queueCompressState NFA_API_NO_IMPL +#define nfaExecMcSheng64_16_expandState NFA_API_NO_IMPL +#define nfaExecMcSheng64_16_initCompressedState NFA_API_NO_IMPL +#define nfaExecMcSheng64_16_testEOD NFA_API_NO_IMPL +#define nfaExecMcSheng64_16_reportCurrent NFA_API_NO_IMPL + +#endif //end of HAVE_AVX512VBMI -#define nfaExecMcSheng64_8_B_Reverse NFA_API_NO_IMPL -#define nfaExecMcSheng64_8_zombie_status NFA_API_ZOMBIE_NO_IMPL - -/* 64-16 bit Sheng-McClellan hybrid */ -char nfaExecMcSheng64_16_testEOD(const struct NFA *nfa, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context); -char nfaExecMcSheng64_16_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecMcSheng64_16_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecMcSheng64_16_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecMcSheng64_16_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecMcSheng64_16_inAccept(const struct NFA *n, ReportID report, - struct mq *q); -char nfaExecMcSheng64_16_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecMcSheng64_16_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecMcSheng64_16_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecMcSheng64_16_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc); -char nfaExecMcSheng64_16_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); -#define nfaExecMcSheng64_16_B_Reverse NFA_API_NO_IMPL -#define nfaExecMcSheng64_16_zombie_status NFA_API_ZOMBIE_NO_IMPL -#else // !HAVE_AVX512VBMI -#define nfaExecMcSheng64_8_B_Reverse NFA_API_NO_IMPL -#define nfaExecMcSheng64_8_zombie_status NFA_API_ZOMBIE_NO_IMPL -#define nfaExecMcSheng64_8_Q NFA_API_NO_IMPL -#define nfaExecMcSheng64_8_Q2 NFA_API_NO_IMPL -#define nfaExecMcSheng64_8_QR NFA_API_NO_IMPL -#define nfaExecMcSheng64_8_inAccept NFA_API_NO_IMPL -#define nfaExecMcSheng64_8_inAnyAccept NFA_API_NO_IMPL -#define nfaExecMcSheng64_8_queueInitState NFA_API_NO_IMPL -#define nfaExecMcSheng64_8_queueCompressState NFA_API_NO_IMPL -#define nfaExecMcSheng64_8_expandState NFA_API_NO_IMPL -#define nfaExecMcSheng64_8_initCompressedState NFA_API_NO_IMPL -#define nfaExecMcSheng64_8_testEOD NFA_API_NO_IMPL -#define nfaExecMcSheng64_8_reportCurrent NFA_API_NO_IMPL - -#define nfaExecMcSheng64_16_B_Reverse NFA_API_NO_IMPL -#define nfaExecMcSheng64_16_zombie_status NFA_API_ZOMBIE_NO_IMPL -#define nfaExecMcSheng64_16_Q NFA_API_NO_IMPL -#define nfaExecMcSheng64_16_Q2 NFA_API_NO_IMPL -#define nfaExecMcSheng64_16_QR NFA_API_NO_IMPL -#define nfaExecMcSheng64_16_inAccept NFA_API_NO_IMPL -#define nfaExecMcSheng64_16_inAnyAccept NFA_API_NO_IMPL -#define nfaExecMcSheng64_16_queueInitState NFA_API_NO_IMPL -#define nfaExecMcSheng64_16_queueCompressState NFA_API_NO_IMPL -#define nfaExecMcSheng64_16_expandState NFA_API_NO_IMPL -#define nfaExecMcSheng64_16_initCompressedState NFA_API_NO_IMPL -#define nfaExecMcSheng64_16_testEOD NFA_API_NO_IMPL -#define nfaExecMcSheng64_16_reportCurrent NFA_API_NO_IMPL - -#endif //end of HAVE_AVX512VBMI - #endif diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng_compile.cpp b/contrib/libs/hyperscan/src/nfa/mcsheng_compile.cpp index ffe630c554..fb75e49a35 100644 --- a/contrib/libs/hyperscan/src/nfa/mcsheng_compile.cpp +++ b/contrib/libs/hyperscan/src/nfa/mcsheng_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -244,106 +244,106 @@ void populateBasicInfo(size_t state_size, const dfa_info &info, } static -mstate_aux *getAux64(NFA *n, dstate_id_t i) { - mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(n); - mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); - - mstate_aux *aux = aux_base + i; - assert((const char *)aux < (const char *)n + m->length); - return aux; -} - -static -void createShuffleMasks64(mcsheng64 *m, const dfa_info &info, - dstate_id_t sheng_end, - const map<dstate_id_t, AccelScheme> &accel_escape_info) { - DEBUG_PRINTF("using first %hu states for a sheng\n", sheng_end); - assert(sheng_end > DEAD_STATE + 1); - assert(sheng_end <= sizeof(m512) + 1); - vector<array<u8, sizeof(m512)>> masks; - masks.resize(info.alpha_size); - /* -1 to avoid wasting a slot as we do not include dead state */ - vector<dstate_id_t> raw_ids; - raw_ids.resize(sheng_end - 1); - for (dstate_id_t s = DEAD_STATE + 1; s < info.states.size(); s++) { - assert(info.implId(s)); /* should not map to DEAD_STATE */ - if (info.is_sheng(s)) { - raw_ids[info.extra[s].sheng_id] = s; - } - } - for (u32 i = 0; i < info.alpha_size; i++) { - if (i == info.alpha_remap[TOP]) { - continue; - } - auto &mask = masks[i]; - assert(sizeof(mask) == sizeof(m512)); - mask.fill(0); - - for (dstate_id_t sheng_id = 0; sheng_id < sheng_end - 1; sheng_id++) { - dstate_id_t raw_id = raw_ids[sheng_id]; - dstate_id_t next_id = info.implId(info.states[raw_id].next[i]); - if (next_id == DEAD_STATE) { - next_id = sheng_end - 1; - } else if (next_id < sheng_end) { - next_id--; - } - DEBUG_PRINTF("%hu: %u->next %hu\n", sheng_id, i, next_id); - mask[sheng_id] = verify_u8(next_id); - } - } - for (u32 i = 0; i < N_CHARS; i++) { - assert(info.alpha_remap[i] != info.alpha_remap[TOP]); - memcpy((u8 *)&m->sheng_succ_masks[i], - (u8 *)masks[info.alpha_remap[i]].data(), sizeof(m512)); - } - m->sheng_end = sheng_end; - m->sheng_accel_limit = sheng_end - 1; - - for (dstate_id_t s : raw_ids) { - if (contains(accel_escape_info, s)) { - LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id); - } - } -} - -static -void populateBasicInfo64(size_t state_size, const dfa_info &info, - u32 total_size, u32 aux_offset, u32 accel_offset, - u32 accel_count, ReportID arb, bool single, NFA *nfa) { - assert(state_size == sizeof(u16) || state_size == sizeof(u8)); - - nfa->length = total_size; - nfa->nPositions = info.states.size(); - - nfa->scratchStateSize = verify_u32(state_size); - nfa->streamStateSize = verify_u32(state_size); - - if (state_size == sizeof(u8)) { - nfa->type = MCSHENG_64_NFA_8; - } else { - nfa->type = MCSHENG_64_NFA_16; - } - - mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa); - for (u32 i = 0; i < 256; i++) { - m->remap[i] = verify_u8(info.alpha_remap[i]); - } - m->alphaShift = info.getAlphaShift(); - m->length = total_size; - m->aux_offset = aux_offset; - m->accel_offset = accel_offset; - m->arb_report = arb; - m->state_count = verify_u16(info.size()); - m->start_anchored = info.implId(info.raw.start_anchored); - m->start_floating = info.implId(info.raw.start_floating); - m->has_accel = accel_count ? 1 : 0; - - if (single) { - m->flags |= MCSHENG_FLAG_SINGLE; - } -} - -static +mstate_aux *getAux64(NFA *n, dstate_id_t i) { + mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(n); + mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); + + mstate_aux *aux = aux_base + i; + assert((const char *)aux < (const char *)n + m->length); + return aux; +} + +static +void createShuffleMasks64(mcsheng64 *m, const dfa_info &info, + dstate_id_t sheng_end, + const map<dstate_id_t, AccelScheme> &accel_escape_info) { + DEBUG_PRINTF("using first %hu states for a sheng\n", sheng_end); + assert(sheng_end > DEAD_STATE + 1); + assert(sheng_end <= sizeof(m512) + 1); + vector<array<u8, sizeof(m512)>> masks; + masks.resize(info.alpha_size); + /* -1 to avoid wasting a slot as we do not include dead state */ + vector<dstate_id_t> raw_ids; + raw_ids.resize(sheng_end - 1); + for (dstate_id_t s = DEAD_STATE + 1; s < info.states.size(); s++) { + assert(info.implId(s)); /* should not map to DEAD_STATE */ + if (info.is_sheng(s)) { + raw_ids[info.extra[s].sheng_id] = s; + } + } + for (u32 i = 0; i < info.alpha_size; i++) { + if (i == info.alpha_remap[TOP]) { + continue; + } + auto &mask = masks[i]; + assert(sizeof(mask) == sizeof(m512)); + mask.fill(0); + + for (dstate_id_t sheng_id = 0; sheng_id < sheng_end - 1; sheng_id++) { + dstate_id_t raw_id = raw_ids[sheng_id]; + dstate_id_t next_id = info.implId(info.states[raw_id].next[i]); + if (next_id == DEAD_STATE) { + next_id = sheng_end - 1; + } else if (next_id < sheng_end) { + next_id--; + } + DEBUG_PRINTF("%hu: %u->next %hu\n", sheng_id, i, next_id); + mask[sheng_id] = verify_u8(next_id); + } + } + for (u32 i = 0; i < N_CHARS; i++) { + assert(info.alpha_remap[i] != info.alpha_remap[TOP]); + memcpy((u8 *)&m->sheng_succ_masks[i], + (u8 *)masks[info.alpha_remap[i]].data(), sizeof(m512)); + } + m->sheng_end = sheng_end; + m->sheng_accel_limit = sheng_end - 1; + + for (dstate_id_t s : raw_ids) { + if (contains(accel_escape_info, s)) { + LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id); + } + } +} + +static +void populateBasicInfo64(size_t state_size, const dfa_info &info, + u32 total_size, u32 aux_offset, u32 accel_offset, + u32 accel_count, ReportID arb, bool single, NFA *nfa) { + assert(state_size == sizeof(u16) || state_size == sizeof(u8)); + + nfa->length = total_size; + nfa->nPositions = info.states.size(); + + nfa->scratchStateSize = verify_u32(state_size); + nfa->streamStateSize = verify_u32(state_size); + + if (state_size == sizeof(u8)) { + nfa->type = MCSHENG_64_NFA_8; + } else { + nfa->type = MCSHENG_64_NFA_16; + } + + mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa); + for (u32 i = 0; i < 256; i++) { + m->remap[i] = verify_u8(info.alpha_remap[i]); + } + m->alphaShift = info.getAlphaShift(); + m->length = total_size; + m->aux_offset = aux_offset; + m->accel_offset = accel_offset; + m->arb_report = arb; + m->state_count = verify_u16(info.size()); + m->start_anchored = info.implId(info.raw.start_anchored); + m->start_floating = info.implId(info.raw.start_floating); + m->has_accel = accel_count ? 1 : 0; + + if (single) { + m->flags |= MCSHENG_FLAG_SINGLE; + } +} + +static size_t calcShermanRegionSize(const dfa_info &info) { size_t rv = 0; @@ -371,7 +371,7 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, /* returns false on error */ static bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end, - dstate_id_t *sherman_base) { + dstate_id_t *sherman_base) { info.states[0].impl_id = 0; /* dead is always 0 */ vector<dstate_id_t> norm; @@ -481,7 +481,7 @@ CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) { } #define MAX_SHENG_STATES 16 -#define MAX_SHENG64_STATES 64 +#define MAX_SHENG64_STATES 64 #define MAX_SHENG_LEAKINESS 0.05 using LeakinessCache = ue2_unordered_map<pair<RdfaVertex, u32>, double>; @@ -535,8 +535,8 @@ double leakiness(const RdfaGraph &g, dfa_info &info, static dstate_id_t find_sheng_states(dfa_info &info, - map<dstate_id_t, AccelScheme> &accel_escape_info, - size_t max_sheng_states) { + map<dstate_id_t, AccelScheme> &accel_escape_info, + size_t max_sheng_states) { RdfaGraph g(info.raw); auto cyclics = find_vertices_in_cycles(g); @@ -571,7 +571,7 @@ dstate_id_t find_sheng_states(dfa_info &info, flat_set<dstate_id_t> considered = { DEAD_STATE }; bool seen_back_edge = false; while (!to_consider.empty() - && sheng_states.size() < max_sheng_states) { + && sheng_states.size() < max_sheng_states) { auto v = to_consider.front(); to_consider.pop_front(); if (!considered.insert(g[v].index).second) { @@ -717,80 +717,80 @@ void fill_in_succ_table_16(NFA *nfa, const dfa_info &info, } } -static -void fill_in_aux_info64(NFA *nfa, const dfa_info &info, - const map<dstate_id_t, AccelScheme> &accel_escape_info, - u32 accel_offset, UNUSED u32 accel_end_offset, - const vector<u32> &reports, - const vector<u32> &reports_eod, - u32 report_base_offset, - const raw_report_info &ri) { - mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa); - - vector<u32> reportOffsets; - - ri.fillReportLists(nfa, report_base_offset, reportOffsets); - - for (u32 i = 0; i < info.size(); i++) { - u16 impl_id = info.implId(i); - mstate_aux *this_aux = getAux64(nfa, impl_id); - - fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets); - if (contains(accel_escape_info, i)) { - this_aux->accel_offset = accel_offset; - accel_offset += info.strat.accelSize(); - assert(accel_offset <= accel_end_offset); - assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - info.strat.buildAccel(i, accel_escape_info.at(i), - (void *)((char *)m + this_aux->accel_offset)); - } - } -} - -static -u16 get_edge_flags64(NFA *nfa, dstate_id_t target_impl_id) { - mstate_aux *aux = getAux64(nfa, target_impl_id); - u16 flags = 0; - - if (aux->accept) { - flags |= ACCEPT_FLAG; - } - - if (aux->accel_offset) { - flags |= ACCEL_FLAG; - } - - return flags; -} - -static -void fill_in_succ_table_64_16(NFA *nfa, const dfa_info &info, - dstate_id_t sheng_end, - UNUSED dstate_id_t sherman_base) { - u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng64)); - - u8 alphaShift = info.getAlphaShift(); - assert(alphaShift <= 8); - - for (size_t i = 0; i < info.size(); i++) { - if (!info.is_normal(i)) { - assert(info.implId(i) < sheng_end || info.is_sherman(i)); - continue; - } - - assert(info.implId(i) < sherman_base); - u16 normal_id = verify_u16(info.implId(i) - sheng_end); - - for (size_t s = 0; s < info.impl_alpha_size; s++) { - dstate_id_t raw_succ = info.states[i].next[s]; - u16 &entry = succ_table[((size_t)normal_id << alphaShift) + s]; - - entry = info.implId(raw_succ); - entry |= get_edge_flags64(nfa, entry); - } - } -} - +static +void fill_in_aux_info64(NFA *nfa, const dfa_info &info, + const map<dstate_id_t, AccelScheme> &accel_escape_info, + u32 accel_offset, UNUSED u32 accel_end_offset, + const vector<u32> &reports, + const vector<u32> &reports_eod, + u32 report_base_offset, + const raw_report_info &ri) { + mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa); + + vector<u32> reportOffsets; + + ri.fillReportLists(nfa, report_base_offset, reportOffsets); + + for (u32 i = 0; i < info.size(); i++) { + u16 impl_id = info.implId(i); + mstate_aux *this_aux = getAux64(nfa, impl_id); + + fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets); + if (contains(accel_escape_info, i)) { + this_aux->accel_offset = accel_offset; + accel_offset += info.strat.accelSize(); + assert(accel_offset <= accel_end_offset); + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + info.strat.buildAccel(i, accel_escape_info.at(i), + (void *)((char *)m + this_aux->accel_offset)); + } + } +} + +static +u16 get_edge_flags64(NFA *nfa, dstate_id_t target_impl_id) { + mstate_aux *aux = getAux64(nfa, target_impl_id); + u16 flags = 0; + + if (aux->accept) { + flags |= ACCEPT_FLAG; + } + + if (aux->accel_offset) { + flags |= ACCEL_FLAG; + } + + return flags; +} + +static +void fill_in_succ_table_64_16(NFA *nfa, const dfa_info &info, + dstate_id_t sheng_end, + UNUSED dstate_id_t sherman_base) { + u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng64)); + + u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); + + for (size_t i = 0; i < info.size(); i++) { + if (!info.is_normal(i)) { + assert(info.implId(i) < sheng_end || info.is_sherman(i)); + continue; + } + + assert(info.implId(i) < sherman_base); + u16 normal_id = verify_u16(info.implId(i) - sheng_end); + + for (size_t s = 0; s < info.impl_alpha_size; s++) { + dstate_id_t raw_succ = info.states[i].next[s]; + u16 &entry = succ_table[((size_t)normal_id << alphaShift) + s]; + + entry = info.implId(raw_succ); + entry |= get_edge_flags64(nfa, entry); + } + } +} + #define MAX_SHERMAN_LIST_LEN 8 static @@ -1017,19 +1017,19 @@ bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, assert(info.getAlphaShift() <= 8); - // Sherman optimization - if (info.impl_alpha_size > 16) { - u16 total_daddy = 0; - for (u32 i = 0; i < info.size(); i++) { - find_better_daddy(info, i, - is_cyclic_near(info.raw, info.raw.start_anchored), - grey); - total_daddy += info.extra[i].daddytaken; - } - - DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, - info.size() * info.impl_alpha_size, info.size(), - info.impl_alpha_size); + // Sherman optimization + if (info.impl_alpha_size > 16) { + u16 total_daddy = 0; + for (u32 i = 0; i < info.size(); i++) { + find_better_daddy(info, i, + is_cyclic_near(info.raw, info.raw.start_anchored), + grey); + total_daddy += info.extra[i].daddytaken; + } + + DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, + info.size() * info.impl_alpha_size, info.size(), + info.impl_alpha_size); } u16 sherman_limit; @@ -1110,160 +1110,160 @@ void fill_in_succ_table_8(NFA *nfa, const dfa_info &info, } static -void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { - char *nfa_base = (char *)nfa; - mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa); - char *sherman_table = nfa_base + m->sherman_offset; - - assert(ISALIGNED_16(sherman_table)); - for (size_t i = 0; i < info.size(); i++) { - if (!info.is_sherman(i)) { - continue; - } - u16 fs = verify_u16(info.implId(i)); - DEBUG_PRINTF("building sherman %zu impl %hu\n", i, fs); - - assert(fs >= sherman_limit); - - char *curr_sherman_entry - = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; - assert(curr_sherman_entry <= nfa_base + m->length); - - u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken); - assert(len <= 9); - dstate_id_t d = info.states[i].daddy; - - *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE; - *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len; - *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d); - u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET); - - for (u16 s = 0; s < info.impl_alpha_size; s++) { - if (info.states[i].next[s] != info.states[d].next[s]) { - *(chars++) = (u8)s; - } - } - - u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); - for (u16 s = 0; s < info.impl_alpha_size; s++) { - if (info.states[i].next[s] != info.states[d].next[s]) { - DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs, - info.implId(d), - info.implId(info.states[i].next[s])); - u16 entry_val = info.implId(info.states[i].next[s]); - entry_val |= get_edge_flags64(nfa, entry_val); - unaligned_store_u16((u8 *)states++, entry_val); - } - } - } -} - -static -bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end, - const map<dstate_id_t, AccelScheme>&accel_escape_info, - const Grey &grey) { - DEBUG_PRINTF("building mcsheng 64-16\n"); - - vector<u32> reports; /* index in ri for the appropriate report list */ - vector<u32> reports_eod; /* as above */ - ReportID arb; - u8 single; - - assert(info.getAlphaShift() <= 8); - - // Sherman optimization - if (info.impl_alpha_size > 16) { - u16 total_daddy = 0; - for (u32 i = 0; i < info.size(); i++) { - find_better_daddy(info, i, - is_cyclic_near(info.raw, info.raw.start_anchored), - grey); - total_daddy += info.extra[i].daddytaken; - } - - DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, - info.size() * info.impl_alpha_size, info.size(), - info.impl_alpha_size); - } - - u16 sherman_limit; - if (!allocateImplId16(info, sheng_end, &sherman_limit)) { - DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", - info.size()); - return nullptr; - } - u16 count_real_states = sherman_limit - sheng_end; - - auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); - - size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) - * count_real_states; - - size_t aux_size = sizeof(mstate_aux) * info.size(); - - size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng64) + tran_size); - size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); - size_t accel_offset = ROUNDUP_N(aux_offset + aux_size - + ri->getReportListSize(), 32); - size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); - size_t sherman_size = calcShermanRegionSize(info); - - size_t total_size = sherman_offset + sherman_size; - - accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ - assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - - auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); - mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get()); - - populateBasicInfo64(sizeof(u16), info, total_size, aux_offset, accel_offset, - accel_escape_info.size(), arb, single, nfa.get()); - createShuffleMasks64(m, info, sheng_end, accel_escape_info); - - /* copy in the mc header information */ - m->sherman_offset = sherman_offset; - m->sherman_end = total_size; - m->sherman_limit = sherman_limit; - - DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n", sheng_end, - count_real_states, info.size()); - - fill_in_aux_info64(nfa.get(), info, accel_escape_info, accel_offset, - sherman_offset - sizeof(NFA), reports, reports_eod, - aux_offset + aux_size, *ri); - - fill_in_succ_table_64_16(nfa.get(), info, sheng_end, sherman_limit); - - fill_in_sherman64(nfa.get(), info, sherman_limit); - - return nfa; -} - -static -void fill_in_succ_table_64_8(NFA *nfa, const dfa_info &info, - dstate_id_t sheng_end) { - u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng64); - - u8 alphaShift = info.getAlphaShift(); - assert(alphaShift <= 8); - - for (size_t i = 0; i < info.size(); i++) { - assert(!info.is_sherman(i)); - if (!info.is_normal(i)) { - assert(info.implId(i) < sheng_end); - continue; - } - u8 normal_id = verify_u8(info.implId(i) - sheng_end); - - for (size_t s = 0; s < info.impl_alpha_size; s++) { - dstate_id_t raw_succ = info.states[i].next[s]; - succ_table[((size_t)normal_id << alphaShift) + s] - = info.implId(raw_succ); - } - } -} - -static +void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { + char *nfa_base = (char *)nfa; + mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa); + char *sherman_table = nfa_base + m->sherman_offset; + + assert(ISALIGNED_16(sherman_table)); + for (size_t i = 0; i < info.size(); i++) { + if (!info.is_sherman(i)) { + continue; + } + u16 fs = verify_u16(info.implId(i)); + DEBUG_PRINTF("building sherman %zu impl %hu\n", i, fs); + + assert(fs >= sherman_limit); + + char *curr_sherman_entry + = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; + assert(curr_sherman_entry <= nfa_base + m->length); + + u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken); + assert(len <= 9); + dstate_id_t d = info.states[i].daddy; + + *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE; + *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len; + *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d); + u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET); + + for (u16 s = 0; s < info.impl_alpha_size; s++) { + if (info.states[i].next[s] != info.states[d].next[s]) { + *(chars++) = (u8)s; + } + } + + u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); + for (u16 s = 0; s < info.impl_alpha_size; s++) { + if (info.states[i].next[s] != info.states[d].next[s]) { + DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs, + info.implId(d), + info.implId(info.states[i].next[s])); + u16 entry_val = info.implId(info.states[i].next[s]); + entry_val |= get_edge_flags64(nfa, entry_val); + unaligned_store_u16((u8 *)states++, entry_val); + } + } + } +} + +static +bytecode_ptr<NFA> mcsheng64Compile16(dfa_info&info, dstate_id_t sheng_end, + const map<dstate_id_t, AccelScheme>&accel_escape_info, + const Grey &grey) { + DEBUG_PRINTF("building mcsheng 64-16\n"); + + vector<u32> reports; /* index in ri for the appropriate report list */ + vector<u32> reports_eod; /* as above */ + ReportID arb; + u8 single; + + assert(info.getAlphaShift() <= 8); + + // Sherman optimization + if (info.impl_alpha_size > 16) { + u16 total_daddy = 0; + for (u32 i = 0; i < info.size(); i++) { + find_better_daddy(info, i, + is_cyclic_near(info.raw, info.raw.start_anchored), + grey); + total_daddy += info.extra[i].daddytaken; + } + + DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, + info.size() * info.impl_alpha_size, info.size(), + info.impl_alpha_size); + } + + u16 sherman_limit; + if (!allocateImplId16(info, sheng_end, &sherman_limit)) { + DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", + info.size()); + return nullptr; + } + u16 count_real_states = sherman_limit - sheng_end; + + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); + + size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) + * count_real_states; + + size_t aux_size = sizeof(mstate_aux) * info.size(); + + size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng64) + tran_size); + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); + size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + + ri->getReportListSize(), 32); + size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); + size_t sherman_size = calcShermanRegionSize(info); + + size_t total_size = sherman_offset + sherman_size; + + accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + + auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); + mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get()); + + populateBasicInfo64(sizeof(u16), info, total_size, aux_offset, accel_offset, + accel_escape_info.size(), arb, single, nfa.get()); + createShuffleMasks64(m, info, sheng_end, accel_escape_info); + + /* copy in the mc header information */ + m->sherman_offset = sherman_offset; + m->sherman_end = total_size; + m->sherman_limit = sherman_limit; + + DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n", sheng_end, + count_real_states, info.size()); + + fill_in_aux_info64(nfa.get(), info, accel_escape_info, accel_offset, + sherman_offset - sizeof(NFA), reports, reports_eod, + aux_offset + aux_size, *ri); + + fill_in_succ_table_64_16(nfa.get(), info, sheng_end, sherman_limit); + + fill_in_sherman64(nfa.get(), info, sherman_limit); + + return nfa; +} + +static +void fill_in_succ_table_64_8(NFA *nfa, const dfa_info &info, + dstate_id_t sheng_end) { + u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng64); + + u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); + + for (size_t i = 0; i < info.size(); i++) { + assert(!info.is_sherman(i)); + if (!info.is_normal(i)) { + assert(info.implId(i) < sheng_end); + continue; + } + u8 normal_id = verify_u8(info.implId(i) - sheng_end); + + for (size_t s = 0; s < info.impl_alpha_size; s++) { + dstate_id_t raw_succ = info.states[i].next[s]; + succ_table[((size_t)normal_id << alphaShift) + s] + = info.implId(raw_succ); + } + } +} + +static void allocateImplId8(dfa_info &info, dstate_id_t sheng_end, const map<dstate_id_t, AccelScheme> &accel_escape_info, u16 *accel_limit, u16 *accept_limit) { @@ -1360,58 +1360,58 @@ bytecode_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, return nfa; } -static -bytecode_ptr<NFA> mcsheng64Compile8(dfa_info &info, dstate_id_t sheng_end, - const map<dstate_id_t, AccelScheme> &accel_escape_info) { - DEBUG_PRINTF("building mcsheng 64-8\n"); - - vector<u32> reports; - vector<u32> reports_eod; - ReportID arb; - u8 single; - - auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); - - size_t normal_count = info.size() - sheng_end; - - size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * normal_count; - size_t aux_size = sizeof(mstate_aux) * info.size(); - size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng64) + tran_size); - size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); - size_t accel_offset = ROUNDUP_N(aux_offset + aux_size - + ri->getReportListSize(), 32); - size_t total_size = accel_offset + accel_size; - - DEBUG_PRINTF("aux_size %zu\n", aux_size); - DEBUG_PRINTF("aux_offset %zu\n", aux_offset); - DEBUG_PRINTF("rl size %u\n", ri->getReportListSize()); - DEBUG_PRINTF("accel_size %zu\n", accel_size); - DEBUG_PRINTF("accel_offset %zu\n", accel_offset); - DEBUG_PRINTF("total_size %zu\n", total_size); - - accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ - assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - - auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); - mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get()); - - allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8, - &m->accept_limit_8); - - populateBasicInfo64(sizeof(u8), info, total_size, aux_offset, accel_offset, - accel_escape_info.size(), arb, single, nfa.get()); - createShuffleMasks64(m, info, sheng_end, accel_escape_info); - - fill_in_aux_info64(nfa.get(), info, accel_escape_info, accel_offset, - total_size - sizeof(NFA), reports, reports_eod, - aux_offset + aux_size, *ri); - - fill_in_succ_table_64_8(nfa.get(), info, sheng_end); - DEBUG_PRINTF("rl size %zu\n", ri->size()); - - return nfa; -} - +static +bytecode_ptr<NFA> mcsheng64Compile8(dfa_info &info, dstate_id_t sheng_end, + const map<dstate_id_t, AccelScheme> &accel_escape_info) { + DEBUG_PRINTF("building mcsheng 64-8\n"); + + vector<u32> reports; + vector<u32> reports_eod; + ReportID arb; + u8 single; + + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); + + size_t normal_count = info.size() - sheng_end; + + size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * normal_count; + size_t aux_size = sizeof(mstate_aux) * info.size(); + size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng64) + tran_size); + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); + size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + + ri->getReportListSize(), 32); + size_t total_size = accel_offset + accel_size; + + DEBUG_PRINTF("aux_size %zu\n", aux_size); + DEBUG_PRINTF("aux_offset %zu\n", aux_offset); + DEBUG_PRINTF("rl size %u\n", ri->getReportListSize()); + DEBUG_PRINTF("accel_size %zu\n", accel_size); + DEBUG_PRINTF("accel_offset %zu\n", accel_offset); + DEBUG_PRINTF("total_size %zu\n", total_size); + + accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + + auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); + mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa.get()); + + allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8, + &m->accept_limit_8); + + populateBasicInfo64(sizeof(u8), info, total_size, aux_offset, accel_offset, + accel_escape_info.size(), arb, single, nfa.get()); + createShuffleMasks64(m, info, sheng_end, accel_escape_info); + + fill_in_aux_info64(nfa.get(), info, accel_escape_info, accel_offset, + total_size - sizeof(NFA), reports, reports_eod, + aux_offset + aux_size, *ri); + + fill_in_succ_table_64_8(nfa.get(), info, sheng_end); + DEBUG_PRINTF("rl size %zu\n", ri->size()); + + return nfa; +} + bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm) { if (!cc.grey.allowMcSheng) { @@ -1431,16 +1431,16 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc, map<dstate_id_t, AccelScheme> accel_escape_info = info.strat.getAccelInfo(cc.grey); - auto old_states = info.states; - dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info, MAX_SHENG_STATES); + auto old_states = info.states; + dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info, MAX_SHENG_STATES); if (sheng_end <= DEAD_STATE + 1) { - info.states = old_states; + info.states = old_states; return nullptr; } bytecode_ptr<NFA> nfa; - + if (!using8bit) { nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey); } else { @@ -1448,7 +1448,67 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc, } if (!nfa) { - info.states = old_states; + info.states = old_states; + return nfa; + } + + if (has_eod_reports) { + nfa->flags |= NFA_ACCEPTS_EOD; + } + + DEBUG_PRINTF("compile done\n"); + return nfa; +} + +bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm) { + if (!cc.grey.allowMcSheng) { + return nullptr; + } + + if (!cc.target_info.has_avx512vbmi()) { + DEBUG_PRINTF("McSheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); + return nullptr; + } + + mcclellan_build_strat mbs(raw, rm, false); + dfa_info info(mbs); + bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; + + if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming + * mode with our semantics */ + raw.stripExtraEodReports(); + } + + bool has_eod_reports = raw.hasEodReports(); + + map<dstate_id_t, AccelScheme> accel_escape_info + = info.strat.getAccelInfo(cc.grey); + bool using64state = false; /*default flag*/ + dstate_id_t sheng_end64; + sheng_end64 = find_sheng_states(info, accel_escape_info, MAX_SHENG64_STATES); + + if (sheng_end64 <= DEAD_STATE + 1) { + return nullptr; + } else { + using64state = true; + } + + bytecode_ptr<NFA> nfa; + + if (using64state) { + assert((sheng_end64 > 17) && (sheng_end64 <= 65)); + if (!using8bit) { + nfa = mcsheng64Compile16(info, sheng_end64, accel_escape_info, cc.grey); + } else { + assert(using8bit); + nfa = mcsheng64Compile8(info, sheng_end64, accel_escape_info); + assert(nfa); + assert(nfa->type == MCSHENG_64_NFA_8); + } + } + + if (!nfa) { return nfa; } @@ -1460,66 +1520,6 @@ bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc, return nfa; } -bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm) { - if (!cc.grey.allowMcSheng) { - return nullptr; - } - - if (!cc.target_info.has_avx512vbmi()) { - DEBUG_PRINTF("McSheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); - return nullptr; - } - - mcclellan_build_strat mbs(raw, rm, false); - dfa_info info(mbs); - bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; - - if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming - * mode with our semantics */ - raw.stripExtraEodReports(); - } - - bool has_eod_reports = raw.hasEodReports(); - - map<dstate_id_t, AccelScheme> accel_escape_info - = info.strat.getAccelInfo(cc.grey); - bool using64state = false; /*default flag*/ - dstate_id_t sheng_end64; - sheng_end64 = find_sheng_states(info, accel_escape_info, MAX_SHENG64_STATES); - - if (sheng_end64 <= DEAD_STATE + 1) { - return nullptr; - } else { - using64state = true; - } - - bytecode_ptr<NFA> nfa; - - if (using64state) { - assert((sheng_end64 > 17) && (sheng_end64 <= 65)); - if (!using8bit) { - nfa = mcsheng64Compile16(info, sheng_end64, accel_escape_info, cc.grey); - } else { - assert(using8bit); - nfa = mcsheng64Compile8(info, sheng_end64, accel_escape_info); - assert(nfa); - assert(nfa->type == MCSHENG_64_NFA_8); - } - } - - if (!nfa) { - return nfa; - } - - if (has_eod_reports) { - nfa->flags |= NFA_ACCEPTS_EOD; - } - - DEBUG_PRINTF("compile done\n"); - return nfa; -} - bool has_accel_mcsheng(const NFA *) { return true; /* consider the sheng region as accelerated */ } diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng_compile.h b/contrib/libs/hyperscan/src/nfa/mcsheng_compile.h index 7de7c14568..3a79b46a23 100644 --- a/contrib/libs/hyperscan/src/nfa/mcsheng_compile.h +++ b/contrib/libs/hyperscan/src/nfa/mcsheng_compile.h @@ -42,8 +42,8 @@ struct raw_dfa; bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm); -bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm); +bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm); bool has_accel_mcsheng(const NFA *nfa); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng_data.c b/contrib/libs/hyperscan/src/nfa/mcsheng_data.c index 304e383736..0701b4b313 100644 --- a/contrib/libs/hyperscan/src/nfa/mcsheng_data.c +++ b/contrib/libs/hyperscan/src/nfa/mcsheng_data.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,15 +41,15 @@ const u64a mcsheng_pext_mask[8] = { 0x00ff00000000000f, 0xff0000000000000f, }; -#if defined(HAVE_AVX512VBMI) -const u64a mcsheng64_pext_mask[8] = { - 0, /* dummy */ - 0x000000000000ff3f, - 0x0000000000ff003f, - 0x00000000ff00003f, - 0x000000ff0000003f, - 0x0000ff000000003f, - 0x00ff00000000003f, - 0xff0000000000003f, -}; -#endif +#if defined(HAVE_AVX512VBMI) +const u64a mcsheng64_pext_mask[8] = { + 0, /* dummy */ + 0x000000000000ff3f, + 0x0000000000ff003f, + 0x00000000ff00003f, + 0x000000ff0000003f, + 0x0000ff000000003f, + 0x00ff00000000003f, + 0xff0000000000003f, +}; +#endif diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng_internal.h b/contrib/libs/hyperscan/src/nfa/mcsheng_internal.h index 646229709d..d985574624 100644 --- a/contrib/libs/hyperscan/src/nfa/mcsheng_internal.h +++ b/contrib/libs/hyperscan/src/nfa/mcsheng_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -84,7 +84,7 @@ struct mcsheng { u8 has_accel; /**< 1 iff there are any accel plans */ u8 remap[256]; /**< remaps characters to a smaller alphabet */ ReportID arb_report; /**< one of the accepts that this dfa may raise */ - u32 accel_offset; /**< offset of accel structures from start of McClellan */ + u32 accel_offset; /**< offset of accel structures from start of McClellan */ m128 sheng_masks[N_CHARS]; }; @@ -92,33 +92,33 @@ struct mcsheng { * representing the data from a u64a. */ extern const u64a mcsheng_pext_mask[8]; -struct mcsheng64 { - u16 state_count; /**< total number of states */ - u32 length; /**< length of dfa in bytes */ - u16 start_anchored; /**< anchored start state */ - u16 start_floating; /**< floating start state */ - u32 aux_offset; /**< offset of the aux structures relative to the start of - * the nfa structure */ - u32 sherman_offset; /**< offset of array of sherman state offsets the - * state_info structures relative to the start of the - * nfa structure */ - u32 sherman_end; /**< offset of the end of the state_info structures - * relative to the start of the nfa structure */ - u16 sheng_end; /**< first non-sheng state */ - u16 sheng_accel_limit; /**< first sheng accel state. state given in terms of - * internal sheng ids */ - u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ - u16 accept_limit_8; /**< 8 bit, lowest accept state */ - u16 sherman_limit; /**< lowest sherman state */ - u8 alphaShift; - u8 flags; - u8 has_accel; /**< 1 iff there are any accel plans */ - u8 remap[256]; /**< remaps characters to a smaller alphabet */ - ReportID arb_report; /**< one of the accepts that this dfa may raise */ - u32 accel_offset; /**< offset of accel structures from start of McClellan */ - m512 sheng_succ_masks[N_CHARS]; -}; - -extern const u64a mcsheng64_pext_mask[8]; - +struct mcsheng64 { + u16 state_count; /**< total number of states */ + u32 length; /**< length of dfa in bytes */ + u16 start_anchored; /**< anchored start state */ + u16 start_floating; /**< floating start state */ + u32 aux_offset; /**< offset of the aux structures relative to the start of + * the nfa structure */ + u32 sherman_offset; /**< offset of array of sherman state offsets the + * state_info structures relative to the start of the + * nfa structure */ + u32 sherman_end; /**< offset of the end of the state_info structures + * relative to the start of the nfa structure */ + u16 sheng_end; /**< first non-sheng state */ + u16 sheng_accel_limit; /**< first sheng accel state. state given in terms of + * internal sheng ids */ + u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ + u16 accept_limit_8; /**< 8 bit, lowest accept state */ + u16 sherman_limit; /**< lowest sherman state */ + u8 alphaShift; + u8 flags; + u8 has_accel; /**< 1 iff there are any accel plans */ + u8 remap[256]; /**< remaps characters to a smaller alphabet */ + ReportID arb_report; /**< one of the accepts that this dfa may raise */ + u32 accel_offset; /**< offset of accel structures from start of McClellan */ + m512 sheng_succ_masks[N_CHARS]; +}; + +extern const u64a mcsheng64_pext_mask[8]; + #endif diff --git a/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c b/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c index 7de11f3e97..75cac4b481 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c +++ b/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -76,10 +76,10 @@ DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \ DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \ DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \ - DISPATCH_CASE(SHENG_NFA_32, Sheng32, dbnt_func); \ - DISPATCH_CASE(SHENG_NFA_64, Sheng64, dbnt_func); \ - DISPATCH_CASE(MCSHENG_64_NFA_8, McSheng64_8, dbnt_func); \ - DISPATCH_CASE(MCSHENG_64_NFA_16, McSheng64_16, dbnt_func); \ + DISPATCH_CASE(SHENG_NFA_32, Sheng32, dbnt_func); \ + DISPATCH_CASE(SHENG_NFA_64, Sheng64, dbnt_func); \ + DISPATCH_CASE(MCSHENG_64_NFA_8, McSheng64_8, dbnt_func); \ + DISPATCH_CASE(MCSHENG_64_NFA_16, McSheng64_16, dbnt_func); \ default: \ assert(0); \ } diff --git a/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp b/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp index 2645cdefab..47153163e9 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp +++ b/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -430,65 +430,65 @@ const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_repeats_other_than_firsts = const char *NFATraits<MCSHENG_NFA_16>::name = "Shengy McShengFace 16"; #endif -template<> struct NFATraits<SHENG_NFA_32> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 1; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; -const nfa_dispatch_fn NFATraits<SHENG_NFA_32>::has_accel = has_accel_sheng; -const nfa_dispatch_fn NFATraits<SHENG_NFA_32>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<SHENG_NFA_32>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) -const char *NFATraits<SHENG_NFA_32>::name = "Sheng 32"; -#endif - -template<> struct NFATraits<SHENG_NFA_64> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 1; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; -const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_accel = has_accel_sheng; -const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) -const char *NFATraits<SHENG_NFA_64>::name = "Sheng 64"; -#endif - -template<> struct NFATraits<MCSHENG_64_NFA_8> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 1; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; -const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_accel = has_accel_mcsheng; -const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) -const char *NFATraits<MCSHENG_64_NFA_8>::name = "Shengy64 McShengFace 8"; -#endif - -template<> struct NFATraits<MCSHENG_64_NFA_16> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 2; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; -const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_accel = has_accel_mcsheng; -const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) -const char *NFATraits<MCSHENG_64_NFA_16>::name = "Shengy64 McShengFace 16"; -#endif +template<> struct NFATraits<SHENG_NFA_32> { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 1; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits<SHENG_NFA_32>::has_accel = has_accel_sheng; +const nfa_dispatch_fn NFATraits<SHENG_NFA_32>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<SHENG_NFA_32>::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits<SHENG_NFA_32>::name = "Sheng 32"; +#endif + +template<> struct NFATraits<SHENG_NFA_64> { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 1; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_accel = has_accel_sheng; +const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<SHENG_NFA_64>::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits<SHENG_NFA_64>::name = "Sheng 64"; +#endif + +template<> struct NFATraits<MCSHENG_64_NFA_8> { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 1; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_accel = has_accel_mcsheng; +const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_8>::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits<MCSHENG_64_NFA_8>::name = "Shengy64 McShengFace 8"; +#endif + +template<> struct NFATraits<MCSHENG_64_NFA_16> { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 2; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_accel = has_accel_mcsheng; +const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits<MCSHENG_64_NFA_16>::name = "Shengy64 McShengFace 16"; +#endif } // namespace #if defined(DUMP_SUPPORT) diff --git a/contrib/libs/hyperscan/src/nfa/nfa_build_util.h b/contrib/libs/hyperscan/src/nfa/nfa_build_util.h index 15a30becc9..ee7a309494 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_build_util.h +++ b/contrib/libs/hyperscan/src/nfa/nfa_build_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/contrib/libs/hyperscan/src/nfa/nfa_internal.h b/contrib/libs/hyperscan/src/nfa/nfa_internal.h index 46dbbecacc..ad27e28b14 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_internal.h +++ b/contrib/libs/hyperscan/src/nfa/nfa_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -72,10 +72,10 @@ enum NFAEngineType { TAMARAMA_NFA, /**< magic nfa container */ MCSHENG_NFA_8, /**< magic pseudo nfa */ MCSHENG_NFA_16, /**< magic pseudo nfa */ - SHENG_NFA_32, /**< magic pseudo nfa */ - SHENG_NFA_64, /**< magic pseudo nfa */ - MCSHENG_64_NFA_8, /**< magic pseudo nfa */ - MCSHENG_64_NFA_16, /**< magic pseudo nfa */ + SHENG_NFA_32, /**< magic pseudo nfa */ + SHENG_NFA_64, /**< magic pseudo nfa */ + MCSHENG_64_NFA_8, /**< magic pseudo nfa */ + MCSHENG_64_NFA_16, /**< magic pseudo nfa */ /** \brief bogus NFA - not used */ INVALID_NFA }; @@ -152,8 +152,8 @@ static really_inline int isMcClellanType(u8 t) { /** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid * DFA. */ static really_inline int isShengMcClellanType(u8 t) { - return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16 || - t == MCSHENG_64_NFA_8 || t == MCSHENG_64_NFA_16; + return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16 || + t == MCSHENG_64_NFA_8 || t == MCSHENG_64_NFA_16; } /** \brief True if the given type (from NFA::type) is a Gough DFA. */ @@ -162,25 +162,25 @@ static really_inline int isGoughType(u8 t) { } /** \brief True if the given type (from NFA::type) is a Sheng DFA. */ -static really_inline int isSheng16Type(u8 t) { +static really_inline int isSheng16Type(u8 t) { return t == SHENG_NFA; } -/** \brief True if the given type (from NFA::type) is a Sheng32 DFA. */ -static really_inline int isSheng32Type(u8 t) { - return t == SHENG_NFA_32; -} - -/** \brief True if the given type (from NFA::type) is a Sheng64 DFA. */ -static really_inline int isSheng64Type(u8 t) { - return t == SHENG_NFA_64; -} - -/** \brief True if the given type (from NFA::type) is a Sheng16/32/64 DFA. */ -static really_inline int isShengType(u8 t) { - return t == SHENG_NFA || t == SHENG_NFA_32 || t == SHENG_NFA_64; -} - +/** \brief True if the given type (from NFA::type) is a Sheng32 DFA. */ +static really_inline int isSheng32Type(u8 t) { + return t == SHENG_NFA_32; +} + +/** \brief True if the given type (from NFA::type) is a Sheng64 DFA. */ +static really_inline int isSheng64Type(u8 t) { + return t == SHENG_NFA_64; +} + +/** \brief True if the given type (from NFA::type) is a Sheng16/32/64 DFA. */ +static really_inline int isShengType(u8 t) { + return t == SHENG_NFA || t == SHENG_NFA_32 || t == SHENG_NFA_64; +} + /** * \brief True if the given type (from NFA::type) is a McClellan, Gough or * Sheng DFA. diff --git a/contrib/libs/hyperscan/src/nfa/sheng.c b/contrib/libs/hyperscan/src/nfa/sheng.c index 7673131501..3f36e21891 100644 --- a/contrib/libs/hyperscan/src/nfa/sheng.c +++ b/contrib/libs/hyperscan/src/nfa/sheng.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -154,205 +154,205 @@ char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt, return MO_CONTINUE_MATCHING; /* continue execution */ } -#if defined(HAVE_AVX512VBMI) -// Sheng32 -static really_inline -const struct sheng32 *get_sheng32(const struct NFA *n) { - return (const struct sheng32 *)getImplNfa(n); -} - -static really_inline -const struct sstate_aux *get_aux32(const struct sheng32 *sh, u8 id) { - u32 offset = sh->aux_offset - sizeof(struct NFA) + - (id & SHENG32_STATE_MASK) * sizeof(struct sstate_aux); - DEBUG_PRINTF("Getting aux for state %u at offset %llu\n", - id & SHENG32_STATE_MASK, (u64a)offset + sizeof(struct NFA)); - return (const struct sstate_aux *)((const char *) sh + offset); -} - -static really_inline -const union AccelAux *get_accel32(const struct sheng32 *sh, u8 id) { - const struct sstate_aux *saux = get_aux32(sh, id); - DEBUG_PRINTF("Getting accel aux at offset %u\n", saux->accel); - const union AccelAux *aux = (const union AccelAux *) - ((const char *)sh + saux->accel - sizeof(struct NFA)); - return aux; -} - -static really_inline -const struct report_list *get_rl32(const struct sheng32 *sh, - const struct sstate_aux *aux) { - DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept); - return (const struct report_list *) - ((const char *)sh + aux->accept - sizeof(struct NFA)); -} - -static really_inline -const struct report_list *get_eod_rl32(const struct sheng32 *sh, - const struct sstate_aux *aux) { - DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept); - return (const struct report_list *) - ((const char *)sh + aux->accept_eod - sizeof(struct NFA)); -} - -static really_inline -char sheng32HasAccept(const struct sheng32 *sh, const struct sstate_aux *aux, - ReportID report) { - assert(sh && aux); - - const struct report_list *rl = get_rl32(sh, aux); - assert(ISALIGNED_N(rl, 4)); - - DEBUG_PRINTF("report list has %u entries\n", rl->count); - - for (u32 i = 0; i < rl->count; i++) { - if (rl->report[i] == report) { - DEBUG_PRINTF("reporting %u\n", rl->report[i]); - return 1; - } - } - - return 0; -} - -static really_inline -char fireReports32(const struct sheng32 *sh, NfaCallback cb, void *ctxt, - const u8 state, u64a loc, u8 *const cached_accept_state, - ReportID *const cached_accept_id, char eod) { - DEBUG_PRINTF("reporting matches @ %llu\n", loc); - - if (!eod && state == *cached_accept_state) { - DEBUG_PRINTF("reporting %u\n", *cached_accept_id); - if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - - return MO_CONTINUE_MATCHING; /* continue execution */ - } - const struct sstate_aux *aux = get_aux32(sh, state); - const struct report_list *rl = eod ? get_eod_rl32(sh, aux) : - get_rl32(sh, aux); - assert(ISALIGNED(rl)); - - DEBUG_PRINTF("report list has %u entries\n", rl->count); - u32 count = rl->count; - - if (!eod && count == 1) { - *cached_accept_state = state; - *cached_accept_id = rl->report[0]; - - DEBUG_PRINTF("reporting %u\n", rl->report[0]); - if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - - return MO_CONTINUE_MATCHING; /* continue execution */ - } - - for (u32 i = 0; i < count; i++) { - DEBUG_PRINTF("reporting %u\n", rl->report[i]); - if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - } - return MO_CONTINUE_MATCHING; /* continue execution */ -} - -// Sheng64 -static really_inline -const struct sheng64 *get_sheng64(const struct NFA *n) { - return (const struct sheng64 *)getImplNfa(n); -} - -static really_inline -const struct sstate_aux *get_aux64(const struct sheng64 *sh, u8 id) { - u32 offset = sh->aux_offset - sizeof(struct NFA) + - (id & SHENG64_STATE_MASK) * sizeof(struct sstate_aux); - DEBUG_PRINTF("Getting aux for state %u at offset %llu\n", - id & SHENG64_STATE_MASK, (u64a)offset + sizeof(struct NFA)); - return (const struct sstate_aux *)((const char *) sh + offset); -} - -static really_inline -const struct report_list *get_rl64(const struct sheng64 *sh, - const struct sstate_aux *aux) { - DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept); - return (const struct report_list *) - ((const char *)sh + aux->accept - sizeof(struct NFA)); -} - -static really_inline -const struct report_list *get_eod_rl64(const struct sheng64 *sh, - const struct sstate_aux *aux) { - DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept); - return (const struct report_list *) - ((const char *)sh + aux->accept_eod - sizeof(struct NFA)); -} - -static really_inline -char sheng64HasAccept(const struct sheng64 *sh, const struct sstate_aux *aux, - ReportID report) { - assert(sh && aux); - - const struct report_list *rl = get_rl64(sh, aux); - assert(ISALIGNED_N(rl, 4)); - - DEBUG_PRINTF("report list has %u entries\n", rl->count); - - for (u32 i = 0; i < rl->count; i++) { - if (rl->report[i] == report) { - DEBUG_PRINTF("reporting %u\n", rl->report[i]); - return 1; - } - } - - return 0; -} - -static really_inline -char fireReports64(const struct sheng64 *sh, NfaCallback cb, void *ctxt, - const u8 state, u64a loc, u8 *const cached_accept_state, - ReportID *const cached_accept_id, char eod) { - DEBUG_PRINTF("reporting matches @ %llu\n", loc); - - if (!eod && state == *cached_accept_state) { - DEBUG_PRINTF("reporting %u\n", *cached_accept_id); - if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - - return MO_CONTINUE_MATCHING; /* continue execution */ - } - const struct sstate_aux *aux = get_aux64(sh, state); - const struct report_list *rl = eod ? get_eod_rl64(sh, aux) : - get_rl64(sh, aux); - assert(ISALIGNED(rl)); - - DEBUG_PRINTF("report list has %u entries\n", rl->count); - u32 count = rl->count; - - if (!eod && count == 1) { - *cached_accept_state = state; - *cached_accept_id = rl->report[0]; - - DEBUG_PRINTF("reporting %u\n", rl->report[0]); - if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - - return MO_CONTINUE_MATCHING; /* continue execution */ - } - - for (u32 i = 0; i < count; i++) { - DEBUG_PRINTF("reporting %u\n", rl->report[i]); - if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - } - return MO_CONTINUE_MATCHING; /* continue execution */ -} -#endif // end of HAVE_AVX512VBMI - +#if defined(HAVE_AVX512VBMI) +// Sheng32 +static really_inline +const struct sheng32 *get_sheng32(const struct NFA *n) { + return (const struct sheng32 *)getImplNfa(n); +} + +static really_inline +const struct sstate_aux *get_aux32(const struct sheng32 *sh, u8 id) { + u32 offset = sh->aux_offset - sizeof(struct NFA) + + (id & SHENG32_STATE_MASK) * sizeof(struct sstate_aux); + DEBUG_PRINTF("Getting aux for state %u at offset %llu\n", + id & SHENG32_STATE_MASK, (u64a)offset + sizeof(struct NFA)); + return (const struct sstate_aux *)((const char *) sh + offset); +} + +static really_inline +const union AccelAux *get_accel32(const struct sheng32 *sh, u8 id) { + const struct sstate_aux *saux = get_aux32(sh, id); + DEBUG_PRINTF("Getting accel aux at offset %u\n", saux->accel); + const union AccelAux *aux = (const union AccelAux *) + ((const char *)sh + saux->accel - sizeof(struct NFA)); + return aux; +} + +static really_inline +const struct report_list *get_rl32(const struct sheng32 *sh, + const struct sstate_aux *aux) { + DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept); + return (const struct report_list *) + ((const char *)sh + aux->accept - sizeof(struct NFA)); +} + +static really_inline +const struct report_list *get_eod_rl32(const struct sheng32 *sh, + const struct sstate_aux *aux) { + DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept); + return (const struct report_list *) + ((const char *)sh + aux->accept_eod - sizeof(struct NFA)); +} + +static really_inline +char sheng32HasAccept(const struct sheng32 *sh, const struct sstate_aux *aux, + ReportID report) { + assert(sh && aux); + + const struct report_list *rl = get_rl32(sh, aux); + assert(ISALIGNED_N(rl, 4)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + + for (u32 i = 0; i < rl->count; i++) { + if (rl->report[i] == report) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); + return 1; + } + } + + return 0; +} + +static really_inline +char fireReports32(const struct sheng32 *sh, NfaCallback cb, void *ctxt, + const u8 state, u64a loc, u8 *const cached_accept_state, + ReportID *const cached_accept_id, char eod) { + DEBUG_PRINTF("reporting matches @ %llu\n", loc); + + if (!eod && state == *cached_accept_state) { + DEBUG_PRINTF("reporting %u\n", *cached_accept_id); + if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + const struct sstate_aux *aux = get_aux32(sh, state); + const struct report_list *rl = eod ? get_eod_rl32(sh, aux) : + get_rl32(sh, aux); + assert(ISALIGNED(rl)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + u32 count = rl->count; + + if (!eod && count == 1) { + *cached_accept_state = state; + *cached_accept_id = rl->report[0]; + + DEBUG_PRINTF("reporting %u\n", rl->report[0]); + if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + for (u32 i = 0; i < count; i++) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); + if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + } + return MO_CONTINUE_MATCHING; /* continue execution */ +} + +// Sheng64 +static really_inline +const struct sheng64 *get_sheng64(const struct NFA *n) { + return (const struct sheng64 *)getImplNfa(n); +} + +static really_inline +const struct sstate_aux *get_aux64(const struct sheng64 *sh, u8 id) { + u32 offset = sh->aux_offset - sizeof(struct NFA) + + (id & SHENG64_STATE_MASK) * sizeof(struct sstate_aux); + DEBUG_PRINTF("Getting aux for state %u at offset %llu\n", + id & SHENG64_STATE_MASK, (u64a)offset + sizeof(struct NFA)); + return (const struct sstate_aux *)((const char *) sh + offset); +} + +static really_inline +const struct report_list *get_rl64(const struct sheng64 *sh, + const struct sstate_aux *aux) { + DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept); + return (const struct report_list *) + ((const char *)sh + aux->accept - sizeof(struct NFA)); +} + +static really_inline +const struct report_list *get_eod_rl64(const struct sheng64 *sh, + const struct sstate_aux *aux) { + DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept); + return (const struct report_list *) + ((const char *)sh + aux->accept_eod - sizeof(struct NFA)); +} + +static really_inline +char sheng64HasAccept(const struct sheng64 *sh, const struct sstate_aux *aux, + ReportID report) { + assert(sh && aux); + + const struct report_list *rl = get_rl64(sh, aux); + assert(ISALIGNED_N(rl, 4)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + + for (u32 i = 0; i < rl->count; i++) { + if (rl->report[i] == report) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); + return 1; + } + } + + return 0; +} + +static really_inline +char fireReports64(const struct sheng64 *sh, NfaCallback cb, void *ctxt, + const u8 state, u64a loc, u8 *const cached_accept_state, + ReportID *const cached_accept_id, char eod) { + DEBUG_PRINTF("reporting matches @ %llu\n", loc); + + if (!eod && state == *cached_accept_state) { + DEBUG_PRINTF("reporting %u\n", *cached_accept_id); + if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + const struct sstate_aux *aux = get_aux64(sh, state); + const struct report_list *rl = eod ? get_eod_rl64(sh, aux) : + get_rl64(sh, aux); + assert(ISALIGNED(rl)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + u32 count = rl->count; + + if (!eod && count == 1) { + *cached_accept_state = state; + *cached_accept_id = rl->report[0]; + + DEBUG_PRINTF("reporting %u\n", rl->report[0]); + if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + for (u32 i = 0; i < count; i++) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); + if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + } + return MO_CONTINUE_MATCHING; /* continue execution */ +} +#endif // end of HAVE_AVX512VBMI + /* include Sheng function definitions */ #include "sheng_defs.h" @@ -827,7 +827,7 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) { fireSingleReport(cb, ctxt, sh->report, offset); } else { fireReports(sh, cb, ctxt, s, offset, &cached_state_id, - &cached_report_id, 0); + &cached_report_id, 0); } } @@ -870,1008 +870,1008 @@ char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest, *(u8 *)dest = *(const u8 *)src; return 0; } - -#if defined(HAVE_AVX512VBMI) -// Sheng32 -static really_inline -char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt, - u64a offset, u8 *const cached_accept_state, - ReportID *const cached_accept_id, const u8 *cur_buf, - const u8 *start, const u8 *end, u8 can_die, - u8 has_accel, u8 single, const u8 **scanned, u8 *state) { - DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n", - (u64a)(end - start), offset); - DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), - (s64a)(end - cur_buf)); - DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, - !!has_accel, !!single); - int rv; - /* scan and report all matches */ - if (can_die) { - if (has_accel) { - rv = sheng32_4_coda(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - start, end, scanned); - } else { - rv = sheng32_4_cod(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - start, end, scanned); - } - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - rv = sheng32_cod(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - *scanned, end, scanned); - } else { - if (has_accel) { - rv = sheng32_4_coa(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - start, end, scanned); - } else { - rv = sheng32_4_co(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - start, end, scanned); - } - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - rv = sheng32_co(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - *scanned, end, scanned); - } - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - return MO_ALIVE; -} - -static really_inline -void runSheng32Nm(const struct sheng32 *sh, NfaCallback cb, void *ctxt, - u64a offset, u8 *const cached_accept_state, - ReportID *const cached_accept_id, const u8 *cur_buf, - const u8 *start, const u8 *end, u8 can_die, u8 has_accel, - u8 single, const u8 **scanned, u8 *state) { - DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n", - (u64a)(end - start), offset); - DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), - (s64a)(end - cur_buf)); - DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, - !!has_accel, !!single); - /* just scan the buffer */ - if (can_die) { - if (has_accel) { - sheng32_4_nmda(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - start, end, scanned); - } else { - sheng32_4_nmd(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - start, end, scanned); - } - sheng32_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, - single, offset, cur_buf, *scanned, end, scanned); - } else { - sheng32_4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, - single, offset, cur_buf, start, end, scanned); - sheng32_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, - single, offset, cur_buf, *scanned, end, scanned); - } -} - -static really_inline -char runSheng32Sam(const struct sheng32 *sh, NfaCallback cb, void *ctxt, - u64a offset, u8 *const cached_accept_state, - ReportID *const cached_accept_id, const u8 *cur_buf, - const u8 *start, const u8 *end, u8 can_die, u8 has_accel, - u8 single, const u8 **scanned, u8 *state) { - DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n", - (u64a)(end - start), offset); - DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), - (s64a)(end - cur_buf)); - DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, - !!has_accel, !!single); - int rv; - /* scan until first match */ - if (can_die) { - if (has_accel) { - rv = sheng32_4_samda(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - start, end, scanned); - } else { - rv = sheng32_4_samd(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - start, end, scanned); - } - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - /* if we stopped before we expected, we found a match */ - if (rv == MO_MATCHES_PENDING) { - return MO_MATCHES_PENDING; - } - - rv = sheng32_samd(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - *scanned, end, scanned); - } else { - if (has_accel) { - rv = sheng32_4_sama(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - start, end, scanned); - } else { - rv = sheng32_4_sam(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - start, end, scanned); - } - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - /* if we stopped before we expected, we found a match */ - if (rv == MO_MATCHES_PENDING) { - return MO_MATCHES_PENDING; - } - - rv = sheng32_sam(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - *scanned, end, scanned); - } - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - /* if we stopped before we expected, we found a match */ - if (rv == MO_MATCHES_PENDING) { - return MO_MATCHES_PENDING; - } - return MO_ALIVE; -} - -static never_inline -char runSheng32(const struct sheng32 *sh, struct mq *q, s64a b_end, - enum MatchMode mode) { - u8 state = *(u8 *)q->state; - u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; - u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; - u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; - - u8 cached_accept_state = 0; - ReportID cached_accept_id = 0; - - DEBUG_PRINTF("starting Sheng32 execution in state %u\n", - state & SHENG32_STATE_MASK); - - if (q->report_current) { - DEBUG_PRINTF("reporting current pending matches\n"); - assert(sh); - - q->report_current = 0; - - int rv; - if (single) { - rv = fireSingleReport(q->cb, q->context, sh->report, - q_cur_offset(q)); - } else { - rv = fireReports32(sh, q->cb, q->context, state, q_cur_offset(q), - &cached_accept_state, &cached_accept_id, 0); - } - if (rv == MO_HALT_MATCHING) { - DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK); - return MO_DEAD; - } - - DEBUG_PRINTF("proceeding with matching\n"); - } - - assert(q_cur_type(q) == MQE_START); - s64a start = q_cur_loc(q); - - DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start, - mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" : - mode == NO_MATCHES ? "NO MATCHES" : - mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???"); - - DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), - q_cur_type(q) == MQE_START ? "START" : - q_cur_type(q) == MQE_TOP ? "TOP" : - q_cur_type(q) == MQE_END ? "END" : "???"); - - const u8* cur_buf; - if (start < 0) { - DEBUG_PRINTF("negative location, scanning history\n"); - DEBUG_PRINTF("min location: %zd\n", -q->hlength); - cur_buf = q->history + q->hlength; - } else { - DEBUG_PRINTF("positive location, scanning buffer\n"); - DEBUG_PRINTF("max location: %lli\n", b_end); - cur_buf = q->buffer; - } - - /* if we our queue event is past our end */ - if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { - DEBUG_PRINTF("current location past buffer end\n"); - DEBUG_PRINTF("setting q location to %llu\n", b_end); - DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK); - q->items[q->cur].location = b_end; - return MO_ALIVE; - } - - q->cur++; - - s64a cur_start = start; - - while (1) { - DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), - q_cur_type(q) == MQE_START ? "START" : - q_cur_type(q) == MQE_TOP ? "TOP" : - q_cur_type(q) == MQE_END ? "END" : "???"); - s64a end = q_cur_loc(q); - if (mode != NO_MATCHES) { - end = MIN(end, b_end); - } - assert(end <= (s64a) q->length); - s64a cur_end = end; - - /* we may cross the border between history and current buffer */ - if (cur_start < 0) { - cur_end = MIN(0, cur_end); - } - - DEBUG_PRINTF("start: %lli end: %lli\n", start, end); - - /* don't scan zero length buffer */ - if (cur_start != cur_end) { - const u8 * scanned = cur_buf; - char rv; - - if (mode == NO_MATCHES) { - runSheng32Nm(sh, q->cb, q->context, q->offset, - &cached_accept_state, &cached_accept_id, cur_buf, - cur_buf + cur_start, cur_buf + cur_end, can_die, - has_accel, single, &scanned, &state); - } else if (mode == CALLBACK_OUTPUT) { - rv = runSheng32Cb(sh, q->cb, q->context, q->offset, - &cached_accept_state, &cached_accept_id, - cur_buf, cur_buf + cur_start, cur_buf + cur_end, - can_die, has_accel, single, &scanned, &state); - if (rv == MO_DEAD) { - DEBUG_PRINTF("exiting in state %u\n", - state & SHENG32_STATE_MASK); - return MO_DEAD; - } - } else if (mode == STOP_AT_MATCH) { - rv = runSheng32Sam(sh, q->cb, q->context, q->offset, - &cached_accept_state, &cached_accept_id, - cur_buf, cur_buf + cur_start, - cur_buf + cur_end, can_die, has_accel, single, - &scanned, &state); - if (rv == MO_DEAD) { - DEBUG_PRINTF("exiting in state %u\n", - state & SHENG32_STATE_MASK); - return rv; - } else if (rv == MO_MATCHES_PENDING) { - assert(q->cur); - DEBUG_PRINTF("found a match, setting q location to %zd\n", - scanned - cur_buf + 1); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = - scanned - cur_buf + 1; /* due to exiting early */ - *(u8 *)q->state = state; - DEBUG_PRINTF("exiting in state %u\n", - state & SHENG32_STATE_MASK); - return rv; - } - } else { - assert(!"invalid scanning mode!"); - } - assert(scanned == cur_buf + cur_end); - - cur_start = cur_end; - } - - /* if we our queue event is past our end */ - if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { - DEBUG_PRINTF("current location past buffer end\n"); - DEBUG_PRINTF("setting q location to %llu\n", b_end); - DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = b_end; - *(u8 *)q->state = state; - return MO_ALIVE; - } - - /* crossing over into actual buffer */ - if (cur_start == 0) { - DEBUG_PRINTF("positive location, scanning buffer\n"); - DEBUG_PRINTF("max offset: %lli\n", b_end); - cur_buf = q->buffer; - } - - /* continue scanning the same buffer */ - if (end != cur_end) { - continue; - } - - switch (q_cur_type(q)) { - case MQE_END: - *(u8 *)q->state = state; - q->cur++; - DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK); - if (can_die) { - return (state & SHENG32_STATE_DEAD) ? MO_DEAD : MO_ALIVE; - } - return MO_ALIVE; - case MQE_TOP: - if (q->offset + cur_start == 0) { - DEBUG_PRINTF("Anchored start, going to state %u\n", - sh->anchored); - state = sh->anchored; - } else { - u8 new_state = get_aux32(sh, state)->top; - DEBUG_PRINTF("Top event %u->%u\n", state & SHENG32_STATE_MASK, - new_state & SHENG32_STATE_MASK); - state = new_state; - } - break; - default: - assert(!"invalid queue event"); - break; - } - q->cur++; - } -} - -char nfaExecSheng32_B(const struct NFA *n, u64a offset, const u8 *buffer, - size_t length, NfaCallback cb, void *context) { - DEBUG_PRINTF("smallwrite Sheng32\n"); - assert(n->type == SHENG_NFA_32); - const struct sheng32 *sh = getImplNfa(n); - u8 state = sh->anchored; - u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; - u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; - u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; - u8 cached_accept_state = 0; - ReportID cached_accept_id = 0; - - /* scan and report all matches */ - int rv; - s64a end = length; - const u8 *scanned; - - rv = runSheng32Cb(sh, cb, context, offset, &cached_accept_state, - &cached_accept_id, buffer, buffer, buffer + end, can_die, - has_accel, single, &scanned, &state); - if (rv == MO_DEAD) { - DEBUG_PRINTF("exiting in state %u\n", - state & SHENG32_STATE_MASK); - return MO_DEAD; - } - - DEBUG_PRINTF("%u\n", state & SHENG32_STATE_MASK); - - const struct sstate_aux *aux = get_aux32(sh, state); - - if (aux->accept_eod) { - DEBUG_PRINTF("Reporting EOD matches\n"); - fireReports32(sh, cb, context, state, end + offset, - &cached_accept_state, &cached_accept_id, 1); - } - - return state & SHENG32_STATE_DEAD ? MO_DEAD : MO_ALIVE; -} - -char nfaExecSheng32_Q(const struct NFA *n, struct mq *q, s64a end) { - const struct sheng32 *sh = get_sheng32(n); - char rv = runSheng32(sh, q, end, CALLBACK_OUTPUT); - return rv; -} - -char nfaExecSheng32_Q2(const struct NFA *n, struct mq *q, s64a end) { - const struct sheng32 *sh = get_sheng32(n); - char rv = runSheng32(sh, q, end, STOP_AT_MATCH); - return rv; -} - -char nfaExecSheng32_QR(const struct NFA *n, struct mq *q, ReportID report) { - assert(q_cur_type(q) == MQE_START); - - const struct sheng32 *sh = get_sheng32(n); - char rv = runSheng32(sh, q, 0 /* end */, NO_MATCHES); - - if (rv && nfaExecSheng32_inAccept(n, report, q)) { - return MO_MATCHES_PENDING; - } - return rv; -} - -char nfaExecSheng32_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { - assert(n && q); - - const struct sheng32 *sh = get_sheng32(n); - u8 s = *(const u8 *)q->state; - DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG32_STATE_MASK)); - - const struct sstate_aux *aux = get_aux32(sh, s); - - if (!aux->accept) { - return 0; - } - - return sheng32HasAccept(sh, aux, report); -} - -char nfaExecSheng32_inAnyAccept(const struct NFA *n, struct mq *q) { - assert(n && q); - - const struct sheng32 *sh = get_sheng32(n); - u8 s = *(const u8 *)q->state; - DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG32_STATE_MASK)); - - const struct sstate_aux *aux = get_aux32(sh, s); - return !!aux->accept; -} - -char nfaExecSheng32_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, u64a offset, - NfaCallback cb, void *ctxt) { - assert(nfa); - - const struct sheng32 *sh = get_sheng32(nfa); - u8 s = *(const u8 *)state; - DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG32_STATE_MASK)); - - const struct sstate_aux *aux = get_aux32(sh, s); - - if (!aux->accept_eod) { - return MO_CONTINUE_MATCHING; - } - - return fireReports32(sh, cb, ctxt, s, offset, NULL, NULL, 1); -} - -char nfaExecSheng32_reportCurrent(const struct NFA *n, struct mq *q) { - const struct sheng32 *sh = (const struct sheng32 *)getImplNfa(n); - NfaCallback cb = q->cb; - void *ctxt = q->context; - u8 s = *(u8 *)q->state; - const struct sstate_aux *aux = get_aux32(sh, s); - u64a offset = q_cur_offset(q); - u8 cached_state_id = 0; - ReportID cached_report_id = 0; - assert(q_cur_type(q) == MQE_START); - - if (aux->accept) { - if (sh->flags & SHENG_FLAG_SINGLE_REPORT) { - fireSingleReport(cb, ctxt, sh->report, offset); - } else { - fireReports32(sh, cb, ctxt, s, offset, &cached_state_id, - &cached_report_id, 0); - } - } - - return 0; -} - -char nfaExecSheng32_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, UNUSED u8 key) { - const struct sheng32 *sh = get_sheng32(nfa); - u8 *s = (u8 *)state; - *s = offset ? sh->floating: sh->anchored; - return !(*s & SHENG32_STATE_DEAD); -} - -char nfaExecSheng32_queueInitState(const struct NFA *nfa, struct mq *q) { - assert(nfa->scratchStateSize == 1); - - /* starting in floating state */ - const struct sheng32 *sh = get_sheng32(nfa); - *(u8 *)q->state = sh->floating; - DEBUG_PRINTF("starting in floating state\n"); - return 0; -} - -char nfaExecSheng32_queueCompressState(UNUSED const struct NFA *nfa, - const struct mq *q, UNUSED s64a loc) { - void *dest = q->streamState; - const void *src = q->state; - assert(nfa->scratchStateSize == 1); - assert(nfa->streamStateSize == 1); - *(u8 *)dest = *(const u8 *)src; - return 0; -} - -char nfaExecSheng32_expandState(UNUSED const struct NFA *nfa, void *dest, - const void *src, UNUSED u64a offset, - UNUSED u8 key) { - assert(nfa->scratchStateSize == 1); - assert(nfa->streamStateSize == 1); - *(u8 *)dest = *(const u8 *)src; - return 0; -} - -// Sheng64 -static really_inline -char runSheng64Cb(const struct sheng64 *sh, NfaCallback cb, void *ctxt, - u64a offset, u8 *const cached_accept_state, - ReportID *const cached_accept_id, const u8 *cur_buf, - const u8 *start, const u8 *end, u8 can_die, - u8 single, const u8 **scanned, u8 *state) { - DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n", - (u64a)(end - start), offset); - DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), - (s64a)(end - cur_buf)); - DEBUG_PRINTF("can die: %u single: %u\n", !!can_die, !!single); - int rv; - /* scan and report all matches */ - if (can_die) { - rv = sheng64_4_cod(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - start, end, scanned); - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - rv = sheng64_cod(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - *scanned, end, scanned); - } else { - rv = sheng64_4_co(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - start, end, scanned); - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - rv = sheng64_co(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - *scanned, end, scanned); - } - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - return MO_ALIVE; -} - -static really_inline -void runSheng64Nm(const struct sheng64 *sh, NfaCallback cb, void *ctxt, - u64a offset, u8 *const cached_accept_state, - ReportID *const cached_accept_id, const u8 *cur_buf, - const u8 *start, const u8 *end, u8 can_die, - u8 single, const u8 **scanned, u8 *state) { - DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n", - (u64a)(end - start), offset); - DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), - (s64a)(end - cur_buf)); - DEBUG_PRINTF("can die: %u single: %u\n", !!can_die, !!single); - /* just scan the buffer */ - if (can_die) { - sheng64_4_nmd(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - start, end, scanned); - sheng64_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, - single, offset, cur_buf, *scanned, end, scanned); - } else { - sheng64_4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, - single, offset, cur_buf, start, end, scanned); - sheng64_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, - single, offset, cur_buf, *scanned, end, scanned); - } -} - -static really_inline -char runSheng64Sam(const struct sheng64 *sh, NfaCallback cb, void *ctxt, - u64a offset, u8 *const cached_accept_state, - ReportID *const cached_accept_id, const u8 *cur_buf, - const u8 *start, const u8 *end, u8 can_die, - u8 single, const u8 **scanned, u8 *state) { - DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n", - (u64a)(end - start), offset); - DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), - (s64a)(end - cur_buf)); - DEBUG_PRINTF("can die: %u single: %u\n", !!can_die, !!single); - int rv; - /* scan until first match */ - if (can_die) { - rv = sheng64_4_samd(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - start, end, scanned); - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - /* if we stopped before we expected, we found a match */ - if (rv == MO_MATCHES_PENDING) { - return MO_MATCHES_PENDING; - } - - rv = sheng64_samd(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - *scanned, end, scanned); - } else { - rv = sheng64_4_sam(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - start, end, scanned); - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - /* if we stopped before we expected, we found a match */ - if (rv == MO_MATCHES_PENDING) { - return MO_MATCHES_PENDING; - } - - rv = sheng64_sam(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, - *scanned, end, scanned); - } - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - /* if we stopped before we expected, we found a match */ - if (rv == MO_MATCHES_PENDING) { - return MO_MATCHES_PENDING; - } - return MO_ALIVE; -} - -static never_inline -char runSheng64(const struct sheng64 *sh, struct mq *q, s64a b_end, - enum MatchMode mode) { - u8 state = *(u8 *)q->state; - u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; - u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; - - u8 cached_accept_state = 0; - ReportID cached_accept_id = 0; - - DEBUG_PRINTF("starting Sheng64 execution in state %u\n", - state & SHENG64_STATE_MASK); - - if (q->report_current) { - DEBUG_PRINTF("reporting current pending matches\n"); - assert(sh); - - q->report_current = 0; - - int rv; - if (single) { - rv = fireSingleReport(q->cb, q->context, sh->report, - q_cur_offset(q)); - } else { - rv = fireReports64(sh, q->cb, q->context, state, q_cur_offset(q), - &cached_accept_state, &cached_accept_id, 0); - } - if (rv == MO_HALT_MATCHING) { - DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK); - return MO_DEAD; - } - - DEBUG_PRINTF("proceeding with matching\n"); - } - - assert(q_cur_type(q) == MQE_START); - s64a start = q_cur_loc(q); - - DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start, - mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" : - mode == NO_MATCHES ? "NO MATCHES" : - mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???"); - - DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), - q_cur_type(q) == MQE_START ? "START" : - q_cur_type(q) == MQE_TOP ? "TOP" : - q_cur_type(q) == MQE_END ? "END" : "???"); - - const u8* cur_buf; - if (start < 0) { - DEBUG_PRINTF("negative location, scanning history\n"); - DEBUG_PRINTF("min location: %zd\n", -q->hlength); - cur_buf = q->history + q->hlength; - } else { - DEBUG_PRINTF("positive location, scanning buffer\n"); - DEBUG_PRINTF("max location: %lli\n", b_end); - cur_buf = q->buffer; - } - - /* if we our queue event is past our end */ - if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { - DEBUG_PRINTF("current location past buffer end\n"); - DEBUG_PRINTF("setting q location to %llu\n", b_end); - DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK); - q->items[q->cur].location = b_end; - return MO_ALIVE; - } - - q->cur++; - - s64a cur_start = start; - - while (1) { - DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), - q_cur_type(q) == MQE_START ? "START" : - q_cur_type(q) == MQE_TOP ? "TOP" : - q_cur_type(q) == MQE_END ? "END" : "???"); - s64a end = q_cur_loc(q); - if (mode != NO_MATCHES) { - end = MIN(end, b_end); - } - assert(end <= (s64a) q->length); - s64a cur_end = end; - - /* we may cross the border between history and current buffer */ - if (cur_start < 0) { - cur_end = MIN(0, cur_end); - } - - DEBUG_PRINTF("start: %lli end: %lli\n", start, end); - - /* don't scan zero length buffer */ - if (cur_start != cur_end) { - const u8 * scanned = cur_buf; - char rv; - - if (mode == NO_MATCHES) { - runSheng64Nm(sh, q->cb, q->context, q->offset, - &cached_accept_state, &cached_accept_id, cur_buf, - cur_buf + cur_start, cur_buf + cur_end, can_die, - single, &scanned, &state); - } else if (mode == CALLBACK_OUTPUT) { - rv = runSheng64Cb(sh, q->cb, q->context, q->offset, - &cached_accept_state, &cached_accept_id, - cur_buf, cur_buf + cur_start, cur_buf + cur_end, - can_die, single, &scanned, &state); - if (rv == MO_DEAD) { - DEBUG_PRINTF("exiting in state %u\n", - state & SHENG64_STATE_MASK); - return MO_DEAD; - } - } else if (mode == STOP_AT_MATCH) { - rv = runSheng64Sam(sh, q->cb, q->context, q->offset, - &cached_accept_state, &cached_accept_id, - cur_buf, cur_buf + cur_start, - cur_buf + cur_end, can_die, single, - &scanned, &state); - if (rv == MO_DEAD) { - DEBUG_PRINTF("exiting in state %u\n", - state & SHENG64_STATE_MASK); - return rv; - } else if (rv == MO_MATCHES_PENDING) { - assert(q->cur); - DEBUG_PRINTF("found a match, setting q location to %zd\n", - scanned - cur_buf + 1); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = - scanned - cur_buf + 1; /* due to exiting early */ - *(u8 *)q->state = state; - DEBUG_PRINTF("exiting in state %u\n", - state & SHENG64_STATE_MASK); - return rv; - } - } else { - assert(!"invalid scanning mode!"); - } - assert(scanned == cur_buf + cur_end); - - cur_start = cur_end; - } - - /* if we our queue event is past our end */ - if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { - DEBUG_PRINTF("current location past buffer end\n"); - DEBUG_PRINTF("setting q location to %llu\n", b_end); - DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = b_end; - *(u8 *)q->state = state; - return MO_ALIVE; - } - - /* crossing over into actual buffer */ - if (cur_start == 0) { - DEBUG_PRINTF("positive location, scanning buffer\n"); - DEBUG_PRINTF("max offset: %lli\n", b_end); - cur_buf = q->buffer; - } - - /* continue scanning the same buffer */ - if (end != cur_end) { - continue; - } - - switch (q_cur_type(q)) { - case MQE_END: - *(u8 *)q->state = state; - q->cur++; - DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK); - if (can_die) { - return (state & SHENG64_STATE_DEAD) ? MO_DEAD : MO_ALIVE; - } - return MO_ALIVE; - case MQE_TOP: - if (q->offset + cur_start == 0) { - DEBUG_PRINTF("Anchored start, going to state %u\n", - sh->anchored); - state = sh->anchored; - } else { - u8 new_state = get_aux64(sh, state)->top; - DEBUG_PRINTF("Top event %u->%u\n", state & SHENG64_STATE_MASK, - new_state & SHENG64_STATE_MASK); - state = new_state; - } - break; - default: - assert(!"invalid queue event"); - break; - } - q->cur++; - } -} - -char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer, - size_t length, NfaCallback cb, void *context) { - DEBUG_PRINTF("smallwrite Sheng64\n"); - assert(n->type == SHENG_NFA_64); - const struct sheng64 *sh = getImplNfa(n); - u8 state = sh->anchored; - u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; - u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; - u8 cached_accept_state = 0; - ReportID cached_accept_id = 0; - - /* scan and report all matches */ - int rv; - s64a end = length; - const u8 *scanned; - - rv = runSheng64Cb(sh, cb, context, offset, &cached_accept_state, - &cached_accept_id, buffer, buffer, buffer + end, can_die, - single, &scanned, &state); - if (rv == MO_DEAD) { - DEBUG_PRINTF("exiting in state %u\n", - state & SHENG64_STATE_MASK); - return MO_DEAD; - } - - DEBUG_PRINTF("%u\n", state & SHENG64_STATE_MASK); - - const struct sstate_aux *aux = get_aux64(sh, state); - - if (aux->accept_eod) { - DEBUG_PRINTF("Reporting EOD matches\n"); - fireReports64(sh, cb, context, state, end + offset, - &cached_accept_state, &cached_accept_id, 1); - } - - return state & SHENG64_STATE_DEAD ? MO_DEAD : MO_ALIVE; -} - -char nfaExecSheng64_Q(const struct NFA *n, struct mq *q, s64a end) { - const struct sheng64 *sh = get_sheng64(n); - char rv = runSheng64(sh, q, end, CALLBACK_OUTPUT); - return rv; -} - -char nfaExecSheng64_Q2(const struct NFA *n, struct mq *q, s64a end) { - const struct sheng64 *sh = get_sheng64(n); - char rv = runSheng64(sh, q, end, STOP_AT_MATCH); - return rv; -} - -char nfaExecSheng64_QR(const struct NFA *n, struct mq *q, ReportID report) { - assert(q_cur_type(q) == MQE_START); - - const struct sheng64 *sh = get_sheng64(n); - char rv = runSheng64(sh, q, 0 /* end */, NO_MATCHES); - - if (rv && nfaExecSheng64_inAccept(n, report, q)) { - return MO_MATCHES_PENDING; - } - return rv; -} - -char nfaExecSheng64_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { - assert(n && q); - - const struct sheng64 *sh = get_sheng64(n); - u8 s = *(const u8 *)q->state; - DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG64_STATE_MASK)); - - const struct sstate_aux *aux = get_aux64(sh, s); - - if (!aux->accept) { - return 0; - } - - return sheng64HasAccept(sh, aux, report); -} - -char nfaExecSheng64_inAnyAccept(const struct NFA *n, struct mq *q) { - assert(n && q); - - const struct sheng64 *sh = get_sheng64(n); - u8 s = *(const u8 *)q->state; - DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG64_STATE_MASK)); - - const struct sstate_aux *aux = get_aux64(sh, s); - return !!aux->accept; -} - -char nfaExecSheng64_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, u64a offset, - NfaCallback cb, void *ctxt) { - assert(nfa); - - const struct sheng64 *sh = get_sheng64(nfa); - u8 s = *(const u8 *)state; - DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG64_STATE_MASK)); - - const struct sstate_aux *aux = get_aux64(sh, s); - - if (!aux->accept_eod) { - return MO_CONTINUE_MATCHING; - } - - return fireReports64(sh, cb, ctxt, s, offset, NULL, NULL, 1); -} - -char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q) { - const struct sheng64 *sh = (const struct sheng64 *)getImplNfa(n); - NfaCallback cb = q->cb; - void *ctxt = q->context; - u8 s = *(u8 *)q->state; - const struct sstate_aux *aux = get_aux64(sh, s); - u64a offset = q_cur_offset(q); - u8 cached_state_id = 0; - ReportID cached_report_id = 0; - assert(q_cur_type(q) == MQE_START); - - if (aux->accept) { - if (sh->flags & SHENG_FLAG_SINGLE_REPORT) { - fireSingleReport(cb, ctxt, sh->report, offset); - } else { - fireReports64(sh, cb, ctxt, s, offset, &cached_state_id, - &cached_report_id, 0); - } - } - - return 0; -} - -char nfaExecSheng64_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, UNUSED u8 key) { - const struct sheng64 *sh = get_sheng64(nfa); - u8 *s = (u8 *)state; - *s = offset ? sh->floating: sh->anchored; - return !(*s & SHENG64_STATE_DEAD); -} - -char nfaExecSheng64_queueInitState(const struct NFA *nfa, struct mq *q) { - assert(nfa->scratchStateSize == 1); - - /* starting in floating state */ - const struct sheng64 *sh = get_sheng64(nfa); - *(u8 *)q->state = sh->floating; - DEBUG_PRINTF("starting in floating state\n"); - return 0; -} - -char nfaExecSheng64_queueCompressState(UNUSED const struct NFA *nfa, - const struct mq *q, UNUSED s64a loc) { - void *dest = q->streamState; - const void *src = q->state; - assert(nfa->scratchStateSize == 1); - assert(nfa->streamStateSize == 1); - *(u8 *)dest = *(const u8 *)src; - return 0; -} - -char nfaExecSheng64_expandState(UNUSED const struct NFA *nfa, void *dest, - const void *src, UNUSED u64a offset, - UNUSED u8 key) { - assert(nfa->scratchStateSize == 1); - assert(nfa->streamStateSize == 1); - *(u8 *)dest = *(const u8 *)src; - return 0; -} -#endif // end of HAVE_AVX512VBMI + +#if defined(HAVE_AVX512VBMI) +// Sheng32 +static really_inline +char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt, + u64a offset, u8 *const cached_accept_state, + ReportID *const cached_accept_id, const u8 *cur_buf, + const u8 *start, const u8 *end, u8 can_die, + u8 has_accel, u8 single, const u8 **scanned, u8 *state) { + DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n", + (u64a)(end - start), offset); + DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), + (s64a)(end - cur_buf)); + DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, + !!has_accel, !!single); + int rv; + /* scan and report all matches */ + if (can_die) { + if (has_accel) { + rv = sheng32_4_coda(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + start, end, scanned); + } else { + rv = sheng32_4_cod(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + start, end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + rv = sheng32_cod(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + *scanned, end, scanned); + } else { + if (has_accel) { + rv = sheng32_4_coa(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + start, end, scanned); + } else { + rv = sheng32_4_co(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + start, end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + rv = sheng32_co(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + *scanned, end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + return MO_ALIVE; +} + +static really_inline +void runSheng32Nm(const struct sheng32 *sh, NfaCallback cb, void *ctxt, + u64a offset, u8 *const cached_accept_state, + ReportID *const cached_accept_id, const u8 *cur_buf, + const u8 *start, const u8 *end, u8 can_die, u8 has_accel, + u8 single, const u8 **scanned, u8 *state) { + DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n", + (u64a)(end - start), offset); + DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), + (s64a)(end - cur_buf)); + DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, + !!has_accel, !!single); + /* just scan the buffer */ + if (can_die) { + if (has_accel) { + sheng32_4_nmda(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + start, end, scanned); + } else { + sheng32_4_nmd(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + start, end, scanned); + } + sheng32_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, + single, offset, cur_buf, *scanned, end, scanned); + } else { + sheng32_4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, + single, offset, cur_buf, start, end, scanned); + sheng32_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, + single, offset, cur_buf, *scanned, end, scanned); + } +} + +static really_inline +char runSheng32Sam(const struct sheng32 *sh, NfaCallback cb, void *ctxt, + u64a offset, u8 *const cached_accept_state, + ReportID *const cached_accept_id, const u8 *cur_buf, + const u8 *start, const u8 *end, u8 can_die, u8 has_accel, + u8 single, const u8 **scanned, u8 *state) { + DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n", + (u64a)(end - start), offset); + DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), + (s64a)(end - cur_buf)); + DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, + !!has_accel, !!single); + int rv; + /* scan until first match */ + if (can_die) { + if (has_accel) { + rv = sheng32_4_samda(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + start, end, scanned); + } else { + rv = sheng32_4_samd(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + start, end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + /* if we stopped before we expected, we found a match */ + if (rv == MO_MATCHES_PENDING) { + return MO_MATCHES_PENDING; + } + + rv = sheng32_samd(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + *scanned, end, scanned); + } else { + if (has_accel) { + rv = sheng32_4_sama(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + start, end, scanned); + } else { + rv = sheng32_4_sam(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + start, end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + /* if we stopped before we expected, we found a match */ + if (rv == MO_MATCHES_PENDING) { + return MO_MATCHES_PENDING; + } + + rv = sheng32_sam(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + *scanned, end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + /* if we stopped before we expected, we found a match */ + if (rv == MO_MATCHES_PENDING) { + return MO_MATCHES_PENDING; + } + return MO_ALIVE; +} + +static never_inline +char runSheng32(const struct sheng32 *sh, struct mq *q, s64a b_end, + enum MatchMode mode) { + u8 state = *(u8 *)q->state; + u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; + u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; + u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; + + u8 cached_accept_state = 0; + ReportID cached_accept_id = 0; + + DEBUG_PRINTF("starting Sheng32 execution in state %u\n", + state & SHENG32_STATE_MASK); + + if (q->report_current) { + DEBUG_PRINTF("reporting current pending matches\n"); + assert(sh); + + q->report_current = 0; + + int rv; + if (single) { + rv = fireSingleReport(q->cb, q->context, sh->report, + q_cur_offset(q)); + } else { + rv = fireReports32(sh, q->cb, q->context, state, q_cur_offset(q), + &cached_accept_state, &cached_accept_id, 0); + } + if (rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK); + return MO_DEAD; + } + + DEBUG_PRINTF("proceeding with matching\n"); + } + + assert(q_cur_type(q) == MQE_START); + s64a start = q_cur_loc(q); + + DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start, + mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" : + mode == NO_MATCHES ? "NO MATCHES" : + mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???"); + + DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), + q_cur_type(q) == MQE_START ? "START" : + q_cur_type(q) == MQE_TOP ? "TOP" : + q_cur_type(q) == MQE_END ? "END" : "???"); + + const u8* cur_buf; + if (start < 0) { + DEBUG_PRINTF("negative location, scanning history\n"); + DEBUG_PRINTF("min location: %zd\n", -q->hlength); + cur_buf = q->history + q->hlength; + } else { + DEBUG_PRINTF("positive location, scanning buffer\n"); + DEBUG_PRINTF("max location: %lli\n", b_end); + cur_buf = q->buffer; + } + + /* if we our queue event is past our end */ + if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { + DEBUG_PRINTF("current location past buffer end\n"); + DEBUG_PRINTF("setting q location to %llu\n", b_end); + DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK); + q->items[q->cur].location = b_end; + return MO_ALIVE; + } + + q->cur++; + + s64a cur_start = start; + + while (1) { + DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), + q_cur_type(q) == MQE_START ? "START" : + q_cur_type(q) == MQE_TOP ? "TOP" : + q_cur_type(q) == MQE_END ? "END" : "???"); + s64a end = q_cur_loc(q); + if (mode != NO_MATCHES) { + end = MIN(end, b_end); + } + assert(end <= (s64a) q->length); + s64a cur_end = end; + + /* we may cross the border between history and current buffer */ + if (cur_start < 0) { + cur_end = MIN(0, cur_end); + } + + DEBUG_PRINTF("start: %lli end: %lli\n", start, end); + + /* don't scan zero length buffer */ + if (cur_start != cur_end) { + const u8 * scanned = cur_buf; + char rv; + + if (mode == NO_MATCHES) { + runSheng32Nm(sh, q->cb, q->context, q->offset, + &cached_accept_state, &cached_accept_id, cur_buf, + cur_buf + cur_start, cur_buf + cur_end, can_die, + has_accel, single, &scanned, &state); + } else if (mode == CALLBACK_OUTPUT) { + rv = runSheng32Cb(sh, q->cb, q->context, q->offset, + &cached_accept_state, &cached_accept_id, + cur_buf, cur_buf + cur_start, cur_buf + cur_end, + can_die, has_accel, single, &scanned, &state); + if (rv == MO_DEAD) { + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG32_STATE_MASK); + return MO_DEAD; + } + } else if (mode == STOP_AT_MATCH) { + rv = runSheng32Sam(sh, q->cb, q->context, q->offset, + &cached_accept_state, &cached_accept_id, + cur_buf, cur_buf + cur_start, + cur_buf + cur_end, can_die, has_accel, single, + &scanned, &state); + if (rv == MO_DEAD) { + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG32_STATE_MASK); + return rv; + } else if (rv == MO_MATCHES_PENDING) { + assert(q->cur); + DEBUG_PRINTF("found a match, setting q location to %zd\n", + scanned - cur_buf + 1); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = + scanned - cur_buf + 1; /* due to exiting early */ + *(u8 *)q->state = state; + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG32_STATE_MASK); + return rv; + } + } else { + assert(!"invalid scanning mode!"); + } + assert(scanned == cur_buf + cur_end); + + cur_start = cur_end; + } + + /* if we our queue event is past our end */ + if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { + DEBUG_PRINTF("current location past buffer end\n"); + DEBUG_PRINTF("setting q location to %llu\n", b_end); + DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = b_end; + *(u8 *)q->state = state; + return MO_ALIVE; + } + + /* crossing over into actual buffer */ + if (cur_start == 0) { + DEBUG_PRINTF("positive location, scanning buffer\n"); + DEBUG_PRINTF("max offset: %lli\n", b_end); + cur_buf = q->buffer; + } + + /* continue scanning the same buffer */ + if (end != cur_end) { + continue; + } + + switch (q_cur_type(q)) { + case MQE_END: + *(u8 *)q->state = state; + q->cur++; + DEBUG_PRINTF("exiting in state %u\n", state & SHENG32_STATE_MASK); + if (can_die) { + return (state & SHENG32_STATE_DEAD) ? MO_DEAD : MO_ALIVE; + } + return MO_ALIVE; + case MQE_TOP: + if (q->offset + cur_start == 0) { + DEBUG_PRINTF("Anchored start, going to state %u\n", + sh->anchored); + state = sh->anchored; + } else { + u8 new_state = get_aux32(sh, state)->top; + DEBUG_PRINTF("Top event %u->%u\n", state & SHENG32_STATE_MASK, + new_state & SHENG32_STATE_MASK); + state = new_state; + } + break; + default: + assert(!"invalid queue event"); + break; + } + q->cur++; + } +} + +char nfaExecSheng32_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context) { + DEBUG_PRINTF("smallwrite Sheng32\n"); + assert(n->type == SHENG_NFA_32); + const struct sheng32 *sh = getImplNfa(n); + u8 state = sh->anchored; + u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; + u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; + u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; + u8 cached_accept_state = 0; + ReportID cached_accept_id = 0; + + /* scan and report all matches */ + int rv; + s64a end = length; + const u8 *scanned; + + rv = runSheng32Cb(sh, cb, context, offset, &cached_accept_state, + &cached_accept_id, buffer, buffer, buffer + end, can_die, + has_accel, single, &scanned, &state); + if (rv == MO_DEAD) { + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG32_STATE_MASK); + return MO_DEAD; + } + + DEBUG_PRINTF("%u\n", state & SHENG32_STATE_MASK); + + const struct sstate_aux *aux = get_aux32(sh, state); + + if (aux->accept_eod) { + DEBUG_PRINTF("Reporting EOD matches\n"); + fireReports32(sh, cb, context, state, end + offset, + &cached_accept_state, &cached_accept_id, 1); + } + + return state & SHENG32_STATE_DEAD ? MO_DEAD : MO_ALIVE; +} + +char nfaExecSheng32_Q(const struct NFA *n, struct mq *q, s64a end) { + const struct sheng32 *sh = get_sheng32(n); + char rv = runSheng32(sh, q, end, CALLBACK_OUTPUT); + return rv; +} + +char nfaExecSheng32_Q2(const struct NFA *n, struct mq *q, s64a end) { + const struct sheng32 *sh = get_sheng32(n); + char rv = runSheng32(sh, q, end, STOP_AT_MATCH); + return rv; +} + +char nfaExecSheng32_QR(const struct NFA *n, struct mq *q, ReportID report) { + assert(q_cur_type(q) == MQE_START); + + const struct sheng32 *sh = get_sheng32(n); + char rv = runSheng32(sh, q, 0 /* end */, NO_MATCHES); + + if (rv && nfaExecSheng32_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } + return rv; +} + +char nfaExecSheng32_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + + const struct sheng32 *sh = get_sheng32(n); + u8 s = *(const u8 *)q->state; + DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG32_STATE_MASK)); + + const struct sstate_aux *aux = get_aux32(sh, s); + + if (!aux->accept) { + return 0; + } + + return sheng32HasAccept(sh, aux, report); +} + +char nfaExecSheng32_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct sheng32 *sh = get_sheng32(n); + u8 s = *(const u8 *)q->state; + DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG32_STATE_MASK)); + + const struct sstate_aux *aux = get_aux32(sh, s); + return !!aux->accept; +} + +char nfaExecSheng32_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback cb, void *ctxt) { + assert(nfa); + + const struct sheng32 *sh = get_sheng32(nfa); + u8 s = *(const u8 *)state; + DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG32_STATE_MASK)); + + const struct sstate_aux *aux = get_aux32(sh, s); + + if (!aux->accept_eod) { + return MO_CONTINUE_MATCHING; + } + + return fireReports32(sh, cb, ctxt, s, offset, NULL, NULL, 1); +} + +char nfaExecSheng32_reportCurrent(const struct NFA *n, struct mq *q) { + const struct sheng32 *sh = (const struct sheng32 *)getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; + u8 s = *(u8 *)q->state; + const struct sstate_aux *aux = get_aux32(sh, s); + u64a offset = q_cur_offset(q); + u8 cached_state_id = 0; + ReportID cached_report_id = 0; + assert(q_cur_type(q) == MQE_START); + + if (aux->accept) { + if (sh->flags & SHENG_FLAG_SINGLE_REPORT) { + fireSingleReport(cb, ctxt, sh->report, offset); + } else { + fireReports32(sh, cb, ctxt, s, offset, &cached_state_id, + &cached_report_id, 0); + } + } + + return 0; +} + +char nfaExecSheng32_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct sheng32 *sh = get_sheng32(nfa); + u8 *s = (u8 *)state; + *s = offset ? sh->floating: sh->anchored; + return !(*s & SHENG32_STATE_DEAD); +} + +char nfaExecSheng32_queueInitState(const struct NFA *nfa, struct mq *q) { + assert(nfa->scratchStateSize == 1); + + /* starting in floating state */ + const struct sheng32 *sh = get_sheng32(nfa); + *(u8 *)q->state = sh->floating; + DEBUG_PRINTF("starting in floating state\n"); + return 0; +} + +char nfaExecSheng32_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecSheng32_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +// Sheng64 +static really_inline +char runSheng64Cb(const struct sheng64 *sh, NfaCallback cb, void *ctxt, + u64a offset, u8 *const cached_accept_state, + ReportID *const cached_accept_id, const u8 *cur_buf, + const u8 *start, const u8 *end, u8 can_die, + u8 single, const u8 **scanned, u8 *state) { + DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n", + (u64a)(end - start), offset); + DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), + (s64a)(end - cur_buf)); + DEBUG_PRINTF("can die: %u single: %u\n", !!can_die, !!single); + int rv; + /* scan and report all matches */ + if (can_die) { + rv = sheng64_4_cod(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + start, end, scanned); + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + rv = sheng64_cod(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + *scanned, end, scanned); + } else { + rv = sheng64_4_co(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + start, end, scanned); + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + rv = sheng64_co(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + *scanned, end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + return MO_ALIVE; +} + +static really_inline +void runSheng64Nm(const struct sheng64 *sh, NfaCallback cb, void *ctxt, + u64a offset, u8 *const cached_accept_state, + ReportID *const cached_accept_id, const u8 *cur_buf, + const u8 *start, const u8 *end, u8 can_die, + u8 single, const u8 **scanned, u8 *state) { + DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n", + (u64a)(end - start), offset); + DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), + (s64a)(end - cur_buf)); + DEBUG_PRINTF("can die: %u single: %u\n", !!can_die, !!single); + /* just scan the buffer */ + if (can_die) { + sheng64_4_nmd(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + start, end, scanned); + sheng64_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, + single, offset, cur_buf, *scanned, end, scanned); + } else { + sheng64_4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, + single, offset, cur_buf, start, end, scanned); + sheng64_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, + single, offset, cur_buf, *scanned, end, scanned); + } +} + +static really_inline +char runSheng64Sam(const struct sheng64 *sh, NfaCallback cb, void *ctxt, + u64a offset, u8 *const cached_accept_state, + ReportID *const cached_accept_id, const u8 *cur_buf, + const u8 *start, const u8 *end, u8 can_die, + u8 single, const u8 **scanned, u8 *state) { + DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n", + (u64a)(end - start), offset); + DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), + (s64a)(end - cur_buf)); + DEBUG_PRINTF("can die: %u single: %u\n", !!can_die, !!single); + int rv; + /* scan until first match */ + if (can_die) { + rv = sheng64_4_samd(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + start, end, scanned); + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + /* if we stopped before we expected, we found a match */ + if (rv == MO_MATCHES_PENDING) { + return MO_MATCHES_PENDING; + } + + rv = sheng64_samd(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + *scanned, end, scanned); + } else { + rv = sheng64_4_sam(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + start, end, scanned); + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + /* if we stopped before we expected, we found a match */ + if (rv == MO_MATCHES_PENDING) { + return MO_MATCHES_PENDING; + } + + rv = sheng64_sam(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, + *scanned, end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + /* if we stopped before we expected, we found a match */ + if (rv == MO_MATCHES_PENDING) { + return MO_MATCHES_PENDING; + } + return MO_ALIVE; +} + +static never_inline +char runSheng64(const struct sheng64 *sh, struct mq *q, s64a b_end, + enum MatchMode mode) { + u8 state = *(u8 *)q->state; + u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; + u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; + + u8 cached_accept_state = 0; + ReportID cached_accept_id = 0; + + DEBUG_PRINTF("starting Sheng64 execution in state %u\n", + state & SHENG64_STATE_MASK); + + if (q->report_current) { + DEBUG_PRINTF("reporting current pending matches\n"); + assert(sh); + + q->report_current = 0; + + int rv; + if (single) { + rv = fireSingleReport(q->cb, q->context, sh->report, + q_cur_offset(q)); + } else { + rv = fireReports64(sh, q->cb, q->context, state, q_cur_offset(q), + &cached_accept_state, &cached_accept_id, 0); + } + if (rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK); + return MO_DEAD; + } + + DEBUG_PRINTF("proceeding with matching\n"); + } + + assert(q_cur_type(q) == MQE_START); + s64a start = q_cur_loc(q); + + DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start, + mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" : + mode == NO_MATCHES ? "NO MATCHES" : + mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???"); + + DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), + q_cur_type(q) == MQE_START ? "START" : + q_cur_type(q) == MQE_TOP ? "TOP" : + q_cur_type(q) == MQE_END ? "END" : "???"); + + const u8* cur_buf; + if (start < 0) { + DEBUG_PRINTF("negative location, scanning history\n"); + DEBUG_PRINTF("min location: %zd\n", -q->hlength); + cur_buf = q->history + q->hlength; + } else { + DEBUG_PRINTF("positive location, scanning buffer\n"); + DEBUG_PRINTF("max location: %lli\n", b_end); + cur_buf = q->buffer; + } + + /* if we our queue event is past our end */ + if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { + DEBUG_PRINTF("current location past buffer end\n"); + DEBUG_PRINTF("setting q location to %llu\n", b_end); + DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK); + q->items[q->cur].location = b_end; + return MO_ALIVE; + } + + q->cur++; + + s64a cur_start = start; + + while (1) { + DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), + q_cur_type(q) == MQE_START ? "START" : + q_cur_type(q) == MQE_TOP ? "TOP" : + q_cur_type(q) == MQE_END ? "END" : "???"); + s64a end = q_cur_loc(q); + if (mode != NO_MATCHES) { + end = MIN(end, b_end); + } + assert(end <= (s64a) q->length); + s64a cur_end = end; + + /* we may cross the border between history and current buffer */ + if (cur_start < 0) { + cur_end = MIN(0, cur_end); + } + + DEBUG_PRINTF("start: %lli end: %lli\n", start, end); + + /* don't scan zero length buffer */ + if (cur_start != cur_end) { + const u8 * scanned = cur_buf; + char rv; + + if (mode == NO_MATCHES) { + runSheng64Nm(sh, q->cb, q->context, q->offset, + &cached_accept_state, &cached_accept_id, cur_buf, + cur_buf + cur_start, cur_buf + cur_end, can_die, + single, &scanned, &state); + } else if (mode == CALLBACK_OUTPUT) { + rv = runSheng64Cb(sh, q->cb, q->context, q->offset, + &cached_accept_state, &cached_accept_id, + cur_buf, cur_buf + cur_start, cur_buf + cur_end, + can_die, single, &scanned, &state); + if (rv == MO_DEAD) { + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG64_STATE_MASK); + return MO_DEAD; + } + } else if (mode == STOP_AT_MATCH) { + rv = runSheng64Sam(sh, q->cb, q->context, q->offset, + &cached_accept_state, &cached_accept_id, + cur_buf, cur_buf + cur_start, + cur_buf + cur_end, can_die, single, + &scanned, &state); + if (rv == MO_DEAD) { + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG64_STATE_MASK); + return rv; + } else if (rv == MO_MATCHES_PENDING) { + assert(q->cur); + DEBUG_PRINTF("found a match, setting q location to %zd\n", + scanned - cur_buf + 1); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = + scanned - cur_buf + 1; /* due to exiting early */ + *(u8 *)q->state = state; + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG64_STATE_MASK); + return rv; + } + } else { + assert(!"invalid scanning mode!"); + } + assert(scanned == cur_buf + cur_end); + + cur_start = cur_end; + } + + /* if we our queue event is past our end */ + if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { + DEBUG_PRINTF("current location past buffer end\n"); + DEBUG_PRINTF("setting q location to %llu\n", b_end); + DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = b_end; + *(u8 *)q->state = state; + return MO_ALIVE; + } + + /* crossing over into actual buffer */ + if (cur_start == 0) { + DEBUG_PRINTF("positive location, scanning buffer\n"); + DEBUG_PRINTF("max offset: %lli\n", b_end); + cur_buf = q->buffer; + } + + /* continue scanning the same buffer */ + if (end != cur_end) { + continue; + } + + switch (q_cur_type(q)) { + case MQE_END: + *(u8 *)q->state = state; + q->cur++; + DEBUG_PRINTF("exiting in state %u\n", state & SHENG64_STATE_MASK); + if (can_die) { + return (state & SHENG64_STATE_DEAD) ? MO_DEAD : MO_ALIVE; + } + return MO_ALIVE; + case MQE_TOP: + if (q->offset + cur_start == 0) { + DEBUG_PRINTF("Anchored start, going to state %u\n", + sh->anchored); + state = sh->anchored; + } else { + u8 new_state = get_aux64(sh, state)->top; + DEBUG_PRINTF("Top event %u->%u\n", state & SHENG64_STATE_MASK, + new_state & SHENG64_STATE_MASK); + state = new_state; + } + break; + default: + assert(!"invalid queue event"); + break; + } + q->cur++; + } +} + +char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context) { + DEBUG_PRINTF("smallwrite Sheng64\n"); + assert(n->type == SHENG_NFA_64); + const struct sheng64 *sh = getImplNfa(n); + u8 state = sh->anchored; + u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; + u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; + u8 cached_accept_state = 0; + ReportID cached_accept_id = 0; + + /* scan and report all matches */ + int rv; + s64a end = length; + const u8 *scanned; + + rv = runSheng64Cb(sh, cb, context, offset, &cached_accept_state, + &cached_accept_id, buffer, buffer, buffer + end, can_die, + single, &scanned, &state); + if (rv == MO_DEAD) { + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG64_STATE_MASK); + return MO_DEAD; + } + + DEBUG_PRINTF("%u\n", state & SHENG64_STATE_MASK); + + const struct sstate_aux *aux = get_aux64(sh, state); + + if (aux->accept_eod) { + DEBUG_PRINTF("Reporting EOD matches\n"); + fireReports64(sh, cb, context, state, end + offset, + &cached_accept_state, &cached_accept_id, 1); + } + + return state & SHENG64_STATE_DEAD ? MO_DEAD : MO_ALIVE; +} + +char nfaExecSheng64_Q(const struct NFA *n, struct mq *q, s64a end) { + const struct sheng64 *sh = get_sheng64(n); + char rv = runSheng64(sh, q, end, CALLBACK_OUTPUT); + return rv; +} + +char nfaExecSheng64_Q2(const struct NFA *n, struct mq *q, s64a end) { + const struct sheng64 *sh = get_sheng64(n); + char rv = runSheng64(sh, q, end, STOP_AT_MATCH); + return rv; +} + +char nfaExecSheng64_QR(const struct NFA *n, struct mq *q, ReportID report) { + assert(q_cur_type(q) == MQE_START); + + const struct sheng64 *sh = get_sheng64(n); + char rv = runSheng64(sh, q, 0 /* end */, NO_MATCHES); + + if (rv && nfaExecSheng64_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } + return rv; +} + +char nfaExecSheng64_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + + const struct sheng64 *sh = get_sheng64(n); + u8 s = *(const u8 *)q->state; + DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG64_STATE_MASK)); + + const struct sstate_aux *aux = get_aux64(sh, s); + + if (!aux->accept) { + return 0; + } + + return sheng64HasAccept(sh, aux, report); +} + +char nfaExecSheng64_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct sheng64 *sh = get_sheng64(n); + u8 s = *(const u8 *)q->state; + DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG64_STATE_MASK)); + + const struct sstate_aux *aux = get_aux64(sh, s); + return !!aux->accept; +} + +char nfaExecSheng64_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback cb, void *ctxt) { + assert(nfa); + + const struct sheng64 *sh = get_sheng64(nfa); + u8 s = *(const u8 *)state; + DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG64_STATE_MASK)); + + const struct sstate_aux *aux = get_aux64(sh, s); + + if (!aux->accept_eod) { + return MO_CONTINUE_MATCHING; + } + + return fireReports64(sh, cb, ctxt, s, offset, NULL, NULL, 1); +} + +char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q) { + const struct sheng64 *sh = (const struct sheng64 *)getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; + u8 s = *(u8 *)q->state; + const struct sstate_aux *aux = get_aux64(sh, s); + u64a offset = q_cur_offset(q); + u8 cached_state_id = 0; + ReportID cached_report_id = 0; + assert(q_cur_type(q) == MQE_START); + + if (aux->accept) { + if (sh->flags & SHENG_FLAG_SINGLE_REPORT) { + fireSingleReport(cb, ctxt, sh->report, offset); + } else { + fireReports64(sh, cb, ctxt, s, offset, &cached_state_id, + &cached_report_id, 0); + } + } + + return 0; +} + +char nfaExecSheng64_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct sheng64 *sh = get_sheng64(nfa); + u8 *s = (u8 *)state; + *s = offset ? sh->floating: sh->anchored; + return !(*s & SHENG64_STATE_DEAD); +} + +char nfaExecSheng64_queueInitState(const struct NFA *nfa, struct mq *q) { + assert(nfa->scratchStateSize == 1); + + /* starting in floating state */ + const struct sheng64 *sh = get_sheng64(nfa); + *(u8 *)q->state = sh->floating; + DEBUG_PRINTF("starting in floating state\n"); + return 0; +} + +char nfaExecSheng64_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecSheng64_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} +#endif // end of HAVE_AVX512VBMI diff --git a/contrib/libs/hyperscan/src/nfa/sheng.h b/contrib/libs/hyperscan/src/nfa/sheng.h index 6111c6dec5..7b90e3034f 100644 --- a/contrib/libs/hyperscan/src/nfa/sheng.h +++ b/contrib/libs/hyperscan/src/nfa/sheng.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -58,86 +58,86 @@ char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer, size_t length, NfaCallback cb, void *context); -#if defined(HAVE_AVX512VBMI) -#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL -#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL - -char nfaExecSheng32_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecSheng32_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecSheng32_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecSheng32_inAccept(const struct NFA *n, ReportID report, - struct mq *q); -char nfaExecSheng32_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecSheng32_queueInitState(const struct NFA *nfa, struct mq *q); -char nfaExecSheng32_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc); -char nfaExecSheng32_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); -char nfaExecSheng32_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, u8 key); -char nfaExecSheng32_testEOD(const struct NFA *nfa, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context); -char nfaExecSheng32_reportCurrent(const struct NFA *n, struct mq *q); - -char nfaExecSheng32_B(const struct NFA *n, u64a offset, const u8 *buffer, - size_t length, NfaCallback cb, void *context); - -#define nfaExecSheng64_B_Reverse NFA_API_NO_IMPL -#define nfaExecSheng64_zombie_status NFA_API_ZOMBIE_NO_IMPL - -char nfaExecSheng64_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecSheng64_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecSheng64_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecSheng64_inAccept(const struct NFA *n, ReportID report, - struct mq *q); -char nfaExecSheng64_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecSheng64_queueInitState(const struct NFA *nfa, struct mq *q); -char nfaExecSheng64_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc); -char nfaExecSheng64_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); -char nfaExecSheng64_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, u8 key); -char nfaExecSheng64_testEOD(const struct NFA *nfa, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context); -char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q); - -char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer, - size_t length, NfaCallback cb, void *context); - -#else // !HAVE_AVX512VBMI - -#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL -#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL -#define nfaExecSheng32_Q NFA_API_NO_IMPL -#define nfaExecSheng32_Q2 NFA_API_NO_IMPL -#define nfaExecSheng32_QR NFA_API_NO_IMPL -#define nfaExecSheng32_inAccept NFA_API_NO_IMPL -#define nfaExecSheng32_inAnyAccept NFA_API_NO_IMPL -#define nfaExecSheng32_queueInitState NFA_API_NO_IMPL -#define nfaExecSheng32_queueCompressState NFA_API_NO_IMPL -#define nfaExecSheng32_expandState NFA_API_NO_IMPL -#define nfaExecSheng32_initCompressedState NFA_API_NO_IMPL -#define nfaExecSheng32_testEOD NFA_API_NO_IMPL -#define nfaExecSheng32_reportCurrent NFA_API_NO_IMPL -#define nfaExecSheng32_B NFA_API_NO_IMPL - -#define nfaExecSheng64_B_Reverse NFA_API_NO_IMPL -#define nfaExecSheng64_zombie_status NFA_API_ZOMBIE_NO_IMPL -#define nfaExecSheng64_Q NFA_API_NO_IMPL -#define nfaExecSheng64_Q2 NFA_API_NO_IMPL -#define nfaExecSheng64_QR NFA_API_NO_IMPL -#define nfaExecSheng64_inAccept NFA_API_NO_IMPL -#define nfaExecSheng64_inAnyAccept NFA_API_NO_IMPL -#define nfaExecSheng64_queueInitState NFA_API_NO_IMPL -#define nfaExecSheng64_queueCompressState NFA_API_NO_IMPL -#define nfaExecSheng64_expandState NFA_API_NO_IMPL -#define nfaExecSheng64_initCompressedState NFA_API_NO_IMPL -#define nfaExecSheng64_testEOD NFA_API_NO_IMPL -#define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL -#define nfaExecSheng64_B NFA_API_NO_IMPL -#endif // end of HAVE_AVX512VBMI - +#if defined(HAVE_AVX512VBMI) +#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL +#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL + +char nfaExecSheng32_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecSheng32_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecSheng32_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecSheng32_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecSheng32_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecSheng32_queueInitState(const struct NFA *nfa, struct mq *q); +char nfaExecSheng32_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecSheng32_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); +char nfaExecSheng32_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, u8 key); +char nfaExecSheng32_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecSheng32_reportCurrent(const struct NFA *n, struct mq *q); + +char nfaExecSheng32_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context); + +#define nfaExecSheng64_B_Reverse NFA_API_NO_IMPL +#define nfaExecSheng64_zombie_status NFA_API_ZOMBIE_NO_IMPL + +char nfaExecSheng64_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecSheng64_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecSheng64_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecSheng64_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecSheng64_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecSheng64_queueInitState(const struct NFA *nfa, struct mq *q); +char nfaExecSheng64_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecSheng64_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); +char nfaExecSheng64_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, u8 key); +char nfaExecSheng64_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecSheng64_reportCurrent(const struct NFA *n, struct mq *q); + +char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context); + +#else // !HAVE_AVX512VBMI + +#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL +#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL +#define nfaExecSheng32_Q NFA_API_NO_IMPL +#define nfaExecSheng32_Q2 NFA_API_NO_IMPL +#define nfaExecSheng32_QR NFA_API_NO_IMPL +#define nfaExecSheng32_inAccept NFA_API_NO_IMPL +#define nfaExecSheng32_inAnyAccept NFA_API_NO_IMPL +#define nfaExecSheng32_queueInitState NFA_API_NO_IMPL +#define nfaExecSheng32_queueCompressState NFA_API_NO_IMPL +#define nfaExecSheng32_expandState NFA_API_NO_IMPL +#define nfaExecSheng32_initCompressedState NFA_API_NO_IMPL +#define nfaExecSheng32_testEOD NFA_API_NO_IMPL +#define nfaExecSheng32_reportCurrent NFA_API_NO_IMPL +#define nfaExecSheng32_B NFA_API_NO_IMPL + +#define nfaExecSheng64_B_Reverse NFA_API_NO_IMPL +#define nfaExecSheng64_zombie_status NFA_API_ZOMBIE_NO_IMPL +#define nfaExecSheng64_Q NFA_API_NO_IMPL +#define nfaExecSheng64_Q2 NFA_API_NO_IMPL +#define nfaExecSheng64_QR NFA_API_NO_IMPL +#define nfaExecSheng64_inAccept NFA_API_NO_IMPL +#define nfaExecSheng64_inAnyAccept NFA_API_NO_IMPL +#define nfaExecSheng64_queueInitState NFA_API_NO_IMPL +#define nfaExecSheng64_queueCompressState NFA_API_NO_IMPL +#define nfaExecSheng64_expandState NFA_API_NO_IMPL +#define nfaExecSheng64_initCompressedState NFA_API_NO_IMPL +#define nfaExecSheng64_testEOD NFA_API_NO_IMPL +#define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL +#define nfaExecSheng64_B NFA_API_NO_IMPL +#endif // end of HAVE_AVX512VBMI + #endif /* SHENG_H_ */ diff --git a/contrib/libs/hyperscan/src/nfa/sheng_defs.h b/contrib/libs/hyperscan/src/nfa/sheng_defs.h index d14018829c..390af75221 100644 --- a/contrib/libs/hyperscan/src/nfa/sheng_defs.h +++ b/contrib/libs/hyperscan/src/nfa/sheng_defs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -52,43 +52,43 @@ u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) { return (a | b | c | d) & (SHENG_STATE_FLAG_MASK); } -#if defined(HAVE_AVX512VBMI) -static really_inline -u8 isDeadState32(const u8 a) { - return a & SHENG32_STATE_DEAD; -} - -static really_inline -u8 isAcceptState32(const u8 a) { - return a & SHENG32_STATE_ACCEPT; -} - -static really_inline -u8 isAccelState32(const u8 a) { - return a & SHENG32_STATE_ACCEL; -} - -static really_inline -u8 hasInterestingStates32(const u8 a, const u8 b, const u8 c, const u8 d) { - return (a | b | c | d) & (SHENG32_STATE_FLAG_MASK); -} - -static really_inline -u8 isDeadState64(const u8 a) { - return a & SHENG64_STATE_DEAD; -} - -static really_inline -u8 isAcceptState64(const u8 a) { - return a & SHENG64_STATE_ACCEPT; -} - -static really_inline -u8 hasInterestingStates64(const u8 a, const u8 b, const u8 c, const u8 d) { - return (a | b | c | d) & (SHENG64_STATE_FLAG_MASK); -} -#endif - +#if defined(HAVE_AVX512VBMI) +static really_inline +u8 isDeadState32(const u8 a) { + return a & SHENG32_STATE_DEAD; +} + +static really_inline +u8 isAcceptState32(const u8 a) { + return a & SHENG32_STATE_ACCEPT; +} + +static really_inline +u8 isAccelState32(const u8 a) { + return a & SHENG32_STATE_ACCEL; +} + +static really_inline +u8 hasInterestingStates32(const u8 a, const u8 b, const u8 c, const u8 d) { + return (a | b | c | d) & (SHENG32_STATE_FLAG_MASK); +} + +static really_inline +u8 isDeadState64(const u8 a) { + return a & SHENG64_STATE_DEAD; +} + +static really_inline +u8 isAcceptState64(const u8 a) { + return a & SHENG64_STATE_ACCEPT; +} + +static really_inline +u8 hasInterestingStates64(const u8 a, const u8 b, const u8 c, const u8 d) { + return (a | b | c | d) & (SHENG64_STATE_FLAG_MASK); +} +#endif + /* these functions should be optimized out, used by NO_MATCHES mode */ static really_inline u8 dummyFunc4(UNUSED const u8 a, UNUSED const u8 b, UNUSED const u8 c, @@ -108,162 +108,162 @@ u8 dummyFunc(UNUSED const u8 a) { #define SHENG_IMPL sheng_cod #define DEAD_FUNC isDeadState #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_cod -#define DEAD_FUNC32 isDeadState32 -#define ACCEPT_FUNC32 isAcceptState32 -#define SHENG64_IMPL sheng64_cod -#define DEAD_FUNC64 isDeadState64 -#define ACCEPT_FUNC64 isAcceptState64 -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_cod +#define DEAD_FUNC32 isDeadState32 +#define ACCEPT_FUNC32 isAcceptState32 +#define SHENG64_IMPL sheng64_cod +#define DEAD_FUNC64 isDeadState64 +#define ACCEPT_FUNC64 isAcceptState64 +#endif #define STOP_AT_MATCH 0 #include "sheng_impl.h" #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef DEAD_FUNC32 -#undef ACCEPT_FUNC32 -#undef SHENG64_IMPL -#undef DEAD_FUNC64 -#undef ACCEPT_FUNC64 -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef DEAD_FUNC32 +#undef ACCEPT_FUNC32 +#undef SHENG64_IMPL +#undef DEAD_FUNC64 +#undef ACCEPT_FUNC64 +#endif #undef STOP_AT_MATCH /* callback output, can't die */ #define SHENG_IMPL sheng_co #define DEAD_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_co -#define DEAD_FUNC32 dummyFunc -#define ACCEPT_FUNC32 isAcceptState32 -#define SHENG64_IMPL sheng64_co -#define DEAD_FUNC64 dummyFunc -#define ACCEPT_FUNC64 isAcceptState64 -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_co +#define DEAD_FUNC32 dummyFunc +#define ACCEPT_FUNC32 isAcceptState32 +#define SHENG64_IMPL sheng64_co +#define DEAD_FUNC64 dummyFunc +#define ACCEPT_FUNC64 isAcceptState64 +#endif #define STOP_AT_MATCH 0 #include "sheng_impl.h" #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef DEAD_FUNC32 -#undef ACCEPT_FUNC32 -#undef SHENG64_IMPL -#undef DEAD_FUNC64 -#undef ACCEPT_FUNC64 -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef DEAD_FUNC32 +#undef ACCEPT_FUNC32 +#undef SHENG64_IMPL +#undef DEAD_FUNC64 +#undef ACCEPT_FUNC64 +#endif #undef STOP_AT_MATCH /* stop at match, can die */ #define SHENG_IMPL sheng_samd #define DEAD_FUNC isDeadState #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_samd -#define DEAD_FUNC32 isDeadState32 -#define ACCEPT_FUNC32 isAcceptState32 -#define SHENG64_IMPL sheng64_samd -#define DEAD_FUNC64 isDeadState64 -#define ACCEPT_FUNC64 isAcceptState64 -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_samd +#define DEAD_FUNC32 isDeadState32 +#define ACCEPT_FUNC32 isAcceptState32 +#define SHENG64_IMPL sheng64_samd +#define DEAD_FUNC64 isDeadState64 +#define ACCEPT_FUNC64 isAcceptState64 +#endif #define STOP_AT_MATCH 1 #include "sheng_impl.h" #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef DEAD_FUNC32 -#undef ACCEPT_FUNC32 -#undef SHENG64_IMPL -#undef DEAD_FUNC64 -#undef ACCEPT_FUNC64 -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef DEAD_FUNC32 +#undef ACCEPT_FUNC32 +#undef SHENG64_IMPL +#undef DEAD_FUNC64 +#undef ACCEPT_FUNC64 +#endif #undef STOP_AT_MATCH /* stop at match, can't die */ #define SHENG_IMPL sheng_sam #define DEAD_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_sam -#define DEAD_FUNC32 dummyFunc -#define ACCEPT_FUNC32 isAcceptState32 -#define SHENG64_IMPL sheng64_sam -#define DEAD_FUNC64 dummyFunc -#define ACCEPT_FUNC64 isAcceptState64 -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_sam +#define DEAD_FUNC32 dummyFunc +#define ACCEPT_FUNC32 isAcceptState32 +#define SHENG64_IMPL sheng64_sam +#define DEAD_FUNC64 dummyFunc +#define ACCEPT_FUNC64 isAcceptState64 +#endif #define STOP_AT_MATCH 1 #include "sheng_impl.h" #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef DEAD_FUNC32 -#undef ACCEPT_FUNC32 -#undef SHENG64_IMPL -#undef DEAD_FUNC64 -#undef ACCEPT_FUNC64 -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef DEAD_FUNC32 +#undef ACCEPT_FUNC32 +#undef SHENG64_IMPL +#undef DEAD_FUNC64 +#undef ACCEPT_FUNC64 +#endif #undef STOP_AT_MATCH /* no match, can die */ #define SHENG_IMPL sheng_nmd #define DEAD_FUNC isDeadState #define ACCEPT_FUNC dummyFunc -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_nmd -#define DEAD_FUNC32 isDeadState32 -#define ACCEPT_FUNC32 dummyFunc -#define SHENG64_IMPL sheng64_nmd -#define DEAD_FUNC64 isDeadState64 -#define ACCEPT_FUNC64 dummyFunc -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_nmd +#define DEAD_FUNC32 isDeadState32 +#define ACCEPT_FUNC32 dummyFunc +#define SHENG64_IMPL sheng64_nmd +#define DEAD_FUNC64 isDeadState64 +#define ACCEPT_FUNC64 dummyFunc +#endif #define STOP_AT_MATCH 0 #include "sheng_impl.h" #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef DEAD_FUNC32 -#undef ACCEPT_FUNC32 -#undef SHENG64_IMPL -#undef DEAD_FUNC64 -#undef ACCEPT_FUNC64 -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef DEAD_FUNC32 +#undef ACCEPT_FUNC32 +#undef SHENG64_IMPL +#undef DEAD_FUNC64 +#undef ACCEPT_FUNC64 +#endif #undef STOP_AT_MATCH /* no match, can't die */ #define SHENG_IMPL sheng_nm #define DEAD_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_nm -#define DEAD_FUNC32 dummyFunc -#define ACCEPT_FUNC32 dummyFunc -#define SHENG64_IMPL sheng64_nm -#define DEAD_FUNC64 dummyFunc -#define ACCEPT_FUNC64 dummyFunc -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_nm +#define DEAD_FUNC32 dummyFunc +#define ACCEPT_FUNC32 dummyFunc +#define SHENG64_IMPL sheng64_nm +#define DEAD_FUNC64 dummyFunc +#define ACCEPT_FUNC64 dummyFunc +#endif #define STOP_AT_MATCH 0 #include "sheng_impl.h" #undef SHENG_IMPL #undef DEAD_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef DEAD_FUNC32 -#undef ACCEPT_FUNC32 -#undef SHENG64_IMPL -#undef DEAD_FUNC64 -#undef ACCEPT_FUNC64 -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef DEAD_FUNC32 +#undef ACCEPT_FUNC32 +#undef SHENG64_IMPL +#undef DEAD_FUNC64 +#undef ACCEPT_FUNC64 +#endif #undef STOP_AT_MATCH /* @@ -277,16 +277,16 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC isAccelState #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_4_coda -#define INTERESTING_FUNC32 hasInterestingStates32 -#define INNER_DEAD_FUNC32 isDeadState32 -#define OUTER_DEAD_FUNC32 dummyFunc -#define INNER_ACCEL_FUNC32 isAccelState32 -#define OUTER_ACCEL_FUNC32 dummyFunc -#define ACCEPT_FUNC32 isAcceptState32 -#define NO_SHENG64_IMPL -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_4_coda +#define INTERESTING_FUNC32 hasInterestingStates32 +#define INNER_DEAD_FUNC32 isDeadState32 +#define OUTER_DEAD_FUNC32 dummyFunc +#define INNER_ACCEL_FUNC32 isAccelState32 +#define OUTER_ACCEL_FUNC32 dummyFunc +#define ACCEPT_FUNC32 isAcceptState32 +#define NO_SHENG64_IMPL +#endif #define STOP_AT_MATCH 0 #include "sheng_impl4.h" #undef SHENG_IMPL @@ -296,16 +296,16 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef INTERESTING_FUNC32 -#undef INNER_DEAD_FUNC32 -#undef OUTER_DEAD_FUNC32 -#undef INNER_ACCEL_FUNC32 -#undef OUTER_ACCEL_FUNC32 -#undef ACCEPT_FUNC32 -#undef NO_SHENG64_IMPL -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef INTERESTING_FUNC32 +#undef INNER_DEAD_FUNC32 +#undef OUTER_DEAD_FUNC32 +#undef INNER_ACCEL_FUNC32 +#undef OUTER_ACCEL_FUNC32 +#undef ACCEPT_FUNC32 +#undef NO_SHENG64_IMPL +#endif #undef STOP_AT_MATCH /* callback output, can die, not accelerated */ @@ -316,20 +316,20 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_4_cod -#define INTERESTING_FUNC32 hasInterestingStates32 -#define INNER_DEAD_FUNC32 isDeadState32 -#define OUTER_DEAD_FUNC32 dummyFunc -#define INNER_ACCEL_FUNC32 dummyFunc -#define OUTER_ACCEL_FUNC32 dummyFunc -#define ACCEPT_FUNC32 isAcceptState32 -#define SHENG64_IMPL sheng64_4_cod -#define INTERESTING_FUNC64 hasInterestingStates64 -#define INNER_DEAD_FUNC64 isDeadState64 -#define OUTER_DEAD_FUNC64 dummyFunc -#define ACCEPT_FUNC64 isAcceptState64 -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_4_cod +#define INTERESTING_FUNC32 hasInterestingStates32 +#define INNER_DEAD_FUNC32 isDeadState32 +#define OUTER_DEAD_FUNC32 dummyFunc +#define INNER_ACCEL_FUNC32 dummyFunc +#define OUTER_ACCEL_FUNC32 dummyFunc +#define ACCEPT_FUNC32 isAcceptState32 +#define SHENG64_IMPL sheng64_4_cod +#define INTERESTING_FUNC64 hasInterestingStates64 +#define INNER_DEAD_FUNC64 isDeadState64 +#define OUTER_DEAD_FUNC64 dummyFunc +#define ACCEPT_FUNC64 isAcceptState64 +#endif #define STOP_AT_MATCH 0 #include "sheng_impl4.h" #undef SHENG_IMPL @@ -339,20 +339,20 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef INTERESTING_FUNC32 -#undef INNER_DEAD_FUNC32 -#undef OUTER_DEAD_FUNC32 -#undef INNER_ACCEL_FUNC32 -#undef OUTER_ACCEL_FUNC32 -#undef ACCEPT_FUNC32 -#undef SHENG64_IMPL -#undef INTERESTING_FUNC64 -#undef INNER_DEAD_FUNC64 -#undef OUTER_DEAD_FUNC64 -#undef ACCEPT_FUNC64 -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef INTERESTING_FUNC32 +#undef INNER_DEAD_FUNC32 +#undef OUTER_DEAD_FUNC32 +#undef INNER_ACCEL_FUNC32 +#undef OUTER_ACCEL_FUNC32 +#undef ACCEPT_FUNC32 +#undef SHENG64_IMPL +#undef INTERESTING_FUNC64 +#undef INNER_DEAD_FUNC64 +#undef OUTER_DEAD_FUNC64 +#undef ACCEPT_FUNC64 +#endif #undef STOP_AT_MATCH /* callback output, can't die, accelerated */ @@ -363,16 +363,16 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC isAccelState #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_4_coa -#define INTERESTING_FUNC32 hasInterestingStates32 -#define INNER_DEAD_FUNC32 dummyFunc -#define OUTER_DEAD_FUNC32 dummyFunc -#define INNER_ACCEL_FUNC32 isAccelState32 -#define OUTER_ACCEL_FUNC32 dummyFunc -#define ACCEPT_FUNC32 isAcceptState32 -#define NO_SHENG64_IMPL -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_4_coa +#define INTERESTING_FUNC32 hasInterestingStates32 +#define INNER_DEAD_FUNC32 dummyFunc +#define OUTER_DEAD_FUNC32 dummyFunc +#define INNER_ACCEL_FUNC32 isAccelState32 +#define OUTER_ACCEL_FUNC32 dummyFunc +#define ACCEPT_FUNC32 isAcceptState32 +#define NO_SHENG64_IMPL +#endif #define STOP_AT_MATCH 0 #include "sheng_impl4.h" #undef SHENG_IMPL @@ -382,16 +382,16 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef INTERESTING_FUNC32 -#undef INNER_DEAD_FUNC32 -#undef OUTER_DEAD_FUNC32 -#undef INNER_ACCEL_FUNC32 -#undef OUTER_ACCEL_FUNC32 -#undef ACCEPT_FUNC32 -#undef NO_SHENG64_IMPL -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef INTERESTING_FUNC32 +#undef INNER_DEAD_FUNC32 +#undef OUTER_DEAD_FUNC32 +#undef INNER_ACCEL_FUNC32 +#undef OUTER_ACCEL_FUNC32 +#undef ACCEPT_FUNC32 +#undef NO_SHENG64_IMPL +#endif #undef STOP_AT_MATCH /* callback output, can't die, not accelerated */ @@ -402,20 +402,20 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_4_co -#define INTERESTING_FUNC32 hasInterestingStates32 -#define INNER_DEAD_FUNC32 dummyFunc -#define OUTER_DEAD_FUNC32 dummyFunc -#define INNER_ACCEL_FUNC32 dummyFunc -#define OUTER_ACCEL_FUNC32 dummyFunc -#define ACCEPT_FUNC32 isAcceptState32 -#define SHENG64_IMPL sheng64_4_co -#define INTERESTING_FUNC64 hasInterestingStates64 -#define INNER_DEAD_FUNC64 dummyFunc -#define OUTER_DEAD_FUNC64 dummyFunc -#define ACCEPT_FUNC64 isAcceptState64 -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_4_co +#define INTERESTING_FUNC32 hasInterestingStates32 +#define INNER_DEAD_FUNC32 dummyFunc +#define OUTER_DEAD_FUNC32 dummyFunc +#define INNER_ACCEL_FUNC32 dummyFunc +#define OUTER_ACCEL_FUNC32 dummyFunc +#define ACCEPT_FUNC32 isAcceptState32 +#define SHENG64_IMPL sheng64_4_co +#define INTERESTING_FUNC64 hasInterestingStates64 +#define INNER_DEAD_FUNC64 dummyFunc +#define OUTER_DEAD_FUNC64 dummyFunc +#define ACCEPT_FUNC64 isAcceptState64 +#endif #define STOP_AT_MATCH 0 #include "sheng_impl4.h" #undef SHENG_IMPL @@ -425,20 +425,20 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef INTERESTING_FUNC32 -#undef INNER_DEAD_FUNC32 -#undef OUTER_DEAD_FUNC32 -#undef INNER_ACCEL_FUNC32 -#undef OUTER_ACCEL_FUNC32 -#undef ACCEPT_FUNC32 -#undef SHENG64_IMPL -#undef INTERESTING_FUNC64 -#undef INNER_DEAD_FUNC64 -#undef OUTER_DEAD_FUNC64 -#undef ACCEPT_FUNC64 -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef INTERESTING_FUNC32 +#undef INNER_DEAD_FUNC32 +#undef OUTER_DEAD_FUNC32 +#undef INNER_ACCEL_FUNC32 +#undef OUTER_ACCEL_FUNC32 +#undef ACCEPT_FUNC32 +#undef SHENG64_IMPL +#undef INTERESTING_FUNC64 +#undef INNER_DEAD_FUNC64 +#undef OUTER_DEAD_FUNC64 +#undef ACCEPT_FUNC64 +#endif #undef STOP_AT_MATCH /* stop at match, can die, accelerated */ @@ -449,16 +449,16 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC isAccelState #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_4_samda -#define INTERESTING_FUNC32 hasInterestingStates32 -#define INNER_DEAD_FUNC32 isDeadState32 -#define OUTER_DEAD_FUNC32 dummyFunc -#define INNER_ACCEL_FUNC32 isAccelState32 -#define OUTER_ACCEL_FUNC32 dummyFunc -#define ACCEPT_FUNC32 isAcceptState32 -#define NO_SHENG64_IMPL -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_4_samda +#define INTERESTING_FUNC32 hasInterestingStates32 +#define INNER_DEAD_FUNC32 isDeadState32 +#define OUTER_DEAD_FUNC32 dummyFunc +#define INNER_ACCEL_FUNC32 isAccelState32 +#define OUTER_ACCEL_FUNC32 dummyFunc +#define ACCEPT_FUNC32 isAcceptState32 +#define NO_SHENG64_IMPL +#endif #define STOP_AT_MATCH 1 #include "sheng_impl4.h" #undef SHENG_IMPL @@ -468,16 +468,16 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef INTERESTING_FUNC32 -#undef INNER_DEAD_FUNC32 -#undef OUTER_DEAD_FUNC32 -#undef INNER_ACCEL_FUNC32 -#undef OUTER_ACCEL_FUNC32 -#undef ACCEPT_FUNC32 -#undef NO_SHENG64_IMPL -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef INTERESTING_FUNC32 +#undef INNER_DEAD_FUNC32 +#undef OUTER_DEAD_FUNC32 +#undef INNER_ACCEL_FUNC32 +#undef OUTER_ACCEL_FUNC32 +#undef ACCEPT_FUNC32 +#undef NO_SHENG64_IMPL +#endif #undef STOP_AT_MATCH /* stop at match, can die, not accelerated */ @@ -488,20 +488,20 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_4_samd -#define INTERESTING_FUNC32 hasInterestingStates32 -#define INNER_DEAD_FUNC32 isDeadState32 -#define OUTER_DEAD_FUNC32 dummyFunc -#define INNER_ACCEL_FUNC32 dummyFunc -#define OUTER_ACCEL_FUNC32 dummyFunc -#define ACCEPT_FUNC32 isAcceptState32 -#define SHENG64_IMPL sheng64_4_samd -#define INTERESTING_FUNC64 hasInterestingStates64 -#define INNER_DEAD_FUNC64 isDeadState64 -#define OUTER_DEAD_FUNC64 dummyFunc -#define ACCEPT_FUNC64 isAcceptState64 -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_4_samd +#define INTERESTING_FUNC32 hasInterestingStates32 +#define INNER_DEAD_FUNC32 isDeadState32 +#define OUTER_DEAD_FUNC32 dummyFunc +#define INNER_ACCEL_FUNC32 dummyFunc +#define OUTER_ACCEL_FUNC32 dummyFunc +#define ACCEPT_FUNC32 isAcceptState32 +#define SHENG64_IMPL sheng64_4_samd +#define INTERESTING_FUNC64 hasInterestingStates64 +#define INNER_DEAD_FUNC64 isDeadState64 +#define OUTER_DEAD_FUNC64 dummyFunc +#define ACCEPT_FUNC64 isAcceptState64 +#endif #define STOP_AT_MATCH 1 #include "sheng_impl4.h" #undef SHENG_IMPL @@ -511,20 +511,20 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef INTERESTING_FUNC32 -#undef INNER_DEAD_FUNC32 -#undef OUTER_DEAD_FUNC32 -#undef INNER_ACCEL_FUNC32 -#undef OUTER_ACCEL_FUNC32 -#undef ACCEPT_FUNC32 -#undef SHENG64_IMPL -#undef INTERESTING_FUNC64 -#undef INNER_DEAD_FUNC64 -#undef OUTER_DEAD_FUNC64 -#undef ACCEPT_FUNC64 -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef INTERESTING_FUNC32 +#undef INNER_DEAD_FUNC32 +#undef OUTER_DEAD_FUNC32 +#undef INNER_ACCEL_FUNC32 +#undef OUTER_ACCEL_FUNC32 +#undef ACCEPT_FUNC32 +#undef SHENG64_IMPL +#undef INTERESTING_FUNC64 +#undef INNER_DEAD_FUNC64 +#undef OUTER_DEAD_FUNC64 +#undef ACCEPT_FUNC64 +#endif #undef STOP_AT_MATCH /* stop at match, can't die, accelerated */ @@ -535,16 +535,16 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC isAccelState #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_4_sama -#define INTERESTING_FUNC32 hasInterestingStates32 -#define INNER_DEAD_FUNC32 dummyFunc -#define OUTER_DEAD_FUNC32 dummyFunc -#define INNER_ACCEL_FUNC32 isAccelState32 -#define OUTER_ACCEL_FUNC32 dummyFunc -#define ACCEPT_FUNC32 isAcceptState32 -#define NO_SHENG64_IMPL -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_4_sama +#define INTERESTING_FUNC32 hasInterestingStates32 +#define INNER_DEAD_FUNC32 dummyFunc +#define OUTER_DEAD_FUNC32 dummyFunc +#define INNER_ACCEL_FUNC32 isAccelState32 +#define OUTER_ACCEL_FUNC32 dummyFunc +#define ACCEPT_FUNC32 isAcceptState32 +#define NO_SHENG64_IMPL +#endif #define STOP_AT_MATCH 1 #include "sheng_impl4.h" #undef SHENG_IMPL @@ -554,16 +554,16 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef INTERESTING_FUNC32 -#undef INNER_DEAD_FUNC32 -#undef OUTER_DEAD_FUNC32 -#undef INNER_ACCEL_FUNC32 -#undef OUTER_ACCEL_FUNC32 -#undef ACCEPT_FUNC32 -#undef NO_SHENG64_IMPL -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef INTERESTING_FUNC32 +#undef INNER_DEAD_FUNC32 +#undef OUTER_DEAD_FUNC32 +#undef INNER_ACCEL_FUNC32 +#undef OUTER_ACCEL_FUNC32 +#undef ACCEPT_FUNC32 +#undef NO_SHENG64_IMPL +#endif #undef STOP_AT_MATCH /* stop at match, can't die, not accelerated */ @@ -574,20 +574,20 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC isAcceptState -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_4_sam -#define INTERESTING_FUNC32 hasInterestingStates32 -#define INNER_DEAD_FUNC32 dummyFunc -#define OUTER_DEAD_FUNC32 dummyFunc -#define INNER_ACCEL_FUNC32 dummyFunc -#define OUTER_ACCEL_FUNC32 dummyFunc -#define ACCEPT_FUNC32 isAcceptState32 -#define SHENG64_IMPL sheng64_4_sam -#define INTERESTING_FUNC64 hasInterestingStates64 -#define INNER_DEAD_FUNC64 dummyFunc -#define OUTER_DEAD_FUNC64 dummyFunc -#define ACCEPT_FUNC64 isAcceptState64 -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_4_sam +#define INTERESTING_FUNC32 hasInterestingStates32 +#define INNER_DEAD_FUNC32 dummyFunc +#define OUTER_DEAD_FUNC32 dummyFunc +#define INNER_ACCEL_FUNC32 dummyFunc +#define OUTER_ACCEL_FUNC32 dummyFunc +#define ACCEPT_FUNC32 isAcceptState32 +#define SHENG64_IMPL sheng64_4_sam +#define INTERESTING_FUNC64 hasInterestingStates64 +#define INNER_DEAD_FUNC64 dummyFunc +#define OUTER_DEAD_FUNC64 dummyFunc +#define ACCEPT_FUNC64 isAcceptState64 +#endif #define STOP_AT_MATCH 1 #include "sheng_impl4.h" #undef SHENG_IMPL @@ -597,20 +597,20 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef INTERESTING_FUNC32 -#undef INNER_DEAD_FUNC32 -#undef OUTER_DEAD_FUNC32 -#undef INNER_ACCEL_FUNC32 -#undef OUTER_ACCEL_FUNC32 -#undef ACCEPT_FUNC32 -#undef SHENG64_IMPL -#undef INTERESTING_FUNC64 -#undef INNER_DEAD_FUNC64 -#undef OUTER_DEAD_FUNC64 -#undef ACCEPT_FUNC64 -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef INTERESTING_FUNC32 +#undef INNER_DEAD_FUNC32 +#undef OUTER_DEAD_FUNC32 +#undef INNER_ACCEL_FUNC32 +#undef OUTER_ACCEL_FUNC32 +#undef ACCEPT_FUNC32 +#undef SHENG64_IMPL +#undef INTERESTING_FUNC64 +#undef INNER_DEAD_FUNC64 +#undef OUTER_DEAD_FUNC64 +#undef ACCEPT_FUNC64 +#endif #undef STOP_AT_MATCH /* no-match have interesting func as dummy, and die/accel checks are outer */ @@ -623,16 +623,16 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC isAccelState #define ACCEPT_FUNC dummyFunc -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_4_nmda -#define INTERESTING_FUNC32 dummyFunc4 -#define INNER_DEAD_FUNC32 dummyFunc -#define OUTER_DEAD_FUNC32 isDeadState32 -#define INNER_ACCEL_FUNC32 dummyFunc -#define OUTER_ACCEL_FUNC32 isAccelState32 -#define ACCEPT_FUNC32 dummyFunc -#define NO_SHENG64_IMPL -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_4_nmda +#define INTERESTING_FUNC32 dummyFunc4 +#define INNER_DEAD_FUNC32 dummyFunc +#define OUTER_DEAD_FUNC32 isDeadState32 +#define INNER_ACCEL_FUNC32 dummyFunc +#define OUTER_ACCEL_FUNC32 isAccelState32 +#define ACCEPT_FUNC32 dummyFunc +#define NO_SHENG64_IMPL +#endif #define STOP_AT_MATCH 0 #include "sheng_impl4.h" #undef SHENG_IMPL @@ -642,16 +642,16 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef INTERESTING_FUNC32 -#undef INNER_DEAD_FUNC32 -#undef OUTER_DEAD_FUNC32 -#undef INNER_ACCEL_FUNC32 -#undef OUTER_ACCEL_FUNC32 -#undef ACCEPT_FUNC32 -#undef NO_SHENG64_IMPL -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef INTERESTING_FUNC32 +#undef INNER_DEAD_FUNC32 +#undef OUTER_DEAD_FUNC32 +#undef INNER_ACCEL_FUNC32 +#undef OUTER_ACCEL_FUNC32 +#undef ACCEPT_FUNC32 +#undef NO_SHENG64_IMPL +#endif #undef STOP_AT_MATCH /* no match, can die, not accelerated */ @@ -662,20 +662,20 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_4_nmd -#define INTERESTING_FUNC32 dummyFunc4 -#define INNER_DEAD_FUNC32 dummyFunc -#define OUTER_DEAD_FUNC32 isDeadState32 -#define INNER_ACCEL_FUNC32 dummyFunc -#define OUTER_ACCEL_FUNC32 dummyFunc -#define ACCEPT_FUNC32 dummyFunc -#define SHENG64_IMPL sheng64_4_nmd -#define INTERESTING_FUNC64 dummyFunc4 -#define INNER_DEAD_FUNC64 dummyFunc -#define OUTER_DEAD_FUNC64 isDeadState64 -#define ACCEPT_FUNC64 dummyFunc -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_4_nmd +#define INTERESTING_FUNC32 dummyFunc4 +#define INNER_DEAD_FUNC32 dummyFunc +#define OUTER_DEAD_FUNC32 isDeadState32 +#define INNER_ACCEL_FUNC32 dummyFunc +#define OUTER_ACCEL_FUNC32 dummyFunc +#define ACCEPT_FUNC32 dummyFunc +#define SHENG64_IMPL sheng64_4_nmd +#define INTERESTING_FUNC64 dummyFunc4 +#define INNER_DEAD_FUNC64 dummyFunc +#define OUTER_DEAD_FUNC64 isDeadState64 +#define ACCEPT_FUNC64 dummyFunc +#endif #define STOP_AT_MATCH 0 #include "sheng_impl4.h" #undef SHENG_IMPL @@ -685,20 +685,20 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef INTERESTING_FUNC32 -#undef INNER_DEAD_FUNC32 -#undef OUTER_DEAD_FUNC32 -#undef INNER_ACCEL_FUNC32 -#undef OUTER_ACCEL_FUNC32 -#undef ACCEPT_FUNC32 -#undef SHENG64_IMPL -#undef INTERESTING_FUNC64 -#undef INNER_DEAD_FUNC64 -#undef OUTER_DEAD_FUNC64 -#undef ACCEPT_FUNC64 -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef INTERESTING_FUNC32 +#undef INNER_DEAD_FUNC32 +#undef OUTER_DEAD_FUNC32 +#undef INNER_ACCEL_FUNC32 +#undef OUTER_ACCEL_FUNC32 +#undef ACCEPT_FUNC32 +#undef SHENG64_IMPL +#undef INTERESTING_FUNC64 +#undef INNER_DEAD_FUNC64 +#undef OUTER_DEAD_FUNC64 +#undef ACCEPT_FUNC64 +#endif #undef STOP_AT_MATCH /* there is no performance benefit in accelerating a no-match case that can't @@ -712,20 +712,20 @@ u8 dummyFunc(UNUSED const u8 a) { #define INNER_ACCEL_FUNC dummyFunc #define OUTER_ACCEL_FUNC dummyFunc #define ACCEPT_FUNC dummyFunc -#if defined(HAVE_AVX512VBMI) -#define SHENG32_IMPL sheng32_4_nm -#define INTERESTING_FUNC32 dummyFunc4 -#define INNER_DEAD_FUNC32 dummyFunc -#define OUTER_DEAD_FUNC32 dummyFunc -#define INNER_ACCEL_FUNC32 dummyFunc -#define OUTER_ACCEL_FUNC32 dummyFunc -#define ACCEPT_FUNC32 dummyFunc -#define SHENG64_IMPL sheng64_4_nm -#define INTERESTING_FUNC64 dummyFunc4 -#define INNER_DEAD_FUNC64 dummyFunc -#define OUTER_DEAD_FUNC64 dummyFunc -#define ACCEPT_FUNC64 dummyFunc -#endif +#if defined(HAVE_AVX512VBMI) +#define SHENG32_IMPL sheng32_4_nm +#define INTERESTING_FUNC32 dummyFunc4 +#define INNER_DEAD_FUNC32 dummyFunc +#define OUTER_DEAD_FUNC32 dummyFunc +#define INNER_ACCEL_FUNC32 dummyFunc +#define OUTER_ACCEL_FUNC32 dummyFunc +#define ACCEPT_FUNC32 dummyFunc +#define SHENG64_IMPL sheng64_4_nm +#define INTERESTING_FUNC64 dummyFunc4 +#define INNER_DEAD_FUNC64 dummyFunc +#define OUTER_DEAD_FUNC64 dummyFunc +#define ACCEPT_FUNC64 dummyFunc +#endif #define STOP_AT_MATCH 0 #include "sheng_impl4.h" #undef SHENG_IMPL @@ -735,20 +735,20 @@ u8 dummyFunc(UNUSED const u8 a) { #undef INNER_ACCEL_FUNC #undef OUTER_ACCEL_FUNC #undef ACCEPT_FUNC -#if defined(HAVE_AVX512VBMI) -#undef SHENG32_IMPL -#undef INTERESTING_FUNC32 -#undef INNER_DEAD_FUNC32 -#undef OUTER_DEAD_FUNC32 -#undef INNER_ACCEL_FUNC32 -#undef OUTER_ACCEL_FUNC32 -#undef ACCEPT_FUNC32 -#undef SHENG64_IMPL -#undef INTERESTING_FUNC64 -#undef INNER_DEAD_FUNC64 -#undef OUTER_DEAD_FUNC64 -#undef ACCEPT_FUNC64 -#endif +#if defined(HAVE_AVX512VBMI) +#undef SHENG32_IMPL +#undef INTERESTING_FUNC32 +#undef INNER_DEAD_FUNC32 +#undef OUTER_DEAD_FUNC32 +#undef INNER_ACCEL_FUNC32 +#undef OUTER_ACCEL_FUNC32 +#undef ACCEPT_FUNC32 +#undef SHENG64_IMPL +#undef INTERESTING_FUNC64 +#undef INNER_DEAD_FUNC64 +#undef OUTER_DEAD_FUNC64 +#undef ACCEPT_FUNC64 +#endif #undef STOP_AT_MATCH #endif // SHENG_DEFS_H diff --git a/contrib/libs/hyperscan/src/nfa/sheng_impl.h b/contrib/libs/hyperscan/src/nfa/sheng_impl.h index 924296699f..fb8ee16834 100644 --- a/contrib/libs/hyperscan/src/nfa/sheng_impl.h +++ b/contrib/libs/hyperscan/src/nfa/sheng_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -95,127 +95,127 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, *scan_end = cur_buf; return MO_CONTINUE_MATCHING; } - -#if defined(HAVE_AVX512VBMI) -static really_inline -char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, - const struct sheng32 *s, - u8 *const cached_accept_state, - ReportID *const cached_accept_id, - u8 single, u64a base_offset, const u8 *buf, const u8 *start, - const u8 *end, const u8 **scan_end) { - DEBUG_PRINTF("Starting DFA execution in state %u\n", - *state & SHENG32_STATE_MASK); - const u8 *cur_buf = start; - if (DEAD_FUNC32(*state)) { - DEBUG_PRINTF("Dead on arrival\n"); - *scan_end = end; - return MO_CONTINUE_MATCHING; - } - DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); - - m512 cur_state = set64x8(*state); - const m512 *masks = s->succ_masks; - - while (likely(cur_buf != end)) { - const u8 c = *cur_buf; - const m512 succ_mask = masks[c]; - cur_state = vpermb512(cur_state, succ_mask); - const u8 tmp = movd512(cur_state); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK, - tmp & SHENG32_STATE_FLAG_MASK); - - if (unlikely(ACCEPT_FUNC32(tmp))) { - DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG32_STATE_MASK); - u64a match_offset = base_offset + (cur_buf - buf) + 1; - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (u64a)(cur_buf - start)); - *state = tmp; - *scan_end = cur_buf; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports32(s, cb, ctxt, tmp, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - cur_buf++; - } - *state = movd512(cur_state); - *scan_end = cur_buf; - return MO_CONTINUE_MATCHING; -} - -static really_inline -char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt, - const struct sheng64 *s, - u8 *const cached_accept_state, - ReportID *const cached_accept_id, - u8 single, u64a base_offset, const u8 *buf, const u8 *start, - const u8 *end, const u8 **scan_end) { - DEBUG_PRINTF("Starting DFA execution in state %u\n", - *state & SHENG64_STATE_MASK); - const u8 *cur_buf = start; - if (DEAD_FUNC64(*state)) { - DEBUG_PRINTF("Dead on arrival\n"); - *scan_end = end; - return MO_CONTINUE_MATCHING; - } - DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); - - m512 cur_state = set64x8(*state); - const m512 *masks = s->succ_masks; - - while (likely(cur_buf != end)) { - const u8 c = *cur_buf; - const m512 succ_mask = masks[c]; - cur_state = vpermb512(cur_state, succ_mask); - const u8 tmp = movd512(cur_state); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK, - tmp & SHENG64_STATE_FLAG_MASK); - - if (unlikely(ACCEPT_FUNC64(tmp))) { - DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG64_STATE_MASK); - u64a match_offset = base_offset + (cur_buf - buf) + 1; - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (u64a)(cur_buf - start)); - *state = tmp; - *scan_end = cur_buf; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports64(s, cb, ctxt, tmp, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - cur_buf++; - } - *state = movd512(cur_state); - *scan_end = cur_buf; - return MO_CONTINUE_MATCHING; -} -#endif + +#if defined(HAVE_AVX512VBMI) +static really_inline +char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, + const struct sheng32 *s, + u8 *const cached_accept_state, + ReportID *const cached_accept_id, + u8 single, u64a base_offset, const u8 *buf, const u8 *start, + const u8 *end, const u8 **scan_end) { + DEBUG_PRINTF("Starting DFA execution in state %u\n", + *state & SHENG32_STATE_MASK); + const u8 *cur_buf = start; + if (DEAD_FUNC32(*state)) { + DEBUG_PRINTF("Dead on arrival\n"); + *scan_end = end; + return MO_CONTINUE_MATCHING; + } + DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); + + m512 cur_state = set64x8(*state); + const m512 *masks = s->succ_masks; + + while (likely(cur_buf != end)) { + const u8 c = *cur_buf; + const m512 succ_mask = masks[c]; + cur_state = vpermb512(cur_state, succ_mask); + const u8 tmp = movd512(cur_state); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); + DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK, + tmp & SHENG32_STATE_FLAG_MASK); + + if (unlikely(ACCEPT_FUNC32(tmp))) { + DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG32_STATE_MASK); + u64a match_offset = base_offset + (cur_buf - buf) + 1; + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (u64a)(cur_buf - start)); + *state = tmp; + *scan_end = cur_buf; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports32(s, cb, ctxt, tmp, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + cur_buf++; + } + *state = movd512(cur_state); + *scan_end = cur_buf; + return MO_CONTINUE_MATCHING; +} + +static really_inline +char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt, + const struct sheng64 *s, + u8 *const cached_accept_state, + ReportID *const cached_accept_id, + u8 single, u64a base_offset, const u8 *buf, const u8 *start, + const u8 *end, const u8 **scan_end) { + DEBUG_PRINTF("Starting DFA execution in state %u\n", + *state & SHENG64_STATE_MASK); + const u8 *cur_buf = start; + if (DEAD_FUNC64(*state)) { + DEBUG_PRINTF("Dead on arrival\n"); + *scan_end = end; + return MO_CONTINUE_MATCHING; + } + DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); + + m512 cur_state = set64x8(*state); + const m512 *masks = s->succ_masks; + + while (likely(cur_buf != end)) { + const u8 c = *cur_buf; + const m512 succ_mask = masks[c]; + cur_state = vpermb512(cur_state, succ_mask); + const u8 tmp = movd512(cur_state); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); + DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK, + tmp & SHENG64_STATE_FLAG_MASK); + + if (unlikely(ACCEPT_FUNC64(tmp))) { + DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG64_STATE_MASK); + u64a match_offset = base_offset + (cur_buf - buf) + 1; + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (u64a)(cur_buf - start)); + *state = tmp; + *scan_end = cur_buf; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports64(s, cb, ctxt, tmp, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + cur_buf++; + } + *state = movd512(cur_state); + *scan_end = cur_buf; + return MO_CONTINUE_MATCHING; +} +#endif diff --git a/contrib/libs/hyperscan/src/nfa/sheng_impl4.h b/contrib/libs/hyperscan/src/nfa/sheng_impl4.h index e033cdadf0..440e7396e2 100644 --- a/contrib/libs/hyperscan/src/nfa/sheng_impl4.h +++ b/contrib/libs/hyperscan/src/nfa/sheng_impl4.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -282,430 +282,430 @@ char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, *scan_end = cur_buf; return MO_CONTINUE_MATCHING; } - -#if defined(HAVE_AVX512VBMI) -static really_inline -char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, - const struct sheng32 *s, - u8 *const cached_accept_state, - ReportID *const cached_accept_id, - u8 single, u64a base_offset, const u8 *buf, const u8 *start, - const u8 *end, const u8 **scan_end) { - DEBUG_PRINTF("Starting DFAx4 execution in state %u\n", - *state & SHENG32_STATE_MASK); - const u8 *cur_buf = start; - const u8 *min_accel_dist = start; - base_offset++; - DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start)); - - if (INNER_ACCEL_FUNC32(*state) || OUTER_ACCEL_FUNC32(*state)) { - DEBUG_PRINTF("Accel state reached @ 0\n"); - const union AccelAux *aaux = - get_accel32(s, *state & SHENG32_STATE_MASK); - const u8 *new_offset = run_accel(aaux, cur_buf, end); - if (new_offset < cur_buf + BAD_ACCEL_DIST) { - min_accel_dist = new_offset + BIG_ACCEL_PENALTY; - } else { - min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; - } - DEBUG_PRINTF("Next accel chance: %llu\n", - (u64a)(min_accel_dist - start)); - DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf); - cur_buf = new_offset; - DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start)); - } - if (INNER_DEAD_FUNC32(*state) || OUTER_DEAD_FUNC32(*state)) { - DEBUG_PRINTF("Dead on arrival\n"); - *scan_end = end; - return MO_CONTINUE_MATCHING; - } - - m512 cur_state = set64x8(*state); - const m512 *masks = s->succ_masks; - - while (likely(end - cur_buf >= 4)) { - const u8 *b1 = cur_buf; - const u8 *b2 = cur_buf + 1; - const u8 *b3 = cur_buf + 2; - const u8 *b4 = cur_buf + 3; - const u8 c1 = *b1; - const u8 c2 = *b2; - const u8 c3 = *b3; - const u8 c4 = *b4; - - const m512 succ_mask1 = masks[c1]; - cur_state = vpermb512(cur_state, succ_mask1); - const u8 a1 = movd512(cur_state); - - const m512 succ_mask2 = masks[c2]; - cur_state = vpermb512(cur_state, succ_mask2); - const u8 a2 = movd512(cur_state); - - const m512 succ_mask3 = masks[c3]; - cur_state = vpermb512(cur_state, succ_mask3); - const u8 a3 = movd512(cur_state); - - const m512 succ_mask4 = masks[c4]; - cur_state = vpermb512(cur_state, succ_mask4); - const u8 a4 = movd512(cur_state); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK, - a1 & SHENG32_STATE_FLAG_MASK); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a2 & SHENG32_STATE_MASK, - a2 & SHENG32_STATE_FLAG_MASK); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a3 & SHENG32_STATE_MASK, - a3 & SHENG32_STATE_FLAG_MASK); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a4 & SHENG32_STATE_MASK, - a4 & SHENG32_STATE_FLAG_MASK); - - if (unlikely(INTERESTING_FUNC32(a1, a2, a3, a4))) { - if (ACCEPT_FUNC32(a1)) { - u64a match_offset = base_offset + b1 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a1 & SHENG32_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b1 - start)); - *scan_end = b1; - *state = a1; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports32(s, cb, ctxt, a1, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (ACCEPT_FUNC32(a2)) { - u64a match_offset = base_offset + b2 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a2 & SHENG32_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b2 - start)); - *scan_end = b2; - *state = a2; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports32(s, cb, ctxt, a2, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (ACCEPT_FUNC32(a3)) { - u64a match_offset = base_offset + b3 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a3 & SHENG32_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b3 - start)); - *scan_end = b3; - *state = a3; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports32(s, cb, ctxt, a3, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (ACCEPT_FUNC32(a4)) { - u64a match_offset = base_offset + b4 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a4 & SHENG32_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b4 - start)); - *scan_end = b4; - *state = a4; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports32(s, cb, ctxt, a4, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (INNER_DEAD_FUNC32(a4)) { - DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf)); - *scan_end = end; - *state = a4; - return MO_CONTINUE_MATCHING; - } - if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC32(a4)) { - DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf)); - const union AccelAux *aaux = - get_accel32(s, a4 & SHENG32_STATE_MASK); - const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); - if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { - min_accel_dist = new_offset + BIG_ACCEL_PENALTY; - } else { - min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; - } - DEBUG_PRINTF("Next accel chance: %llu\n", - (u64a)(min_accel_dist - start)); - DEBUG_PRINTF("Accel scanned %llu bytes\n", - (u64a)(new_offset - cur_buf - 4)); - cur_buf = new_offset; - DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf)); - continue; - } - } - if (OUTER_DEAD_FUNC32(a4)) { - DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf)); - *scan_end = end; - *state = a4; - return MO_CONTINUE_MATCHING; - }; - if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC32(a4)) { - DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf)); - const union AccelAux *aaux = - get_accel32(s, a4 & SHENG32_STATE_MASK); - const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); - if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { - min_accel_dist = new_offset + BIG_ACCEL_PENALTY; - } else { - min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; - } - DEBUG_PRINTF("Next accel chance: %llu\n", - (u64a)(min_accel_dist - start)); - DEBUG_PRINTF("Accel scanned %llu bytes\n", - (u64a)(new_offset - cur_buf - 4)); - cur_buf = new_offset; - DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf)); - continue; - }; - cur_buf += 4; - } - *state = movd512(cur_state); - *scan_end = cur_buf; - return MO_CONTINUE_MATCHING; -} - -#ifndef NO_SHENG64_IMPL -static really_inline -char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt, - const struct sheng64 *s, - u8 *const cached_accept_state, - ReportID *const cached_accept_id, - u8 single, u64a base_offset, const u8 *buf, const u8 *start, - const u8 *end, const u8 **scan_end) { - DEBUG_PRINTF("Starting DFAx4 execution in state %u\n", - *state & SHENG64_STATE_MASK); - const u8 *cur_buf = start; - base_offset++; - DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start)); - - if (INNER_DEAD_FUNC64(*state) || OUTER_DEAD_FUNC64(*state)) { - DEBUG_PRINTF("Dead on arrival\n"); - *scan_end = end; - return MO_CONTINUE_MATCHING; - } - - m512 cur_state = set64x8(*state); - const m512 *masks = s->succ_masks; - - while (likely(end - cur_buf >= 4)) { - const u8 *b1 = cur_buf; - const u8 *b2 = cur_buf + 1; - const u8 *b3 = cur_buf + 2; - const u8 *b4 = cur_buf + 3; - const u8 c1 = *b1; - const u8 c2 = *b2; - const u8 c3 = *b3; - const u8 c4 = *b4; - - const m512 succ_mask1 = masks[c1]; - cur_state = vpermb512(cur_state, succ_mask1); - const u8 a1 = movd512(cur_state); - - const m512 succ_mask2 = masks[c2]; - cur_state = vpermb512(cur_state, succ_mask2); - const u8 a2 = movd512(cur_state); - - const m512 succ_mask3 = masks[c3]; - cur_state = vpermb512(cur_state, succ_mask3); - const u8 a3 = movd512(cur_state); - - const m512 succ_mask4 = masks[c4]; - cur_state = vpermb512(cur_state, succ_mask4); - const u8 a4 = movd512(cur_state); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK, - a1 & SHENG64_STATE_FLAG_MASK); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a2 & SHENG64_STATE_MASK, - a2 & SHENG64_STATE_FLAG_MASK); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a3 & SHENG64_STATE_MASK, - a3 & SHENG64_STATE_FLAG_MASK); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?'); - DEBUG_PRINTF("s: %u (flag: %u)\n", a4 & SHENG64_STATE_MASK, - a4 & SHENG64_STATE_FLAG_MASK); - - if (unlikely(INTERESTING_FUNC64(a1, a2, a3, a4))) { - if (ACCEPT_FUNC64(a1)) { - u64a match_offset = base_offset + b1 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a1 & SHENG64_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b1 - start)); - *scan_end = b1; - *state = a1; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports64(s, cb, ctxt, a1, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (ACCEPT_FUNC64(a2)) { - u64a match_offset = base_offset + b2 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a2 & SHENG64_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b2 - start)); - *scan_end = b2; - *state = a2; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports64(s, cb, ctxt, a2, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (ACCEPT_FUNC64(a3)) { - u64a match_offset = base_offset + b3 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a3 & SHENG64_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b3 - start)); - *scan_end = b3; - *state = a3; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports64(s, cb, ctxt, a3, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (ACCEPT_FUNC64(a4)) { - u64a match_offset = base_offset + b4 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a4 & SHENG64_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b4 - start)); - *scan_end = b4; - *state = a4; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports64(s, cb, ctxt, a4, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (INNER_DEAD_FUNC64(a4)) { - DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf)); - *scan_end = end; - *state = a4; - return MO_CONTINUE_MATCHING; - } - } - if (OUTER_DEAD_FUNC64(a4)) { - DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf)); - *scan_end = end; - *state = a4; - return MO_CONTINUE_MATCHING; - } - cur_buf += 4; - } - *state = movd512(cur_state); - *scan_end = cur_buf; - return MO_CONTINUE_MATCHING; -} -#endif // !NO_SHENG64_IMPL -#endif + +#if defined(HAVE_AVX512VBMI) +static really_inline +char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt, + const struct sheng32 *s, + u8 *const cached_accept_state, + ReportID *const cached_accept_id, + u8 single, u64a base_offset, const u8 *buf, const u8 *start, + const u8 *end, const u8 **scan_end) { + DEBUG_PRINTF("Starting DFAx4 execution in state %u\n", + *state & SHENG32_STATE_MASK); + const u8 *cur_buf = start; + const u8 *min_accel_dist = start; + base_offset++; + DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start)); + + if (INNER_ACCEL_FUNC32(*state) || OUTER_ACCEL_FUNC32(*state)) { + DEBUG_PRINTF("Accel state reached @ 0\n"); + const union AccelAux *aaux = + get_accel32(s, *state & SHENG32_STATE_MASK); + const u8 *new_offset = run_accel(aaux, cur_buf, end); + if (new_offset < cur_buf + BAD_ACCEL_DIST) { + min_accel_dist = new_offset + BIG_ACCEL_PENALTY; + } else { + min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; + } + DEBUG_PRINTF("Next accel chance: %llu\n", + (u64a)(min_accel_dist - start)); + DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf); + cur_buf = new_offset; + DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start)); + } + if (INNER_DEAD_FUNC32(*state) || OUTER_DEAD_FUNC32(*state)) { + DEBUG_PRINTF("Dead on arrival\n"); + *scan_end = end; + return MO_CONTINUE_MATCHING; + } + + m512 cur_state = set64x8(*state); + const m512 *masks = s->succ_masks; + + while (likely(end - cur_buf >= 4)) { + const u8 *b1 = cur_buf; + const u8 *b2 = cur_buf + 1; + const u8 *b3 = cur_buf + 2; + const u8 *b4 = cur_buf + 3; + const u8 c1 = *b1; + const u8 c2 = *b2; + const u8 c3 = *b3; + const u8 c4 = *b4; + + const m512 succ_mask1 = masks[c1]; + cur_state = vpermb512(cur_state, succ_mask1); + const u8 a1 = movd512(cur_state); + + const m512 succ_mask2 = masks[c2]; + cur_state = vpermb512(cur_state, succ_mask2); + const u8 a2 = movd512(cur_state); + + const m512 succ_mask3 = masks[c3]; + cur_state = vpermb512(cur_state, succ_mask3); + const u8 a3 = movd512(cur_state); + + const m512 succ_mask4 = masks[c4]; + cur_state = vpermb512(cur_state, succ_mask4); + const u8 a4 = movd512(cur_state); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); + DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK, + a1 & SHENG32_STATE_FLAG_MASK); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?'); + DEBUG_PRINTF("s: %u (flag: %u)\n", a2 & SHENG32_STATE_MASK, + a2 & SHENG32_STATE_FLAG_MASK); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?'); + DEBUG_PRINTF("s: %u (flag: %u)\n", a3 & SHENG32_STATE_MASK, + a3 & SHENG32_STATE_FLAG_MASK); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?'); + DEBUG_PRINTF("s: %u (flag: %u)\n", a4 & SHENG32_STATE_MASK, + a4 & SHENG32_STATE_FLAG_MASK); + + if (unlikely(INTERESTING_FUNC32(a1, a2, a3, a4))) { + if (ACCEPT_FUNC32(a1)) { + u64a match_offset = base_offset + b1 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a1 & SHENG32_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b1 - start)); + *scan_end = b1; + *state = a1; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports32(s, cb, ctxt, a1, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (ACCEPT_FUNC32(a2)) { + u64a match_offset = base_offset + b2 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a2 & SHENG32_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b2 - start)); + *scan_end = b2; + *state = a2; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports32(s, cb, ctxt, a2, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (ACCEPT_FUNC32(a3)) { + u64a match_offset = base_offset + b3 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a3 & SHENG32_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b3 - start)); + *scan_end = b3; + *state = a3; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports32(s, cb, ctxt, a3, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (ACCEPT_FUNC32(a4)) { + u64a match_offset = base_offset + b4 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a4 & SHENG32_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b4 - start)); + *scan_end = b4; + *state = a4; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports32(s, cb, ctxt, a4, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (INNER_DEAD_FUNC32(a4)) { + DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf)); + *scan_end = end; + *state = a4; + return MO_CONTINUE_MATCHING; + } + if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC32(a4)) { + DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf)); + const union AccelAux *aaux = + get_accel32(s, a4 & SHENG32_STATE_MASK); + const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); + if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { + min_accel_dist = new_offset + BIG_ACCEL_PENALTY; + } else { + min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; + } + DEBUG_PRINTF("Next accel chance: %llu\n", + (u64a)(min_accel_dist - start)); + DEBUG_PRINTF("Accel scanned %llu bytes\n", + (u64a)(new_offset - cur_buf - 4)); + cur_buf = new_offset; + DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf)); + continue; + } + } + if (OUTER_DEAD_FUNC32(a4)) { + DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf)); + *scan_end = end; + *state = a4; + return MO_CONTINUE_MATCHING; + }; + if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC32(a4)) { + DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf)); + const union AccelAux *aaux = + get_accel32(s, a4 & SHENG32_STATE_MASK); + const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); + if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { + min_accel_dist = new_offset + BIG_ACCEL_PENALTY; + } else { + min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; + } + DEBUG_PRINTF("Next accel chance: %llu\n", + (u64a)(min_accel_dist - start)); + DEBUG_PRINTF("Accel scanned %llu bytes\n", + (u64a)(new_offset - cur_buf - 4)); + cur_buf = new_offset; + DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf)); + continue; + }; + cur_buf += 4; + } + *state = movd512(cur_state); + *scan_end = cur_buf; + return MO_CONTINUE_MATCHING; +} + +#ifndef NO_SHENG64_IMPL +static really_inline +char SHENG64_IMPL(u8 *state, NfaCallback cb, void *ctxt, + const struct sheng64 *s, + u8 *const cached_accept_state, + ReportID *const cached_accept_id, + u8 single, u64a base_offset, const u8 *buf, const u8 *start, + const u8 *end, const u8 **scan_end) { + DEBUG_PRINTF("Starting DFAx4 execution in state %u\n", + *state & SHENG64_STATE_MASK); + const u8 *cur_buf = start; + base_offset++; + DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start)); + + if (INNER_DEAD_FUNC64(*state) || OUTER_DEAD_FUNC64(*state)) { + DEBUG_PRINTF("Dead on arrival\n"); + *scan_end = end; + return MO_CONTINUE_MATCHING; + } + + m512 cur_state = set64x8(*state); + const m512 *masks = s->succ_masks; + + while (likely(end - cur_buf >= 4)) { + const u8 *b1 = cur_buf; + const u8 *b2 = cur_buf + 1; + const u8 *b3 = cur_buf + 2; + const u8 *b4 = cur_buf + 3; + const u8 c1 = *b1; + const u8 c2 = *b2; + const u8 c3 = *b3; + const u8 c4 = *b4; + + const m512 succ_mask1 = masks[c1]; + cur_state = vpermb512(cur_state, succ_mask1); + const u8 a1 = movd512(cur_state); + + const m512 succ_mask2 = masks[c2]; + cur_state = vpermb512(cur_state, succ_mask2); + const u8 a2 = movd512(cur_state); + + const m512 succ_mask3 = masks[c3]; + cur_state = vpermb512(cur_state, succ_mask3); + const u8 a3 = movd512(cur_state); + + const m512 succ_mask4 = masks[c4]; + cur_state = vpermb512(cur_state, succ_mask4); + const u8 a4 = movd512(cur_state); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); + DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK, + a1 & SHENG64_STATE_FLAG_MASK); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?'); + DEBUG_PRINTF("s: %u (flag: %u)\n", a2 & SHENG64_STATE_MASK, + a2 & SHENG64_STATE_FLAG_MASK); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?'); + DEBUG_PRINTF("s: %u (flag: %u)\n", a3 & SHENG64_STATE_MASK, + a3 & SHENG64_STATE_FLAG_MASK); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?'); + DEBUG_PRINTF("s: %u (flag: %u)\n", a4 & SHENG64_STATE_MASK, + a4 & SHENG64_STATE_FLAG_MASK); + + if (unlikely(INTERESTING_FUNC64(a1, a2, a3, a4))) { + if (ACCEPT_FUNC64(a1)) { + u64a match_offset = base_offset + b1 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a1 & SHENG64_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b1 - start)); + *scan_end = b1; + *state = a1; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports64(s, cb, ctxt, a1, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (ACCEPT_FUNC64(a2)) { + u64a match_offset = base_offset + b2 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a2 & SHENG64_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b2 - start)); + *scan_end = b2; + *state = a2; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports64(s, cb, ctxt, a2, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (ACCEPT_FUNC64(a3)) { + u64a match_offset = base_offset + b3 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a3 & SHENG64_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b3 - start)); + *scan_end = b3; + *state = a3; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports64(s, cb, ctxt, a3, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (ACCEPT_FUNC64(a4)) { + u64a match_offset = base_offset + b4 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a4 & SHENG64_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b4 - start)); + *scan_end = b4; + *state = a4; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports64(s, cb, ctxt, a4, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (INNER_DEAD_FUNC64(a4)) { + DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf)); + *scan_end = end; + *state = a4; + return MO_CONTINUE_MATCHING; + } + } + if (OUTER_DEAD_FUNC64(a4)) { + DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf)); + *scan_end = end; + *state = a4; + return MO_CONTINUE_MATCHING; + } + cur_buf += 4; + } + *state = movd512(cur_state); + *scan_end = cur_buf; + return MO_CONTINUE_MATCHING; +} +#endif // !NO_SHENG64_IMPL +#endif diff --git a/contrib/libs/hyperscan/src/nfa/sheng_internal.h b/contrib/libs/hyperscan/src/nfa/sheng_internal.h index e133d32f5b..98536886c5 100644 --- a/contrib/libs/hyperscan/src/nfa/sheng_internal.h +++ b/contrib/libs/hyperscan/src/nfa/sheng_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,17 +38,17 @@ #define SHENG_STATE_MASK 0xF #define SHENG_STATE_FLAG_MASK 0x70 -#define SHENG32_STATE_ACCEPT 0x20 -#define SHENG32_STATE_DEAD 0x40 -#define SHENG32_STATE_ACCEL 0x80 -#define SHENG32_STATE_MASK 0x1F -#define SHENG32_STATE_FLAG_MASK 0xE0 - -#define SHENG64_STATE_ACCEPT 0x40 -#define SHENG64_STATE_DEAD 0x80 -#define SHENG64_STATE_MASK 0x3F -#define SHENG64_STATE_FLAG_MASK 0xC0 - +#define SHENG32_STATE_ACCEPT 0x20 +#define SHENG32_STATE_DEAD 0x40 +#define SHENG32_STATE_ACCEL 0x80 +#define SHENG32_STATE_MASK 0x1F +#define SHENG32_STATE_FLAG_MASK 0xE0 + +#define SHENG64_STATE_ACCEPT 0x40 +#define SHENG64_STATE_DEAD 0x80 +#define SHENG64_STATE_MASK 0x3F +#define SHENG64_STATE_FLAG_MASK 0xC0 + #define SHENG_FLAG_SINGLE_REPORT 0x1 #define SHENG_FLAG_CAN_DIE 0x2 #define SHENG_FLAG_HAS_ACCEL 0x4 @@ -78,30 +78,30 @@ struct sheng { ReportID report; }; -struct sheng32 { - m512 succ_masks[256]; - u32 length; - u32 aux_offset; - u32 report_offset; - u32 accel_offset; - u8 n_states; - u8 anchored; - u8 floating; - u8 flags; - ReportID report; -}; - -struct sheng64 { - m512 succ_masks[256]; - u32 length; - u32 aux_offset; - u32 report_offset; - u32 accel_offset; - u8 n_states; - u8 anchored; - u8 floating; - u8 flags; - ReportID report; -}; - +struct sheng32 { + m512 succ_masks[256]; + u32 length; + u32 aux_offset; + u32 report_offset; + u32 accel_offset; + u8 n_states; + u8 anchored; + u8 floating; + u8 flags; + ReportID report; +}; + +struct sheng64 { + m512 succ_masks[256]; + u32 length; + u32 aux_offset; + u32 report_offset; + u32 accel_offset; + u8 n_states; + u8 anchored; + u8 floating; + u8 flags; + ReportID report; +}; + #endif /* SHENG_INTERNAL_H_ */ diff --git a/contrib/libs/hyperscan/src/nfa/shengcompile.cpp b/contrib/libs/hyperscan/src/nfa/shengcompile.cpp index f4ab79ce70..aa3faeb09d 100644 --- a/contrib/libs/hyperscan/src/nfa/shengcompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/shengcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -301,28 +301,28 @@ void dumpShuffleMask(const u8 chr, const u8 *buf, unsigned sz) { } DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str()); } - -static really_inline -void dumpShuffleMask32(const u8 chr, const u8 *buf, unsigned sz) { - stringstream o; - - for (unsigned i = 0; i < sz; i++) { - o.width(2); - o << (buf[i] & SHENG32_STATE_MASK) << " "; - } - DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str()); -} - -static really_inline -void dumpShuffleMask64(const u8 chr, const u8 *buf, unsigned sz) { - stringstream o; - - for (unsigned i = 0; i < sz; i++) { - o.width(2); - o << (buf[i] & SHENG64_STATE_MASK) << " "; - } - DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str()); -} + +static really_inline +void dumpShuffleMask32(const u8 chr, const u8 *buf, unsigned sz) { + stringstream o; + + for (unsigned i = 0; i < sz; i++) { + o.width(2); + o << (buf[i] & SHENG32_STATE_MASK) << " "; + } + DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str()); +} + +static really_inline +void dumpShuffleMask64(const u8 chr, const u8 *buf, unsigned sz) { + stringstream o; + + for (unsigned i = 0; i < sz; i++) { + o.width(2); + o << (buf[i] & SHENG64_STATE_MASK) << " "; + } + DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str()); +} #endif static @@ -333,16 +333,16 @@ void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info, } } -template <typename T> +template <typename T> static -u8 getShengState(UNUSED dstate &state, UNUSED dfa_info &info, - UNUSED map<dstate_id_t, AccelScheme> &accelInfo) { - return 0; -} - -template <> -u8 getShengState<sheng>(dstate &state, dfa_info &info, - map<dstate_id_t, AccelScheme> &accelInfo) { +u8 getShengState(UNUSED dstate &state, UNUSED dfa_info &info, + UNUSED map<dstate_id_t, AccelScheme> &accelInfo) { + return 0; +} + +template <> +u8 getShengState<sheng>(dstate &state, dfa_info &info, + map<dstate_id_t, AccelScheme> &accelInfo) { u8 s = state.impl_id; if (!state.reports.empty()) { s |= SHENG_STATE_ACCEPT; @@ -356,41 +356,41 @@ u8 getShengState<sheng>(dstate &state, dfa_info &info, return s; } -template <> -u8 getShengState<sheng32>(dstate &state, dfa_info &info, - map<dstate_id_t, AccelScheme> &accelInfo) { - u8 s = state.impl_id; - if (!state.reports.empty()) { - s |= SHENG32_STATE_ACCEPT; - } - if (info.isDead(state)) { - s |= SHENG32_STATE_DEAD; - } - if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) { - s |= SHENG32_STATE_ACCEL; - } - return s; -} - -template <> -u8 getShengState<sheng64>(dstate &state, dfa_info &info, - UNUSED map<dstate_id_t, AccelScheme> &accelInfo) { - u8 s = state.impl_id; - if (!state.reports.empty()) { - s |= SHENG64_STATE_ACCEPT; - } - if (info.isDead(state)) { - s |= SHENG64_STATE_DEAD; - } - return s; -} - -template <typename T> +template <> +u8 getShengState<sheng32>(dstate &state, dfa_info &info, + map<dstate_id_t, AccelScheme> &accelInfo) { + u8 s = state.impl_id; + if (!state.reports.empty()) { + s |= SHENG32_STATE_ACCEPT; + } + if (info.isDead(state)) { + s |= SHENG32_STATE_DEAD; + } + if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) { + s |= SHENG32_STATE_ACCEL; + } + return s; +} + +template <> +u8 getShengState<sheng64>(dstate &state, dfa_info &info, + UNUSED map<dstate_id_t, AccelScheme> &accelInfo) { + u8 s = state.impl_id; + if (!state.reports.empty()) { + s |= SHENG64_STATE_ACCEPT; + } + if (info.isDead(state)) { + s |= SHENG64_STATE_DEAD; + } + return s; +} + +template <typename T> static void fillAccelAux(struct NFA *n, dfa_info &info, map<dstate_id_t, AccelScheme> &accelInfo) { DEBUG_PRINTF("Filling accel aux structures\n"); - T *s = (T *)getMutableImplNfa(n); + T *s = (T *)getMutableImplNfa(n); u32 offset = s->accel_offset; for (dstate_id_t i = 0; i < info.size(); i++) { @@ -408,21 +408,21 @@ void fillAccelAux(struct NFA *n, dfa_info &info, } } -template <typename T> +template <typename T> static -void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info, - UNUSED map<dstate_id_t, AccelScheme> &accelInfo, - UNUSED u32 aux_offset, UNUSED u32 report_offset, - UNUSED u32 accel_offset, UNUSED u32 total_size, - UNUSED u32 dfa_size) { -} - -template <> -void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info, - map<dstate_id_t, AccelScheme> &accelInfo, - u32 aux_offset, u32 report_offset, - u32 accel_offset, u32 total_size, - u32 dfa_size) { +void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info, + UNUSED map<dstate_id_t, AccelScheme> &accelInfo, + UNUSED u32 aux_offset, UNUSED u32 report_offset, + UNUSED u32 accel_offset, UNUSED u32 total_size, + UNUSED u32 dfa_size) { +} + +template <> +void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info, + map<dstate_id_t, AccelScheme> &accelInfo, + u32 aux_offset, u32 report_offset, + u32 accel_offset, u32 total_size, + u32 dfa_size) { n->length = total_size; n->scratchStateSize = 1; n->streamStateSize = 1; @@ -438,65 +438,65 @@ void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info, s->length = dfa_size; s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0; - s->anchored = getShengState<sheng>(info.anchored, info, accelInfo); - s->floating = getShengState<sheng>(info.floating, info, accelInfo); -} - -template <> -void populateBasicInfo<sheng32>(struct NFA *n, dfa_info &info, - map<dstate_id_t, AccelScheme> &accelInfo, - u32 aux_offset, u32 report_offset, - u32 accel_offset, u32 total_size, - u32 dfa_size) { - n->length = total_size; - n->scratchStateSize = 1; - n->streamStateSize = 1; - n->nPositions = info.size(); - n->type = SHENG_NFA_32; - n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0; - - sheng32 *s = (sheng32 *)getMutableImplNfa(n); - s->aux_offset = aux_offset; - s->report_offset = report_offset; - s->accel_offset = accel_offset; - s->n_states = info.size(); - s->length = dfa_size; - s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0; - - s->anchored = getShengState<sheng32>(info.anchored, info, accelInfo); - s->floating = getShengState<sheng32>(info.floating, info, accelInfo); -} - -template <> -void populateBasicInfo<sheng64>(struct NFA *n, dfa_info &info, - map<dstate_id_t, AccelScheme> &accelInfo, - u32 aux_offset, u32 report_offset, - u32 accel_offset, u32 total_size, - u32 dfa_size) { - n->length = total_size; - n->scratchStateSize = 1; - n->streamStateSize = 1; - n->nPositions = info.size(); - n->type = SHENG_NFA_64; - n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0; - - sheng64 *s = (sheng64 *)getMutableImplNfa(n); - s->aux_offset = aux_offset; - s->report_offset = report_offset; - s->accel_offset = accel_offset; - s->n_states = info.size(); - s->length = dfa_size; - s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0; - - s->anchored = getShengState<sheng64>(info.anchored, info, accelInfo); - s->floating = getShengState<sheng64>(info.floating, info, accelInfo); -} - -template <typename T> + s->anchored = getShengState<sheng>(info.anchored, info, accelInfo); + s->floating = getShengState<sheng>(info.floating, info, accelInfo); +} + +template <> +void populateBasicInfo<sheng32>(struct NFA *n, dfa_info &info, + map<dstate_id_t, AccelScheme> &accelInfo, + u32 aux_offset, u32 report_offset, + u32 accel_offset, u32 total_size, + u32 dfa_size) { + n->length = total_size; + n->scratchStateSize = 1; + n->streamStateSize = 1; + n->nPositions = info.size(); + n->type = SHENG_NFA_32; + n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0; + + sheng32 *s = (sheng32 *)getMutableImplNfa(n); + s->aux_offset = aux_offset; + s->report_offset = report_offset; + s->accel_offset = accel_offset; + s->n_states = info.size(); + s->length = dfa_size; + s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0; + + s->anchored = getShengState<sheng32>(info.anchored, info, accelInfo); + s->floating = getShengState<sheng32>(info.floating, info, accelInfo); +} + +template <> +void populateBasicInfo<sheng64>(struct NFA *n, dfa_info &info, + map<dstate_id_t, AccelScheme> &accelInfo, + u32 aux_offset, u32 report_offset, + u32 accel_offset, u32 total_size, + u32 dfa_size) { + n->length = total_size; + n->scratchStateSize = 1; + n->streamStateSize = 1; + n->nPositions = info.size(); + n->type = SHENG_NFA_64; + n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0; + + sheng64 *s = (sheng64 *)getMutableImplNfa(n); + s->aux_offset = aux_offset; + s->report_offset = report_offset; + s->accel_offset = accel_offset; + s->n_states = info.size(); + s->length = dfa_size; + s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0; + + s->anchored = getShengState<sheng64>(info.anchored, info, accelInfo); + s->floating = getShengState<sheng64>(info.floating, info, accelInfo); +} + +template <typename T> static void fillTops(NFA *n, dfa_info &info, dstate_id_t id, map<dstate_id_t, AccelScheme> &accelInfo) { - T *s = (T *)getMutableImplNfa(n); + T *s = (T *)getMutableImplNfa(n); u32 aux_base = s->aux_offset; DEBUG_PRINTF("Filling tops for state %u\n", id); @@ -513,14 +513,14 @@ void fillTops(NFA *n, dfa_info &info, dstate_id_t id, DEBUG_PRINTF("Top transition for state %u: %u\n", id, top_state.impl_id); - aux->top = getShengState<T>(top_state, info, accelInfo); + aux->top = getShengState<T>(top_state, info, accelInfo); } -template <typename T> +template <typename T> static void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports, vector<u32> &reports_eod, vector<u32> &report_offsets) { - T *s = (T *)getMutableImplNfa(n); + T *s = (T *)getMutableImplNfa(n); u32 aux_base = s->aux_offset; auto raw_id = info.raw_id(id); @@ -540,97 +540,97 @@ void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports, DEBUG_PRINTF("EOD report list offset: %u\n", aux->accept_eod); } -template <typename T> +template <typename T> static void fillSingleReport(NFA *n, ReportID r_id) { - T *s = (T *)getMutableImplNfa(n); + T *s = (T *)getMutableImplNfa(n); DEBUG_PRINTF("Single report ID: %u\n", r_id); s->report = r_id; s->flags |= SHENG_FLAG_SINGLE_REPORT; } -template <typename T> +template <typename T> static -bool createShuffleMasks(UNUSED T *s, UNUSED dfa_info &info, - UNUSED map<dstate_id_t, AccelScheme> &accelInfo) { - return true; -} - -template <> -bool createShuffleMasks<sheng>(sheng *s, dfa_info &info, - map<dstate_id_t, AccelScheme> &accelInfo) { +bool createShuffleMasks(UNUSED T *s, UNUSED dfa_info &info, + UNUSED map<dstate_id_t, AccelScheme> &accelInfo) { + return true; +} + +template <> +bool createShuffleMasks<sheng>(sheng *s, dfa_info &info, + map<dstate_id_t, AccelScheme> &accelInfo) { for (u16 chr = 0; chr < 256; chr++) { u8 buf[16] = {0}; for (dstate_id_t idx = 0; idx < info.size(); idx++) { auto &succ_state = info.next(idx, chr); - buf[idx] = getShengState<sheng>(succ_state, info, accelInfo); + buf[idx] = getShengState<sheng>(succ_state, info, accelInfo); } #ifdef DEBUG dumpShuffleMask(chr, buf, sizeof(buf)); #endif memcpy(&s->shuffle_masks[chr], buf, sizeof(m128)); } - return true; -} - -template <> -bool createShuffleMasks<sheng32>(sheng32 *s, dfa_info &info, - map<dstate_id_t, AccelScheme> &accelInfo) { - for (u16 chr = 0; chr < 256; chr++) { - u8 buf[64] = {0}; - - assert(info.size() <= 32); - for (dstate_id_t idx = 0; idx < info.size(); idx++) { - auto &succ_state = info.next(idx, chr); - - buf[idx] = getShengState<sheng32>(succ_state, info, accelInfo); - buf[32 + idx] = buf[idx]; - } -#ifdef DEBUG - dumpShuffleMask32(chr, buf, sizeof(buf)); -#endif - memcpy(&s->succ_masks[chr], buf, sizeof(m512)); - } - return true; -} - -template <> -bool createShuffleMasks<sheng64>(sheng64 *s, dfa_info &info, - map<dstate_id_t, AccelScheme> &accelInfo) { - for (u16 chr = 0; chr < 256; chr++) { - u8 buf[64] = {0}; - - assert(info.size() <= 64); - for (dstate_id_t idx = 0; idx < info.size(); idx++) { - auto &succ_state = info.next(idx, chr); - - if (accelInfo.find(info.raw_id(succ_state.impl_id)) - != accelInfo.end()) { - return false; - } - buf[idx] = getShengState<sheng64>(succ_state, info, accelInfo); - } -#ifdef DEBUG - dumpShuffleMask64(chr, buf, sizeof(buf)); -#endif - memcpy(&s->succ_masks[chr], buf, sizeof(m512)); - } - return true; -} - -bool has_accel_sheng(const NFA *) { - return true; /* consider the sheng region as accelerated */ -} - -template <typename T> -static -bytecode_ptr<NFA> shengCompile_int(raw_dfa &raw, const CompileContext &cc, - set<dstate_id_t> *accel_states, - sheng_build_strat &strat, - dfa_info &info) { + return true; +} + +template <> +bool createShuffleMasks<sheng32>(sheng32 *s, dfa_info &info, + map<dstate_id_t, AccelScheme> &accelInfo) { + for (u16 chr = 0; chr < 256; chr++) { + u8 buf[64] = {0}; + + assert(info.size() <= 32); + for (dstate_id_t idx = 0; idx < info.size(); idx++) { + auto &succ_state = info.next(idx, chr); + + buf[idx] = getShengState<sheng32>(succ_state, info, accelInfo); + buf[32 + idx] = buf[idx]; + } +#ifdef DEBUG + dumpShuffleMask32(chr, buf, sizeof(buf)); +#endif + memcpy(&s->succ_masks[chr], buf, sizeof(m512)); + } + return true; +} + +template <> +bool createShuffleMasks<sheng64>(sheng64 *s, dfa_info &info, + map<dstate_id_t, AccelScheme> &accelInfo) { + for (u16 chr = 0; chr < 256; chr++) { + u8 buf[64] = {0}; + + assert(info.size() <= 64); + for (dstate_id_t idx = 0; idx < info.size(); idx++) { + auto &succ_state = info.next(idx, chr); + + if (accelInfo.find(info.raw_id(succ_state.impl_id)) + != accelInfo.end()) { + return false; + } + buf[idx] = getShengState<sheng64>(succ_state, info, accelInfo); + } +#ifdef DEBUG + dumpShuffleMask64(chr, buf, sizeof(buf)); +#endif + memcpy(&s->succ_masks[chr], buf, sizeof(m512)); + } + return true; +} + +bool has_accel_sheng(const NFA *) { + return true; /* consider the sheng region as accelerated */ +} + +template <typename T> +static +bytecode_ptr<NFA> shengCompile_int(raw_dfa &raw, const CompileContext &cc, + set<dstate_id_t> *accel_states, + sheng_build_strat &strat, + dfa_info &info) { if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming * mode with our semantics */ raw.stripExtraEodReports(); @@ -645,7 +645,7 @@ bytecode_ptr<NFA> shengCompile_int(raw_dfa &raw, const CompileContext &cc, DEBUG_PRINTF("Anchored start state: %u, floating start state: %u\n", info.anchored.impl_id, info.floating.impl_id); - u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(T)); + u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(T)); vector<u32> reports, eod_reports, report_offsets; u8 isSingle = 0; ReportID single_report = 0; @@ -667,129 +667,129 @@ bytecode_ptr<NFA> shengCompile_int(raw_dfa &raw, const CompileContext &cc, auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); - populateBasicInfo<T>(nfa.get(), info, accelInfo, nfa_size, - reports_offset, accel_offset, total_size, - total_size - sizeof(NFA)); + populateBasicInfo<T>(nfa.get(), info, accelInfo, nfa_size, + reports_offset, accel_offset, total_size, + total_size - sizeof(NFA)); DEBUG_PRINTF("Setting up aux and report structures\n"); ri->fillReportLists(nfa.get(), reports_offset, report_offsets); for (dstate_id_t idx = 0; idx < info.size(); idx++) { - fillTops<T>(nfa.get(), info, idx, accelInfo); - fillAux<T>(nfa.get(), info, idx, reports, eod_reports, - report_offsets); + fillTops<T>(nfa.get(), info, idx, accelInfo); + fillAux<T>(nfa.get(), info, idx, reports, eod_reports, + report_offsets); } if (isSingle) { - fillSingleReport<T>(nfa.get(), single_report); + fillSingleReport<T>(nfa.get(), single_report); } - fillAccelAux<T>(nfa.get(), info, accelInfo); + fillAccelAux<T>(nfa.get(), info, accelInfo); if (accel_states) { fillAccelOut(accelInfo, accel_states); } - if (!createShuffleMasks<T>((T *)getMutableImplNfa(nfa.get()), info, accelInfo)) { - return nullptr; - } + if (!createShuffleMasks<T>((T *)getMutableImplNfa(nfa.get()), info, accelInfo)) { + return nullptr; + } + + return nfa; +} + +bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, bool only_accel_init, + set<dstate_id_t> *accel_states) { + if (!cc.grey.allowSheng) { + DEBUG_PRINTF("Sheng is not allowed!\n"); + return nullptr; + } + + sheng_build_strat strat(raw, rm, only_accel_init); + dfa_info info(strat); + + DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size()); + + DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n", + raw.start_anchored, raw.start_floating); + + DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n", + info.can_die ? "can" : "cannot", info.size()); + if (info.size() > 16) { + DEBUG_PRINTF("Too many states\n"); + return nullptr; + } + + return shengCompile_int<sheng>(raw, cc, accel_states, strat, info); +} + +bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, bool only_accel_init, + set<dstate_id_t> *accel_states) { + if (!cc.grey.allowSheng) { + DEBUG_PRINTF("Sheng is not allowed!\n"); + return nullptr; + } + + if (!cc.target_info.has_avx512vbmi()) { + DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); + return nullptr; + } + sheng_build_strat strat(raw, rm, only_accel_init); + dfa_info info(strat); + + DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size()); + + DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n", + raw.start_anchored, raw.start_floating); + + DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n", + info.can_die ? "can" : "cannot", info.size()); + assert(info.size() > 16); + if (info.size() > 32) { + DEBUG_PRINTF("Too many states\n"); + return nullptr; + } + + return shengCompile_int<sheng32>(raw, cc, accel_states, strat, info); +} + +bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, bool only_accel_init, + set<dstate_id_t> *accel_states) { + if (!cc.grey.allowSheng) { + DEBUG_PRINTF("Sheng is not allowed!\n"); + return nullptr; + } + + if (!cc.target_info.has_avx512vbmi()) { + DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); + return nullptr; + } + + sheng_build_strat strat(raw, rm, only_accel_init); + dfa_info info(strat); + + DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size()); + + DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n", + raw.start_anchored, raw.start_floating); + + DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n", + info.can_die ? "can" : "cannot", info.size()); + assert(info.size() > 32); + if (info.size() > 64) { + DEBUG_PRINTF("Too many states\n"); + return nullptr; + } + vector<dstate> old_states; + old_states = info.states; + auto nfa = shengCompile_int<sheng64>(raw, cc, accel_states, strat, info); + if (!nfa) { + info.states = old_states; + } return nfa; } -bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, bool only_accel_init, - set<dstate_id_t> *accel_states) { - if (!cc.grey.allowSheng) { - DEBUG_PRINTF("Sheng is not allowed!\n"); - return nullptr; - } - - sheng_build_strat strat(raw, rm, only_accel_init); - dfa_info info(strat); - - DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size()); - - DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n", - raw.start_anchored, raw.start_floating); - - DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n", - info.can_die ? "can" : "cannot", info.size()); - if (info.size() > 16) { - DEBUG_PRINTF("Too many states\n"); - return nullptr; - } - - return shengCompile_int<sheng>(raw, cc, accel_states, strat, info); -} - -bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, bool only_accel_init, - set<dstate_id_t> *accel_states) { - if (!cc.grey.allowSheng) { - DEBUG_PRINTF("Sheng is not allowed!\n"); - return nullptr; - } - - if (!cc.target_info.has_avx512vbmi()) { - DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); - return nullptr; - } - - sheng_build_strat strat(raw, rm, only_accel_init); - dfa_info info(strat); - - DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size()); - - DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n", - raw.start_anchored, raw.start_floating); - - DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n", - info.can_die ? "can" : "cannot", info.size()); - assert(info.size() > 16); - if (info.size() > 32) { - DEBUG_PRINTF("Too many states\n"); - return nullptr; - } - - return shengCompile_int<sheng32>(raw, cc, accel_states, strat, info); -} - -bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, bool only_accel_init, - set<dstate_id_t> *accel_states) { - if (!cc.grey.allowSheng) { - DEBUG_PRINTF("Sheng is not allowed!\n"); - return nullptr; - } - - if (!cc.target_info.has_avx512vbmi()) { - DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n"); - return nullptr; - } - - sheng_build_strat strat(raw, rm, only_accel_init); - dfa_info info(strat); - - DEBUG_PRINTF("Trying to compile a %zu state Sheng\n", raw.states.size()); - - DEBUG_PRINTF("Anchored start state id: %u, floating start state id: %u\n", - raw.start_anchored, raw.start_floating); - - DEBUG_PRINTF("This DFA %s die so effective number of states is %zu\n", - info.can_die ? "can" : "cannot", info.size()); - assert(info.size() > 32); - if (info.size() > 64) { - DEBUG_PRINTF("Too many states\n"); - return nullptr; - } - vector<dstate> old_states; - old_states = info.states; - auto nfa = shengCompile_int<sheng64>(raw, cc, accel_states, strat, info); - if (!nfa) { - info.states = old_states; - } - return nfa; -} - } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/shengcompile.h b/contrib/libs/hyperscan/src/nfa/shengcompile.h index 175bf6a86f..256f4a4e50 100644 --- a/contrib/libs/hyperscan/src/nfa/shengcompile.h +++ b/contrib/libs/hyperscan/src/nfa/shengcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -61,7 +61,7 @@ public: u32 max_allowed_offset_accel() const override; u32 max_stop_char() const override; u32 max_floating_stop_char() const override; - DfaType getType() const override { return Sheng; } + DfaType getType() const override { return Sheng; } private: raw_dfa &rdfa; @@ -71,14 +71,14 @@ bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, bool only_accel_init, std::set<dstate_id_t> *accel_states = nullptr); -bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, bool only_accel_init, - std::set<dstate_id_t> *accel_states = nullptr); - -bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, bool only_accel_init, - std::set<dstate_id_t> *accel_states = nullptr); - +bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, bool only_accel_init, + std::set<dstate_id_t> *accel_states = nullptr); + +bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, bool only_accel_init, + std::set<dstate_id_t> *accel_states = nullptr); + struct sheng_escape_info { CharReach outs; CharReach outs2_single; diff --git a/contrib/libs/hyperscan/src/nfa/vermicelli.h b/contrib/libs/hyperscan/src/nfa/vermicelli.h index 7b35deb8d6..ed797d83f9 100644 --- a/contrib/libs/hyperscan/src/nfa/vermicelli.h +++ b/contrib/libs/hyperscan/src/nfa/vermicelli.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,20 +46,20 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf, nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); assert(buf < buf_end); - VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ - + VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ + // Handle small scans. -#ifdef HAVE_AVX512 - if (buf_end - buf <= VERM_BOUNDARY) { - const u8 *ptr = nocase - ? vermMiniNocase(chars, buf, buf_end, 0) - : vermMini(chars, buf, buf_end, 0); - if (ptr) { - return ptr; - } - return buf_end; - } -#else +#ifdef HAVE_AVX512 + if (buf_end - buf <= VERM_BOUNDARY) { + const u8 *ptr = nocase + ? vermMiniNocase(chars, buf, buf_end, 0) + : vermMini(chars, buf, buf_end, 0); + if (ptr) { + return ptr; + } + return buf_end; + } +#else if (buf_end - buf < VERM_BOUNDARY) { for (; buf < buf_end; buf++) { char cur = (char)*buf; @@ -72,7 +72,7 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf, } return buf; } -#endif +#endif uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; if (min) { @@ -112,20 +112,20 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf, nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); assert(buf < buf_end); - VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ - + VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ + // Handle small scans. -#ifdef HAVE_AVX512 - if (buf_end - buf <= VERM_BOUNDARY) { - const u8 *ptr = nocase - ? vermMiniNocase(chars, buf, buf_end, 1) - : vermMini(chars, buf, buf_end, 1); - if (ptr) { - return ptr; - } - return buf_end; - } -#else +#ifdef HAVE_AVX512 + if (buf_end - buf <= VERM_BOUNDARY) { + const u8 *ptr = nocase + ? vermMiniNocase(chars, buf, buf_end, 1) + : vermMini(chars, buf, buf_end, 1); + if (ptr) { + return ptr; + } + return buf_end; + } +#else if (buf_end - buf < VERM_BOUNDARY) { for (; buf < buf_end; buf++) { char cur = (char)*buf; @@ -138,7 +138,7 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf, } return buf; } -#endif +#endif size_t min = (size_t)buf % VERM_BOUNDARY; if (min) { @@ -179,28 +179,28 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */ VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */ -#ifdef HAVE_AVX512 - if (buf_end - buf <= VERM_BOUNDARY) { - const u8 *ptr = nocase - ? dvermMiniNocase(chars1, chars2, buf, buf_end) - : dvermMini(chars1, chars2, buf, buf_end); - if (ptr) { - return ptr; - } - - /* check for partial match at end */ - u8 mask = nocase ? CASE_CLEAR : 0xff; - if ((buf_end[-1] & mask) == (u8)c1) { - DEBUG_PRINTF("partial!!!\n"); - return buf_end - 1; - } - - return buf_end; - } -#endif - - assert((buf_end - buf) >= VERM_BOUNDARY); - uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; +#ifdef HAVE_AVX512 + if (buf_end - buf <= VERM_BOUNDARY) { + const u8 *ptr = nocase + ? dvermMiniNocase(chars1, chars2, buf, buf_end) + : dvermMini(chars1, chars2, buf, buf_end); + if (ptr) { + return ptr; + } + + /* check for partial match at end */ + u8 mask = nocase ? CASE_CLEAR : 0xff; + if ((buf_end[-1] & mask) == (u8)c1) { + DEBUG_PRINTF("partial!!!\n"); + return buf_end - 1; + } + + return buf_end; + } +#endif + + assert((buf_end - buf) >= VERM_BOUNDARY); + uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; if (min) { // Input isn't aligned, so we need to run one iteration with an // unaligned load, then skip buf forward to the next aligned address. @@ -257,26 +257,26 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, VERM_TYPE mask1 = VERM_SET_FN(m1); VERM_TYPE mask2 = VERM_SET_FN(m2); -#ifdef HAVE_AVX512 - if (buf_end - buf <= VERM_BOUNDARY) { - const u8 *ptr = dvermMiniMasked(chars1, chars2, mask1, mask2, buf, - buf_end); - if (ptr) { - return ptr; - } - - /* check for partial match at end */ - if ((buf_end[-1] & m1) == (u8)c1) { - DEBUG_PRINTF("partial!!!\n"); - return buf_end - 1; - } - - return buf_end; - } -#endif - - assert((buf_end - buf) >= VERM_BOUNDARY); - uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; +#ifdef HAVE_AVX512 + if (buf_end - buf <= VERM_BOUNDARY) { + const u8 *ptr = dvermMiniMasked(chars1, chars2, mask1, mask2, buf, + buf_end); + if (ptr) { + return ptr; + } + + /* check for partial match at end */ + if ((buf_end[-1] & m1) == (u8)c1) { + DEBUG_PRINTF("partial!!!\n"); + return buf_end - 1; + } + + return buf_end; + } +#endif + + assert((buf_end - buf) >= VERM_BOUNDARY); + uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; if (min) { // Input isn't aligned, so we need to run one iteration with an // unaligned load, then skip buf forward to the next aligned address. @@ -308,7 +308,7 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, /* check for partial match at end */ if ((buf_end[-1] & m1) == (u8)c1) { - DEBUG_PRINTF("partial!!!\n"); + DEBUG_PRINTF("partial!!!\n"); return buf_end - 1; } @@ -324,20 +324,20 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf, nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); assert(buf < buf_end); - VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ - + VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ + // Handle small scans. -#ifdef HAVE_AVX512 - if (buf_end - buf <= VERM_BOUNDARY) { - const u8 *ptr = nocase - ? rvermMiniNocase(chars, buf, buf_end, 0) - : rvermMini(chars, buf, buf_end, 0); - if (ptr) { - return ptr; - } - return buf - 1; - } -#else +#ifdef HAVE_AVX512 + if (buf_end - buf <= VERM_BOUNDARY) { + const u8 *ptr = nocase + ? rvermMiniNocase(chars, buf, buf_end, 0) + : rvermMini(chars, buf, buf_end, 0); + if (ptr) { + return ptr; + } + return buf - 1; + } +#else if (buf_end - buf < VERM_BOUNDARY) { for (buf_end--; buf_end >= buf; buf_end--) { char cur = (char)*buf_end; @@ -350,7 +350,7 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf, } return buf_end; } -#endif +#endif size_t min = (size_t)buf_end % VERM_BOUNDARY; if (min) { @@ -358,14 +358,14 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf, // unaligned load, then skip buf backward to the next aligned address. // There's some small overlap here, but we don't mind scanning it twice // if we can do it quickly, do we? - const u8 *ptr = nocase ? rvermUnalignNocase(chars, - buf_end - VERM_BOUNDARY, - 0) - : rvermUnalign(chars, buf_end - VERM_BOUNDARY, - 0); - - if (ptr) { - return ptr; + const u8 *ptr = nocase ? rvermUnalignNocase(chars, + buf_end - VERM_BOUNDARY, + 0) + : rvermUnalign(chars, buf_end - VERM_BOUNDARY, + 0); + + if (ptr) { + return ptr; } buf_end -= min; @@ -396,20 +396,20 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf, nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); assert(buf < buf_end); - VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ - + VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ + // Handle small scans. -#ifdef HAVE_AVX512 - if (buf_end - buf <= VERM_BOUNDARY) { - const u8 *ptr = nocase - ? rvermMiniNocase(chars, buf, buf_end, 1) - : rvermMini(chars, buf, buf_end, 1); - if (ptr) { - return ptr; - } - return buf - 1; - } -#else +#ifdef HAVE_AVX512 + if (buf_end - buf <= VERM_BOUNDARY) { + const u8 *ptr = nocase + ? rvermMiniNocase(chars, buf, buf_end, 1) + : rvermMini(chars, buf, buf_end, 1); + if (ptr) { + return ptr; + } + return buf - 1; + } +#else if (buf_end - buf < VERM_BOUNDARY) { for (buf_end--; buf_end >= buf; buf_end--) { char cur = (char)*buf_end; @@ -422,7 +422,7 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf, } return buf_end; } -#endif +#endif size_t min = (size_t)buf_end % VERM_BOUNDARY; if (min) { @@ -430,14 +430,14 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf, // unaligned load, then skip buf backward to the next aligned address. // There's some small overlap here, but we don't mind scanning it twice // if we can do it quickly, do we? - const u8 *ptr = nocase ? rvermUnalignNocase(chars, - buf_end - VERM_BOUNDARY, - 1) - : rvermUnalign(chars, buf_end - VERM_BOUNDARY, - 1); - - if (ptr) { - return ptr; + const u8 *ptr = nocase ? rvermUnalignNocase(chars, + buf_end - VERM_BOUNDARY, + 1) + : rvermUnalign(chars, buf_end - VERM_BOUNDARY, + 1); + + if (ptr) { + return ptr; } buf_end -= min; @@ -470,32 +470,32 @@ const u8 *rvermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */ VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */ -#ifdef HAVE_AVX512 - if (buf_end - buf <= VERM_BOUNDARY) { - const u8 *ptr = nocase - ? rdvermMiniNocase(chars1, chars2, buf, buf_end) - : rdvermMini(chars1, chars2, buf, buf_end); - - if (ptr) { - return ptr; - } - - // check for partial match at end ??? - return buf - 1; - } -#endif - - assert((buf_end - buf) >= VERM_BOUNDARY); - size_t min = (size_t)buf_end % VERM_BOUNDARY; +#ifdef HAVE_AVX512 + if (buf_end - buf <= VERM_BOUNDARY) { + const u8 *ptr = nocase + ? rdvermMiniNocase(chars1, chars2, buf, buf_end) + : rdvermMini(chars1, chars2, buf, buf_end); + + if (ptr) { + return ptr; + } + + // check for partial match at end ??? + return buf - 1; + } +#endif + + assert((buf_end - buf) >= VERM_BOUNDARY); + size_t min = (size_t)buf_end % VERM_BOUNDARY; if (min) { // input not aligned, so we need to run one iteration with an unaligned // load, then skip buf forward to the next aligned address. There's // some small overlap here, but we don't mind scanning it twice if we // can do it quickly, do we? - const u8 *ptr = nocase ? rdvermPreconditionNocase(chars1, chars2, - buf_end - VERM_BOUNDARY) - : rdvermPrecondition(chars1, chars2, - buf_end - VERM_BOUNDARY); + const u8 *ptr = nocase ? rdvermPreconditionNocase(chars1, chars2, + buf_end - VERM_BOUNDARY) + : rdvermPrecondition(chars1, chars2, + buf_end - VERM_BOUNDARY); if (ptr) { return ptr; diff --git a/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h b/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h index 3ec28dbf77..3307486cff 100644 --- a/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h +++ b/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2020, Intel Corporation + * Copyright (c) 2015-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,8 +32,8 @@ * (users should include vermicelli.h) */ -#if !defined(HAVE_AVX512) - +#if !defined(HAVE_AVX512) + #define VERM_BOUNDARY 16 #define VERM_TYPE m128 #define VERM_SET_FN set16x8 @@ -393,497 +393,497 @@ const u8 *rdvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) { return NULL; } - -#else // HAVE_AVX512 - -#define VERM_BOUNDARY 64 -#define VERM_TYPE m512 -#define VERM_SET_FN set64x8 - -static really_inline -const u8 *vermMini(m512 chars, const u8 *buf, const u8 *buf_end, char negate) { - uintptr_t len = buf_end - buf; - __mmask64 mask = (~0ULL) >> (64 - len); - m512 data = loadu_maskz_m512(mask, buf); - - u64a z = eq512mask(chars, data); - - if (negate) { - z = ~z & mask; - } - z &= mask; - if (unlikely(z)) { - return buf + ctz64(z); - } - return NULL; -} - -static really_inline -const u8 *vermMiniNocase(m512 chars, const u8 *buf, const u8 *buf_end, - char negate) { - uintptr_t len = buf_end - buf; - __mmask64 mask = (~0ULL) >> (64 - len); - m512 data = loadu_maskz_m512(mask, buf); - m512 casemask = set64x8(CASE_CLEAR); - m512 v = and512(casemask, data); - - u64a z = eq512mask(chars, v); - - if (negate) { - z = ~z & mask; - } - z &= mask; - if (unlikely(z)) { - return buf + ctz64(z); - } - return NULL; -} - -static really_inline -const u8 *vermSearchAligned(m512 chars, const u8 *buf, const u8 *buf_end, - char negate) { - assert((size_t)buf % 64 == 0); - for (; buf + 63 < buf_end; buf += 64) { - m512 data = load512(buf); - u64a z = eq512mask(chars, data); - if (negate) { - z = ~z & ~0ULL; - } - if (unlikely(z)) { - u64a pos = ctz64(z); - return buf + pos; - } - } - return NULL; -} - -static really_inline -const u8 *vermSearchAlignedNocase(m512 chars, const u8 *buf, - const u8 *buf_end, char negate) { - assert((size_t)buf % 64 == 0); - m512 casemask = set64x8(CASE_CLEAR); - - for (; buf + 63 < buf_end; buf += 64) { - m512 data = load512(buf); - u64a z = eq512mask(chars, and512(casemask, data)); - if (negate) { - z = ~z & ~0ULL; - } - if (unlikely(z)) { - u64a pos = ctz64(z); - return buf + pos; - } - } - return NULL; -} - -// returns NULL if not found -static really_inline -const u8 *vermUnalign(m512 chars, const u8 *buf, char negate) { - m512 data = loadu512(buf); // unaligned - u64a z = eq512mask(chars, data); - if (negate) { - z = ~z & ~0ULL; - } - if (unlikely(z)) { - return buf + ctz64(z); - } - return NULL; -} - -// returns NULL if not found -static really_inline -const u8 *vermUnalignNocase(m512 chars, const u8 *buf, char negate) { - m512 casemask = set64x8(CASE_CLEAR); - m512 data = loadu512(buf); // unaligned - u64a z = eq512mask(chars, and512(casemask, data)); - if (negate) { - z = ~z & ~0ULL; - } - if (unlikely(z)) { - return buf + ctz64(z); - } - return NULL; -} - -static really_inline -const u8 *dvermMini(m512 chars1, m512 chars2, const u8 *buf, - const u8 *buf_end) { - uintptr_t len = buf_end - buf; - __mmask64 mask = (~0ULL) >> (64 - len); - m512 data = loadu_maskz_m512(mask, buf); - - u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1); - - z &= mask; - if (unlikely(z)) { - u64a pos = ctz64(z); - return buf + pos; - } - return NULL; -} - -static really_inline -const u8 *dvermMiniNocase(m512 chars1, m512 chars2, const u8 *buf, - const u8 *buf_end) { - uintptr_t len = buf_end - buf; - __mmask64 mask = (~0ULL) >> (64 - len); - m512 data = loadu_maskz_m512(mask, buf); - m512 casemask = set64x8(CASE_CLEAR); - m512 v = and512(casemask, data); - - u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1); - - z &= mask; - if (unlikely(z)) { - u64a pos = ctz64(z); - return buf + pos; - } - return NULL; -} - -static really_inline -const u8 *dvermMiniMasked(m512 chars1, m512 chars2, m512 mask1, m512 mask2, - const u8 *buf, const u8 *buf_end) { - uintptr_t len = buf_end - buf; - __mmask64 mask = (~0ULL) >> (64 - len); - m512 data = loadu_maskz_m512(mask, buf); - m512 v1 = and512(data, mask1); - m512 v2 = and512(data, mask2); - - u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1); - - z &= mask; - if (unlikely(z)) { - u64a pos = ctz64(z); - return buf + pos; - } - return NULL; -} - -static really_inline -const u8 *dvermSearchAligned(m512 chars1, m512 chars2, u8 c1, u8 c2, - const u8 *buf, const u8 *buf_end) { - for (; buf + 64 < buf_end; buf += 64) { - m512 data = load512(buf); - u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1); - if (buf[63] == c1 && buf[64] == c2) { - z |= (1ULL << 63); - } - if (unlikely(z)) { - u64a pos = ctz64(z); - return buf + pos; - } - } - - return NULL; -} - -static really_inline -const u8 *dvermSearchAlignedNocase(m512 chars1, m512 chars2, u8 c1, u8 c2, - const u8 *buf, const u8 *buf_end) { - assert((size_t)buf % 64 == 0); - m512 casemask = set64x8(CASE_CLEAR); - - for (; buf + 64 < buf_end; buf += 64) { - m512 data = load512(buf); - m512 v = and512(casemask, data); - u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1); - if ((buf[63] & CASE_CLEAR) == c1 && (buf[64] & CASE_CLEAR) == c2) { - z |= (1ULL << 63); - } - if (unlikely(z)) { - u64a pos = ctz64(z); - return buf + pos; - } - } - - return NULL; -} - -static really_inline -const u8 *dvermSearchAlignedMasked(m512 chars1, m512 chars2, - m512 mask1, m512 mask2, u8 c1, u8 c2, u8 m1, - u8 m2, const u8 *buf, const u8 *buf_end) { - assert((size_t)buf % 64 == 0); - - for (; buf + 64 < buf_end; buf += 64) { - m512 data = load512(buf); - m512 v1 = and512(data, mask1); - m512 v2 = and512(data, mask2); - u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1); - - if ((buf[63] & m1) == c1 && (buf[64] & m2) == c2) { - z |= (1ULL << 63); - } - if (unlikely(z)) { - u64a pos = ctz64(z); - return buf + pos; - } - } - - return NULL; -} - -// returns NULL if not found -static really_inline -const u8 *dvermPrecondition(m512 chars1, m512 chars2, const u8 *buf) { - m512 data = loadu512(buf); // unaligned - u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1); - - /* no fixup of the boundary required - the aligned run will pick it up */ - if (unlikely(z)) { - u64a pos = ctz64(z); - return buf + pos; - } - return NULL; -} - -// returns NULL if not found -static really_inline -const u8 *dvermPreconditionNocase(m512 chars1, m512 chars2, const u8 *buf) { - /* due to laziness, nonalphas and nocase having interesting behaviour */ - m512 casemask = set64x8(CASE_CLEAR); - m512 data = loadu512(buf); // unaligned - m512 v = and512(casemask, data); - u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1); - - /* no fixup of the boundary required - the aligned run will pick it up */ - if (unlikely(z)) { - u64a pos = ctz64(z); - return buf + pos; - } - return NULL; -} - -// returns NULL if not found -static really_inline -const u8 *dvermPreconditionMasked(m512 chars1, m512 chars2, - m512 mask1, m512 mask2, const u8 *buf) { - m512 data = loadu512(buf); // unaligned - m512 v1 = and512(data, mask1); - m512 v2 = and512(data, mask2); - u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1); - - /* no fixup of the boundary required - the aligned run will pick it up */ - if (unlikely(z)) { - u64a pos = ctz64(z); - return buf + pos; - } - return NULL; -} - -static really_inline -const u8 *lastMatchOffset(const u8 *buf_end, u64a z) { - assert(z); - return buf_end - 64 + 63 - clz64(z); -} - -static really_inline -const u8 *rvermMini(m512 chars, const u8 *buf, const u8 *buf_end, char negate) { - uintptr_t len = buf_end - buf; - __mmask64 mask = (~0ULL) >> (64 - len); - m512 data = loadu_maskz_m512(mask, buf); - - u64a z = eq512mask(chars, data); - - if (negate) { - z = ~z & mask; - } - z &= mask; - if (unlikely(z)) { - return lastMatchOffset(buf + 64, z); - } - return NULL; -} - -static really_inline -const u8 *rvermMiniNocase(m512 chars, const u8 *buf, const u8 *buf_end, - char negate) { - uintptr_t len = buf_end - buf; - __mmask64 mask = (~0ULL) >> (64 - len); - m512 data = loadu_maskz_m512(mask, buf); - m512 casemask = set64x8(CASE_CLEAR); - m512 v = and512(casemask, data); - - u64a z = eq512mask(chars, v); - - if (negate) { - z = ~z & mask; - } - z &= mask; - if (unlikely(z)) { - return lastMatchOffset(buf + 64, z); - } - return NULL; -} - -static really_inline -const u8 *rvermSearchAligned(m512 chars, const u8 *buf, const u8 *buf_end, - char negate) { - assert((size_t)buf_end % 64 == 0); - for (; buf + 63 < buf_end; buf_end -= 64) { - m512 data = load512(buf_end - 64); - u64a z = eq512mask(chars, data); - if (negate) { - z = ~z & ~0ULL; - } - if (unlikely(z)) { - return lastMatchOffset(buf_end, z); - } - } - return NULL; -} - -static really_inline -const u8 *rvermSearchAlignedNocase(m512 chars, const u8 *buf, - const u8 *buf_end, char negate) { - assert((size_t)buf_end % 64 == 0); - m512 casemask = set64x8(CASE_CLEAR); - - for (; buf + 63 < buf_end; buf_end -= 64) { - m512 data = load512(buf_end - 64); - u64a z = eq512mask(chars, and512(casemask, data)); - if (negate) { - z = ~z & ~0ULL; - } - if (unlikely(z)) { - return lastMatchOffset(buf_end, z); - } - } - return NULL; -} - -// returns NULL if not found -static really_inline -const u8 *rvermUnalign(m512 chars, const u8 *buf, char negate) { - m512 data = loadu512(buf); // unaligned - u64a z = eq512mask(chars, data); - if (negate) { - z = ~z & ~0ULL; - } - if (unlikely(z)) { - return lastMatchOffset(buf + 64, z); - } - return NULL; -} - -// returns NULL if not found -static really_inline -const u8 *rvermUnalignNocase(m512 chars, const u8 *buf, char negate) { - m512 casemask = set64x8(CASE_CLEAR); - m512 data = loadu512(buf); // unaligned - u64a z = eq512mask(chars, and512(casemask, data)); - if (negate) { - z = ~z & ~0ULL; - } - if (unlikely(z)) { - return lastMatchOffset(buf + 64, z); - } - return NULL; -} - -static really_inline -const u8 *rdvermMini(m512 chars1, m512 chars2, const u8 *buf, - const u8 *buf_end) { - uintptr_t len = buf_end - buf; - __mmask64 mask = (~0ULL) >> (64 - len); - m512 data = loadu_maskz_m512(mask, buf); - - u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1); - - z &= mask; - if (unlikely(z)) { - return lastMatchOffset(buf + 64, z); - } - return NULL; -} - -static really_inline -const u8 *rdvermMiniNocase(m512 chars1, m512 chars2, const u8 *buf, - const u8 *buf_end) { - uintptr_t len = buf_end - buf; - __mmask64 mask = (~0ULL) >> (64 - len); - m512 data = loadu_maskz_m512(mask, buf); - m512 casemask = set64x8(CASE_CLEAR); - m512 v = and512(casemask, data); - - u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1); - - z &= mask; - if (unlikely(z)) { - return lastMatchOffset(buf + 64, z); - } - return NULL; -} - -static really_inline -const u8 *rdvermSearchAligned(m512 chars1, m512 chars2, u8 c1, u8 c2, - const u8 *buf, const u8 *buf_end) { - assert((size_t)buf_end % 64 == 0); - - for (; buf + 64 < buf_end; buf_end -= 64) { - m512 data = load512(buf_end - 64); - u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1); - if (buf_end[-65] == c1 && buf_end[-64] == c2) { - z |= 1; - } - if (unlikely(z)) { - return lastMatchOffset(buf_end, z); - } - } - return buf_end; -} - -static really_inline -const u8 *rdvermSearchAlignedNocase(m512 chars1, m512 chars2, u8 c1, u8 c2, - const u8 *buf, const u8 *buf_end) { - assert((size_t)buf_end % 64 == 0); - m512 casemask = set64x8(CASE_CLEAR); - - for (; buf + 64 < buf_end; buf_end -= 64) { - m512 data = load512(buf_end - 64); - m512 v = and512(casemask, data); - u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1); - if ((buf_end[-65] & CASE_CLEAR) == c1 - && (buf_end[-64] & CASE_CLEAR) == c2) { - z |= 1; - } - if (unlikely(z)) { - return lastMatchOffset(buf_end, z); - } - } - return buf_end; -} - -// returns NULL if not found -static really_inline -const u8 *rdvermPrecondition(m512 chars1, m512 chars2, const u8 *buf) { - m512 data = loadu512(buf); - u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1); - - // no fixup of the boundary required - the aligned run will pick it up - if (unlikely(z)) { - return lastMatchOffset(buf + 64, z); - } - - return NULL; -} - -// returns NULL if not found -static really_inline -const u8 *rdvermPreconditionNocase(m512 chars1, m512 chars2, const u8 *buf) { - // due to laziness, nonalphas and nocase having interesting behaviour - m512 casemask = set64x8(CASE_CLEAR); - m512 data = loadu512(buf); - m512 v = and512(casemask, data); - u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1); - // no fixup of the boundary required - the aligned run will pick it up - if (unlikely(z)) { - return lastMatchOffset(buf + 64, z); - } - - return NULL; -} - -#endif // HAVE_AVX512 + +#else // HAVE_AVX512 + +#define VERM_BOUNDARY 64 +#define VERM_TYPE m512 +#define VERM_SET_FN set64x8 + +static really_inline +const u8 *vermMini(m512 chars, const u8 *buf, const u8 *buf_end, char negate) { + uintptr_t len = buf_end - buf; + __mmask64 mask = (~0ULL) >> (64 - len); + m512 data = loadu_maskz_m512(mask, buf); + + u64a z = eq512mask(chars, data); + + if (negate) { + z = ~z & mask; + } + z &= mask; + if (unlikely(z)) { + return buf + ctz64(z); + } + return NULL; +} + +static really_inline +const u8 *vermMiniNocase(m512 chars, const u8 *buf, const u8 *buf_end, + char negate) { + uintptr_t len = buf_end - buf; + __mmask64 mask = (~0ULL) >> (64 - len); + m512 data = loadu_maskz_m512(mask, buf); + m512 casemask = set64x8(CASE_CLEAR); + m512 v = and512(casemask, data); + + u64a z = eq512mask(chars, v); + + if (negate) { + z = ~z & mask; + } + z &= mask; + if (unlikely(z)) { + return buf + ctz64(z); + } + return NULL; +} + +static really_inline +const u8 *vermSearchAligned(m512 chars, const u8 *buf, const u8 *buf_end, + char negate) { + assert((size_t)buf % 64 == 0); + for (; buf + 63 < buf_end; buf += 64) { + m512 data = load512(buf); + u64a z = eq512mask(chars, data); + if (negate) { + z = ~z & ~0ULL; + } + if (unlikely(z)) { + u64a pos = ctz64(z); + return buf + pos; + } + } + return NULL; +} + +static really_inline +const u8 *vermSearchAlignedNocase(m512 chars, const u8 *buf, + const u8 *buf_end, char negate) { + assert((size_t)buf % 64 == 0); + m512 casemask = set64x8(CASE_CLEAR); + + for (; buf + 63 < buf_end; buf += 64) { + m512 data = load512(buf); + u64a z = eq512mask(chars, and512(casemask, data)); + if (negate) { + z = ~z & ~0ULL; + } + if (unlikely(z)) { + u64a pos = ctz64(z); + return buf + pos; + } + } + return NULL; +} + +// returns NULL if not found +static really_inline +const u8 *vermUnalign(m512 chars, const u8 *buf, char negate) { + m512 data = loadu512(buf); // unaligned + u64a z = eq512mask(chars, data); + if (negate) { + z = ~z & ~0ULL; + } + if (unlikely(z)) { + return buf + ctz64(z); + } + return NULL; +} + +// returns NULL if not found +static really_inline +const u8 *vermUnalignNocase(m512 chars, const u8 *buf, char negate) { + m512 casemask = set64x8(CASE_CLEAR); + m512 data = loadu512(buf); // unaligned + u64a z = eq512mask(chars, and512(casemask, data)); + if (negate) { + z = ~z & ~0ULL; + } + if (unlikely(z)) { + return buf + ctz64(z); + } + return NULL; +} + +static really_inline +const u8 *dvermMini(m512 chars1, m512 chars2, const u8 *buf, + const u8 *buf_end) { + uintptr_t len = buf_end - buf; + __mmask64 mask = (~0ULL) >> (64 - len); + m512 data = loadu_maskz_m512(mask, buf); + + u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1); + + z &= mask; + if (unlikely(z)) { + u64a pos = ctz64(z); + return buf + pos; + } + return NULL; +} + +static really_inline +const u8 *dvermMiniNocase(m512 chars1, m512 chars2, const u8 *buf, + const u8 *buf_end) { + uintptr_t len = buf_end - buf; + __mmask64 mask = (~0ULL) >> (64 - len); + m512 data = loadu_maskz_m512(mask, buf); + m512 casemask = set64x8(CASE_CLEAR); + m512 v = and512(casemask, data); + + u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1); + + z &= mask; + if (unlikely(z)) { + u64a pos = ctz64(z); + return buf + pos; + } + return NULL; +} + +static really_inline +const u8 *dvermMiniMasked(m512 chars1, m512 chars2, m512 mask1, m512 mask2, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + __mmask64 mask = (~0ULL) >> (64 - len); + m512 data = loadu_maskz_m512(mask, buf); + m512 v1 = and512(data, mask1); + m512 v2 = and512(data, mask2); + + u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1); + + z &= mask; + if (unlikely(z)) { + u64a pos = ctz64(z); + return buf + pos; + } + return NULL; +} + +static really_inline +const u8 *dvermSearchAligned(m512 chars1, m512 chars2, u8 c1, u8 c2, + const u8 *buf, const u8 *buf_end) { + for (; buf + 64 < buf_end; buf += 64) { + m512 data = load512(buf); + u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1); + if (buf[63] == c1 && buf[64] == c2) { + z |= (1ULL << 63); + } + if (unlikely(z)) { + u64a pos = ctz64(z); + return buf + pos; + } + } + + return NULL; +} + +static really_inline +const u8 *dvermSearchAlignedNocase(m512 chars1, m512 chars2, u8 c1, u8 c2, + const u8 *buf, const u8 *buf_end) { + assert((size_t)buf % 64 == 0); + m512 casemask = set64x8(CASE_CLEAR); + + for (; buf + 64 < buf_end; buf += 64) { + m512 data = load512(buf); + m512 v = and512(casemask, data); + u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1); + if ((buf[63] & CASE_CLEAR) == c1 && (buf[64] & CASE_CLEAR) == c2) { + z |= (1ULL << 63); + } + if (unlikely(z)) { + u64a pos = ctz64(z); + return buf + pos; + } + } + + return NULL; +} + +static really_inline +const u8 *dvermSearchAlignedMasked(m512 chars1, m512 chars2, + m512 mask1, m512 mask2, u8 c1, u8 c2, u8 m1, + u8 m2, const u8 *buf, const u8 *buf_end) { + assert((size_t)buf % 64 == 0); + + for (; buf + 64 < buf_end; buf += 64) { + m512 data = load512(buf); + m512 v1 = and512(data, mask1); + m512 v2 = and512(data, mask2); + u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1); + + if ((buf[63] & m1) == c1 && (buf[64] & m2) == c2) { + z |= (1ULL << 63); + } + if (unlikely(z)) { + u64a pos = ctz64(z); + return buf + pos; + } + } + + return NULL; +} + +// returns NULL if not found +static really_inline +const u8 *dvermPrecondition(m512 chars1, m512 chars2, const u8 *buf) { + m512 data = loadu512(buf); // unaligned + u64a z = eq512mask(chars1, data) & (eq512mask(chars2, data) >> 1); + + /* no fixup of the boundary required - the aligned run will pick it up */ + if (unlikely(z)) { + u64a pos = ctz64(z); + return buf + pos; + } + return NULL; +} + +// returns NULL if not found +static really_inline +const u8 *dvermPreconditionNocase(m512 chars1, m512 chars2, const u8 *buf) { + /* due to laziness, nonalphas and nocase having interesting behaviour */ + m512 casemask = set64x8(CASE_CLEAR); + m512 data = loadu512(buf); // unaligned + m512 v = and512(casemask, data); + u64a z = eq512mask(chars1, v) & (eq512mask(chars2, v) >> 1); + + /* no fixup of the boundary required - the aligned run will pick it up */ + if (unlikely(z)) { + u64a pos = ctz64(z); + return buf + pos; + } + return NULL; +} + +// returns NULL if not found +static really_inline +const u8 *dvermPreconditionMasked(m512 chars1, m512 chars2, + m512 mask1, m512 mask2, const u8 *buf) { + m512 data = loadu512(buf); // unaligned + m512 v1 = and512(data, mask1); + m512 v2 = and512(data, mask2); + u64a z = eq512mask(chars1, v1) & (eq512mask(chars2, v2) >> 1); + + /* no fixup of the boundary required - the aligned run will pick it up */ + if (unlikely(z)) { + u64a pos = ctz64(z); + return buf + pos; + } + return NULL; +} + +static really_inline +const u8 *lastMatchOffset(const u8 *buf_end, u64a z) { + assert(z); + return buf_end - 64 + 63 - clz64(z); +} + +static really_inline +const u8 *rvermMini(m512 chars, const u8 *buf, const u8 *buf_end, char negate) { + uintptr_t len = buf_end - buf; + __mmask64 mask = (~0ULL) >> (64 - len); + m512 data = loadu_maskz_m512(mask, buf); + + u64a z = eq512mask(chars, data); + + if (negate) { + z = ~z & mask; + } + z &= mask; + if (unlikely(z)) { + return lastMatchOffset(buf + 64, z); + } + return NULL; +} + +static really_inline +const u8 *rvermMiniNocase(m512 chars, const u8 *buf, const u8 *buf_end, + char negate) { + uintptr_t len = buf_end - buf; + __mmask64 mask = (~0ULL) >> (64 - len); + m512 data = loadu_maskz_m512(mask, buf); + m512 casemask = set64x8(CASE_CLEAR); + m512 v = and512(casemask, data); + + u64a z = eq512mask(chars, v); + + if (negate) { + z = ~z & mask; + } + z &= mask; + if (unlikely(z)) { + return lastMatchOffset(buf + 64, z); + } + return NULL; +} + +static really_inline +const u8 *rvermSearchAligned(m512 chars, const u8 *buf, const u8 *buf_end, + char negate) { + assert((size_t)buf_end % 64 == 0); + for (; buf + 63 < buf_end; buf_end -= 64) { + m512 data = load512(buf_end - 64); + u64a z = eq512mask(chars, data); + if (negate) { + z = ~z & ~0ULL; + } + if (unlikely(z)) { + return lastMatchOffset(buf_end, z); + } + } + return NULL; +} + +static really_inline +const u8 *rvermSearchAlignedNocase(m512 chars, const u8 *buf, + const u8 *buf_end, char negate) { + assert((size_t)buf_end % 64 == 0); + m512 casemask = set64x8(CASE_CLEAR); + + for (; buf + 63 < buf_end; buf_end -= 64) { + m512 data = load512(buf_end - 64); + u64a z = eq512mask(chars, and512(casemask, data)); + if (negate) { + z = ~z & ~0ULL; + } + if (unlikely(z)) { + return lastMatchOffset(buf_end, z); + } + } + return NULL; +} + +// returns NULL if not found +static really_inline +const u8 *rvermUnalign(m512 chars, const u8 *buf, char negate) { + m512 data = loadu512(buf); // unaligned + u64a z = eq512mask(chars, data); + if (negate) { + z = ~z & ~0ULL; + } + if (unlikely(z)) { + return lastMatchOffset(buf + 64, z); + } + return NULL; +} + +// returns NULL if not found +static really_inline +const u8 *rvermUnalignNocase(m512 chars, const u8 *buf, char negate) { + m512 casemask = set64x8(CASE_CLEAR); + m512 data = loadu512(buf); // unaligned + u64a z = eq512mask(chars, and512(casemask, data)); + if (negate) { + z = ~z & ~0ULL; + } + if (unlikely(z)) { + return lastMatchOffset(buf + 64, z); + } + return NULL; +} + +static really_inline +const u8 *rdvermMini(m512 chars1, m512 chars2, const u8 *buf, + const u8 *buf_end) { + uintptr_t len = buf_end - buf; + __mmask64 mask = (~0ULL) >> (64 - len); + m512 data = loadu_maskz_m512(mask, buf); + + u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1); + + z &= mask; + if (unlikely(z)) { + return lastMatchOffset(buf + 64, z); + } + return NULL; +} + +static really_inline +const u8 *rdvermMiniNocase(m512 chars1, m512 chars2, const u8 *buf, + const u8 *buf_end) { + uintptr_t len = buf_end - buf; + __mmask64 mask = (~0ULL) >> (64 - len); + m512 data = loadu_maskz_m512(mask, buf); + m512 casemask = set64x8(CASE_CLEAR); + m512 v = and512(casemask, data); + + u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1); + + z &= mask; + if (unlikely(z)) { + return lastMatchOffset(buf + 64, z); + } + return NULL; +} + +static really_inline +const u8 *rdvermSearchAligned(m512 chars1, m512 chars2, u8 c1, u8 c2, + const u8 *buf, const u8 *buf_end) { + assert((size_t)buf_end % 64 == 0); + + for (; buf + 64 < buf_end; buf_end -= 64) { + m512 data = load512(buf_end - 64); + u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1); + if (buf_end[-65] == c1 && buf_end[-64] == c2) { + z |= 1; + } + if (unlikely(z)) { + return lastMatchOffset(buf_end, z); + } + } + return buf_end; +} + +static really_inline +const u8 *rdvermSearchAlignedNocase(m512 chars1, m512 chars2, u8 c1, u8 c2, + const u8 *buf, const u8 *buf_end) { + assert((size_t)buf_end % 64 == 0); + m512 casemask = set64x8(CASE_CLEAR); + + for (; buf + 64 < buf_end; buf_end -= 64) { + m512 data = load512(buf_end - 64); + m512 v = and512(casemask, data); + u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1); + if ((buf_end[-65] & CASE_CLEAR) == c1 + && (buf_end[-64] & CASE_CLEAR) == c2) { + z |= 1; + } + if (unlikely(z)) { + return lastMatchOffset(buf_end, z); + } + } + return buf_end; +} + +// returns NULL if not found +static really_inline +const u8 *rdvermPrecondition(m512 chars1, m512 chars2, const u8 *buf) { + m512 data = loadu512(buf); + u64a z = eq512mask(chars2, data) & (eq512mask(chars1, data) << 1); + + // no fixup of the boundary required - the aligned run will pick it up + if (unlikely(z)) { + return lastMatchOffset(buf + 64, z); + } + + return NULL; +} + +// returns NULL if not found +static really_inline +const u8 *rdvermPreconditionNocase(m512 chars1, m512 chars2, const u8 *buf) { + // due to laziness, nonalphas and nocase having interesting behaviour + m512 casemask = set64x8(CASE_CLEAR); + m512 data = loadu512(buf); + m512 v = and512(casemask, data); + u64a z = eq512mask(chars2, v) & (eq512mask(chars1, v) << 1); + // no fixup of the boundary required - the aligned run will pick it up + if (unlikely(z)) { + return lastMatchOffset(buf + 64, z); + } + + return NULL; +} + +#endif // HAVE_AVX512 |