diff options
author | Ivan Blinkov <ivan@blinkov.ru> | 2022-02-10 16:47:10 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:10 +0300 |
commit | 1aeb9a455974457866f78722ad98114bafc84e8a (patch) | |
tree | e4340eaf1668684d83a0a58c36947c5def5350ad /contrib/libs/hyperscan/src/nfa | |
parent | bd5ef432f5cfb1e18851381329d94665a4c22470 (diff) | |
download | ydb-1aeb9a455974457866f78722ad98114bafc84e8a.tar.gz |
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfa')
102 files changed, 12082 insertions, 12082 deletions
diff --git a/contrib/libs/hyperscan/src/nfa/accel.c b/contrib/libs/hyperscan/src/nfa/accel.c index 2bc60945f9..3260b7bd3a 100644 --- a/contrib/libs/hyperscan/src/nfa/accel.c +++ b/contrib/libs/hyperscan/src/nfa/accel.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -81,18 +81,18 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { c_end - 1); break; - case ACCEL_DVERM_MASKED: - DEBUG_PRINTF("accel dverm masked %p %p\n", c, c_end); - if (c + 16 + 1 >= c_end) { - return c; - } - - /* need to stop one early to get an accurate end state */ - rv = vermicelliDoubleMaskedExec(accel->dverm.c1, accel->dverm.c2, - accel->dverm.m1, accel->dverm.m2, - c, c_end - 1); - break; - + case ACCEL_DVERM_MASKED: + DEBUG_PRINTF("accel dverm masked %p %p\n", c, c_end); + if (c + 16 + 1 >= c_end) { + return c; + } + + /* need to stop one early to get an accurate end state */ + rv = vermicelliDoubleMaskedExec(accel->dverm.c1, accel->dverm.c2, + accel->dverm.m1, accel->dverm.m2, + c, c_end - 1); + break; + case ACCEL_SHUFTI: DEBUG_PRINTF("accel shufti %p %p\n", c, c_end); if (c + 15 >= c_end) { @@ -129,7 +129,7 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { rv = c_end; break; - + default: assert(!"not here"); return c; @@ -140,7 +140,7 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { rv = MAX(c + accel->generic.offset, rv); rv -= accel->generic.offset; - DEBUG_PRINTF("advanced %zd\n", rv - c); - + DEBUG_PRINTF("advanced %zd\n", rv - c); + return rv; } diff --git a/contrib/libs/hyperscan/src/nfa/accel.h b/contrib/libs/hyperscan/src/nfa/accel.h index 3a03d05967..a91abe0d2c 100644 --- a/contrib/libs/hyperscan/src/nfa/accel.h +++ b/contrib/libs/hyperscan/src/nfa/accel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -60,8 +60,8 @@ enum AccelType { ACCEL_SHUFTI, ACCEL_DSHUFTI, ACCEL_TRUFFLE, - ACCEL_RED_TAPE, - ACCEL_DVERM_MASKED, + ACCEL_RED_TAPE, + ACCEL_DVERM_MASKED, }; /** \brief Structure for accel framework. */ @@ -81,25 +81,25 @@ union AccelAux { u8 offset; u8 c1; // uppercase if nocase u8 c2; // uppercase if nocase - u8 m1; // masked variant - u8 m2; // masked variant + u8 m1; // masked variant + u8 m2; // masked variant } dverm; struct { u8 accel_type; u8 offset; - u8 c; // uppercase if nocase - u8 len; - } mverm; - struct { - u8 accel_type; - u8 offset; - u8 c; // uppercase if nocase - u8 len1; - u8 len2; - } mdverm; - struct { - u8 accel_type; - u8 offset; + u8 c; // uppercase if nocase + u8 len; + } mverm; + struct { + u8 accel_type; + u8 offset; + u8 c; // uppercase if nocase + u8 len1; + u8 len2; + } mdverm; + struct { + u8 accel_type; + u8 offset; m128 lo; m128 hi; } shufti; diff --git a/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.cpp b/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.cpp index ae71e141a2..dbd31e6033 100644 --- a/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.cpp +++ b/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.cpp @@ -1,607 +1,607 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "accel_dfa_build_strat.h" - -#include "accel.h" -#include "grey.h" -#include "nfagraph/ng_limex_accel.h" -#include "shufticompile.h" -#include "trufflecompile.h" -#include "util/accel_scheme.h" -#include "util/charreach.h" -#include "util/container.h" -#include "util/dump_charclass.h" -#include "util/small_vector.h" -#include "util/verify_types.h" - -#include <sstream> -#include <unordered_map> -#include <unordered_set> -#include <vector> - -#define PATHS_LIMIT 500 - -using namespace std; - -namespace ue2 { - -namespace { -struct path { - small_vector<CharReach, MAX_ACCEL_DEPTH + 1> reach; - dstate_id_t dest = DEAD_STATE; - explicit path(dstate_id_t base) : dest(base) {} -}; -}; - -template<typename Container> -void dump_paths(const Container &paths) { - for (UNUSED const path &p : paths) { - DEBUG_PRINTF("[%s] -> %u\n", describeClasses(p.reach).c_str(), p.dest); - } - DEBUG_PRINTF("%zu paths\n", paths.size()); -} - -static -vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) { - vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */ - - for (u32 i = 0; i < N_CHARS; i++) { - rv.at(rdfa.alpha_remap[i]).set(i); - } - - return rv; -} - -static -bool is_useful_path(const vector<path> &good, const path &p) { - for (const auto &g : good) { - assert(g.dest == p.dest); - assert(g.reach.size() <= p.reach.size()); - auto git = g.reach.rbegin(); - auto pit = p.reach.rbegin(); - - for (; git != g.reach.rend(); ++git, ++pit) { - if (!pit->isSubsetOf(*git)) { - goto next; - } - } - DEBUG_PRINTF("better: [%s] -> %u\n", describeClasses(g.reach).c_str(), - g.dest); - - return false; - next:; - } - - return true; -} - -static -path append(const path &orig, const CharReach &cr, u32 new_dest) { - path p(new_dest); - p.reach = orig.reach; - p.reach.push_back(cr); - - return p; -} - -static -void extend(const raw_dfa &rdfa, const vector<CharReach> &rev_map, - const path &p, unordered_map<u32, vector<path>> &all, - vector<path> &out) { - const dstate &s = rdfa.states[p.dest]; - - if (!p.reach.empty() && p.reach.back().none()) { - out.push_back(p); - return; - } - - if (!s.reports.empty()) { - if (generates_callbacks(rdfa.kind)) { - out.push_back(p); - return; - } else { - path pp = append(p, CharReach(), p.dest); - all[p.dest].push_back(pp); - out.push_back(move(pp)); - } - } - - if (!s.reports_eod.empty()) { - path pp = append(p, CharReach(), p.dest); - all[p.dest].push_back(pp); - out.push_back(move(pp)); - } - - flat_map<u32, CharReach> dest; - for (u32 i = 0; i < rev_map.size(); i++) { - u32 succ = s.next[i]; - dest[succ] |= rev_map[i]; - } - - for (const auto &e : dest) { - path pp = append(p, e.second, e.first); - if (!is_useful_path(all[e.first], pp)) { - DEBUG_PRINTF("not useful: [%s] -> %u\n", - describeClasses(pp.reach).c_str(), pp.dest); - continue; - } - - DEBUG_PRINTF("----good: [%s] -> %u\n", - describeClasses(pp.reach).c_str(), pp.dest); - all[e.first].push_back(pp); - out.push_back(move(pp)); - } -} - -static -vector<vector<CharReach>> generate_paths(const raw_dfa &rdfa, - dstate_id_t base, u32 len) { - const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa); - vector<path> paths{path(base)}; - unordered_map<u32, vector<path>> all; - all[base].push_back(path(base)); - for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) { - vector<path> next_gen; - for (const auto &p : paths) { - extend(rdfa, rev_map, p, all, next_gen); - } - - paths = move(next_gen); - } - - dump_paths(paths); - - vector<vector<CharReach>> rv; - rv.reserve(paths.size()); - for (auto &p : paths) { - rv.push_back(vector<CharReach>(std::make_move_iterator(p.reach.begin()), - std::make_move_iterator(p.reach.end()))); - } - return rv; -} - -static -AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, - u32 max_allowed_accel_offset) { - DEBUG_PRINTF("looking for accel for %hu\n", base); - vector<vector<CharReach>> paths = - generate_paths(rdfa, base, max_allowed_accel_offset + 1); - AccelScheme as = findBestAccelScheme(paths, CharReach(), true); - DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset); - return as; -} - -static UNUSED -bool better(const AccelScheme &a, const AccelScheme &b) { - if (!a.double_byte.empty() && b.double_byte.empty()) { - return true; - } - - if (!b.double_byte.empty()) { - return false; - } - - return a.cr.count() < b.cr.count(); -} - -static -bool double_byte_ok(const AccelScheme &info) { - return !info.double_byte.empty() && - info.double_cr.count() < info.double_byte.size() && +/* + * Copyright (c) 2015-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "accel_dfa_build_strat.h" + +#include "accel.h" +#include "grey.h" +#include "nfagraph/ng_limex_accel.h" +#include "shufticompile.h" +#include "trufflecompile.h" +#include "util/accel_scheme.h" +#include "util/charreach.h" +#include "util/container.h" +#include "util/dump_charclass.h" +#include "util/small_vector.h" +#include "util/verify_types.h" + +#include <sstream> +#include <unordered_map> +#include <unordered_set> +#include <vector> + +#define PATHS_LIMIT 500 + +using namespace std; + +namespace ue2 { + +namespace { +struct path { + small_vector<CharReach, MAX_ACCEL_DEPTH + 1> reach; + dstate_id_t dest = DEAD_STATE; + explicit path(dstate_id_t base) : dest(base) {} +}; +}; + +template<typename Container> +void dump_paths(const Container &paths) { + for (UNUSED const path &p : paths) { + DEBUG_PRINTF("[%s] -> %u\n", describeClasses(p.reach).c_str(), p.dest); + } + DEBUG_PRINTF("%zu paths\n", paths.size()); +} + +static +vector<CharReach> reverse_alpha_remapping(const raw_dfa &rdfa) { + vector<CharReach> rv(rdfa.alpha_size - 1); /* TOP not required */ + + for (u32 i = 0; i < N_CHARS; i++) { + rv.at(rdfa.alpha_remap[i]).set(i); + } + + return rv; +} + +static +bool is_useful_path(const vector<path> &good, const path &p) { + for (const auto &g : good) { + assert(g.dest == p.dest); + assert(g.reach.size() <= p.reach.size()); + auto git = g.reach.rbegin(); + auto pit = p.reach.rbegin(); + + for (; git != g.reach.rend(); ++git, ++pit) { + if (!pit->isSubsetOf(*git)) { + goto next; + } + } + DEBUG_PRINTF("better: [%s] -> %u\n", describeClasses(g.reach).c_str(), + g.dest); + + return false; + next:; + } + + return true; +} + +static +path append(const path &orig, const CharReach &cr, u32 new_dest) { + path p(new_dest); + p.reach = orig.reach; + p.reach.push_back(cr); + + return p; +} + +static +void extend(const raw_dfa &rdfa, const vector<CharReach> &rev_map, + const path &p, unordered_map<u32, vector<path>> &all, + vector<path> &out) { + const dstate &s = rdfa.states[p.dest]; + + if (!p.reach.empty() && p.reach.back().none()) { + out.push_back(p); + return; + } + + if (!s.reports.empty()) { + if (generates_callbacks(rdfa.kind)) { + out.push_back(p); + return; + } else { + path pp = append(p, CharReach(), p.dest); + all[p.dest].push_back(pp); + out.push_back(move(pp)); + } + } + + if (!s.reports_eod.empty()) { + path pp = append(p, CharReach(), p.dest); + all[p.dest].push_back(pp); + out.push_back(move(pp)); + } + + flat_map<u32, CharReach> dest; + for (u32 i = 0; i < rev_map.size(); i++) { + u32 succ = s.next[i]; + dest[succ] |= rev_map[i]; + } + + for (const auto &e : dest) { + path pp = append(p, e.second, e.first); + if (!is_useful_path(all[e.first], pp)) { + DEBUG_PRINTF("not useful: [%s] -> %u\n", + describeClasses(pp.reach).c_str(), pp.dest); + continue; + } + + DEBUG_PRINTF("----good: [%s] -> %u\n", + describeClasses(pp.reach).c_str(), pp.dest); + all[e.first].push_back(pp); + out.push_back(move(pp)); + } +} + +static +vector<vector<CharReach>> generate_paths(const raw_dfa &rdfa, + dstate_id_t base, u32 len) { + const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa); + vector<path> paths{path(base)}; + unordered_map<u32, vector<path>> all; + all[base].push_back(path(base)); + for (u32 i = 0; i < len && paths.size() < PATHS_LIMIT; i++) { + vector<path> next_gen; + for (const auto &p : paths) { + extend(rdfa, rev_map, p, all, next_gen); + } + + paths = move(next_gen); + } + + dump_paths(paths); + + vector<vector<CharReach>> rv; + rv.reserve(paths.size()); + for (auto &p : paths) { + rv.push_back(vector<CharReach>(std::make_move_iterator(p.reach.begin()), + std::make_move_iterator(p.reach.end()))); + } + return rv; +} + +static +AccelScheme look_for_offset_accel(const raw_dfa &rdfa, dstate_id_t base, + u32 max_allowed_accel_offset) { + DEBUG_PRINTF("looking for accel for %hu\n", base); + vector<vector<CharReach>> paths = + generate_paths(rdfa, base, max_allowed_accel_offset + 1); + AccelScheme as = findBestAccelScheme(paths, CharReach(), true); + DEBUG_PRINTF("found %s + %u\n", describeClass(as.cr).c_str(), as.offset); + return as; +} + +static UNUSED +bool better(const AccelScheme &a, const AccelScheme &b) { + if (!a.double_byte.empty() && b.double_byte.empty()) { + return true; + } + + if (!b.double_byte.empty()) { + return false; + } + + return a.cr.count() < b.cr.count(); +} + +static +bool double_byte_ok(const AccelScheme &info) { + return !info.double_byte.empty() && + info.double_cr.count() < info.double_byte.size() && info.double_cr.count() <= 2; -} - -static -bool has_self_loop(dstate_id_t s, const raw_dfa &raw) { - u16 top_remap = raw.alpha_remap[TOP]; - for (u32 i = 0; i < raw.states[s].next.size(); i++) { - if (i != top_remap && raw.states[s].next[i] == s) { - return true; - } - } - return false; -} - -static -flat_set<u16> find_nonexit_symbols(const raw_dfa &rdfa, - const CharReach &escape) { - flat_set<u16> rv; - CharReach nonexit = ~escape; - for (auto i = nonexit.find_first(); i != nonexit.npos; - i = nonexit.find_next(i)) { - rv.insert(rdfa.alpha_remap[i]); - } - - return rv; -} - -static -dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { - if (raw.start_floating != DEAD_STATE) { - DEBUG_PRINTF("has floating start\n"); - return raw.start_floating; - } - - DEBUG_PRINTF("looking for SDS proxy\n"); - - dstate_id_t s = raw.start_anchored; - - if (has_self_loop(s, raw)) { - return s; - } - - u16 top_remap = raw.alpha_remap[TOP]; - - std::unordered_set<dstate_id_t> seen; - while (true) { - seen.insert(s); - DEBUG_PRINTF("basis %hu\n", s); - - /* check if we are connected to a state with a self loop */ - for (u32 i = 0; i < raw.states[s].next.size(); i++) { - dstate_id_t t = raw.states[s].next[i]; - if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) { - return t; - } - } - - /* find a neighbour to use as a basis for looking for the sds proxy */ - dstate_id_t t = DEAD_STATE; - for (u32 i = 0; i < raw.states[s].next.size(); i++) { - dstate_id_t tt = raw.states[s].next[i]; - if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) { - t = tt; - break; - } - } - - if (t == DEAD_STATE) { - /* we were unable to find a state to use as a SDS proxy */ - return DEAD_STATE; - } - - s = t; - } -} - -static -set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base, - const AccelScheme &ei) { - DEBUG_PRINTF("looking for region around %hu\n", base); - - set<dstate_id_t> region = {base}; - - if (!ei.double_byte.empty()) { - return region; - } - - DEBUG_PRINTF("accel %s+%u\n", describeClass(ei.cr).c_str(), ei.offset); - - const CharReach &escape = ei.cr; - auto nonexit_symbols = find_nonexit_symbols(rdfa, escape); - - vector<dstate_id_t> pending = {base}; - while (!pending.empty()) { - dstate_id_t curr = pending.back(); - pending.pop_back(); - for (auto s : nonexit_symbols) { - dstate_id_t t = rdfa.states[curr].next[s]; - if (contains(region, t)) { - continue; - } - - DEBUG_PRINTF(" %hu is in region\n", t); - region.insert(t); - pending.push_back(t); - } - } - - return region; -} - -AccelScheme -accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const { - AccelScheme rv; - const raw_dfa &rdfa = get_raw(); - rv.cr.clear(); - rv.offset = 0; - const dstate &raw = rdfa.states[this_idx]; - const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa); - bool outs2_broken = false; - flat_map<dstate_id_t, CharReach> succs; - - for (u32 i = 0; i < rev_map.size(); i++) { - if (raw.next[i] == this_idx) { - continue; - } - - const CharReach &cr_i = rev_map.at(i); - - rv.cr |= cr_i; - dstate_id_t next_id = raw.next[i]; - - DEBUG_PRINTF("next is %hu\n", next_id); - const dstate &raw_next = rdfa.states[next_id]; - - if (outs2_broken) { - continue; - } - - if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) { - DEBUG_PRINTF("leads to report\n"); - outs2_broken = true; /* cannot accelerate over reports */ - continue; - } - succs[next_id] |= cr_i; - } - - if (!outs2_broken) { - for (const auto &e : succs) { - const CharReach &cr_i = e.second; - const dstate &raw_next = rdfa.states[e.first]; - - CharReach cr_all_j; - for (u32 j = 0; j < rev_map.size(); j++) { - if (raw_next.next[j] == raw.next[j]) { - continue; - } - - DEBUG_PRINTF("state %hu: adding sym %u -> %hu to 2 \n", e.first, - j, raw_next.next[j]); - cr_all_j |= rev_map.at(j); - } - - if (cr_i.count() * cr_all_j.count() > 8) { - DEBUG_PRINTF("adding %zu to double_cr\n", cr_i.count()); - rv.double_cr |= cr_i; - } else { - for (auto ii = cr_i.find_first(); ii != CharReach::npos; - ii = cr_i.find_next(ii)) { - for (auto jj = cr_all_j.find_first(); jj != CharReach::npos; - jj = cr_all_j.find_next(jj)) { - rv.double_byte.emplace((u8)ii, (u8)jj); - if (rv.double_byte.size() > 8) { - DEBUG_PRINTF("outs2 too big\n"); - outs2_broken = true; - goto done; - } - } - } - } - } - - done: - assert(outs2_broken || rv.double_byte.size() <= 8); - if (outs2_broken) { - rv.double_byte.clear(); - } - } - - DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa)); - DEBUG_PRINTF("broken %d\n", outs2_broken); - if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) && - this_idx == rdfa.start_floating && this_idx != DEAD_STATE) { - DEBUG_PRINTF("looking for offset accel at %u\n", this_idx); - auto offset = - look_for_offset_accel(rdfa, this_idx, max_allowed_offset_accel()); - DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(), rv.cr.count()); - if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) { - DEBUG_PRINTF("using offset accel\n"); - rv = offset; - } - } - - return rv; -} - -void -accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, - const AccelScheme &info, - void *accel_out) { - AccelAux *accel = (AccelAux *)accel_out; - - DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset, - info.double_offset); - accel->generic.offset = verify_u8(info.offset); - - if (double_byte_ok(info) && info.double_cr.none() && - info.double_byte.size() == 1) { - accel->accel_type = ACCEL_DVERM; - accel->dverm.c1 = info.double_byte.begin()->first; - accel->dverm.c2 = info.double_byte.begin()->second; - accel->dverm.offset = verify_u8(info.double_offset); - DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx); - return; - } - - if (double_byte_ok(info) && info.double_cr.none() && - (info.double_byte.size() == 2 || info.double_byte.size() == 4)) { - bool ok = true; - - assert(!info.double_byte.empty()); - u8 firstC = info.double_byte.begin()->first & CASE_CLEAR; - u8 secondC = info.double_byte.begin()->second & CASE_CLEAR; - - for (const pair<u8, u8> &p : info.double_byte) { - if ((p.first & CASE_CLEAR) != firstC || - (p.second & CASE_CLEAR) != secondC) { - ok = false; - break; - } - } - - if (ok) { - accel->accel_type = ACCEL_DVERM_NOCASE; - accel->dverm.c1 = firstC; - accel->dverm.c2 = secondC; - accel->dverm.offset = verify_u8(info.double_offset); - DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx); - return; - } - - u8 m1; - u8 m2; - if (buildDvermMask(info.double_byte, &m1, &m2)) { - accel->accel_type = ACCEL_DVERM_MASKED; - accel->dverm.offset = verify_u8(info.double_offset); - accel->dverm.c1 = info.double_byte.begin()->first & m1; - accel->dverm.c2 = info.double_byte.begin()->second & m2; - accel->dverm.m1 = m1; - accel->dverm.m2 = m2; - DEBUG_PRINTF( - "building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", - accel->dverm.c1, accel->dverm.c2); - return; - } - } - - if (double_byte_ok(info) && - shuftiBuildDoubleMasks( - info.double_cr, info.double_byte, (u8 *)&accel->dshufti.lo1, - (u8 *)&accel->dshufti.hi1, (u8 *)&accel->dshufti.lo2, - (u8 *)&accel->dshufti.hi2)) { - accel->accel_type = ACCEL_DSHUFTI; - accel->dshufti.offset = verify_u8(info.double_offset); - DEBUG_PRINTF("state %hu is double shufti\n", this_idx); - return; - } - - if (info.cr.none()) { - accel->accel_type = ACCEL_RED_TAPE; - DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape" - " from which there is no escape\n", - this_idx); - return; - } - - if (info.cr.count() == 1) { - accel->accel_type = ACCEL_VERM; - accel->verm.c = info.cr.find_first(); - DEBUG_PRINTF("state %hu is vermicelli\n", this_idx); - return; - } - - if (info.cr.count() == 2 && info.cr.isCaselessChar()) { - accel->accel_type = ACCEL_VERM_NOCASE; - accel->verm.c = info.cr.find_first() & CASE_CLEAR; - DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx); - return; - } - - if (info.cr.count() > max_floating_stop_char()) { - accel->accel_type = ACCEL_NONE; - DEBUG_PRINTF("state %hu is too broad\n", this_idx); - return; - } - - accel->accel_type = ACCEL_SHUFTI; - if (-1 != shuftiBuildMasks(info.cr, (u8 *)&accel->shufti.lo, - (u8 *)&accel->shufti.hi)) { - DEBUG_PRINTF("state %hu is shufti\n", this_idx); - return; - } - - assert(!info.cr.none()); - accel->accel_type = ACCEL_TRUFFLE; - truffleBuildMasks(info.cr, (u8 *)&accel->truffle.mask1, - (u8 *)&accel->truffle.mask2); - DEBUG_PRINTF("state %hu is truffle\n", this_idx); -} - -map<dstate_id_t, AccelScheme> -accel_dfa_build_strat::getAccelInfo(const Grey &grey) { - map<dstate_id_t, AccelScheme> rv; - raw_dfa &rdfa = get_raw(); - if (!grey.accelerateDFA) { - return rv; - } - - dstate_id_t sds_proxy = get_sds_or_proxy(rdfa); - DEBUG_PRINTF("sds %hu\n", sds_proxy); - - /* Find accel info for a single state. */ - auto do_state = [&](size_t i) { - if (i == DEAD_STATE) { - return; - } - - /* Note on report acceleration states: While we can't accelerate while - * we are spamming out callbacks, the QR code paths don't raise reports - * during scanning so they can accelerate report states. */ - if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) { - return; - } - - size_t single_limit = - i == sds_proxy ? max_floating_stop_char() : max_stop_char(); - DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit); - - AccelScheme ei = find_escape_strings(i); - if (ei.cr.count() > single_limit) { - DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i, - ei.cr.count()); - return; - } - - DEBUG_PRINTF("state %zu should be accelerable %zu\n", i, ei.cr.count()); - - rv[i] = ei; - }; - - if (only_accel_init) { - DEBUG_PRINTF("only computing accel for init states\n"); - do_state(rdfa.start_anchored); - if (rdfa.start_floating != rdfa.start_anchored) { - do_state(rdfa.start_floating); - } - } else { - DEBUG_PRINTF("computing accel for all states\n"); - for (size_t i = 0; i < rdfa.states.size(); i++) { - do_state(i); - } - } - - /* provide acceleration states to states in the region of sds */ - if (contains(rv, sds_proxy)) { - AccelScheme sds_ei = rv[sds_proxy]; - sds_ei.double_byte.clear(); /* region based on single byte scheme - * may differ from double byte */ - DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n", - sds_ei.cr.count()); - auto sds_region = find_region(rdfa, sds_proxy, sds_ei); - for (auto s : sds_region) { - if (!contains(rv, s) || better(sds_ei, rv[s])) { - rv[s] = sds_ei; - } - } - } - - return rv; -} -}; +} + +static +bool has_self_loop(dstate_id_t s, const raw_dfa &raw) { + u16 top_remap = raw.alpha_remap[TOP]; + for (u32 i = 0; i < raw.states[s].next.size(); i++) { + if (i != top_remap && raw.states[s].next[i] == s) { + return true; + } + } + return false; +} + +static +flat_set<u16> find_nonexit_symbols(const raw_dfa &rdfa, + const CharReach &escape) { + flat_set<u16> rv; + CharReach nonexit = ~escape; + for (auto i = nonexit.find_first(); i != nonexit.npos; + i = nonexit.find_next(i)) { + rv.insert(rdfa.alpha_remap[i]); + } + + return rv; +} + +static +dstate_id_t get_sds_or_proxy(const raw_dfa &raw) { + if (raw.start_floating != DEAD_STATE) { + DEBUG_PRINTF("has floating start\n"); + return raw.start_floating; + } + + DEBUG_PRINTF("looking for SDS proxy\n"); + + dstate_id_t s = raw.start_anchored; + + if (has_self_loop(s, raw)) { + return s; + } + + u16 top_remap = raw.alpha_remap[TOP]; + + std::unordered_set<dstate_id_t> seen; + while (true) { + seen.insert(s); + DEBUG_PRINTF("basis %hu\n", s); + + /* check if we are connected to a state with a self loop */ + for (u32 i = 0; i < raw.states[s].next.size(); i++) { + dstate_id_t t = raw.states[s].next[i]; + if (i != top_remap && t != DEAD_STATE && has_self_loop(t, raw)) { + return t; + } + } + + /* find a neighbour to use as a basis for looking for the sds proxy */ + dstate_id_t t = DEAD_STATE; + for (u32 i = 0; i < raw.states[s].next.size(); i++) { + dstate_id_t tt = raw.states[s].next[i]; + if (i != top_remap && tt != DEAD_STATE && !contains(seen, tt)) { + t = tt; + break; + } + } + + if (t == DEAD_STATE) { + /* we were unable to find a state to use as a SDS proxy */ + return DEAD_STATE; + } + + s = t; + } +} + +static +set<dstate_id_t> find_region(const raw_dfa &rdfa, dstate_id_t base, + const AccelScheme &ei) { + DEBUG_PRINTF("looking for region around %hu\n", base); + + set<dstate_id_t> region = {base}; + + if (!ei.double_byte.empty()) { + return region; + } + + DEBUG_PRINTF("accel %s+%u\n", describeClass(ei.cr).c_str(), ei.offset); + + const CharReach &escape = ei.cr; + auto nonexit_symbols = find_nonexit_symbols(rdfa, escape); + + vector<dstate_id_t> pending = {base}; + while (!pending.empty()) { + dstate_id_t curr = pending.back(); + pending.pop_back(); + for (auto s : nonexit_symbols) { + dstate_id_t t = rdfa.states[curr].next[s]; + if (contains(region, t)) { + continue; + } + + DEBUG_PRINTF(" %hu is in region\n", t); + region.insert(t); + pending.push_back(t); + } + } + + return region; +} + +AccelScheme +accel_dfa_build_strat::find_escape_strings(dstate_id_t this_idx) const { + AccelScheme rv; + const raw_dfa &rdfa = get_raw(); + rv.cr.clear(); + rv.offset = 0; + const dstate &raw = rdfa.states[this_idx]; + const vector<CharReach> rev_map = reverse_alpha_remapping(rdfa); + bool outs2_broken = false; + flat_map<dstate_id_t, CharReach> succs; + + for (u32 i = 0; i < rev_map.size(); i++) { + if (raw.next[i] == this_idx) { + continue; + } + + const CharReach &cr_i = rev_map.at(i); + + rv.cr |= cr_i; + dstate_id_t next_id = raw.next[i]; + + DEBUG_PRINTF("next is %hu\n", next_id); + const dstate &raw_next = rdfa.states[next_id]; + + if (outs2_broken) { + continue; + } + + if (!raw_next.reports.empty() && generates_callbacks(rdfa.kind)) { + DEBUG_PRINTF("leads to report\n"); + outs2_broken = true; /* cannot accelerate over reports */ + continue; + } + succs[next_id] |= cr_i; + } + + if (!outs2_broken) { + for (const auto &e : succs) { + const CharReach &cr_i = e.second; + const dstate &raw_next = rdfa.states[e.first]; + + CharReach cr_all_j; + for (u32 j = 0; j < rev_map.size(); j++) { + if (raw_next.next[j] == raw.next[j]) { + continue; + } + + DEBUG_PRINTF("state %hu: adding sym %u -> %hu to 2 \n", e.first, + j, raw_next.next[j]); + cr_all_j |= rev_map.at(j); + } + + if (cr_i.count() * cr_all_j.count() > 8) { + DEBUG_PRINTF("adding %zu to double_cr\n", cr_i.count()); + rv.double_cr |= cr_i; + } else { + for (auto ii = cr_i.find_first(); ii != CharReach::npos; + ii = cr_i.find_next(ii)) { + for (auto jj = cr_all_j.find_first(); jj != CharReach::npos; + jj = cr_all_j.find_next(jj)) { + rv.double_byte.emplace((u8)ii, (u8)jj); + if (rv.double_byte.size() > 8) { + DEBUG_PRINTF("outs2 too big\n"); + outs2_broken = true; + goto done; + } + } + } + } + } + + done: + assert(outs2_broken || rv.double_byte.size() <= 8); + if (outs2_broken) { + rv.double_byte.clear(); + } + } + + DEBUG_PRINTF("this %u, sds proxy %hu\n", this_idx, get_sds_or_proxy(rdfa)); + DEBUG_PRINTF("broken %d\n", outs2_broken); + if (!double_byte_ok(rv) && !is_triggered(rdfa.kind) && + this_idx == rdfa.start_floating && this_idx != DEAD_STATE) { + DEBUG_PRINTF("looking for offset accel at %u\n", this_idx); + auto offset = + look_for_offset_accel(rdfa, this_idx, max_allowed_offset_accel()); + DEBUG_PRINTF("width %zu vs %zu\n", offset.cr.count(), rv.cr.count()); + if (double_byte_ok(offset) || offset.cr.count() < rv.cr.count()) { + DEBUG_PRINTF("using offset accel\n"); + rv = offset; + } + } + + return rv; +} + +void +accel_dfa_build_strat::buildAccel(UNUSED dstate_id_t this_idx, + const AccelScheme &info, + void *accel_out) { + AccelAux *accel = (AccelAux *)accel_out; + + DEBUG_PRINTF("accelerations scheme has offset s%u/d%u\n", info.offset, + info.double_offset); + accel->generic.offset = verify_u8(info.offset); + + if (double_byte_ok(info) && info.double_cr.none() && + info.double_byte.size() == 1) { + accel->accel_type = ACCEL_DVERM; + accel->dverm.c1 = info.double_byte.begin()->first; + accel->dverm.c2 = info.double_byte.begin()->second; + accel->dverm.offset = verify_u8(info.double_offset); + DEBUG_PRINTF("state %hu is double vermicelli\n", this_idx); + return; + } + + if (double_byte_ok(info) && info.double_cr.none() && + (info.double_byte.size() == 2 || info.double_byte.size() == 4)) { + bool ok = true; + + assert(!info.double_byte.empty()); + u8 firstC = info.double_byte.begin()->first & CASE_CLEAR; + u8 secondC = info.double_byte.begin()->second & CASE_CLEAR; + + for (const pair<u8, u8> &p : info.double_byte) { + if ((p.first & CASE_CLEAR) != firstC || + (p.second & CASE_CLEAR) != secondC) { + ok = false; + break; + } + } + + if (ok) { + accel->accel_type = ACCEL_DVERM_NOCASE; + accel->dverm.c1 = firstC; + accel->dverm.c2 = secondC; + accel->dverm.offset = verify_u8(info.double_offset); + DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx); + return; + } + + u8 m1; + u8 m2; + if (buildDvermMask(info.double_byte, &m1, &m2)) { + accel->accel_type = ACCEL_DVERM_MASKED; + accel->dverm.offset = verify_u8(info.double_offset); + accel->dverm.c1 = info.double_byte.begin()->first & m1; + accel->dverm.c2 = info.double_byte.begin()->second & m2; + accel->dverm.m1 = m1; + accel->dverm.m2 = m2; + DEBUG_PRINTF( + "building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", + accel->dverm.c1, accel->dverm.c2); + return; + } + } + + if (double_byte_ok(info) && + shuftiBuildDoubleMasks( + info.double_cr, info.double_byte, (u8 *)&accel->dshufti.lo1, + (u8 *)&accel->dshufti.hi1, (u8 *)&accel->dshufti.lo2, + (u8 *)&accel->dshufti.hi2)) { + accel->accel_type = ACCEL_DSHUFTI; + accel->dshufti.offset = verify_u8(info.double_offset); + DEBUG_PRINTF("state %hu is double shufti\n", this_idx); + return; + } + + if (info.cr.none()) { + accel->accel_type = ACCEL_RED_TAPE; + DEBUG_PRINTF("state %hu is a dead end full of bureaucratic red tape" + " from which there is no escape\n", + this_idx); + return; + } + + if (info.cr.count() == 1) { + accel->accel_type = ACCEL_VERM; + accel->verm.c = info.cr.find_first(); + DEBUG_PRINTF("state %hu is vermicelli\n", this_idx); + return; + } + + if (info.cr.count() == 2 && info.cr.isCaselessChar()) { + accel->accel_type = ACCEL_VERM_NOCASE; + accel->verm.c = info.cr.find_first() & CASE_CLEAR; + DEBUG_PRINTF("state %hu is caseless vermicelli\n", this_idx); + return; + } + + if (info.cr.count() > max_floating_stop_char()) { + accel->accel_type = ACCEL_NONE; + DEBUG_PRINTF("state %hu is too broad\n", this_idx); + return; + } + + accel->accel_type = ACCEL_SHUFTI; + if (-1 != shuftiBuildMasks(info.cr, (u8 *)&accel->shufti.lo, + (u8 *)&accel->shufti.hi)) { + DEBUG_PRINTF("state %hu is shufti\n", this_idx); + return; + } + + assert(!info.cr.none()); + accel->accel_type = ACCEL_TRUFFLE; + truffleBuildMasks(info.cr, (u8 *)&accel->truffle.mask1, + (u8 *)&accel->truffle.mask2); + DEBUG_PRINTF("state %hu is truffle\n", this_idx); +} + +map<dstate_id_t, AccelScheme> +accel_dfa_build_strat::getAccelInfo(const Grey &grey) { + map<dstate_id_t, AccelScheme> rv; + raw_dfa &rdfa = get_raw(); + if (!grey.accelerateDFA) { + return rv; + } + + dstate_id_t sds_proxy = get_sds_or_proxy(rdfa); + DEBUG_PRINTF("sds %hu\n", sds_proxy); + + /* Find accel info for a single state. */ + auto do_state = [&](size_t i) { + if (i == DEAD_STATE) { + return; + } + + /* Note on report acceleration states: While we can't accelerate while + * we are spamming out callbacks, the QR code paths don't raise reports + * during scanning so they can accelerate report states. */ + if (generates_callbacks(rdfa.kind) && !rdfa.states[i].reports.empty()) { + return; + } + + size_t single_limit = + i == sds_proxy ? max_floating_stop_char() : max_stop_char(); + DEBUG_PRINTF("inspecting %zu/%hu: %zu\n", i, sds_proxy, single_limit); + + AccelScheme ei = find_escape_strings(i); + if (ei.cr.count() > single_limit) { + DEBUG_PRINTF("state %zu is not accelerable has %zu\n", i, + ei.cr.count()); + return; + } + + DEBUG_PRINTF("state %zu should be accelerable %zu\n", i, ei.cr.count()); + + rv[i] = ei; + }; + + if (only_accel_init) { + DEBUG_PRINTF("only computing accel for init states\n"); + do_state(rdfa.start_anchored); + if (rdfa.start_floating != rdfa.start_anchored) { + do_state(rdfa.start_floating); + } + } else { + DEBUG_PRINTF("computing accel for all states\n"); + for (size_t i = 0; i < rdfa.states.size(); i++) { + do_state(i); + } + } + + /* provide acceleration states to states in the region of sds */ + if (contains(rv, sds_proxy)) { + AccelScheme sds_ei = rv[sds_proxy]; + sds_ei.double_byte.clear(); /* region based on single byte scheme + * may differ from double byte */ + DEBUG_PRINTF("looking to expand offset accel to nearby states, %zu\n", + sds_ei.cr.count()); + auto sds_region = find_region(rdfa, sds_proxy, sds_ei); + for (auto s : sds_region) { + if (!contains(rv, s) || better(sds_ei, rv[s])) { + rv[s] = sds_ei; + } + } + } + + return rv; +} +}; diff --git a/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.h b/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.h index 53a6f35b3d..934f422d73 100644 --- a/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.h +++ b/contrib/libs/hyperscan/src/nfa/accel_dfa_build_strat.h @@ -1,69 +1,69 @@ -/* +/* * Copyright (c) 2015-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ACCEL_DFA_BUILD_STRAT_H -#define ACCEL_DFA_BUILD_STRAT_H - -#include "rdfa.h" -#include "dfa_build_strat.h" -#include "ue2common.h" -#include "util/accel_scheme.h" - -#include <map> - -namespace ue2 { - -class ReportManager; -struct Grey; + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ACCEL_DFA_BUILD_STRAT_H +#define ACCEL_DFA_BUILD_STRAT_H + +#include "rdfa.h" +#include "dfa_build_strat.h" +#include "ue2common.h" +#include "util/accel_scheme.h" + +#include <map> + +namespace ue2 { + +class ReportManager; +struct Grey; enum DfaType { McClellan, Sheng, Gough }; - -class accel_dfa_build_strat : public dfa_build_strat { -public: - accel_dfa_build_strat(const ReportManager &rm_in, bool only_accel_init_in) - : dfa_build_strat(rm_in), only_accel_init(only_accel_init_in) {} - virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const; - virtual size_t accelSize(void) const = 0; - virtual u32 max_allowed_offset_accel() const = 0; - virtual u32 max_stop_char() const = 0; - virtual u32 max_floating_stop_char() const = 0; - virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info, - void *accel_out); - virtual std::map<dstate_id_t, AccelScheme> getAccelInfo(const Grey &grey); + +class accel_dfa_build_strat : public dfa_build_strat { +public: + accel_dfa_build_strat(const ReportManager &rm_in, bool only_accel_init_in) + : dfa_build_strat(rm_in), only_accel_init(only_accel_init_in) {} + virtual AccelScheme find_escape_strings(dstate_id_t this_idx) const; + virtual size_t accelSize(void) const = 0; + virtual u32 max_allowed_offset_accel() const = 0; + virtual u32 max_stop_char() const = 0; + virtual u32 max_floating_stop_char() const = 0; + virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info, + void *accel_out); + virtual std::map<dstate_id_t, AccelScheme> getAccelInfo(const Grey &grey); virtual DfaType getType() const = 0; -private: - bool only_accel_init; -}; - -} // namespace ue2 - -#endif // ACCEL_DFA_BUILD_STRAT_H +private: + bool only_accel_init; +}; + +} // namespace ue2 + +#endif // ACCEL_DFA_BUILD_STRAT_H diff --git a/contrib/libs/hyperscan/src/nfa/accelcompile.cpp b/contrib/libs/hyperscan/src/nfa/accelcompile.cpp index a224410dc9..092388e6a7 100644 --- a/contrib/libs/hyperscan/src/nfa/accelcompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/accelcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -72,8 +72,8 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { } DEBUG_PRINTF("attempting shufti for %zu chars\n", outs); - if (-1 != shuftiBuildMasks(info.single_stops, (u8 *)&aux->shufti.lo, - (u8 *)&aux->shufti.hi)) { + if (-1 != shuftiBuildMasks(info.single_stops, (u8 *)&aux->shufti.lo, + (u8 *)&aux->shufti.hi)) { aux->accel_type = ACCEL_SHUFTI; aux->shufti.offset = offset; DEBUG_PRINTF("shufti built OK\n"); @@ -86,55 +86,55 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { DEBUG_PRINTF("building Truffle for %zu chars\n", outs); aux->accel_type = ACCEL_TRUFFLE; aux->truffle.offset = offset; - truffleBuildMasks(info.single_stops, (u8 *)&aux->truffle.mask1, - (u8 *)&aux->truffle.mask2); + truffleBuildMasks(info.single_stops, (u8 *)&aux->truffle.mask1, + (u8 *)&aux->truffle.mask2); return; } DEBUG_PRINTF("unable to accelerate case with %zu outs\n", outs); } -bool buildDvermMask(const flat_set<pair<u8, u8>> &escape_set, u8 *m1_out, - u8 *m2_out) { - u8 a1 = 0xff; - u8 a2 = 0xff; - u8 b1 = 0xff; - u8 b2 = 0xff; - - for (const auto &e : escape_set) { - DEBUG_PRINTF("%0hhx %0hhx\n", e.first, e.second); - a1 &= e.first; - b1 &= ~e.first; - a2 &= e.second; - b2 &= ~e.second; - } - - u8 m1 = a1 | b1; - u8 m2 = a2 | b2; - - u32 holes1 = 8 - popcount32(m1); - u32 holes2 = 8 - popcount32(m2); - - DEBUG_PRINTF("aaaa %0hhx %0hhx\n", a1, a2); - DEBUG_PRINTF("bbbb %0hhx %0hhx\n", b1, b2); - DEBUG_PRINTF("mask %0hhx %0hhx\n", m1, m2); - - assert(holes1 <= 8 && holes2 <= 8); - assert(escape_set.size() <= 1U << (holes1 + holes2)); - if (escape_set.size() != 1U << (holes1 + holes2)) { - return false; - } - - if (m1_out) { - *m1_out = m1; - } - if (m2_out) { - *m2_out = m2; - } - - return true; -} - +bool buildDvermMask(const flat_set<pair<u8, u8>> &escape_set, u8 *m1_out, + u8 *m2_out) { + u8 a1 = 0xff; + u8 a2 = 0xff; + u8 b1 = 0xff; + u8 b2 = 0xff; + + for (const auto &e : escape_set) { + DEBUG_PRINTF("%0hhx %0hhx\n", e.first, e.second); + a1 &= e.first; + b1 &= ~e.first; + a2 &= e.second; + b2 &= ~e.second; + } + + u8 m1 = a1 | b1; + u8 m2 = a2 | b2; + + u32 holes1 = 8 - popcount32(m1); + u32 holes2 = 8 - popcount32(m2); + + DEBUG_PRINTF("aaaa %0hhx %0hhx\n", a1, a2); + DEBUG_PRINTF("bbbb %0hhx %0hhx\n", b1, b2); + DEBUG_PRINTF("mask %0hhx %0hhx\n", m1, m2); + + assert(holes1 <= 8 && holes2 <= 8); + assert(escape_set.size() <= 1U << (holes1 + holes2)); + if (escape_set.size() != 1U << (holes1 + holes2)) { + return false; + } + + if (m1_out) { + *m1_out = m1; + } + if (m2_out) { + *m2_out = m2; + } + + return true; +} + static bool isCaselessDouble(const flat_set<pair<u8, u8>> &stop) { // test for vector containing <A,Z> <A,z> <a,Z> <a,z> @@ -190,36 +190,36 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { return; } - if (outs1 == 0) { - u8 m1; - u8 m2; - - if (buildDvermMask(info.double_stop2, &m1, &m2)) { - aux->accel_type = ACCEL_DVERM_MASKED; - aux->dverm.offset = offset; - aux->dverm.c1 = info.double_stop2.begin()->first & m1; - aux->dverm.c2 = info.double_stop2.begin()->second & m2; - aux->dverm.m1 = m1; - aux->dverm.m2 = m2; - DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); - return; - } - } - - if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438. - DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu" - " two-byte literals\n", outs1, outs2); - aux->accel_type = ACCEL_DSHUFTI; - aux->dshufti.offset = offset; - if (shuftiBuildDoubleMasks( - info.double_stop1, info.double_stop2, (u8 *)&aux->dshufti.lo1, - (u8 *)&aux->dshufti.hi1, (u8 *)&aux->dshufti.lo2, - (u8 *)&aux->dshufti.hi2)) { + if (outs1 == 0) { + u8 m1; + u8 m2; + + if (buildDvermMask(info.double_stop2, &m1, &m2)) { + aux->accel_type = ACCEL_DVERM_MASKED; + aux->dverm.offset = offset; + aux->dverm.c1 = info.double_stop2.begin()->first & m1; + aux->dverm.c2 = info.double_stop2.begin()->second & m2; + aux->dverm.m1 = m1; + aux->dverm.m2 = m2; + DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", + aux->dverm.c1, aux->dverm.c2); return; } } + if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438. + DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu" + " two-byte literals\n", outs1, outs2); + aux->accel_type = ACCEL_DSHUFTI; + aux->dshufti.offset = offset; + if (shuftiBuildDoubleMasks( + info.double_stop1, info.double_stop2, (u8 *)&aux->dshufti.lo1, + (u8 *)&aux->dshufti.hi1, (u8 *)&aux->dshufti.lo2, + (u8 *)&aux->dshufti.hi2)) { + return; + } + } + // drop back to attempt single-byte accel DEBUG_PRINTF("dropping back to single-byte acceleration\n"); aux->accel_type = ACCEL_NONE; @@ -231,8 +231,8 @@ bool buildAccelAux(const AccelInfo &info, AccelAux *aux) { DEBUG_PRINTF("picked red tape\n"); aux->accel_type = ACCEL_RED_TAPE; aux->generic.offset = info.single_offset; - } - if (aux->accel_type == ACCEL_NONE) { + } + if (aux->accel_type == ACCEL_NONE) { buildAccelDouble(info, aux); } if (aux->accel_type == ACCEL_NONE) { diff --git a/contrib/libs/hyperscan/src/nfa/accelcompile.h b/contrib/libs/hyperscan/src/nfa/accelcompile.h index d0b3cdc74f..c1930d2247 100644 --- a/contrib/libs/hyperscan/src/nfa/accelcompile.h +++ b/contrib/libs/hyperscan/src/nfa/accelcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,7 +31,7 @@ #include "ue2common.h" #include "util/charreach.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" union AccelAux; @@ -51,10 +51,10 @@ struct AccelInfo { bool buildAccelAux(const AccelInfo &info, AccelAux *aux); -/* returns true is the escape set can be handled with a masked double_verm */ -bool buildDvermMask(const flat_set<std::pair<u8, u8>> &escape_set, - u8 *m1_out = nullptr, u8 *m2_out = nullptr); - +/* returns true is the escape set can be handled with a masked double_verm */ +bool buildDvermMask(const flat_set<std::pair<u8, u8>> &escape_set, + u8 *m1_out = nullptr, u8 *m2_out = nullptr); + } // namespace ue2 #endif diff --git a/contrib/libs/hyperscan/src/nfa/callback.h b/contrib/libs/hyperscan/src/nfa/callback.h index 9bdaa8d141..8550e33c84 100644 --- a/contrib/libs/hyperscan/src/nfa/callback.h +++ b/contrib/libs/hyperscan/src/nfa/callback.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,26 +37,26 @@ /** \brief The type for an NFA callback. * - * This is a function that takes as arguments the current start and end offsets - * where the match occurs, the id of the match and the context pointer that was - * passed into the NFA API function that executed the NFA. + * This is a function that takes as arguments the current start and end offsets + * where the match occurs, the id of the match and the context pointer that was + * passed into the NFA API function that executed the NFA. * - * The start offset is the "start of match" (SOM) offset for the match. It is - * only provided by engines that natively support SOM tracking (e.g. Gough). - * - * The end offset will be the offset after the character that caused the match. - * Thus, if we have a buffer containing 'abc', then a pattern that matches an - * empty string will have an offset of 0, a pattern that matches 'a' will have - * an offset of 1, and a pattern that matches 'abc' will have an offset of 3, - * which will be a value that is 'beyond' the size of the buffer. That is, if - * we have n characters in the buffer, there are n+1 different potential - * offsets for matches. + * The start offset is the "start of match" (SOM) offset for the match. It is + * only provided by engines that natively support SOM tracking (e.g. Gough). * + * The end offset will be the offset after the character that caused the match. + * Thus, if we have a buffer containing 'abc', then a pattern that matches an + * empty string will have an offset of 0, a pattern that matches 'a' will have + * an offset of 1, and a pattern that matches 'abc' will have an offset of 3, + * which will be a value that is 'beyond' the size of the buffer. That is, if + * we have n characters in the buffer, there are n+1 different potential + * offsets for matches. + * * This function should return an int - currently the possible return values * are 0, which means 'stop running the engine' or non-zero, which means * 'continue matching'. */ -typedef int (*NfaCallback)(u64a start, u64a end, ReportID id, void *context); +typedef int (*NfaCallback)(u64a start, u64a end, ReportID id, void *context); /** * standard \ref NfaCallback return value indicating that engine execution diff --git a/contrib/libs/hyperscan/src/nfa/castle.c b/contrib/libs/hyperscan/src/nfa/castle.c index 7c158b31c0..175a709ee1 100644 --- a/contrib/libs/hyperscan/src/nfa/castle.c +++ b/contrib/libs/hyperscan/src/nfa/castle.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -96,9 +96,9 @@ char subCastleReportCurrent(const struct Castle *c, struct mq *q, repeatHasMatch(info, rctrl, rstate, offset); DEBUG_PRINTF("repeatHasMatch returned %d\n", match); if (match == REPEAT_MATCH) { - DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset, - subIdx, sub->report); - if (q->cb(0, offset, sub->report, q->context) == MO_HALT_MATCHING) { + DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset, + subIdx, sub->report); + if (q->cb(0, offset, sub->report, q->context) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } } @@ -112,22 +112,22 @@ int castleReportCurrent(const struct Castle *c, struct mq *q) { DEBUG_PRINTF("offset=%llu\n", offset); if (c->exclusive) { - u8 *active = (u8 *)q->streamState; - u8 *groups = active + c->groupIterOffset; - for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { - u8 *cur = active + i * c->activeIdxSize; - const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); - DEBUG_PRINTF("subcastle %u\n", activeIdx); - if (subCastleReportCurrent(c, q, - offset, activeIdx) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } + u8 *active = (u8 *)q->streamState; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); + DEBUG_PRINTF("subcastle %u\n", activeIdx); + if (subCastleReportCurrent(c, q, + offset, activeIdx) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } } } - if (c->exclusive != PURE_EXCLUSIVE) { - const u8 *active = (const u8 *)q->streamState + c->activeOffset; + if (c->exclusive != PURE_EXCLUSIVE) { + const u8 *active = (const u8 *)q->streamState + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { DEBUG_PRINTF("subcastle %u\n", i); @@ -168,18 +168,18 @@ static really_inline char castleInAccept(const struct Castle *c, struct mq *q, const ReportID report, const u64a offset) { DEBUG_PRINTF("offset=%llu\n", offset); - /* ignore when just catching up due to full queue */ - if (report == MO_INVALID_IDX) { - return 0; - } + /* ignore when just catching up due to full queue */ + if (report == MO_INVALID_IDX) { + return 0; + } if (c->exclusive) { - u8 *active = (u8 *)q->streamState; - u8 *groups = active + c->groupIterOffset; - for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { - u8 *cur = active + i * c->activeIdxSize; - const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); + u8 *active = (u8 *)q->streamState; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("subcastle %u\n", activeIdx); if (subCastleInAccept(c, q, report, offset, activeIdx)) { return 1; @@ -187,10 +187,10 @@ char castleInAccept(const struct Castle *c, struct mq *q, } } - if (c->exclusive != PURE_EXCLUSIVE) { - const u8 *active = (const u8 *)q->streamState + c->activeOffset; + if (c->exclusive != PURE_EXCLUSIVE) { + const u8 *active = (const u8 *)q->streamState + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { + i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { DEBUG_PRINTF("subcastle %u\n", i); if (subCastleInAccept(c, q, report, offset, i)) { return 1; @@ -214,13 +214,13 @@ void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset, if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) { DEBUG_PRINTF("sub %u is stale at offset %llu\n", subIdx, offset); - if (sub->exclusiveId < c->numRepeats) { - u8 *active = (u8 *)stream_state; - u8 *groups = active + c->groupIterOffset; - mmbit_unset(groups, c->numGroups, sub->exclusiveId); + if (sub->exclusiveId < c->numRepeats) { + u8 *active = (u8 *)stream_state; + u8 *groups = active + c->groupIterOffset; + mmbit_unset(groups, c->numGroups, sub->exclusiveId); } else { - u8 *active = (u8 *)stream_state + c->activeOffset; - mmbit_unset(active, c->numRepeats, subIdx); + u8 *active = (u8 *)stream_state + c->activeOffset; + mmbit_unset(active, c->numRepeats, subIdx); } } } @@ -230,47 +230,47 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset, void *full_state, void *stream_state) { DEBUG_PRINTF("offset=%llu\n", offset); - if (!c->staleIterOffset) { - DEBUG_PRINTF("{no repeats can go stale}\n"); - return; /* no subcastle can ever go stale */ - } - + if (!c->staleIterOffset) { + DEBUG_PRINTF("{no repeats can go stale}\n"); + return; /* no subcastle can ever go stale */ + } + if (c->exclusive) { - u8 *active = (u8 *)stream_state; - u8 *groups = active + c->groupIterOffset; - for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { - u8 *cur = active + i * c->activeIdxSize; - const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); + u8 *active = (u8 *)stream_state; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("subcastle %u\n", activeIdx); subCastleDeactivateStaleSubs(c, offset, full_state, stream_state, activeIdx); } } - if (c->exclusive != PURE_EXCLUSIVE) { - const u8 *active = (const u8 *)stream_state + c->activeOffset; - const struct mmbit_sparse_iter *it - = (const void *)((const char *)c + c->staleIterOffset); - - struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; - u32 numRepeats = c->numRepeats; - u32 idx = 0; - - u32 i = mmbit_sparse_iter_begin(active, numRepeats, &idx, it, si_state); - while(i != MMB_INVALID) { + if (c->exclusive != PURE_EXCLUSIVE) { + const u8 *active = (const u8 *)stream_state + c->activeOffset; + const struct mmbit_sparse_iter *it + = (const void *)((const char *)c + c->staleIterOffset); + + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + u32 numRepeats = c->numRepeats; + u32 idx = 0; + + u32 i = mmbit_sparse_iter_begin(active, numRepeats, &idx, it, si_state); + while(i != MMB_INVALID) { DEBUG_PRINTF("subcastle %u\n", i); - subCastleDeactivateStaleSubs(c, offset, full_state, stream_state, i); - i = mmbit_sparse_iter_next(active, numRepeats, i, &idx, it, - si_state); + subCastleDeactivateStaleSubs(c, offset, full_state, stream_state, i); + i = mmbit_sparse_iter_next(active, numRepeats, i, &idx, it, + si_state); } } } static really_inline void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset, - void *full_state, void *stream_state, - UNUSED char stale_checked) { + void *full_state, void *stream_state, + UNUSED char stale_checked) { assert(top < c->numRepeats); const struct SubCastle *sub = getSubCastle(c, top); @@ -280,20 +280,20 @@ void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset, info->packedCtrlSize; char is_alive = 0; - u8 *active = (u8 *)stream_state; - if (sub->exclusiveId < c->numRepeats) { - u8 *groups = active + c->groupIterOffset; - active += sub->exclusiveId * c->activeIdxSize; - if (mmbit_set(groups, c->numGroups, sub->exclusiveId)) { - const u32 activeIdx = partial_load_u32(active, c->activeIdxSize); - is_alive = (activeIdx == top); - } - - if (!is_alive) { - partial_store_u32(active, top, c->activeIdxSize); - } + u8 *active = (u8 *)stream_state; + if (sub->exclusiveId < c->numRepeats) { + u8 *groups = active + c->groupIterOffset; + active += sub->exclusiveId * c->activeIdxSize; + if (mmbit_set(groups, c->numGroups, sub->exclusiveId)) { + const u32 activeIdx = partial_load_u32(active, c->activeIdxSize); + is_alive = (activeIdx == top); + } + + if (!is_alive) { + partial_store_u32(active, top, c->activeIdxSize); + } } else { - active += c->activeOffset; + active += c->activeOffset; is_alive = mmbit_set(active, c->numRepeats, top); } @@ -302,8 +302,8 @@ void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset, } else { DEBUG_PRINTF("repeat %u is already alive\n", top); // Caller should ensure we're not stale. - assert(!stale_checked - || repeatHasMatch(info, rctrl, rstate, offset) != REPEAT_STALE); + assert(!stale_checked + || repeatHasMatch(info, rctrl, rstate, offset) != REPEAT_STALE); // Ignore duplicate top events. u64a last = repeatLastTop(info, rctrl, rstate); @@ -331,11 +331,11 @@ void subCastleFindMatch(const struct Castle *c, const u64a begin, u64a match = repeatNextMatch(info, rctrl, rstate, begin); if (match == 0) { DEBUG_PRINTF("no more matches for sub %u\n", subIdx); - if (sub->exclusiveId < c->numRepeats) { - u8 *groups = (u8 *)stream_state + c->groupIterOffset; - mmbit_unset(groups, c->numGroups, sub->exclusiveId); + if (sub->exclusiveId < c->numRepeats) { + u8 *groups = (u8 *)stream_state + c->groupIterOffset; + mmbit_unset(groups, c->numGroups, sub->exclusiveId); } else { - u8 *active = (u8 *)stream_state + c->activeOffset; + u8 *active = (u8 *)stream_state + c->activeOffset; mmbit_unset(active, c->numRepeats, subIdx); } return; @@ -368,20 +368,20 @@ char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end, *mloc = 0; if (c->exclusive) { - u8 *active = (u8 *)stream_state; - u8 *groups = active + c->groupIterOffset; - for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { - u8 *cur = active + i * c->activeIdxSize; - const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); + u8 *active = (u8 *)stream_state; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("subcastle %u\n", activeIdx); subCastleFindMatch(c, begin, end, full_state, stream_state, mloc, &found, activeIdx); } } - if (c->exclusive != PURE_EXCLUSIVE) { - u8 *active = (u8 *)stream_state + c->activeOffset; + if (c->exclusive != PURE_EXCLUSIVE) { + u8 *active = (u8 *)stream_state + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { @@ -411,37 +411,37 @@ u64a subCastleNextMatch(const struct Castle *c, void *full_state, } static really_inline -void set_matching(const struct Castle *c, const u64a match, u8 *active, - u8 *matching, const u32 active_size, const u32 active_id, - const u32 matching_id, u64a *offset, const u64a end) { - if (match == 0) { - DEBUG_PRINTF("no more matches\n"); - mmbit_unset(active, active_size, active_id); - } else if (match > end) { - // If we had a local copy of the active mmbit, we could skip - // looking at this repeat again. But we don't, so we just move - // on. - } else if (match == *offset) { - mmbit_set(matching, c->numRepeats, matching_id); - } else if (match < *offset) { - // New minimum offset. - *offset = match; - mmbit_clear(matching, c->numRepeats); - mmbit_set(matching, c->numRepeats, matching_id); - } -} - -static really_inline +void set_matching(const struct Castle *c, const u64a match, u8 *active, + u8 *matching, const u32 active_size, const u32 active_id, + const u32 matching_id, u64a *offset, const u64a end) { + if (match == 0) { + DEBUG_PRINTF("no more matches\n"); + mmbit_unset(active, active_size, active_id); + } else if (match > end) { + // If we had a local copy of the active mmbit, we could skip + // looking at this repeat again. But we don't, so we just move + // on. + } else if (match == *offset) { + mmbit_set(matching, c->numRepeats, matching_id); + } else if (match < *offset) { + // New minimum offset. + *offset = match; + mmbit_clear(matching, c->numRepeats); + mmbit_set(matching, c->numRepeats, matching_id); + } +} + +static really_inline void subCastleMatchLoop(const struct Castle *c, void *full_state, void *stream_state, const u64a end, const u64a loc, u64a *offset) { - u8 *active = (u8 *)stream_state + c->activeOffset; + u8 *active = (u8 *)stream_state + c->activeOffset; u8 *matching = full_state; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { u64a match = subCastleNextMatch(c, full_state, stream_state, loc, i); - set_matching(c, match, active, matching, c->numRepeats, i, - i, offset, end); + set_matching(c, match, active, matching, c->numRepeats, i, + i, offset, end); } } @@ -457,7 +457,7 @@ char subCastleFireMatch(const struct Castle *c, const void *full_state, i = mmbit_iterate(matching, c->numRepeats, i)) { const struct SubCastle *sub = getSubCastle(c, i); DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, i); - if (cb(0, offset, sub->report, ctx) == MO_HALT_MATCHING) { + if (cb(0, offset, sub->report, ctx) == MO_HALT_MATCHING) { DEBUG_PRINTF("caller told us to halt\n"); return MO_HALT_MATCHING; } @@ -485,36 +485,36 @@ char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end, u64a offset = end; // min offset of next match u32 activeIdx = 0; - mmbit_clear(matching, c->numRepeats); + mmbit_clear(matching, c->numRepeats); if (c->exclusive) { - u8 *active = (u8 *)stream_state; - u8 *groups = active + c->groupIterOffset; - for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { - u8 *cur = active + i * c->activeIdxSize; - activeIdx = partial_load_u32(cur, c->activeIdxSize); + u8 *active = (u8 *)stream_state; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + activeIdx = partial_load_u32(cur, c->activeIdxSize); u64a match = subCastleNextMatch(c, full_state, stream_state, - loc, activeIdx); - set_matching(c, match, groups, matching, c->numGroups, i, - activeIdx, &offset, end); + loc, activeIdx); + set_matching(c, match, groups, matching, c->numGroups, i, + activeIdx, &offset, end); } } - if (c->exclusive != PURE_EXCLUSIVE) { + if (c->exclusive != PURE_EXCLUSIVE) { subCastleMatchLoop(c, full_state, stream_state, - end, loc, &offset); + end, loc, &offset); } - DEBUG_PRINTF("offset=%llu\n", offset); - if (!mmbit_any(matching, c->numRepeats)) { - DEBUG_PRINTF("no more matches\n"); + DEBUG_PRINTF("offset=%llu\n", offset); + if (!mmbit_any(matching, c->numRepeats)) { + DEBUG_PRINTF("no more matches\n"); break; } - - if (subCastleFireMatch(c, full_state, stream_state, - cb, ctx, offset) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - loc = offset; + + if (subCastleFireMatch(c, full_state, stream_state, + cb, ctx, offset) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + loc = offset; } return MO_CONTINUE_MATCHING; @@ -573,8 +573,8 @@ char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin, static really_inline char castleScanTruffle(const struct Castle *c, const u8 *buf, const size_t begin, const size_t end, size_t *loc) { - const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2, - buf + begin, buf + end); + const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2, + buf + begin, buf + end); if (ptr == buf + end) { DEBUG_PRINTF("no escape found\n"); return 0; @@ -616,103 +616,103 @@ char castleScan(const struct Castle *c, const u8 *buf, const size_t begin, } static really_inline -char castleRevScanVerm(const struct Castle *c, const u8 *buf, - const size_t begin, const size_t end, size_t *loc) { - const u8 *ptr = rvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); - if (ptr == buf + begin - 1) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - assert(ptr >= buf && ptr < buf + end); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -static really_inline -char castleRevScanNVerm(const struct Castle *c, const u8 *buf, - const size_t begin, const size_t end, size_t *loc) { - const u8 *ptr = rnvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); - if (ptr == buf + begin - 1) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - assert(ptr >= buf && ptr < buf + end); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -static really_inline -char castleRevScanShufti(const struct Castle *c, const u8 *buf, - const size_t begin, const size_t end, size_t *loc) { - const m128 mask_lo = c->u.shuf.mask_lo; - const m128 mask_hi = c->u.shuf.mask_hi; - const u8 *ptr = rshuftiExec(mask_lo, mask_hi, buf + begin, buf + end); - if (ptr == buf + begin - 1) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - assert(ptr >= buf && ptr < buf + end); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -static really_inline -char castleRevScanTruffle(const struct Castle *c, const u8 *buf, - const size_t begin, const size_t end, size_t *loc) { - const u8 *ptr = rtruffleExec(c->u.truffle.mask1, c->u.truffle.mask2, - buf + begin, buf + end); - if (ptr == buf + begin - 1) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - assert(ptr >= buf && ptr < buf + end); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -static really_inline -char castleRevScan(const struct Castle *c, const u8 *buf, const size_t begin, - const size_t end, size_t *loc) { - assert(begin <= end); - DEBUG_PRINTF("scanning backwards over (%zu,%zu]\n", begin, end); - if (begin == end) { - return 0; - } - - switch (c->type) { - case CASTLE_DOT: - // Nothing can stop a dot scan! - return 0; - case CASTLE_VERM: - return castleRevScanVerm(c, buf, begin, end, loc); - case CASTLE_NVERM: - return castleRevScanNVerm(c, buf, begin, end, loc); - case CASTLE_SHUFTI: - return castleRevScanShufti(c, buf, begin, end, loc); - case CASTLE_TRUFFLE: - return castleRevScanTruffle(c, buf, begin, end, loc); - default: - DEBUG_PRINTF("unknown scan type!\n"); - assert(0); - return 0; - } -} - -static really_inline -void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp, - char stale_checked) { +char castleRevScanVerm(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const u8 *ptr = rvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScanNVerm(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const u8 *ptr = rnvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScanShufti(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const m128 mask_lo = c->u.shuf.mask_lo; + const m128 mask_hi = c->u.shuf.mask_hi; + const u8 *ptr = rshuftiExec(mask_lo, mask_hi, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScanTruffle(const struct Castle *c, const u8 *buf, + const size_t begin, const size_t end, size_t *loc) { + const u8 *ptr = rtruffleExec(c->u.truffle.mask1, c->u.truffle.mask2, + buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleRevScan(const struct Castle *c, const u8 *buf, const size_t begin, + const size_t end, size_t *loc) { + assert(begin <= end); + DEBUG_PRINTF("scanning backwards over (%zu,%zu]\n", begin, end); + if (begin == end) { + return 0; + } + + switch (c->type) { + case CASTLE_DOT: + // Nothing can stop a dot scan! + return 0; + case CASTLE_VERM: + return castleRevScanVerm(c, buf, begin, end, loc); + case CASTLE_NVERM: + return castleRevScanNVerm(c, buf, begin, end, loc); + case CASTLE_SHUFTI: + return castleRevScanShufti(c, buf, begin, end, loc); + case CASTLE_TRUFFLE: + return castleRevScanTruffle(c, buf, begin, end, loc); + default: + DEBUG_PRINTF("unknown scan type!\n"); + assert(0); + return 0; + } +} + +static really_inline +void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp, + char stale_checked) { const u32 event = q->items[q->cur].type; switch (event) { case MQE_TOP: @@ -726,29 +726,29 @@ void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp, assert(event < MQE_INVALID); u32 top = event - MQE_TOP_FIRST; DEBUG_PRINTF("top %u at offset %llu\n", top, sp); - castleProcessTop(c, top, sp, q->state, q->streamState, stale_checked); + castleProcessTop(c, top, sp, q->state, q->streamState, stale_checked); break; } } static really_inline -void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) { - DEBUG_PRINTF("clearing active repeats due to escape\n"); - if (c->exclusive) { - u8 *groups = (u8 *)q->streamState + c->groupIterOffset; - mmbit_clear(groups, c->numGroups); - } - - if (c->exclusive != PURE_EXCLUSIVE) { - mmbit_clear(active, c->numRepeats); - } -} - -static really_inline -char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end, - enum MatchMode mode) { +void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) { + DEBUG_PRINTF("clearing active repeats due to escape\n"); + if (c->exclusive) { + u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + mmbit_clear(groups, c->numGroups); + } + + if (c->exclusive != PURE_EXCLUSIVE) { + mmbit_clear(active, c->numRepeats); + } +} + +static really_inline +char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end, + enum MatchMode mode) { assert(n && q); - assert(n->type == CASTLE_NFA); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("state=%p, streamState=%p\n", q->state, q->streamState); @@ -766,7 +766,7 @@ char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end, return 1; } - u8 *active = (u8 *)q->streamState + c->activeOffset;// active multibit + u8 *active = (u8 *)q->streamState + c->activeOffset;// active multibit assert(q->cur + 1 < q->end); // require at least two items assert(q_cur_type(q) == MQE_START); @@ -780,8 +780,8 @@ char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end, char found = 0; if (c->exclusive) { - u8 *groups = (u8 *)q->streamState + c->groupIterOffset; - found = mmbit_any(groups, c->numGroups); + u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + found = mmbit_any(groups, c->numGroups); } if (!found && !mmbit_any(active, c->numRepeats)) { @@ -828,7 +828,7 @@ char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end, } if (escape_found) { - clear_repeats(c, q, active); + clear_repeats(c, q, active); } } @@ -842,63 +842,63 @@ char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end, } sp = q_cur_offset(q); - castleHandleEvent(c, q, sp, 1); + castleHandleEvent(c, q, sp, 1); q->cur++; } if (c->exclusive) { - u8 *groups = (u8 *)q->streamState + c->groupIterOffset; - if (mmbit_any_precise(groups, c->numGroups)) { - return 1; + u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + if (mmbit_any_precise(groups, c->numGroups)) { + return 1; } } return mmbit_any_precise(active, c->numRepeats); } -char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("entry\n"); - return nfaExecCastle_Q_i(n, q, end, CALLBACK_OUTPUT); + return nfaExecCastle_Q_i(n, q, end, CALLBACK_OUTPUT); } -char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("entry\n"); - return nfaExecCastle_Q_i(n, q, end, STOP_AT_MATCH); + return nfaExecCastle_Q_i(n, q, end, STOP_AT_MATCH); } -static -s64a castleLastKillLoc(const struct Castle *c, struct mq *q) { - assert(q_cur_type(q) == MQE_START); - assert(q_last_type(q) == MQE_END); - s64a sp = q_cur_loc(q); - s64a ep = q_last_loc(q); - - DEBUG_PRINTF("finding final squash in (%lld, %lld]\n", sp, ep); +static +s64a castleLastKillLoc(const struct Castle *c, struct mq *q) { + assert(q_cur_type(q) == MQE_START); + assert(q_last_type(q) == MQE_END); + s64a sp = q_cur_loc(q); + s64a ep = q_last_loc(q); - size_t loc; + DEBUG_PRINTF("finding final squash in (%lld, %lld]\n", sp, ep); - if (ep > 0) { - if (castleRevScan(c, q->buffer, sp > 0 ? sp : 0, ep, &loc)) { - return (s64a)loc; + size_t loc; + + if (ep > 0) { + if (castleRevScan(c, q->buffer, sp > 0 ? sp : 0, ep, &loc)) { + return (s64a)loc; } - ep = 0; - } - - if (sp < 0) { - s64a hlen = q->hlength; - - if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) { - return (s64a)loc - hlen; + ep = 0; + } + + if (sp < 0) { + s64a hlen = q->hlength; + + if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) { + return (s64a)loc - hlen; } - ep = 0; + ep = 0; } - - return sp - 1; /* the repeats are never killed */ + + return sp - 1; /* the repeats are never killed */ } -char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report) { +char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report) { assert(n && q); - assert(n->type == CASTLE_NFA); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); if (q->cur == q->end) { @@ -909,42 +909,42 @@ char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report) { assert(q_cur_type(q) == MQE_START); const struct Castle *c = getImplNfa(n); - u8 *active = (u8 *)q->streamState + c->activeOffset; - - u64a end_offset = q_last_loc(q) + q->offset; - s64a last_kill_loc = castleLastKillLoc(c, q); - DEBUG_PRINTF("all repeats killed at %lld (exec range %lld, %lld)\n", - last_kill_loc, q_cur_loc(q), q_last_loc(q)); - assert(last_kill_loc < q_last_loc(q)); - - if (last_kill_loc != q_cur_loc(q) - 1) { - clear_repeats(c, q, active); - } - - q->cur++; /* skip start event */ - - /* skip events prior to the repeats being squashed */ - while (q_cur_loc(q) <= last_kill_loc) { - DEBUG_PRINTF("skipping moot event at %lld\n", q_cur_loc(q)); - q->cur++; - assert(q->cur < q->end); - } - - while (q->cur < q->end) { - DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q), - q_cur_offset(q)); - u64a sp = q_cur_offset(q); - castleHandleEvent(c, q, sp, 0); + u8 *active = (u8 *)q->streamState + c->activeOffset; + + u64a end_offset = q_last_loc(q) + q->offset; + s64a last_kill_loc = castleLastKillLoc(c, q); + DEBUG_PRINTF("all repeats killed at %lld (exec range %lld, %lld)\n", + last_kill_loc, q_cur_loc(q), q_last_loc(q)); + assert(last_kill_loc < q_last_loc(q)); + + if (last_kill_loc != q_cur_loc(q) - 1) { + clear_repeats(c, q, active); + } + + q->cur++; /* skip start event */ + + /* skip events prior to the repeats being squashed */ + while (q_cur_loc(q) <= last_kill_loc) { + DEBUG_PRINTF("skipping moot event at %lld\n", q_cur_loc(q)); + q->cur++; + assert(q->cur < q->end); + } + + while (q->cur < q->end) { + DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q), + q_cur_offset(q)); + u64a sp = q_cur_offset(q); + castleHandleEvent(c, q, sp, 0); q->cur++; } - castleDeactivateStaleSubs(c, end_offset, q->state, q->streamState); - - char found = 0; + castleDeactivateStaleSubs(c, end_offset, q->state, q->streamState); + + char found = 0; if (c->exclusive) { - u8 *groups = (u8 *)q->streamState + c->groupIterOffset; - found = mmbit_any_precise(groups, c->numGroups); - + u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + found = mmbit_any_precise(groups, c->numGroups); + } if (!found && !mmbit_any_precise(active, c->numRepeats)) { @@ -952,16 +952,16 @@ char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report) { return 0; } - if (castleInAccept(c, q, report, end_offset)) { + if (castleInAccept(c, q, report, end_offset)) { return MO_MATCHES_PENDING; } return 1; } -char nfaExecCastle_reportCurrent(const struct NFA *n, struct mq *q) { +char nfaExecCastle_reportCurrent(const struct NFA *n, struct mq *q) { assert(n && q); - assert(n->type == CASTLE_NFA); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); const struct Castle *c = getImplNfa(n); @@ -969,89 +969,89 @@ char nfaExecCastle_reportCurrent(const struct NFA *n, struct mq *q) { return 0; } -char nfaExecCastle_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { +char nfaExecCastle_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { assert(n && q); - assert(n->type == CASTLE_NFA); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); const struct Castle *c = getImplNfa(n); return castleInAccept(c, q, report, q_cur_offset(q)); } -char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q) { - assert(n && q); - assert(n->type == CASTLE_NFA); - DEBUG_PRINTF("entry\n"); - - const struct Castle *c = getImplNfa(n); - const u64a offset = q_cur_offset(q); - DEBUG_PRINTF("offset=%llu\n", offset); - - if (c->exclusive) { - u8 *active = (u8 *)q->streamState; - u8 *groups = active + c->groupIterOffset; - for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { - u8 *cur = active + i * c->activeIdxSize; - const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); - DEBUG_PRINTF("subcastle %u\n", activeIdx); - const struct SubCastle *sub = getSubCastle(c, activeIdx); - if (subCastleInAccept(c, q, sub->report, offset, activeIdx)) { - return 1; - } - } - } - - if (c->exclusive != PURE_EXCLUSIVE) { - const u8 *active = (const u8 *)q->streamState + c->activeOffset; - for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { - DEBUG_PRINTF("subcastle %u\n", i); - const struct SubCastle *sub = getSubCastle(c, i); - if (subCastleInAccept(c, q, sub->report, offset, i)) { - return 1; - } - } - } - - return 0; -} - - -char nfaExecCastle_queueInitState(UNUSED const struct NFA *n, struct mq *q) { +char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q) { assert(n && q); - assert(n->type == CASTLE_NFA); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); const struct Castle *c = getImplNfa(n); + const u64a offset = q_cur_offset(q); + DEBUG_PRINTF("offset=%llu\n", offset); + + if (c->exclusive) { + u8 *active = (u8 *)q->streamState; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); + DEBUG_PRINTF("subcastle %u\n", activeIdx); + const struct SubCastle *sub = getSubCastle(c, activeIdx); + if (subCastleInAccept(c, q, sub->report, offset, activeIdx)) { + return 1; + } + } + } + + if (c->exclusive != PURE_EXCLUSIVE) { + const u8 *active = (const u8 *)q->streamState + c->activeOffset; + for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { + DEBUG_PRINTF("subcastle %u\n", i); + const struct SubCastle *sub = getSubCastle(c, i); + if (subCastleInAccept(c, q, sub->report, offset, i)) { + return 1; + } + } + } + + return 0; +} + + +char nfaExecCastle_queueInitState(UNUSED const struct NFA *n, struct mq *q) { + assert(n && q); + assert(n->type == CASTLE_NFA); + DEBUG_PRINTF("entry\n"); + + const struct Castle *c = getImplNfa(n); assert(q->streamState); if (c->exclusive) { - u8 *groups = (u8 *)q->streamState + c->groupIterOffset; - mmbit_clear(groups, c->numGroups); + u8 *groups = (u8 *)q->streamState + c->groupIterOffset; + mmbit_clear(groups, c->numGroups); } - if (c->exclusive != PURE_EXCLUSIVE) { - u8 *active = (u8 *)q->streamState + c->activeOffset; + if (c->exclusive != PURE_EXCLUSIVE) { + u8 *active = (u8 *)q->streamState + c->activeOffset; mmbit_clear(active, c->numRepeats); } return 0; } -char nfaExecCastle_initCompressedState(const struct NFA *n, UNUSED u64a offset, - void *state, UNUSED u8 key) { +char nfaExecCastle_initCompressedState(const struct NFA *n, UNUSED u64a offset, + void *state, UNUSED u8 key) { assert(n && state); - assert(n->type == CASTLE_NFA); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry\n"); const struct Castle *c = getImplNfa(n); if (c->exclusive) { - u8 *groups = (u8 *)state + c->groupIterOffset; - mmbit_clear(groups, c->numGroups); + u8 *groups = (u8 *)state + c->groupIterOffset; + mmbit_clear(groups, c->numGroups); } - if (c->exclusive != PURE_EXCLUSIVE) { - u8 *active = (u8 *)state + c->activeOffset; + if (c->exclusive != PURE_EXCLUSIVE) { + u8 *active = (u8 *)state + c->activeOffset; mmbit_clear(active, c->numRepeats); } return 0; @@ -1070,10 +1070,10 @@ void subCastleQueueCompressState(const struct Castle *c, const u32 subIdx, repeatPack(packed, info, rctrl, offset); } -char nfaExecCastle_queueCompressState(const struct NFA *n, const struct mq *q, - s64a loc) { +char nfaExecCastle_queueCompressState(const struct NFA *n, const struct mq *q, + s64a loc) { assert(n && q); - assert(n->type == CASTLE_NFA); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry, loc=%lld\n", loc); const struct Castle *c = getImplNfa(n); @@ -1082,19 +1082,19 @@ char nfaExecCastle_queueCompressState(const struct NFA *n, const struct mq *q, const u64a offset = q->offset + loc; DEBUG_PRINTF("offset=%llu\n", offset); if (c->exclusive) { - u8 *active = (u8 *)q->streamState; - u8 *groups = active + c->groupIterOffset; - for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { - u8 *cur = active + i * c->activeIdxSize; - const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); + u8 *active = (u8 *)q->streamState; + u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); DEBUG_PRINTF("packing state for sub %u\n", activeIdx); subCastleQueueCompressState(c, activeIdx, q, offset); } } - if (c->exclusive != PURE_EXCLUSIVE) { - const u8 *active = (const u8 *)q->streamState + c->activeOffset; + if (c->exclusive != PURE_EXCLUSIVE) { + const u8 *active = (const u8 *)q->streamState + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { DEBUG_PRINTF("packing state for sub %u\n", i); @@ -1118,28 +1118,28 @@ void subCastleExpandState(const struct Castle *c, const u32 subIdx, packed + info->packedCtrlSize, offset)); } -char nfaExecCastle_expandState(const struct NFA *n, void *dest, const void *src, - u64a offset, UNUSED u8 key) { +char nfaExecCastle_expandState(const struct NFA *n, void *dest, const void *src, + u64a offset, UNUSED u8 key) { assert(n && dest && src); - assert(n->type == CASTLE_NFA); + assert(n->type == CASTLE_NFA); DEBUG_PRINTF("entry, src=%p, dest=%p, offset=%llu\n", src, dest, offset); const struct Castle *c = getImplNfa(n); if (c->exclusive) { - const u8 *active = (const u8 *)src; - const u8 *groups = active + c->groupIterOffset; - for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { - const u8 *cur = active + i * c->activeIdxSize; - const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); + const u8 *active = (const u8 *)src; + const u8 *groups = active + c->groupIterOffset; + for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { + const u8 *cur = active + i * c->activeIdxSize; + const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); subCastleExpandState(c, activeIdx, dest, src, offset); } } - if (c->exclusive != PURE_EXCLUSIVE) { + if (c->exclusive != PURE_EXCLUSIVE) { // Unpack state for all active repeats. - const u8 *active = (const u8 *)src + c->activeOffset; + const u8 *active = (const u8 *)src + c->activeOffset; for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { subCastleExpandState(c, i, dest, src, offset); diff --git a/contrib/libs/hyperscan/src/nfa/castle.h b/contrib/libs/hyperscan/src/nfa/castle.h index cc7496ca71..83f0b6fb79 100644 --- a/contrib/libs/hyperscan/src/nfa/castle.h +++ b/contrib/libs/hyperscan/src/nfa/castle.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,24 +38,24 @@ extern "C" { struct mq; struct NFA; -char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecCastle_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecCastle_inAccept(const struct NFA *n, ReportID report, - struct mq *q); -char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecCastle_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecCastle_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecCastle_queueCompressState(const struct NFA *nfa, const struct mq *q, - s64a loc); -char nfaExecCastle_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); +char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecCastle_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecCastle_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecCastle_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecCastle_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecCastle_queueCompressState(const struct NFA *nfa, const struct mq *q, + s64a loc); +char nfaExecCastle_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); -#define nfaExecCastle_testEOD NFA_API_NO_IMPL -#define nfaExecCastle_B_Reverse NFA_API_NO_IMPL -#define nfaExecCastle_zombie_status NFA_API_ZOMBIE_NO_IMPL +#define nfaExecCastle_testEOD NFA_API_NO_IMPL +#define nfaExecCastle_B_Reverse NFA_API_NO_IMPL +#define nfaExecCastle_zombie_status NFA_API_ZOMBIE_NO_IMPL #ifdef __cplusplus } diff --git a/contrib/libs/hyperscan/src/nfa/castle_internal.h b/contrib/libs/hyperscan/src/nfa/castle_internal.h index 429c232ff8..19c353b4dd 100644 --- a/contrib/libs/hyperscan/src/nfa/castle_internal.h +++ b/contrib/libs/hyperscan/src/nfa/castle_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,9 +42,9 @@ struct SubCastle { u32 streamStateOffset; //!< offset within stream state u32 repeatInfoOffset; //!< offset of RepeatInfo structure // relative to the start of SubCastle - u32 exclusiveId; //!< exclusive group id of this SubCastle, - // set to the number of SubCastles in Castle - // if it is not exclusive + u32 exclusiveId; //!< exclusive group id of this SubCastle, + // set to the number of SubCastles in Castle + // if it is not exclusive }; #define CASTLE_DOT 0 @@ -53,12 +53,12 @@ struct SubCastle { #define CASTLE_SHUFTI 3 #define CASTLE_TRUFFLE 4 -enum ExclusiveType { - NOT_EXCLUSIVE, //!< no subcastles are exclusive - EXCLUSIVE, //!< a subset of subcastles are exclusive - PURE_EXCLUSIVE //!< all subcastles are exclusive -}; - +enum ExclusiveType { + NOT_EXCLUSIVE, //!< no subcastles are exclusive + EXCLUSIVE, //!< a subset of subcastles are exclusive + PURE_EXCLUSIVE //!< all subcastles are exclusive +}; + /** * \brief Castle engine structure. * @@ -71,60 +71,60 @@ enum ExclusiveType { * - struct Castle * - struct SubCastle[numRepeats] * - tables for sparse model repeats - * - sparse iterator for subcastles that may be stale + * - sparse iterator for subcastles that may be stale * * Castle stores an "active repeats" multibit in stream state, followed by the - * packed repeat state for each SubCastle. If there are both exclusive and - * non-exclusive SubCastle groups, we use an active id for each exclusive group - * and a multibit for the non-exclusive group. We also store an "active - * exclusive groups" multibit for exclusive groups. If all SubCastles are mutual - * exclusive, we remove "active repeats" multibit from stream state. - * * Castle stream state: - * * - * * |---| - * * | | active subengine id for exclusive group 1 - * * |---| - * * | | active subengine id for exclusive group 2(if necessary) - * * |---| - * * ... - * * |---| - * * | | "active repeats" multibit for non-exclusive subcastles - * * | | (if not all subcastles are exclusive) - * * |---| - * * | | active multibit for exclusive groups - * * | | - * * |---| - * * ||-|| common pool of stream state for exclusive group 1 - * * ||-|| - * * |---| - * * ||-|| common pool of stream state for exclusive group 2(if necessary) - * * ||-|| - * * |---| - * * ... - * * |---| - * * | | stream state for each non-exclusive subcastles - * * ... - * * | | - * * |---| + * packed repeat state for each SubCastle. If there are both exclusive and + * non-exclusive SubCastle groups, we use an active id for each exclusive group + * and a multibit for the non-exclusive group. We also store an "active + * exclusive groups" multibit for exclusive groups. If all SubCastles are mutual + * exclusive, we remove "active repeats" multibit from stream state. + * * Castle stream state: + * * + * * |---| + * * | | active subengine id for exclusive group 1 + * * |---| + * * | | active subengine id for exclusive group 2(if necessary) + * * |---| + * * ... + * * |---| + * * | | "active repeats" multibit for non-exclusive subcastles + * * | | (if not all subcastles are exclusive) + * * |---| + * * | | active multibit for exclusive groups + * * | | + * * |---| + * * ||-|| common pool of stream state for exclusive group 1 + * * ||-|| + * * |---| + * * ||-|| common pool of stream state for exclusive group 2(if necessary) + * * ||-|| + * * |---| + * * ... + * * |---| + * * | | stream state for each non-exclusive subcastles + * * ... + * * | | + * * |---| * * In full state (stored in scratch space) it stores a temporary multibit over * the repeats (used by \ref castleMatchLoop), followed by the repeat control - * blocks for each SubCastle. + * blocks for each SubCastle. */ struct ALIGN_AVX_DIRECTIVE Castle { - u32 numRepeats; //!< number of repeats in Castle - u32 numGroups; //!< number of exclusive groups - u8 type; //!< tells us which scanning mechanism (below) to use - u8 exclusive; //!< tells us if there are mutual exclusive SubCastles - u8 activeIdxSize; //!< number of bytes in stream state to store - // active SubCastle id for exclusive mode - u32 activeOffset; //!< offset to active multibit for non-exclusive - // SubCastles - u32 staleIterOffset; //!< offset to a sparse iterator to check for stale - // sub castles - u32 groupIterOffset; //!< offset to a iterator to check the aliveness of - // exclusive groups - + u32 numRepeats; //!< number of repeats in Castle + u32 numGroups; //!< number of exclusive groups + u8 type; //!< tells us which scanning mechanism (below) to use + u8 exclusive; //!< tells us if there are mutual exclusive SubCastles + u8 activeIdxSize; //!< number of bytes in stream state to store + // active SubCastle id for exclusive mode + u32 activeOffset; //!< offset to active multibit for non-exclusive + // SubCastles + u32 staleIterOffset; //!< offset to a sparse iterator to check for stale + // sub castles + u32 groupIterOffset; //!< offset to a iterator to check the aliveness of + // exclusive groups + union { struct { char c; diff --git a/contrib/libs/hyperscan/src/nfa/castlecompile.cpp b/contrib/libs/hyperscan/src/nfa/castlecompile.cpp index d4c361337a..ac3d514a77 100644 --- a/contrib/libs/hyperscan/src/nfa/castlecompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/castlecompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,15 +26,15 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief Castle: multi-tenant repeat engine, compiler code. */ - + #include "castlecompile.h" #include "castle_internal.h" -#include "limex_limits.h" +#include "limex_limits.h" #include "nfa_internal.h" #include "repeatcompile.h" #include "shufticompile.h" @@ -48,18 +48,18 @@ #include "util/compile_context.h" #include "util/container.h" #include "util/dump_charclass.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph.h" #include "util/make_unique.h" -#include "util/multibit_build.h" -#include "util/report_manager.h" +#include "util/multibit_build.h" +#include "util/report_manager.h" #include "util/verify_types.h" #include "grey.h" #include <stack> #include <cassert> -#include <boost/graph/adjacency_list.hpp> +#include <boost/graph/adjacency_list.hpp> #include <boost/range/adaptor/map.hpp> using namespace std; @@ -102,15 +102,15 @@ void writeCastleScanEngine(const CharReach &cr, Castle *c) { return; } - if (shuftiBuildMasks(negated, (u8 *)&c->u.shuf.mask_lo, - (u8 *)&c->u.shuf.mask_hi) != -1) { + if (shuftiBuildMasks(negated, (u8 *)&c->u.shuf.mask_lo, + (u8 *)&c->u.shuf.mask_hi) != -1) { c->type = CASTLE_SHUFTI; return; } c->type = CASTLE_TRUFFLE; - truffleBuildMasks(negated, (u8 *)(u8 *)&c->u.truffle.mask1, - (u8 *)&c->u.truffle.mask2); + truffleBuildMasks(negated, (u8 *)(u8 *)&c->u.truffle.mask1, + (u8 *)&c->u.truffle.mask2); } static @@ -156,7 +156,7 @@ void getNeighborInfo(const CliqueGraph &g, vector<u32> &neighbor, // find neighbors for cv for (const auto &v : adjacent_vertices_range(cv, g)) { - if (g[v].stateId != id && contains(group, g[v].stateId)) { + if (g[v].stateId != id && contains(group, g[v].stateId)) { neighbor.push_back(g[v].stateId); DEBUG_PRINTF("Neighbor:%u\n", g[v].stateId); } @@ -208,7 +208,7 @@ bool graph_empty(const Graph &g) { static vector<u32> removeClique(CliqueGraph &cg) { vector<vector<u32>> cliquesVec(1); - DEBUG_PRINTF("graph size:%zu\n", num_vertices(cg)); + DEBUG_PRINTF("graph size:%zu\n", num_vertices(cg)); findCliqueGroup(cg, cliquesVec[0]); while (!graph_empty(cg)) { const vector<u32> &c = cliquesVec.back(); @@ -240,7 +240,7 @@ vector<u32> removeClique(CliqueGraph &cg) { } } - DEBUG_PRINTF("clique size:%zu\n", cliquesVec[id].size()); + DEBUG_PRINTF("clique size:%zu\n", cliquesVec[id].size()); return cliquesVec[id]; } @@ -248,18 +248,18 @@ vector<u32> removeClique(CliqueGraph &cg) { // the end locations where it overlaps with other literals, // then the literals are mutual exclusive static -bool findExclusivePair(const size_t id1, const size_t id2, - const size_t lower, +bool findExclusivePair(const size_t id1, const size_t id2, + const size_t lower, const vector<vector<size_t>> &min_reset_dist, const vector<vector<vector<CharReach>>> &triggers) { const auto &triggers1 = triggers[id1]; const auto &triggers2 = triggers[id2]; - for (size_t i = 0; i < triggers1.size(); ++i) { - for (size_t j = 0; j < triggers2.size(); ++j) { + for (size_t i = 0; i < triggers1.size(); ++i) { + for (size_t j = 0; j < triggers2.size(); ++j) { if (!literalOverlap(triggers1[i], triggers2[j], - min_reset_dist[id2 - lower][j]) || + min_reset_dist[id2 - lower][j]) || !literalOverlap(triggers2[j], triggers1[i], - min_reset_dist[id1 - lower][i])) { + min_reset_dist[id1 - lower][i])) { return false; } } @@ -268,92 +268,92 @@ bool findExclusivePair(const size_t id1, const size_t id2, } static -vector<vector<u32>> checkExclusion(u32 &streamStateSize, - const CharReach &cr, - const vector<vector<vector<CharReach>>> &triggers, - enum ExclusiveType &exclusive, - const size_t numRepeats) { - vector<vector<u32>> groups; - size_t trigSize = triggers.size(); - DEBUG_PRINTF("trigSize %zu\n", trigSize); - - size_t lower = 0; - size_t total = 0; - while (lower < trigSize) { - vector<CliqueVertex> vertices; - unique_ptr<CliqueGraph> cg = std::make_unique<CliqueGraph>(); - - vector<vector<size_t>> min_reset_dist; - size_t upper = min(lower + CLIQUE_GRAPH_MAX_SIZE, trigSize); - // get min reset distance for each repeat - for (size_t i = lower; i < upper; i++) { - CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg); - vertices.push_back(v); - - const vector<size_t> &tmp_dist = - minResetDistToEnd(triggers[i], cr); - min_reset_dist.push_back(tmp_dist); - } - - // find exclusive pair for each repeat - for (size_t i = lower; i < upper; i++) { - CliqueVertex s = vertices[i - lower]; - for (size_t j = i + 1; j < upper; j++) { - if (findExclusivePair(i, j, lower, min_reset_dist, - triggers)) { - CliqueVertex d = vertices[j - lower]; - add_edge(s, d, *cg); - } +vector<vector<u32>> checkExclusion(u32 &streamStateSize, + const CharReach &cr, + const vector<vector<vector<CharReach>>> &triggers, + enum ExclusiveType &exclusive, + const size_t numRepeats) { + vector<vector<u32>> groups; + size_t trigSize = triggers.size(); + DEBUG_PRINTF("trigSize %zu\n", trigSize); + + size_t lower = 0; + size_t total = 0; + while (lower < trigSize) { + vector<CliqueVertex> vertices; + unique_ptr<CliqueGraph> cg = std::make_unique<CliqueGraph>(); + + vector<vector<size_t>> min_reset_dist; + size_t upper = min(lower + CLIQUE_GRAPH_MAX_SIZE, trigSize); + // get min reset distance for each repeat + for (size_t i = lower; i < upper; i++) { + CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg); + vertices.push_back(v); + + const vector<size_t> &tmp_dist = + minResetDistToEnd(triggers[i], cr); + min_reset_dist.push_back(tmp_dist); + } + + // find exclusive pair for each repeat + for (size_t i = lower; i < upper; i++) { + CliqueVertex s = vertices[i - lower]; + for (size_t j = i + 1; j < upper; j++) { + if (findExclusivePair(i, j, lower, min_reset_dist, + triggers)) { + CliqueVertex d = vertices[j - lower]; + add_edge(s, d, *cg); + } } } - - // find the largest exclusive group - auto clique = removeClique(*cg); - size_t cliqueSize = clique.size(); - if (cliqueSize > 1) { - groups.push_back(clique); - exclusive = EXCLUSIVE; - total += cliqueSize; - } - - lower += CLIQUE_GRAPH_MAX_SIZE; - } - DEBUG_PRINTF("clique size %zu, num of repeats %zu\n", - total, numRepeats); - if (total == numRepeats) { - exclusive = PURE_EXCLUSIVE; - streamStateSize = 0; - }; - - return groups; -} - -namespace { -struct ExclusiveInfo { - - /** Mapping between top and exclusive group id */ - map<u32, u32> groupId; - - /** Number of exclusive groups */ - u32 numGroups = 0; -}; + + // find the largest exclusive group + auto clique = removeClique(*cg); + size_t cliqueSize = clique.size(); + if (cliqueSize > 1) { + groups.push_back(clique); + exclusive = EXCLUSIVE; + total += cliqueSize; + } + + lower += CLIQUE_GRAPH_MAX_SIZE; + } + DEBUG_PRINTF("clique size %zu, num of repeats %zu\n", + total, numRepeats); + if (total == numRepeats) { + exclusive = PURE_EXCLUSIVE; + streamStateSize = 0; + }; + + return groups; } +namespace { +struct ExclusiveInfo { + + /** Mapping between top and exclusive group id */ + map<u32, u32> groupId; + + /** Number of exclusive groups */ + u32 numGroups = 0; +}; +} + static void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs, vector<RepeatInfo> &infos, vector<u64a> &patchSize, const vector<pair<depth, bool>> &repeatInfoPair, u32 &scratchStateSize, u32 &streamStateSize, u32 &tableSize, vector<u64a> &tables, u32 &sparseRepeats, - const ExclusiveInfo &exclusiveInfo, - vector<u32> &may_stale, const ReportManager &rm) { - const bool remap_reports = has_managed_reports(proto.kind); - + const ExclusiveInfo &exclusiveInfo, + vector<u32> &may_stale, const ReportManager &rm) { + const bool remap_reports = has_managed_reports(proto.kind); + u32 i = 0; - const auto &groupId = exclusiveInfo.groupId; - const auto &numGroups = exclusiveInfo.numGroups; - vector<u32> maxStreamSize(numGroups, 0); - + const auto &groupId = exclusiveInfo.groupId; + const auto &numGroups = exclusiveInfo.numGroups; + vector<u32> maxStreamSize(numGroups, 0); + for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); it != ite; ++it, ++i) { const PureRepeat &pr = it->second; @@ -361,7 +361,7 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs, bool is_reset = repeatInfoPair[i].second; enum RepeatType rtype = chooseRepeatType(pr.bounds.min, pr.bounds.max, - min_period, is_reset, true); + min_period, is_reset, true); RepeatStateInfo rsi(rtype, pr.bounds.min, pr.bounds.max, min_period); DEBUG_PRINTF("sub %u: selected %s model for %s repeat\n", i, @@ -370,26 +370,26 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs, SubCastle &sub = subs[i]; RepeatInfo &info = infos[i]; - info.packedCtrlSize = rsi.packedCtrlSize; - u32 subStreamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize); - - // Handle stream/scratch space alloc for exclusive case differently. - if (contains(groupId, i)) { - u32 id = groupId.at(i); - maxStreamSize[id] = max(maxStreamSize[id], subStreamStateSize); - // SubCastle full/stream state offsets are written in for the group - // below. + info.packedCtrlSize = rsi.packedCtrlSize; + u32 subStreamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize); + + // Handle stream/scratch space alloc for exclusive case differently. + if (contains(groupId, i)) { + u32 id = groupId.at(i); + maxStreamSize[id] = max(maxStreamSize[id], subStreamStateSize); + // SubCastle full/stream state offsets are written in for the group + // below. } else { sub.fullStateOffset = scratchStateSize; sub.streamStateOffset = streamStateSize; - scratchStateSize += verify_u32(sizeof(RepeatControl)); + scratchStateSize += verify_u32(sizeof(RepeatControl)); streamStateSize += subStreamStateSize; } - if (pr.bounds.max.is_finite()) { - may_stale.push_back(i); - } - + if (pr.bounds.max.is_finite()) { + may_stale.push_back(i); + } + info.type = verify_u8(rtype); info.repeatMin = depth_to_u32(pr.bounds.min); info.repeatMax = depth_to_u32(pr.bounds.max); @@ -405,44 +405,44 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs, info.encodingSize = rsi.encodingSize; info.patchesOffset = rsi.patchesOffset; - assert(pr.reports.size() == 1); - ReportID id = *pr.reports.begin(); - sub.report = remap_reports ? rm.getProgramOffset(id) : id; + assert(pr.reports.size() == 1); + ReportID id = *pr.reports.begin(); + sub.report = remap_reports ? rm.getProgramOffset(id) : id; if (rtype == REPEAT_SPARSE_OPTIMAL_P) { - for (u32 j = 0; j < rsi.patchSize; j++) { - tables.push_back(rsi.table[j]); - } - sparseRepeats++; - patchSize[i] = rsi.patchSize; - tableSize += rsi.patchSize; - } - } - - vector<u32> scratchOffset(numGroups, 0); - vector<u32> streamOffset(numGroups, 0); - for (const auto &j : groupId) { - u32 top = j.first; - u32 id = j.second; - SubCastle &sub = subs[top]; - if (!scratchOffset[id]) { + for (u32 j = 0; j < rsi.patchSize; j++) { + tables.push_back(rsi.table[j]); + } + sparseRepeats++; + patchSize[i] = rsi.patchSize; + tableSize += rsi.patchSize; + } + } + + vector<u32> scratchOffset(numGroups, 0); + vector<u32> streamOffset(numGroups, 0); + for (const auto &j : groupId) { + u32 top = j.first; + u32 id = j.second; + SubCastle &sub = subs[top]; + if (!scratchOffset[id]) { sub.fullStateOffset = scratchStateSize; sub.streamStateOffset = streamStateSize; - scratchOffset[id] = scratchStateSize; - streamOffset[id] = streamStateSize; - scratchStateSize += verify_u32(sizeof(RepeatControl)); - streamStateSize += maxStreamSize[id]; - } else { - sub.fullStateOffset = scratchOffset[id]; - sub.streamStateOffset = streamOffset[id]; + scratchOffset[id] = scratchStateSize; + streamOffset[id] = streamStateSize; + scratchStateSize += verify_u32(sizeof(RepeatControl)); + streamStateSize += maxStreamSize[id]; + } else { + sub.fullStateOffset = scratchOffset[id]; + sub.streamStateOffset = streamOffset[id]; } } } -bytecode_ptr<NFA> +bytecode_ptr<NFA> buildCastle(const CastleProto &proto, const map<u32, vector<vector<CharReach>>> &triggers, - const CompileContext &cc, const ReportManager &rm) { + const CompileContext &cc, const ReportManager &rm) { assert(cc.grey.allowCastle); const size_t numRepeats = proto.repeats.size(); @@ -474,8 +474,8 @@ buildCastle(const CastleProto &proto, depth maxWidth(0); u32 i = 0; - ExclusiveInfo exclusiveInfo; - vector<vector<vector<CharReach>>> candidateTriggers; + ExclusiveInfo exclusiveInfo; + vector<vector<vector<CharReach>>> candidateTriggers; vector<u32> candidateRepeats; vector<pair<depth, bool>> repeatInfoPair; for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); @@ -501,7 +501,7 @@ buildCastle(const CastleProto &proto, // possibly means that we've got a repeat that we can't trigger. We do // need to cope with it though. if (contains(triggers, top)) { - min_period = depth(minPeriod(triggers.at(top), cr, &is_reset)); + min_period = depth(minPeriod(triggers.at(top), cr, &is_reset)); } if (min_period > pr.bounds.max) { @@ -511,60 +511,60 @@ buildCastle(const CastleProto &proto, repeatInfoPair.push_back(make_pair(min_period, is_reset)); - candidateTriggers.push_back(triggers.at(top)); - candidateRepeats.push_back(i); + candidateTriggers.push_back(triggers.at(top)); + candidateRepeats.push_back(i); } // Case 1: exclusive repeats - enum ExclusiveType exclusive = NOT_EXCLUSIVE; + enum ExclusiveType exclusive = NOT_EXCLUSIVE; u32 activeIdxSize = 0; - u32 groupIterOffset = 0; + u32 groupIterOffset = 0; if (cc.grey.castleExclusive) { - auto cliqueGroups = - checkExclusion(streamStateSize, cr, candidateTriggers, - exclusive, numRepeats); - for (const auto &group : cliqueGroups) { - // mutual exclusive repeats group found, - // update state sizes + auto cliqueGroups = + checkExclusion(streamStateSize, cr, candidateTriggers, + exclusive, numRepeats); + for (const auto &group : cliqueGroups) { + // mutual exclusive repeats group found, + // update state sizes activeIdxSize = calcPackedBytes(numRepeats + 1); streamStateSize += activeIdxSize; // replace with top values - for (const auto &val : group) { - const u32 top = candidateRepeats[val]; - exclusiveInfo.groupId[top] = exclusiveInfo.numGroups; + for (const auto &val : group) { + const u32 top = candidateRepeats[val]; + exclusiveInfo.groupId[top] = exclusiveInfo.numGroups; } - exclusiveInfo.numGroups++; + exclusiveInfo.numGroups++; } - - if (exclusive) { - groupIterOffset = streamStateSize; - streamStateSize += mmbit_size(exclusiveInfo.numGroups); - } - - DEBUG_PRINTF("num of groups:%u\n", exclusiveInfo.numGroups); + + if (exclusive) { + groupIterOffset = streamStateSize; + streamStateSize += mmbit_size(exclusiveInfo.numGroups); + } + + DEBUG_PRINTF("num of groups:%u\n", exclusiveInfo.numGroups); } - candidateRepeats.clear(); + candidateRepeats.clear(); DEBUG_PRINTF("reach %s exclusive %u\n", describeClass(cr).c_str(), exclusive); u32 tableSize = 0; u32 sparseRepeats = 0; - vector<u32> may_stale; /* sub castles that may go stale */ - + vector<u32> may_stale; /* sub castles that may go stale */ + buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair, scratchStateSize, streamStateSize, tableSize, - tables, sparseRepeats, exclusiveInfo, may_stale, rm); - - DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size()); - vector<mmbit_sparse_iter> stale_iter; - if (!may_stale.empty()) { - stale_iter = mmbBuildSparseIterator(may_stale, numRepeats); - } - - - size_t total_size = + tables, sparseRepeats, exclusiveInfo, may_stale, rm); + + DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size()); + vector<mmbit_sparse_iter> stale_iter; + if (!may_stale.empty()) { + stale_iter = mmbBuildSparseIterator(may_stale, numRepeats); + } + + + size_t total_size = sizeof(NFA) + // initial NFA structure sizeof(Castle) + // Castle structure sizeof(SubCastle) * subs.size() + // SubCastles themselves @@ -574,11 +574,11 @@ buildCastle(const CastleProto &proto, sizeof(u64a) * sparseRepeats; // paddings for // REPEAT_SPARSE_OPTIMAL_P tables - total_size = ROUNDUP_N(total_size, alignof(mmbit_sparse_iter)); - total_size += byte_length(stale_iter); // stale sparse iter - - auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); - nfa->type = verify_u8(CASTLE_NFA); + total_size = ROUNDUP_N(total_size, alignof(mmbit_sparse_iter)); + total_size += byte_length(stale_iter); // stale sparse iter + + auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); + nfa->type = verify_u8(CASTLE_NFA); nfa->length = verify_u32(total_size); nfa->nPositions = verify_u32(subs.size()); nfa->streamStateSize = streamStateSize; @@ -586,15 +586,15 @@ buildCastle(const CastleProto &proto, nfa->minWidth = verify_u32(minWidth); nfa->maxWidth = maxWidth.is_finite() ? verify_u32(maxWidth) : 0; - char * const base_ptr = (char *)nfa.get() + sizeof(NFA); - char *ptr = base_ptr; + char * const base_ptr = (char *)nfa.get() + sizeof(NFA); + char *ptr = base_ptr; Castle *c = (Castle *)ptr; c->numRepeats = verify_u32(subs.size()); - c->numGroups = exclusiveInfo.numGroups; - c->exclusive = verify_s8(exclusive); + c->numGroups = exclusiveInfo.numGroups; + c->exclusive = verify_s8(exclusive); c->activeIdxSize = verify_u8(activeIdxSize); - c->activeOffset = verify_u32(c->numGroups * activeIdxSize); - c->groupIterOffset = groupIterOffset; + c->activeOffset = verify_u32(c->numGroups * activeIdxSize); + c->groupIterOffset = groupIterOffset; writeCastleScanEngine(cr, c); @@ -628,22 +628,22 @@ buildCastle(const CastleProto &proto, } // set exclusive group info - if (contains(exclusiveInfo.groupId, i)) { - sub->exclusiveId = exclusiveInfo.groupId[i]; + if (contains(exclusiveInfo.groupId, i)) { + sub->exclusiveId = exclusiveInfo.groupId[i]; } else { - sub->exclusiveId = numRepeats; - } - } - - ptr = base_ptr + total_size - sizeof(NFA) - byte_length(stale_iter); - - assert(ptr + byte_length(stale_iter) == base_ptr + total_size - sizeof(NFA)); - if (!stale_iter.empty()) { - c->staleIterOffset = verify_u32(ptr - base_ptr); - copy_bytes(ptr, stale_iter); - ptr += byte_length(stale_iter); - } - + sub->exclusiveId = numRepeats; + } + } + + ptr = base_ptr + total_size - sizeof(NFA) - byte_length(stale_iter); + + assert(ptr + byte_length(stale_iter) == base_ptr + total_size - sizeof(NFA)); + if (!stale_iter.empty()) { + c->staleIterOffset = verify_u32(ptr - base_ptr); + copy_bytes(ptr, stale_iter); + ptr += byte_length(stale_iter); + } + return nfa; } @@ -687,7 +687,7 @@ depth findMaxWidth(const CastleProto &proto, u32 top) { return proto.repeats.at(top).bounds.max; } -CastleProto::CastleProto(nfa_kind k, const PureRepeat &pr) : kind(k) { +CastleProto::CastleProto(nfa_kind k, const PureRepeat &pr) : kind(k) { assert(pr.reach.any()); assert(pr.reports.size() == 1); u32 top = 0; @@ -749,7 +749,7 @@ u32 CastleProto::merge(const PureRepeat &pr) { bool mergeCastle(CastleProto &c1, const CastleProto &c2, map<u32, u32> &top_map) { assert(&c1 != &c2); - assert(c1.kind == c2.kind); + assert(c1.kind == c2.kind); DEBUG_PRINTF("c1 has %zu repeats, c2 has %zu repeats\n", c1.repeats.size(), c2.repeats.size()); @@ -770,7 +770,7 @@ bool mergeCastle(CastleProto &c1, const CastleProto &c2, const u32 top = m.first; const PureRepeat &pr = m.second; DEBUG_PRINTF("top %u\n", top); - u32 new_top = c1.merge(pr); + u32 new_top = c1.merge(pr); top_map[top] = new_top; DEBUG_PRINTF("adding repeat: map %u->%u\n", top, new_top); } @@ -823,7 +823,7 @@ bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2, ReportID report2) { assert(!c1.repeats.empty()); assert(!c2.repeats.empty()); - assert(c1.kind == c2.kind); + assert(c1.kind == c2.kind); if (c1.reach() != c2.reach()) { DEBUG_PRINTF("different reach\n"); @@ -870,7 +870,7 @@ bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2, bool is_equal(const CastleProto &c1, const CastleProto &c2) { assert(!c1.repeats.empty()); assert(!c2.repeats.empty()); - assert(c1.kind == c2.kind); + assert(c1.kind == c2.kind); if (c1.reach() != c2.reach()) { DEBUG_PRINTF("different reach\n"); @@ -881,7 +881,7 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2) { } bool requiresDedupe(const CastleProto &proto, - const flat_set<ReportID> &reports) { + const flat_set<ReportID> &reports) { for (const auto &report : reports) { auto it = proto.report_map.find(report); if (it == end(proto.report_map)) { @@ -905,8 +905,8 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { u32 min_bound = pr.bounds.min; // always finite if (min_bound == 0) { // Vacuous case, we can only do this once. assert(!edge(g.start, g.accept, g).second); - NFAEdge e = add_edge(g.start, g.accept, g); - g[e].tops.insert(top); + NFAEdge e = add_edge(g.start, g.accept, g); + g[e].tops.insert(top); g[u].reports.insert(pr.reports.begin(), pr.reports.end()); min_bound = 1; } @@ -914,9 +914,9 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { for (u32 i = 0; i < min_bound; i++) { NFAVertex v = add_vertex(g); g[v].char_reach = pr.reach; - NFAEdge e = add_edge(u, v, g); + NFAEdge e = add_edge(u, v, g); if (u == g.start) { - g[e].tops.insert(top); + g[e].tops.insert(top); } u = v; } @@ -933,9 +933,9 @@ void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { if (head != u) { add_edge(head, v, g); } - NFAEdge e = add_edge(u, v, g); + NFAEdge e = add_edge(u, v, g); if (u == g.start) { - g[e].tops.insert(top); + g[e].tops.insert(top); } u = v; } @@ -964,7 +964,7 @@ bool hasZeroMinBound(const CastleProto &proto) { return false; } -unique_ptr<NGHolder> makeHolder(const CastleProto &proto, +unique_ptr<NGHolder> makeHolder(const CastleProto &proto, const CompileContext &cc) { assert(!proto.repeats.empty()); @@ -977,13 +977,13 @@ unique_ptr<NGHolder> makeHolder(const CastleProto &proto, } } - auto g = ue2::make_unique<NGHolder>(proto.kind); + auto g = ue2::make_unique<NGHolder>(proto.kind); for (const auto &m : proto.repeats) { addToHolder(*g, m.first, m.second); } - //dumpGraph("castle_holder.dot", *g); + //dumpGraph("castle_holder.dot", *g); // Sanity checks. assert(allMatchStatesHaveReports(*g)); diff --git a/contrib/libs/hyperscan/src/nfa/castlecompile.h b/contrib/libs/hyperscan/src/nfa/castlecompile.h index ea5f06dabc..cd830eb3a0 100644 --- a/contrib/libs/hyperscan/src/nfa/castlecompile.h +++ b/contrib/libs/hyperscan/src/nfa/castlecompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,8 +26,8 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief Castle: multi-tenant repeat engine, compiler code. */ @@ -37,14 +37,14 @@ #include "nfa_kind.h" #include "ue2common.h" #include "nfagraph/ng_repeat.h" -#include "util/bytecode_ptr.h" +#include "util/bytecode_ptr.h" #include "util/depth.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include <map> #include <memory> #include <set> -#include <unordered_map> +#include <unordered_map> #include <vector> struct NFA; @@ -53,7 +53,7 @@ namespace ue2 { class CharReach; class NGHolder; -class ReportManager; +class ReportManager; struct CompileContext; /** @@ -68,7 +68,7 @@ struct CompileContext; */ struct CastleProto { static constexpr size_t max_occupancy = 65536; // arbitrary limit - CastleProto(nfa_kind k, const PureRepeat &pr); + CastleProto(nfa_kind k, const PureRepeat &pr); const CharReach &reach() const; /** \brief Add a new repeat. */ @@ -90,16 +90,16 @@ struct CastleProto { std::map<u32, PureRepeat> repeats; /** \brief Mapping from report to associated tops. */ - std::unordered_map<ReportID, flat_set<u32>> report_map; + std::unordered_map<ReportID, flat_set<u32>> report_map; /** * \brief Next top id to use. Repeats may be removed without top remapping, * so we track this explicitly instead of using repeats.size(). */ u32 next_top = 1; - - /** \brief Kind for this engine. */ - nfa_kind kind; + + /** \brief Kind for this engine. */ + nfa_kind kind; }; std::set<ReportID> all_reports(const CastleProto &proto); @@ -122,15 +122,15 @@ void remapCastleTops(CastleProto &proto, std::map<u32, u32> &top_map); * NOTE: Tops must be contiguous, i.e. \ref remapCastleTops must have been run * first. */ -bytecode_ptr<NFA> +bytecode_ptr<NFA> buildCastle(const CastleProto &proto, const std::map<u32, std::vector<std::vector<CharReach>>> &triggers, - const CompileContext &cc, const ReportManager &rm); + const CompileContext &cc, const ReportManager &rm); /** - * \brief Merge two CastleProto prototypes together, if possible. If a - * particular repeat from c2 is already in c1, then it will be reused rather - * than adding a duplicate repeat. + * \brief Merge two CastleProto prototypes together, if possible. If a + * particular repeat from c2 is already in c1, then it will be reused rather + * than adding a duplicate repeat. * * Returns true if merge of all repeats in c2 into c1 succeeds, and fills * mapping with the repeat indices. @@ -158,12 +158,12 @@ bool is_equal(const CastleProto &c1, const CastleProto &c2); * of the reports in the given set. */ bool requiresDedupe(const CastleProto &proto, - const flat_set<ReportID> &reports); + const flat_set<ReportID> &reports); /** * \brief Build an NGHolder from a CastleProto. */ -std::unique_ptr<NGHolder> makeHolder(const CastleProto &castle, +std::unique_ptr<NGHolder> makeHolder(const CastleProto &castle, const CompileContext &cc); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/dfa_build_strat.cpp b/contrib/libs/hyperscan/src/nfa/dfa_build_strat.cpp index b6b7a7fb9f..378ad692c2 100644 --- a/contrib/libs/hyperscan/src/nfa/dfa_build_strat.cpp +++ b/contrib/libs/hyperscan/src/nfa/dfa_build_strat.cpp @@ -1,38 +1,38 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "dfa_build_strat.h" - -namespace ue2 { - -// prevent weak vtables for raw_report_info, dfa_build_strat -raw_report_info::~raw_report_info() {} - -dfa_build_strat::~dfa_build_strat() {} - -} // namespace ue2 +/* + * Copyright (c) 2015-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "dfa_build_strat.h" + +namespace ue2 { + +// prevent weak vtables for raw_report_info, dfa_build_strat +raw_report_info::~raw_report_info() {} + +dfa_build_strat::~dfa_build_strat() {} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/dfa_build_strat.h b/contrib/libs/hyperscan/src/nfa/dfa_build_strat.h index cda001623c..552e35573a 100644 --- a/contrib/libs/hyperscan/src/nfa/dfa_build_strat.h +++ b/contrib/libs/hyperscan/src/nfa/dfa_build_strat.h @@ -1,68 +1,68 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef DFA_BUILD_STRAT_H -#define DFA_BUILD_STRAT_H - -#include "rdfa.h" -#include "ue2common.h" - -#include <memory> -#include <vector> - -struct NFA; - -namespace ue2 { - -class ReportManager; - -struct raw_report_info { - virtual ~raw_report_info(); - virtual u32 getReportListSize() const = 0; /* in bytes */ - virtual size_t size() const = 0; /* number of lists */ - virtual void fillReportLists(NFA *n, size_t base_offset, - std::vector<u32> &ro /* out */) const = 0; -}; - -class dfa_build_strat { -public: - explicit dfa_build_strat(const ReportManager &rm_in) : rm(rm_in) {} - virtual ~dfa_build_strat(); - virtual raw_dfa &get_raw() const = 0; - virtual std::unique_ptr<raw_report_info> gatherReports( - std::vector<u32> &reports /* out */, - std::vector<u32> &reports_eod /* out */, - u8 *isSingleReport /* out */, - ReportID *arbReport /* out */) const = 0; -protected: - const ReportManager &rm; -}; - -} // namespace ue2 - -#endif // DFA_BUILD_STRAT_H +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef DFA_BUILD_STRAT_H +#define DFA_BUILD_STRAT_H + +#include "rdfa.h" +#include "ue2common.h" + +#include <memory> +#include <vector> + +struct NFA; + +namespace ue2 { + +class ReportManager; + +struct raw_report_info { + virtual ~raw_report_info(); + virtual u32 getReportListSize() const = 0; /* in bytes */ + virtual size_t size() const = 0; /* number of lists */ + virtual void fillReportLists(NFA *n, size_t base_offset, + std::vector<u32> &ro /* out */) const = 0; +}; + +class dfa_build_strat { +public: + explicit dfa_build_strat(const ReportManager &rm_in) : rm(rm_in) {} + virtual ~dfa_build_strat(); + virtual raw_dfa &get_raw() const = 0; + virtual std::unique_ptr<raw_report_info> gatherReports( + std::vector<u32> &reports /* out */, + std::vector<u32> &reports_eod /* out */, + u8 *isSingleReport /* out */, + ReportID *arbReport /* out */) const = 0; +protected: + const ReportManager &rm; +}; + +} // namespace ue2 + +#endif // DFA_BUILD_STRAT_H diff --git a/contrib/libs/hyperscan/src/nfa/dfa_min.cpp b/contrib/libs/hyperscan/src/nfa/dfa_min.cpp index 1a07e8a7d3..68b7680b78 100644 --- a/contrib/libs/hyperscan/src/nfa/dfa_min.cpp +++ b/contrib/libs/hyperscan/src/nfa/dfa_min.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,14 +26,14 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file - * \brief Build code for DFA minimization. - */ +/** + * \file + * \brief Build code for DFA minimization. + */ /** - * /Summary of the Hopcroft minimisation algorithm/ - * + * /Summary of the Hopcroft minimisation algorithm/ + * * partition := {F, Q \ F}; * work_queue := {F}; * while (work_queue is not empty) do @@ -59,19 +59,19 @@ #include "dfa_min.h" #include "grey.h" -#include "mcclellancompile_util.h" -#include "rdfa.h" +#include "mcclellancompile_util.h" +#include "rdfa.h" #include "ue2common.h" -#include "util/container.h" -#include "util/flat_containers.h" -#include "util/noncopyable.h" +#include "util/container.h" +#include "util/flat_containers.h" +#include "util/noncopyable.h" #include "util/partitioned_set.h" #include <algorithm> #include <functional> -#include <iterator> +#include <iterator> #include <map> -#include <queue> +#include <queue> #include <set> #include <vector> @@ -82,77 +82,77 @@ namespace ue2 { namespace { struct hopcroft_state_info { - explicit hopcroft_state_info(size_t alpha_size) : prev(alpha_size) {} - - /** \brief Mapping from symbol to a list of predecessors that transition to - * this state on that symbol. */ - vector<vector<dstate_id_t>> prev; + explicit hopcroft_state_info(size_t alpha_size) : prev(alpha_size) {} + + /** \brief Mapping from symbol to a list of predecessors that transition to + * this state on that symbol. */ + vector<vector<dstate_id_t>> prev; }; -struct HopcroftInfo : noncopyable { - size_t alpha_size; //!< Size of DFA alphabet. - queue<size_t> work_queue; //!< Hopcroft work queue of partition indices. - partitioned_set<dstate_id_t> partition; //!< Partition set of DFA states. - vector<hopcroft_state_info> states; //!< Pre-calculated state info (preds) +struct HopcroftInfo : noncopyable { + size_t alpha_size; //!< Size of DFA alphabet. + queue<size_t> work_queue; //!< Hopcroft work queue of partition indices. + partitioned_set<dstate_id_t> partition; //!< Partition set of DFA states. + vector<hopcroft_state_info> states; //!< Pre-calculated state info (preds) - explicit HopcroftInfo(const raw_dfa &rdfa); + explicit HopcroftInfo(const raw_dfa &rdfa); }; -} // namespace +} // namespace /** - * \brief Create an initial partitioning and work_queue. + * \brief Create an initial partitioning and work_queue. * - * Initial partition contains {accepting states..., Non-accepting states} - * Initial work_queue contains accepting state subsets + * Initial partition contains {accepting states..., Non-accepting states} + * Initial work_queue contains accepting state subsets * - * The initial partitioning needs to distinguish between the different - * reporting behaviours (unlike standard Hopcroft) --> more than one subset - * possible for the accepting states. - * - * Look for accepting states in both reports and reports_eod. - * Creates a map with a key(reports, reports_eod) and an id. - * Reports of each state are searched against the map and - * added to the corresponding id -> partition[id] and work_queue[id]. - * Non Accept states are added to partition[id+1]. + * The initial partitioning needs to distinguish between the different + * reporting behaviours (unlike standard Hopcroft) --> more than one subset + * possible for the accepting states. + * + * Look for accepting states in both reports and reports_eod. + * Creates a map with a key(reports, reports_eod) and an id. + * Reports of each state are searched against the map and + * added to the corresponding id -> partition[id] and work_queue[id]. + * Non Accept states are added to partition[id+1]. */ static -vector<size_t> create_map(const raw_dfa &rdfa, queue<size_t> &work_queue) { +vector<size_t> create_map(const raw_dfa &rdfa, queue<size_t> &work_queue) { using ReportKey = pair<flat_set<ReportID>, flat_set<ReportID>>; map<ReportKey, size_t> subset_map; vector<size_t> state_to_subset(rdfa.states.size(), INVALID_SUBSET); for (size_t i = 0; i < rdfa.states.size(); i++) { - const auto &ds = rdfa.states[i]; - if (!ds.reports.empty() || !ds.reports_eod.empty()) { - ReportKey key(ds.reports, ds.reports_eod); + const auto &ds = rdfa.states[i]; + if (!ds.reports.empty() || !ds.reports_eod.empty()) { + ReportKey key(ds.reports, ds.reports_eod); if (contains(subset_map, key)) { state_to_subset[i] = subset_map[key]; } else { size_t sub = subset_map.size(); - subset_map.emplace(std::move(key), sub); + subset_map.emplace(std::move(key), sub); state_to_subset[i] = sub; - work_queue.push(sub); + work_queue.push(sub); } } } - /* Give non-accept states their own subset. */ + /* Give non-accept states their own subset. */ size_t non_accept_sub = subset_map.size(); - replace(state_to_subset.begin(), state_to_subset.end(), INVALID_SUBSET, - non_accept_sub); + replace(state_to_subset.begin(), state_to_subset.end(), INVALID_SUBSET, + non_accept_sub); return state_to_subset; } -HopcroftInfo::HopcroftInfo(const raw_dfa &rdfa) - : alpha_size(rdfa.alpha_size), partition(create_map(rdfa, work_queue)), - states(rdfa.states.size(), hopcroft_state_info(alpha_size)) { - /* Construct predecessor lists for each state, indexed by symbol. */ - for (size_t i = 0; i < states.size(); i++) { // i is the previous state - for (size_t sym = 0; sym < alpha_size; sym++) { - dstate_id_t present_state = rdfa.states[i].next[sym]; - states[present_state].prev[sym].push_back(i); +HopcroftInfo::HopcroftInfo(const raw_dfa &rdfa) + : alpha_size(rdfa.alpha_size), partition(create_map(rdfa, work_queue)), + states(rdfa.states.size(), hopcroft_state_info(alpha_size)) { + /* Construct predecessor lists for each state, indexed by symbol. */ + for (size_t i = 0; i < states.size(); i++) { // i is the previous state + for (size_t sym = 0; sym < alpha_size; sym++) { + dstate_id_t present_state = rdfa.states[i].next[sym]; + states[present_state].prev[sym].push_back(i); } } } @@ -170,14 +170,14 @@ HopcroftInfo::HopcroftInfo(const raw_dfa &rdfa) * - replace S in work_queue by the smaller of the two sets. */ static -void split_and_replace_set(const size_t part_index, HopcroftInfo &info, - const flat_set<dstate_id_t> &splitter) { +void split_and_replace_set(const size_t part_index, HopcroftInfo &info, + const flat_set<dstate_id_t> &splitter) { /* singleton sets cannot be split */ - if (info.partition[part_index].size() == 1) { + if (info.partition[part_index].size() == 1) { return; } - size_t small_index = info.partition.split(part_index, splitter); + size_t small_index = info.partition.split(part_index, splitter); if (small_index == INVALID_SUBSET) { /* the set could not be split */ @@ -187,56 +187,56 @@ void split_and_replace_set(const size_t part_index, HopcroftInfo &info, /* larger subset remains at the input subset index, if the input subset was * already in the work queue then the larger subset will remain there. */ - info.work_queue.push(small_index); + info.work_queue.push(small_index); } /** - * \brief Core of the Hopcroft minimisation algorithm. + * \brief Core of the Hopcroft minimisation algorithm. */ static -void dfa_min(HopcroftInfo &info) { - flat_set<dstate_id_t> curr, sym_preds; +void dfa_min(HopcroftInfo &info) { + flat_set<dstate_id_t> curr, sym_preds; vector<size_t> cand_subsets; - while (!info.work_queue.empty()) { - /* Choose and remove a set of states (curr, or A in the description - * above) from the work queue. Note that we copy the set because the - * partition may be split by the loop below. */ - curr.clear(); - insert(&curr, info.partition[info.work_queue.front()]); - info.work_queue.pop(); - - for (size_t sym = 0; sym < info.alpha_size; sym++) { - /* Find the set of states sym_preds for which a transition on the - * given symbol leads to a state in curr. */ - sym_preds.clear(); - for (dstate_id_t s : curr) { - insert(&sym_preds, info.states[s].prev[sym]); - } - - if (sym_preds.empty()) { + while (!info.work_queue.empty()) { + /* Choose and remove a set of states (curr, or A in the description + * above) from the work queue. Note that we copy the set because the + * partition may be split by the loop below. */ + curr.clear(); + insert(&curr, info.partition[info.work_queue.front()]); + info.work_queue.pop(); + + for (size_t sym = 0; sym < info.alpha_size; sym++) { + /* Find the set of states sym_preds for which a transition on the + * given symbol leads to a state in curr. */ + sym_preds.clear(); + for (dstate_id_t s : curr) { + insert(&sym_preds, info.states[s].prev[sym]); + } + + if (sym_preds.empty()) { continue; } - /* we only need to consider subsets with at least one member in - * sym_preds for splitting */ + /* we only need to consider subsets with at least one member in + * sym_preds for splitting */ cand_subsets.clear(); - info.partition.find_overlapping(sym_preds, &cand_subsets); + info.partition.find_overlapping(sym_preds, &cand_subsets); for (size_t sub : cand_subsets) { - split_and_replace_set(sub, info, sym_preds); + split_and_replace_set(sub, info, sym_preds); } } } } /** - * \brief Build the new DFA state table. + * \brief Build the new DFA state table. */ static -void mapping_new_states(const HopcroftInfo &info, - vector<dstate_id_t> &old_to_new, raw_dfa &rdfa) { - const size_t num_partitions = info.partition.size(); +void mapping_new_states(const HopcroftInfo &info, + vector<dstate_id_t> &old_to_new, raw_dfa &rdfa) { + const size_t num_partitions = info.partition.size(); // Mapping from equiv class's first state to equiv class index. map<dstate_id_t, size_t> ordering; @@ -245,7 +245,7 @@ void mapping_new_states(const HopcroftInfo &info, vector<dstate_id_t> eq_state(num_partitions); for (size_t i = 0; i < num_partitions; i++) { - ordering[*info.partition[i].begin()] = i; + ordering[*info.partition[i].begin()] = i; } dstate_id_t new_id = 0; @@ -253,28 +253,28 @@ void mapping_new_states(const HopcroftInfo &info, eq_state[m.second] = new_id++; } - for (size_t t = 0; t < info.partition.size(); t++) { - for (dstate_id_t id : info.partition[t]) { + for (size_t t = 0; t < info.partition.size(); t++) { + for (dstate_id_t id : info.partition[t]) { old_to_new[id] = eq_state[t]; } } vector<dstate> new_states; new_states.reserve(num_partitions); - - for (const auto &m : ordering) { - new_states.push_back(rdfa.states[m.first]); + + for (const auto &m : ordering) { + new_states.push_back(rdfa.states[m.first]); } - rdfa.states = std::move(new_states); + rdfa.states = std::move(new_states); } static -void renumber_new_states(const HopcroftInfo &info, - const vector<dstate_id_t> &old_to_new, raw_dfa &rdfa) { - for (size_t i = 0; i < info.partition.size(); i++) { - for (size_t sym = 0; sym < info.alpha_size; sym++) { - dstate_id_t output = rdfa.states[i].next[sym]; - rdfa.states[i].next[sym] = old_to_new[output]; +void renumber_new_states(const HopcroftInfo &info, + const vector<dstate_id_t> &old_to_new, raw_dfa &rdfa) { + for (size_t i = 0; i < info.partition.size(); i++) { + for (size_t sym = 0; sym < info.alpha_size; sym++) { + dstate_id_t output = rdfa.states[i].next[sym]; + rdfa.states[i].next[sym] = old_to_new[output]; } dstate_id_t dad = rdfa.states[i].daddy; rdfa.states[i].daddy = old_to_new[dad]; @@ -285,14 +285,14 @@ void renumber_new_states(const HopcroftInfo &info, } static -void new_dfa(raw_dfa &rdfa, const HopcroftInfo &info) { - if (info.partition.size() == info.states.size()) { - return; +void new_dfa(raw_dfa &rdfa, const HopcroftInfo &info) { + if (info.partition.size() == info.states.size()) { + return; } - - vector<dstate_id_t> old_to_new(info.states.size()); - mapping_new_states(info, old_to_new, rdfa); - renumber_new_states(info, old_to_new, rdfa); + + vector<dstate_id_t> old_to_new(info.states.size()); + mapping_new_states(info, old_to_new, rdfa); + renumber_new_states(info, old_to_new, rdfa); } void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) { @@ -300,16 +300,16 @@ void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) { return; } - if (is_dead(rdfa)) { - DEBUG_PRINTF("dfa is empty\n"); - } - + if (is_dead(rdfa)) { + DEBUG_PRINTF("dfa is empty\n"); + } + UNUSED const size_t states_before = rdfa.states.size(); - HopcroftInfo info(rdfa); + HopcroftInfo info(rdfa); - dfa_min(info); - new_dfa(rdfa, info); + dfa_min(info); + new_dfa(rdfa, info); DEBUG_PRINTF("reduced from %zu to %zu states\n", states_before, rdfa.states.size()); diff --git a/contrib/libs/hyperscan/src/nfa/dfa_min.h b/contrib/libs/hyperscan/src/nfa/dfa_min.h index 61ca6c21a4..efad545f1e 100644 --- a/contrib/libs/hyperscan/src/nfa/dfa_min.h +++ b/contrib/libs/hyperscan/src/nfa/dfa_min.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,9 +26,9 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file - * \brief Build code for DFA minimization. +/** + * \file + * \brief Build code for DFA minimization. */ #ifndef DFA_MIN_H diff --git a/contrib/libs/hyperscan/src/nfa/gough.c b/contrib/libs/hyperscan/src/nfa/gough.c index 44acd4c286..1dde71e9ba 100644 --- a/contrib/libs/hyperscan/src/nfa/gough.c +++ b/contrib/libs/hyperscan/src/nfa/gough.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -110,7 +110,7 @@ u64a expandSomValue(u32 comp_slot_width, u64a curr_offset, } static really_inline -char doReports(NfaCallback cb, void *ctxt, const struct mcclellan *m, +char doReports(NfaCallback cb, void *ctxt, const struct mcclellan *m, const struct gough_som_info *som, u16 s, u64a loc, char eod, u16 * const cached_accept_state, u32 * const cached_accept_id, u32 * const cached_accept_som) { @@ -307,7 +307,7 @@ u16 goughEnableStarts(const struct mcclellan *m, u16 s, u64a som_offset, static really_inline char goughExec16_i(const struct mcclellan *m, struct gough_som_info *som, u16 *state, const u8 *buf, size_t len, u64a offAdj, - NfaCallback cb, void *ctxt, const u8 **c_final, + NfaCallback cb, void *ctxt, const u8 **c_final, enum MatchMode mode) { assert(ISALIGNED_N(state, 2)); @@ -461,7 +461,7 @@ with_accel: static really_inline char goughExec8_i(const struct mcclellan *m, struct gough_som_info *som, u8 *state, const u8 *buf, size_t len, u64a offAdj, - NfaCallback cb, void *ctxt, const u8 **c_final, + NfaCallback cb, void *ctxt, const u8 **c_final, enum MatchMode mode) { u8 s = *state; const u8 *c = buf, *c_end = buf + len; @@ -595,7 +595,7 @@ with_accel: static never_inline char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som, u8 *state, const u8 *buf, size_t len, u64a offAdj, - NfaCallback cb, void *ctxt, const u8 **final_point, + NfaCallback cb, void *ctxt, const u8 **final_point, enum MatchMode mode) { return goughExec8_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point, mode); @@ -604,7 +604,7 @@ char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som, static never_inline char goughExec16_i_ni(const struct mcclellan *m, struct gough_som_info *som, u16 *state, const u8 *buf, size_t len, u64a offAdj, - NfaCallback cb, void *ctxt, const u8 **final_point, + NfaCallback cb, void *ctxt, const u8 **final_point, enum MatchMode mode) { return goughExec16_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point, mode); @@ -622,7 +622,7 @@ const struct gough_som_info *getSomInfoConst(const char *state_base) { static really_inline char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, - const u8 *hend, NfaCallback cb, void *context, + const u8 *hend, NfaCallback cb, void *context, struct mq *q, s64a end, enum MatchMode mode) { DEBUG_PRINTF("enter\n"); struct gough_som_info *som = getSomInfo(q->state); @@ -685,7 +685,7 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *final_look; if (goughExec8_i_ni(m, som, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, &final_look, mode) + offset + sp, cb, context, &final_look, mode) == MO_HALT_MATCHING) { *(u8 *)q->state = 0; return 0; @@ -747,7 +747,7 @@ char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, static really_inline char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, - const u8 *hend, NfaCallback cb, void *context, + const u8 *hend, NfaCallback cb, void *context, struct mq *q, s64a end, enum MatchMode mode) { struct gough_som_info *som = getSomInfo(q->state); assert(n->type == GOUGH_NFA_16); @@ -808,7 +808,7 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, /* do main buffer region */ const u8 *final_look; if (goughExec16_i_ni(m, som, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, &final_look, mode) + offset + sp, cb, context, &final_look, mode) == MO_HALT_MATCHING) { *(u16 *)q->state = 0; return 0; @@ -870,7 +870,7 @@ char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) { u64a offset = q->offset; const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; + NfaCallback cb = q->cb; void *context = q->context; assert(n->type == GOUGH_NFA_8); const u8 *hend = q->history + q->hlength; @@ -882,7 +882,7 @@ char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) { char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) { u64a offset = q->offset; const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; + NfaCallback cb = q->cb; void *context = q->context; assert(n->type == GOUGH_NFA_16); const u8 *hend = q->history + q->hlength; @@ -894,7 +894,7 @@ char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) { char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) { u64a offset = q->offset; const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; + NfaCallback cb = q->cb; void *context = q->context; assert(n->type == GOUGH_NFA_8); const u8 *hend = q->history + q->hlength; @@ -906,7 +906,7 @@ char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) { char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) { u64a offset = q->offset; const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; + NfaCallback cb = q->cb; void *context = q->context; assert(n->type == GOUGH_NFA_16); const u8 *hend = q->history + q->hlength; @@ -918,7 +918,7 @@ char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) { char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) { u64a offset = q->offset; const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; + NfaCallback cb = q->cb; void *context = q->context; assert(n->type == GOUGH_NFA_8); const u8 *hend = q->history + q->hlength; @@ -935,7 +935,7 @@ char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) { char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report) { u64a offset = q->offset; const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; + NfaCallback cb = q->cb; void *context = q->context; assert(n->type == GOUGH_NFA_16); const u8 *hend = q->history + q->hlength; @@ -977,7 +977,7 @@ char nfaExecGough16_initCompressedState(const struct NFA *nfa, u64a offset, char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) { const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); - NfaCallback cb = q->cb; + NfaCallback cb = q->cb; void *ctxt = q->context; u8 s = *(u8 *)q->state; u64a offset = q_cur_offset(q); @@ -999,7 +999,7 @@ char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) { char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) { const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); - NfaCallback cb = q->cb; + NfaCallback cb = q->cb; void *ctxt = q->context; u16 s = *(u16 *)q->state; const struct mstate_aux *aux = get_aux(m, s); @@ -1031,42 +1031,42 @@ char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, return nfaExecMcClellan16_inAccept(n, report, q); } -char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q) { - return nfaExecMcClellan8_inAnyAccept(n, q); -} - -char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q) { - return nfaExecMcClellan16_inAnyAccept(n, q); -} - +char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q) { + return nfaExecMcClellan8_inAnyAccept(n, q); +} + +char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q) { + return nfaExecMcClellan16_inAnyAccept(n, q); +} + static -char goughCheckEOD(const struct NFA *nfa, u16 s, +char goughCheckEOD(const struct NFA *nfa, u16 s, const struct gough_som_info *som, - u64a offset, NfaCallback cb, void *ctxt) { + u64a offset, NfaCallback cb, void *ctxt) { const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); const struct mstate_aux *aux = get_aux(m, s); - if (!aux->accept_eod) { - return MO_CONTINUE_MATCHING; + if (!aux->accept_eod) { + return MO_CONTINUE_MATCHING; } - return doReports(cb, ctxt, m, som, s, offset, 1, NULL, NULL, NULL); + return doReports(cb, ctxt, m, som, s, offset, 1, NULL, NULL, NULL); } char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, u64a offset, - NfaCallback callback, void *context) { + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { const struct gough_som_info *som = getSomInfoConst(state); - return goughCheckEOD(nfa, *(const u8 *)state, som, offset, callback, - context); + return goughCheckEOD(nfa, *(const u8 *)state, som, offset, callback, + context); } char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, u64a offset, - NfaCallback callback, void *context) { + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { assert(ISALIGNED_N(state, 8)); const struct gough_som_info *som = getSomInfoConst(state); - return goughCheckEOD(nfa, *(const u16 *)state, som, offset, callback, - context); + return goughCheckEOD(nfa, *(const u16 *)state, som, offset, callback, + context); } char nfaExecGough8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { diff --git a/contrib/libs/hyperscan/src/nfa/gough.h b/contrib/libs/hyperscan/src/nfa/gough.h index a7f4889232..e3d9f64190 100644 --- a/contrib/libs/hyperscan/src/nfa/gough.h +++ b/contrib/libs/hyperscan/src/nfa/gough.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,13 +39,13 @@ struct mq; char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state, const char *streamState, u64a offset, - NfaCallback callback, void *context); + NfaCallback callback, void *context); char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q); -char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q); char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -61,13 +61,13 @@ char nfaExecGough8_expandState(const struct NFA *nfa, void *dest, char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state, const char *streamState, u64a offset, - NfaCallback callback, void *context); + NfaCallback callback, void *context); char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q); -char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q); char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile.cpp b/contrib/libs/hyperscan/src/nfa/goughcompile.cpp index d41c6f4235..cd127cdba0 100644 --- a/contrib/libs/hyperscan/src/nfa/goughcompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/goughcompile.cpp @@ -37,11 +37,11 @@ #include "nfa_internal.h" #include "util/compile_context.h" #include "util/container.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include "util/make_unique.h" #include "util/order_check.h" -#include "util/report_manager.h" +#include "util/report_manager.h" #include "util/verify_types.h" #include "ue2common.h" @@ -77,20 +77,20 @@ namespace { class gough_build_strat : public mcclellan_build_strat { public: - gough_build_strat( - raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm_in, - const map<dstate_id_t, gough_accel_state_info> &accel_info) - : mcclellan_build_strat(r, rm_in, false), rdfa(r), gg(g), + gough_build_strat( + raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm_in, + const map<dstate_id_t, gough_accel_state_info> &accel_info) + : mcclellan_build_strat(r, rm_in, false), rdfa(r), gg(g), accel_gough_info(accel_info) {} unique_ptr<raw_report_info> gatherReports(vector<u32> &reports /* out */, vector<u32> &reports_eod /* out */, u8 *isSingleReport /* out */, ReportID *arbReport /* out */) const override; - AccelScheme find_escape_strings(dstate_id_t this_idx) const override; + AccelScheme find_escape_strings(dstate_id_t this_idx) const override; size_t accelSize(void) const override { return sizeof(gough_accel); } - void buildAccel(dstate_id_t this_idx, const AccelScheme &info, - void *accel_out) override; - u32 max_allowed_offset_accel() const override { return 0; } + void buildAccel(dstate_id_t this_idx, const AccelScheme &info, + void *accel_out) override; + u32 max_allowed_offset_accel() const override { return 0; } DfaType getType() const override { return Gough; } raw_som_dfa &rdfa; @@ -1036,9 +1036,9 @@ void update_accel_prog_offset(const gough_build_strat &gbs, } } -bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision, - const CompileContext &cc, - const ReportManager &rm) { +bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision, + const CompileContext &cc, + const ReportManager &rm) { assert(somPrecision == 2 || somPrecision == 4 || somPrecision == 8 || !cc.streaming); @@ -1070,8 +1070,8 @@ bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision, map<dstate_id_t, gough_accel_state_info> accel_allowed; find_allowed_accel_states(*cfg, blocks, &accel_allowed); - gough_build_strat gbs(raw, *cfg, rm, accel_allowed); - auto basic_dfa = mcclellanCompile_i(raw, gbs, cc); + gough_build_strat gbs(raw, *cfg, rm, accel_allowed); + auto basic_dfa = mcclellanCompile_i(raw, gbs, cc); assert(basic_dfa); if (!basic_dfa) { return nullptr; @@ -1117,7 +1117,7 @@ bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision, gi.stream_som_loc_width = somPrecision; u32 gough_size = ROUNDUP_N(curr_offset, 16); - auto gough_dfa = make_zeroed_bytecode_ptr<NFA>(gough_size); + auto gough_dfa = make_zeroed_bytecode_ptr<NFA>(gough_size); memcpy(gough_dfa.get(), basic_dfa.get(), basic_dfa->length); memcpy((char *)gough_dfa.get() + haig_offset, &gi, sizeof(gi)); @@ -1149,44 +1149,44 @@ bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision, return gough_dfa; } -AccelScheme gough_build_strat::find_escape_strings(dstate_id_t this_idx) const { - AccelScheme rv; +AccelScheme gough_build_strat::find_escape_strings(dstate_id_t this_idx) const { + AccelScheme rv; if (!contains(accel_gough_info, this_idx)) { - rv.cr = CharReach::dot(); - rv.double_byte.clear(); - return rv; + rv.cr = CharReach::dot(); + rv.double_byte.clear(); + return rv; } - rv = mcclellan_build_strat::find_escape_strings(this_idx); - - assert(!rv.offset || rv.cr.all()); /* should have been limited by strat */ - if (rv.offset) { - rv.cr = CharReach::dot(); - rv.double_byte.clear(); - return rv; - } + rv = mcclellan_build_strat::find_escape_strings(this_idx); - if (rv.double_offset - || !accel_gough_info.at(this_idx).two_byte) { - rv.double_byte.clear(); + assert(!rv.offset || rv.cr.all()); /* should have been limited by strat */ + if (rv.offset) { + rv.cr = CharReach::dot(); + rv.double_byte.clear(); + return rv; } - - return rv; + + if (rv.double_offset + || !accel_gough_info.at(this_idx).two_byte) { + rv.double_byte.clear(); + } + + return rv; } -void gough_build_strat::buildAccel(dstate_id_t this_idx, const AccelScheme &info, - void *accel_out) { +void gough_build_strat::buildAccel(dstate_id_t this_idx, const AccelScheme &info, + void *accel_out) { assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux)); gough_accel *accel = (gough_accel *)accel_out; /* build a plain accelaux so we can work out where we can get to */ - mcclellan_build_strat::buildAccel(this_idx, info, &accel->accel); + mcclellan_build_strat::buildAccel(this_idx, info, &accel->accel); DEBUG_PRINTF("state %hu is accel with type %hhu\n", this_idx, accel->accel.accel_type); if (accel->accel.accel_type == ACCEL_NONE) { return; } - assert(!accel->accel.generic.offset); + assert(!accel->accel.generic.offset); assert(contains(accel_gough_info, this_idx)); accel->margin_dist = verify_u8(accel_gough_info.at(this_idx).margin); built_accel[accel] = this_idx; @@ -1198,11 +1198,11 @@ namespace { struct raw_gough_report_list { set<som_report> reports; - raw_gough_report_list( - const vector<pair<ReportID, GoughSSAVar *>> &raw_reports, - const ReportManager &rm, bool do_remap) { + raw_gough_report_list( + const vector<pair<ReportID, GoughSSAVar *>> &raw_reports, + const ReportManager &rm, bool do_remap) { for (const auto &m : raw_reports) { - ReportID r = do_remap ? rm.getProgramOffset(m.first) : m.first; + ReportID r = do_remap ? rm.getProgramOffset(m.first) : m.first; u32 impl_slot = INVALID_SLOT; if (m.second) { impl_slot = m.second->slot; @@ -1233,11 +1233,11 @@ unique_ptr<raw_report_info> gough_build_strat::gatherReports( ReportID *arbReport) const { DEBUG_PRINTF("gathering reports\n"); - const bool remap_reports = has_managed_reports(rdfa.kind); - - auto ri = ue2::make_unique<raw_gough_report_info_impl>(); - map<raw_gough_report_list, u32> rev; - + const bool remap_reports = has_managed_reports(rdfa.kind); + + auto ri = ue2::make_unique<raw_gough_report_info_impl>(); + map<raw_gough_report_list, u32> rev; + assert(!rdfa.states.empty()); vector<GoughVertex> verts(rdfa.states.size()); @@ -1256,7 +1256,7 @@ unique_ptr<raw_report_info> gough_build_strat::gatherReports( continue; } - raw_gough_report_list rrl(gg[v].reports, rm, remap_reports); + raw_gough_report_list rrl(gg[v].reports, rm, remap_reports); DEBUG_PRINTF("non empty r %zu\n", reports.size()); if (rev.find(rrl) != rev.end()) { reports.push_back(rev[rrl]); @@ -1275,7 +1275,7 @@ unique_ptr<raw_report_info> gough_build_strat::gatherReports( } DEBUG_PRINTF("non empty r eod\n"); - raw_gough_report_list rrl(gg[v].reports_eod, rm, remap_reports); + raw_gough_report_list rrl(gg[v].reports_eod, rm, remap_reports); if (rev.find(rrl) != rev.end()) { reports_eod.push_back(rev[rrl]); continue; diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile.h b/contrib/libs/hyperscan/src/nfa/goughcompile.h index 00da1891ec..de29ce23e2 100644 --- a/contrib/libs/hyperscan/src/nfa/goughcompile.h +++ b/contrib/libs/hyperscan/src/nfa/goughcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,8 +32,8 @@ #include "mcclellancompile.h" #include "nfa_kind.h" #include "ue2common.h" -#include "util/bytecode_ptr.h" -#include "util/flat_containers.h" +#include "util/bytecode_ptr.h" +#include "util/flat_containers.h" #include "util/order_check.h" #include <map> @@ -88,10 +88,10 @@ struct raw_som_dfa : public raw_dfa { * som */ }; -bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision, - const CompileContext &cc, - const ReportManager &rm); +bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision, + const CompileContext &cc, + const ReportManager &rm); } // namespace ue2 -#endif // GOUGHCOMPILE_H +#endif // GOUGHCOMPILE_H diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile_internal.h b/contrib/libs/hyperscan/src/nfa/goughcompile_internal.h index e64540523b..404b828dca 100644 --- a/contrib/libs/hyperscan/src/nfa/goughcompile_internal.h +++ b/contrib/libs/hyperscan/src/nfa/goughcompile_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,8 +33,8 @@ #include "mcclellancompile.h" #include "ue2common.h" #include "util/charreach.h" -#include "util/flat_containers.h" -#include "util/noncopyable.h" +#include "util/flat_containers.h" +#include "util/noncopyable.h" #include "util/order_check.h" #include <map> @@ -103,13 +103,13 @@ struct GoughSSAVarWithInputs; struct GoughSSAVarMin; struct GoughSSAVarJoin; -struct GoughSSAVar : noncopyable { +struct GoughSSAVar : noncopyable { GoughSSAVar(void) : seen(false), slot(INVALID_SLOT) {} virtual ~GoughSSAVar(); - const flat_set<GoughSSAVar *> &get_inputs() const { + const flat_set<GoughSSAVar *> &get_inputs() const { return inputs; } - const flat_set<GoughSSAVarWithInputs *> &get_outputs() const { + const flat_set<GoughSSAVarWithInputs *> &get_outputs() const { return outputs; } virtual void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) = 0; @@ -127,8 +127,8 @@ struct GoughSSAVar : noncopyable { clear_outputs(); } protected: - flat_set<GoughSSAVar *> inputs; - flat_set<GoughSSAVarWithInputs *> outputs; + flat_set<GoughSSAVar *> inputs; + flat_set<GoughSSAVarWithInputs *> outputs; friend struct GoughSSAVarWithInputs; friend struct GoughSSAVarMin; friend struct GoughSSAVarJoin; @@ -184,14 +184,14 @@ struct GoughSSAVarJoin : public GoughSSAVarWithInputs { void add_input(GoughSSAVar *v, GoughEdge prev); - const flat_set<GoughEdge> &get_edges_for_input(GoughSSAVar *input) const; - const std::map<GoughSSAVar *, flat_set<GoughEdge>> &get_input_map() const; + const flat_set<GoughEdge> &get_edges_for_input(GoughSSAVar *input) const; + const std::map<GoughSSAVar *, flat_set<GoughEdge>> &get_input_map() const; protected: void remove_input_raw(GoughSSAVar *v) override; private: - std::map<GoughSSAVar *, flat_set<GoughEdge>> input_map; + std::map<GoughSSAVar *, flat_set<GoughEdge>> input_map; }; struct gough_accel_state_info { diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile_reg.cpp b/contrib/libs/hyperscan/src/nfa/goughcompile_reg.cpp index 48e515b9ad..df7c74aeb3 100644 --- a/contrib/libs/hyperscan/src/nfa/goughcompile_reg.cpp +++ b/contrib/libs/hyperscan/src/nfa/goughcompile_reg.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,7 +32,7 @@ #include "gough_internal.h" #include "grey.h" #include "util/container.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph.h" #include "util/graph_range.h" #include "util/order_check.h" @@ -235,7 +235,7 @@ void handle_pending_vertices(GoughSSAVar *def, const GoughGraph &g, if (contains(aux.containing_v, def)) { def_v = aux.containing_v.at(def); } - unordered_set<GoughVertex> done; + unordered_set<GoughVertex> done; while (!pending_vertex.empty()) { GoughVertex current = *pending_vertex.begin(); pending_vertex.erase(current); diff --git a/contrib/libs/hyperscan/src/nfa/lbr.c b/contrib/libs/hyperscan/src/nfa/lbr.c index d403733a65..80f5186d01 100644 --- a/contrib/libs/hyperscan/src/nfa/lbr.c +++ b/contrib/libs/hyperscan/src/nfa/lbr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -77,7 +77,7 @@ void lbrExpandState(const struct lbr_common *l, u64a offset, const struct RepeatInfo *info = getRepeatInfo(l); repeatUnpack(stream_state, info, offset, &lstate->ctrl); - lstate->lastEscape = 0; + lstate->lastEscape = 0; } static really_inline @@ -131,9 +131,9 @@ char repeatIsDead(const struct RepeatInfo *info, return lstate->ctrl.ring.offset == REPEAT_DEAD; case REPEAT_TRAILER: return lstate->ctrl.trailer.offset == REPEAT_DEAD; - case REPEAT_ALWAYS: - assert(!"REPEAT_ALWAYS should only be used by Castle"); - return 0; + case REPEAT_ALWAYS: + assert(!"REPEAT_ALWAYS should only be used by Castle"); + return 0; } assert(0); @@ -294,7 +294,7 @@ char lbrMatchLoop(const struct lbr_common *l, const u64a begin, const u64a end, } DEBUG_PRINTF("firing match at %llu\n", i); - if (cb(0, i, l->report, ctx) == MO_HALT_MATCHING) { + if (cb(0, i, l->report, ctx) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } } @@ -308,7 +308,7 @@ char lbrRevScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf, UNUSED size_t begin, UNUSED size_t end, UNUSED size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_DOT); + assert(nfa->type == LBR_NFA_DOT); // Nothing can kill a dot! return 0; } @@ -317,7 +317,7 @@ static really_inline char lbrRevScanVerm(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_VERM); + assert(nfa->type == LBR_NFA_VERM); const struct lbr_verm *l = getImplNfa(nfa); if (begin == end) { @@ -341,7 +341,7 @@ static really_inline char lbrRevScanNVerm(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_NVERM); + assert(nfa->type == LBR_NFA_NVERM); const struct lbr_verm *l = getImplNfa(nfa); if (begin == end) { @@ -366,7 +366,7 @@ char lbrRevScanShuf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_SHUF); + assert(nfa->type == LBR_NFA_SHUF); const struct lbr_shuf *l = getImplNfa(nfa); if (begin == end) { @@ -390,7 +390,7 @@ char lbrRevScanTruf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_TRUF); + assert(nfa->type == LBR_NFA_TRUF); const struct lbr_truf *l = getImplNfa(nfa); if (begin == end) { @@ -414,7 +414,7 @@ char lbrFwdScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf, UNUSED size_t begin, UNUSED size_t end, UNUSED size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_DOT); + assert(nfa->type == LBR_NFA_DOT); // Nothing can kill a dot! return 0; } @@ -423,7 +423,7 @@ static really_inline char lbrFwdScanVerm(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_VERM); + assert(nfa->type == LBR_NFA_VERM); const struct lbr_verm *l = getImplNfa(nfa); if (begin == end) { @@ -447,7 +447,7 @@ static really_inline char lbrFwdScanNVerm(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_NVERM); + assert(nfa->type == LBR_NFA_NVERM); const struct lbr_verm *l = getImplNfa(nfa); if (begin == end) { @@ -472,7 +472,7 @@ char lbrFwdScanShuf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_SHUF); + assert(nfa->type == LBR_NFA_SHUF); const struct lbr_shuf *l = getImplNfa(nfa); if (begin == end) { @@ -496,7 +496,7 @@ char lbrFwdScanTruf(const struct NFA *nfa, const u8 *buf, size_t begin, size_t end, size_t *loc) { assert(begin <= end); - assert(nfa->type == LBR_NFA_TRUF); + assert(nfa->type == LBR_NFA_TRUF); const struct lbr_truf *l = getImplNfa(nfa); if (begin == end) { diff --git a/contrib/libs/hyperscan/src/nfa/lbr.h b/contrib/libs/hyperscan/src/nfa/lbr.h index a9e42046db..3f89d77850 100644 --- a/contrib/libs/hyperscan/src/nfa/lbr.h +++ b/contrib/libs/hyperscan/src/nfa/lbr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -46,7 +46,7 @@ char nfaExecLbrDot_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q); -char nfaExecLbrDot_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecLbrDot_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -67,7 +67,7 @@ char nfaExecLbrVerm_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report, struct mq *q); -char nfaExecLbrVerm_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecLbrVerm_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -88,7 +88,7 @@ char nfaExecLbrNVerm_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report, struct mq *q); -char nfaExecLbrNVerm_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecLbrNVerm_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -109,7 +109,7 @@ char nfaExecLbrShuf_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report, struct mq *q); -char nfaExecLbrShuf_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecLbrShuf_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -130,7 +130,7 @@ char nfaExecLbrTruf_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report, struct mq *q); -char nfaExecLbrTruf_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecLbrTruf_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q); char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); diff --git a/contrib/libs/hyperscan/src/nfa/lbr_common_impl.h b/contrib/libs/hyperscan/src/nfa/lbr_common_impl.h index 5ae35431e4..7f309b6b10 100644 --- a/contrib/libs/hyperscan/src/nfa/lbr_common_impl.h +++ b/contrib/libs/hyperscan/src/nfa/lbr_common_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -72,7 +72,7 @@ char JOIN(ENGINE_EXEC_NAME, _reportCurrent)(const struct NFA *nfa, const struct lbr_common *l = getImplNfa(nfa); u64a offset = q_cur_offset(q); DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset); - q->cb(0, offset, l->report, q->context); + q->cb(0, offset, l->report, q->context); return 0; } @@ -94,15 +94,15 @@ char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa, return lbrInAccept(l, lstate, q->streamState, offset, report); } -char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { - assert(nfa && q); - assert(isLbrType(nfa->type)); - DEBUG_PRINTF("entry\n"); - - const struct lbr_common *l = getImplNfa(nfa); - return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q); -} - +char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { + assert(nfa && q); + assert(isLbrType(nfa->type)); + DEBUG_PRINTF("entry\n"); + + const struct lbr_common *l = getImplNfa(nfa); + return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q); +} + char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa, struct mq *q) { assert(nfa && q); @@ -215,7 +215,7 @@ char JOIN(ENGINE_EXEC_NAME, _Q_i)(const struct NFA *nfa, struct mq *q, if (q->report_current) { DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q)); - int rv = q->cb(0, q_cur_offset(q), l->report, q->context); + int rv = q->cb(0, q_cur_offset(q), l->report, q->context); q->report_current = 0; if (rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; diff --git a/contrib/libs/hyperscan/src/nfa/limex.h b/contrib/libs/hyperscan/src/nfa/limex.h index 0223604dae..0beb3a807a 100644 --- a/contrib/libs/hyperscan/src/nfa/limex.h +++ b/contrib/libs/hyperscan/src/nfa/limex.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ #define LIMEX_H #ifdef __cplusplus -#include <string> +#include <string> extern "C" { #endif @@ -41,7 +41,7 @@ extern "C" #define GENERATE_NFA_DUMP_DECL(gf_name) \ } /* extern "C" */ \ namespace ue2 { \ - void gf_name##_dump(const struct NFA *nfa, const std::string &base); \ + void gf_name##_dump(const struct NFA *nfa, const std::string &base); \ } /* namespace ue2 */ \ extern "C" { @@ -52,34 +52,34 @@ extern "C" #define GENERATE_NFA_DECL(gf_name) \ char gf_name##_testEOD(const struct NFA *nfa, const char *state, \ const char *streamState, u64a offset, \ - NfaCallback callback, void *context); \ + NfaCallback callback, void *context); \ char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \ char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \ char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \ char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \ char gf_name##_inAccept(const struct NFA *n, ReportID report, \ struct mq *q); \ - char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \ + char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \ char gf_name##_queueInitState(const struct NFA *n, struct mq *q); \ char gf_name##_initCompressedState(const struct NFA *n, u64a offset, \ void *state, u8 key); \ char gf_name##_B_Reverse(const struct NFA *n, u64a offset, const u8 *buf, \ size_t buflen, const u8 *hbuf, size_t hlen, \ - NfaCallback cb, void *context); \ + NfaCallback cb, void *context); \ char gf_name##_queueCompressState(const struct NFA *nfa, \ const struct mq *q, s64a loc); \ char gf_name##_expandState(const struct NFA *nfa, void *dest, \ const void *src, u64a offset, u8 key); \ - enum nfa_zombie_status gf_name##_zombie_status(const struct NFA *nfa, \ - struct mq *q, s64a loc); \ + enum nfa_zombie_status gf_name##_zombie_status(const struct NFA *nfa, \ + struct mq *q, s64a loc); \ GENERATE_NFA_DUMP_DECL(gf_name) -GENERATE_NFA_DECL(nfaExecLimEx32) -GENERATE_NFA_DECL(nfaExecLimEx64) -GENERATE_NFA_DECL(nfaExecLimEx128) -GENERATE_NFA_DECL(nfaExecLimEx256) -GENERATE_NFA_DECL(nfaExecLimEx384) -GENERATE_NFA_DECL(nfaExecLimEx512) +GENERATE_NFA_DECL(nfaExecLimEx32) +GENERATE_NFA_DECL(nfaExecLimEx64) +GENERATE_NFA_DECL(nfaExecLimEx128) +GENERATE_NFA_DECL(nfaExecLimEx256) +GENERATE_NFA_DECL(nfaExecLimEx384) +GENERATE_NFA_DECL(nfaExecLimEx512) #undef GENERATE_NFA_DECL #undef GENERATE_NFA_DUMP_DECL diff --git a/contrib/libs/hyperscan/src/nfa/limex_64.c b/contrib/libs/hyperscan/src/nfa/limex_64.c index e8f0880b27..877891078b 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_64.c +++ b/contrib/libs/hyperscan/src/nfa/limex_64.c @@ -1,73 +1,73 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief LimEx NFA: 128-bit SIMD runtime implementations. - */ - -/* Limex64 is unusual on as on 32 bit platforms, at runtime it uses an m128 for - * state calculations. - */ - -//#define DEBUG_INPUT -//#define DEBUG_EXCEPTIONS - -#include "limex.h" - -#include "accel.h" -#include "limex_internal.h" -#include "nfa_internal.h" -#include "ue2common.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -// Common code -#define STATE_ON_STACK -#define ESTATE_ON_STACK - -#include "limex_runtime.h" - -#define SIZE 64 -#define ENG_STATE_T u64a - -#ifdef ARCH_64_BIT -#define STATE_T u64a -#define LOAD_FROM_ENG load_u64a -#else -#define STATE_T m128 -#define LOAD_FROM_ENG load_m128_from_u64a -#endif - -#include "limex_exceptional.h" - -#include "limex_state_impl.h" - -#define INLINE_ATTR really_inline -#include "limex_common_impl.h" - -#include "limex_runtime_impl.h" +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief LimEx NFA: 128-bit SIMD runtime implementations. + */ + +/* Limex64 is unusual on as on 32 bit platforms, at runtime it uses an m128 for + * state calculations. + */ + +//#define DEBUG_INPUT +//#define DEBUG_EXCEPTIONS + +#include "limex.h" + +#include "accel.h" +#include "limex_internal.h" +#include "nfa_internal.h" +#include "ue2common.h" +#include "util/bitutils.h" +#include "util/simd_utils.h" + +// Common code +#define STATE_ON_STACK +#define ESTATE_ON_STACK + +#include "limex_runtime.h" + +#define SIZE 64 +#define ENG_STATE_T u64a + +#ifdef ARCH_64_BIT +#define STATE_T u64a +#define LOAD_FROM_ENG load_u64a +#else +#define STATE_T m128 +#define LOAD_FROM_ENG load_m128_from_u64a +#endif + +#include "limex_exceptional.h" + +#include "limex_state_impl.h" + +#define INLINE_ATTR really_inline +#include "limex_common_impl.h" + +#include "limex_runtime_impl.h" diff --git a/contrib/libs/hyperscan/src/nfa/limex_accel.c b/contrib/libs/hyperscan/src/nfa/limex_accel.c index 4834b6a547..407c06208c 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_accel.c +++ b/contrib/libs/hyperscan/src/nfa/limex_accel.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,13 +35,13 @@ #include "accel.h" #include "limex_internal.h" #include "limex_limits.h" -#include "limex_shuffle.h" +#include "limex_shuffle.h" #include "nfa_internal.h" #include "shufti.h" #include "truffle.h" #include "ue2common.h" #include "vermicelli.h" -#include "util/arch.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/simd_utils.h" @@ -65,7 +65,7 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux, } aux = aux + aux_idx; - const u8 *ptr = run_accel(aux, &input[i], &input[end]); + const u8 *ptr = run_accel(aux, &input[i], &input[end]); assert(ptr >= &input[i]); size_t j = (size_t)(ptr - input); DEBUG_PRINTF("accel skipped %zu of %zu chars\n", (j - i), (end - i)); @@ -76,26 +76,26 @@ size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux, size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end) { - u32 idx = pext32(s, accel); + u32 idx = pext32(s, accel); return accelScanWrapper(accelTable, aux, input, idx, i, end); } -#ifdef ARCH_64_BIT -size_t doAccel64(u64a s, u64a accel, const u8 *accelTable, - const union AccelAux *aux, const u8 *input, size_t i, - size_t end) { - u32 idx = pext64(s, accel); - return accelScanWrapper(accelTable, aux, input, idx, i, end); -} -#else -size_t doAccel64(m128 s, m128 accel, const u8 *accelTable, - const union AccelAux *aux, const u8 *input, size_t i, - size_t end) { - u32 idx = pext64(movq(s), movq(accel)); - return accelScanWrapper(accelTable, aux, input, idx, i, end); -} -#endif - +#ifdef ARCH_64_BIT +size_t doAccel64(u64a s, u64a accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end) { + u32 idx = pext64(s, accel); + return accelScanWrapper(accelTable, aux, input, idx, i, end); +} +#else +size_t doAccel64(m128 s, m128 accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end) { + u32 idx = pext64(movq(s), movq(accel)); + return accelScanWrapper(accelTable, aux, input, idx, i, end); +} +#endif + size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end) { @@ -104,7 +104,7 @@ size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex, DEBUG_PRINTF("using PSHUFB for 128-bit shuffle\n"); m128 accelPerm = limex->accelPermute; m128 accelComp = limex->accelCompare; - idx = packedExtract128(s, accelPerm, accelComp); + idx = packedExtract128(s, accelPerm, accelComp); return accelScanWrapper(accelTable, aux, input, idx, i, end); } @@ -116,13 +116,13 @@ size_t doAccel256(const m256 *state, const struct LimExNFA256 *limex, DEBUG_PRINTF("using PSHUFB for 256-bit shuffle\n"); m256 accelPerm = limex->accelPermute; m256 accelComp = limex->accelCompare; -#if !defined(HAVE_AVX2) - u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo); - u32 idx2 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi); - assert((idx1 & idx2) == 0); // should be no shared bits - idx = idx1 | idx2; +#if !defined(HAVE_AVX2) + u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo); + u32 idx2 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi); + assert((idx1 & idx2) == 0); // should be no shared bits + idx = idx1 | idx2; #else - idx = packedExtract256(s, accelPerm, accelComp); + idx = packedExtract256(s, accelPerm, accelComp); #endif return accelScanWrapper(accelTable, aux, input, idx, i, end); } @@ -135,9 +135,9 @@ size_t doAccel384(const m384 *state, const struct LimExNFA384 *limex, DEBUG_PRINTF("using PSHUFB for 384-bit shuffle\n"); m384 accelPerm = limex->accelPermute; m384 accelComp = limex->accelCompare; - u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo); - u32 idx2 = packedExtract128(s.mid, accelPerm.mid, accelComp.mid); - u32 idx3 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi); + u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo); + u32 idx2 = packedExtract128(s.mid, accelPerm.mid, accelComp.mid); + u32 idx3 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi); assert((idx1 & idx2 & idx3) == 0); // should be no shared bits idx = idx1 | idx2 | idx3; return accelScanWrapper(accelTable, aux, input, idx, i, end); @@ -151,20 +151,20 @@ size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex, DEBUG_PRINTF("using PSHUFB for 512-bit shuffle\n"); m512 accelPerm = limex->accelPermute; m512 accelComp = limex->accelCompare; -#if defined(HAVE_AVX512) - idx = packedExtract512(s, accelPerm, accelComp); -#elif defined(HAVE_AVX2) - u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo); - u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi); - assert((idx1 & idx2) == 0); // should be no shared bits - idx = idx1 | idx2; +#if defined(HAVE_AVX512) + idx = packedExtract512(s, accelPerm, accelComp); +#elif defined(HAVE_AVX2) + u32 idx1 = packedExtract256(s.lo, accelPerm.lo, accelComp.lo); + u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi); + assert((idx1 & idx2) == 0); // should be no shared bits + idx = idx1 | idx2; #else - u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo); - u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi); - u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo); - u32 idx4 = packedExtract128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi); + u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo); + u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi); + u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo); + u32 idx4 = packedExtract128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi); assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits idx = idx1 | idx2 | idx3 | idx4; -#endif +#endif return accelScanWrapper(accelTable, aux, input, idx, i, end); } diff --git a/contrib/libs/hyperscan/src/nfa/limex_accel.h b/contrib/libs/hyperscan/src/nfa/limex_accel.h index e5c94e82ad..0150081609 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_accel.h +++ b/contrib/libs/hyperscan/src/nfa/limex_accel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,7 +40,7 @@ #include "util/simd_utils.h" // for m128 etc union AccelAux; -struct LimExNFA64; +struct LimExNFA64; struct LimExNFA128; struct LimExNFA256; struct LimExNFA384; @@ -50,16 +50,16 @@ size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end); -#ifdef ARCH_64_BIT -size_t doAccel64(u64a s, u64a accel, const u8 *accelTable, - const union AccelAux *aux, const u8 *input, size_t i, - size_t end); -#else -size_t doAccel64(m128 s, m128 accel, const u8 *accelTable, - const union AccelAux *aux, const u8 *input, size_t i, - size_t end); -#endif - +#ifdef ARCH_64_BIT +size_t doAccel64(u64a s, u64a accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end); +#else +size_t doAccel64(m128 s, m128 accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end); +#endif + size_t doAccel128(const m128 *s, const struct LimExNFA128 *limex, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, size_t end); diff --git a/contrib/libs/hyperscan/src/nfa/limex_common_impl.h b/contrib/libs/hyperscan/src/nfa/limex_common_impl.h index e441945d70..6d72ee464c 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_common_impl.h +++ b/contrib/libs/hyperscan/src/nfa/limex_common_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,22 +31,22 @@ /* impl of limex functions which depend only on state size */ -#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) \ - || !defined(INLINE_ATTR) -# error Must define SIZE, STATE_T, LOAD_FROM_ENG and INLINE_ATTR in includer. +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) \ + || !defined(INLINE_ATTR) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG and INLINE_ATTR in includer. #endif #define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) #define TESTEOD_FN JOIN(moNfaTestEod, SIZE) #define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE) -#define LIMEX_INANYACCEPT_FN JOIN(limexInAnyAccept, SIZE) +#define LIMEX_INANYACCEPT_FN JOIN(limexInAnyAccept, SIZE) #define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE) #define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE) #define INITIAL_FN JOIN(moNfaInitial, SIZE) #define TOP_FN JOIN(moNfaTop, SIZE) #define TOPN_FN JOIN(moNfaTopN, SIZE) -#define PROCESS_ACCEPTS_IMPL_FN JOIN(moProcessAcceptsImpl, SIZE) +#define PROCESS_ACCEPTS_IMPL_FN JOIN(moProcessAcceptsImpl, SIZE) #define PROCESS_ACCEPTS_FN JOIN(moProcessAccepts, SIZE) #define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE) #define CONTEXT_T JOIN(NFAContext, SIZE) @@ -61,20 +61,20 @@ #define SQUASH_UNTUG_BR_FN JOIN(lazyTug, SIZE) #define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE) -#if defined(ARCH_64_BIT) && (SIZE >= 64) -#define CHUNK_T u64a -#define FIND_AND_CLEAR_FN findAndClearLSB_64 -#define POPCOUNT_FN popcount64 -#define RANK_IN_MASK_FN rank_in_mask64 -#else -#define CHUNK_T u32 -#define FIND_AND_CLEAR_FN findAndClearLSB_32 -#define POPCOUNT_FN popcount32 -#define RANK_IN_MASK_FN rank_in_mask32 -#endif - -#define NUM_STATE_CHUNKS (sizeof(STATE_T) / sizeof(CHUNK_T)) - +#if defined(ARCH_64_BIT) && (SIZE >= 64) +#define CHUNK_T u64a +#define FIND_AND_CLEAR_FN findAndClearLSB_64 +#define POPCOUNT_FN popcount64 +#define RANK_IN_MASK_FN rank_in_mask64 +#else +#define CHUNK_T u32 +#define FIND_AND_CLEAR_FN findAndClearLSB_32 +#define POPCOUNT_FN popcount32 +#define RANK_IN_MASK_FN rank_in_mask32 +#endif + +#define NUM_STATE_CHUNKS (sizeof(STATE_T) / sizeof(CHUNK_T)) + static really_inline void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex, const union RepeatControl *repeat_ctrl, @@ -96,7 +96,7 @@ void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex, const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); u32 cyclicState = info->cyclicState; - if (!TESTBIT_STATE(*accstate, cyclicState)) { + if (!TESTBIT_STATE(*accstate, cyclicState)) { continue; } @@ -113,85 +113,85 @@ void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex, } } -static really_inline -char PROCESS_ACCEPTS_IMPL_FN(const IMPL_NFA_T *limex, const STATE_T *s, - STATE_T *squash, const STATE_T *acceptMask, - const struct NFAAccept *acceptTable, u64a offset, - NfaCallback callback, void *context) { +static really_inline +char PROCESS_ACCEPTS_IMPL_FN(const IMPL_NFA_T *limex, const STATE_T *s, + STATE_T *squash, const STATE_T *acceptMask, + const struct NFAAccept *acceptTable, u64a offset, + NfaCallback callback, void *context) { assert(s); assert(limex); assert(callback); - const STATE_T accept_mask = *acceptMask; - STATE_T accepts = AND_STATE(*s, accept_mask); - - // Caller must ensure that we have at least one accept state on. - assert(ISNONZERO_STATE(accepts)); - - CHUNK_T chunks[NUM_STATE_CHUNKS]; - memcpy(chunks, &accepts, sizeof(accepts)); - - CHUNK_T mask_chunks[NUM_STATE_CHUNKS]; - memcpy(mask_chunks, &accept_mask, sizeof(accept_mask)); - - u32 base_index = 0; // Cumulative sum of mask popcount up to current chunk. - for (u32 i = 0; i < NUM_STATE_CHUNKS; i++) { - CHUNK_T chunk = chunks[i]; - while (chunk != 0) { - u32 bit = FIND_AND_CLEAR_FN(&chunk); - u32 local_idx = RANK_IN_MASK_FN(mask_chunks[i], bit); - u32 idx = local_idx + base_index; - const struct NFAAccept *a = &acceptTable[idx]; - DEBUG_PRINTF("state %u: firing report list=%u, offset=%llu\n", - bit + i * (u32)sizeof(chunk) * 8, a->reports, offset); - int rv = limexRunAccept((const char *)limex, a, callback, context, - offset); + const STATE_T accept_mask = *acceptMask; + STATE_T accepts = AND_STATE(*s, accept_mask); + + // Caller must ensure that we have at least one accept state on. + assert(ISNONZERO_STATE(accepts)); + + CHUNK_T chunks[NUM_STATE_CHUNKS]; + memcpy(chunks, &accepts, sizeof(accepts)); + + CHUNK_T mask_chunks[NUM_STATE_CHUNKS]; + memcpy(mask_chunks, &accept_mask, sizeof(accept_mask)); + + u32 base_index = 0; // Cumulative sum of mask popcount up to current chunk. + for (u32 i = 0; i < NUM_STATE_CHUNKS; i++) { + CHUNK_T chunk = chunks[i]; + while (chunk != 0) { + u32 bit = FIND_AND_CLEAR_FN(&chunk); + u32 local_idx = RANK_IN_MASK_FN(mask_chunks[i], bit); + u32 idx = local_idx + base_index; + const struct NFAAccept *a = &acceptTable[idx]; + DEBUG_PRINTF("state %u: firing report list=%u, offset=%llu\n", + bit + i * (u32)sizeof(chunk) * 8, a->reports, offset); + int rv = limexRunAccept((const char *)limex, a, callback, context, + offset); if (unlikely(rv == MO_HALT_MATCHING)) { return 1; } - if (squash != NULL && a->squash != MO_INVALID_IDX) { - DEBUG_PRINTF("applying squash mask at offset %u\n", a->squash); - const ENG_STATE_T *sq = - (const ENG_STATE_T *)((const char *)limex + a->squash); - *squash = AND_STATE(*squash, LOAD_FROM_ENG(sq)); + if (squash != NULL && a->squash != MO_INVALID_IDX) { + DEBUG_PRINTF("applying squash mask at offset %u\n", a->squash); + const ENG_STATE_T *sq = + (const ENG_STATE_T *)((const char *)limex + a->squash); + *squash = AND_STATE(*squash, LOAD_FROM_ENG(sq)); } } - base_index += POPCOUNT_FN(mask_chunks[i]); + base_index += POPCOUNT_FN(mask_chunks[i]); } return 0; } static never_inline -char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s, - const STATE_T *acceptMask, - const struct NFAAccept *acceptTable, u64a offset, - NfaCallback callback, void *context) { - // We have squash masks we might have to apply after firing reports. - STATE_T squash = ONES_STATE; - return PROCESS_ACCEPTS_IMPL_FN(limex, s, &squash, acceptMask, acceptTable, - offset, callback, context); - - *s = AND_STATE(*s, squash); -} - -static never_inline -char PROCESS_ACCEPTS_NOSQUASH_FN(const IMPL_NFA_T *limex, const STATE_T *s, - const STATE_T *acceptMask, - const struct NFAAccept *acceptTable, - u64a offset, NfaCallback callback, - void *context) { - STATE_T *squash = NULL; - return PROCESS_ACCEPTS_IMPL_FN(limex, s, squash, acceptMask, acceptTable, - offset, callback, context); +char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s, + const STATE_T *acceptMask, + const struct NFAAccept *acceptTable, u64a offset, + NfaCallback callback, void *context) { + // We have squash masks we might have to apply after firing reports. + STATE_T squash = ONES_STATE; + return PROCESS_ACCEPTS_IMPL_FN(limex, s, &squash, acceptMask, acceptTable, + offset, callback, context); + + *s = AND_STATE(*s, squash); } -// Run EOD accepts. Note that repeat_ctrl and repeat_state may be NULL if this -// LimEx contains no repeat structures. +static never_inline +char PROCESS_ACCEPTS_NOSQUASH_FN(const IMPL_NFA_T *limex, const STATE_T *s, + const STATE_T *acceptMask, + const struct NFAAccept *acceptTable, + u64a offset, NfaCallback callback, + void *context) { + STATE_T *squash = NULL; + return PROCESS_ACCEPTS_IMPL_FN(limex, s, squash, acceptMask, acceptTable, + offset, callback, context); +} + +// Run EOD accepts. Note that repeat_ctrl and repeat_state may be NULL if this +// LimEx contains no repeat structures. static really_inline char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s, const union RepeatControl *repeat_ctrl, - const char *repeat_state, u64a offset, + const char *repeat_state, u64a offset, NfaCallback callback, void *context) { assert(limex && s); @@ -200,16 +200,16 @@ char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s, return MO_CONTINUE_MATCHING; } - const STATE_T acceptEodMask = LOAD_FROM_ENG(&limex->acceptAtEOD); - STATE_T foundAccepts = AND_STATE(*s, acceptEodMask); + const STATE_T acceptEodMask = LOAD_FROM_ENG(&limex->acceptAtEOD); + STATE_T foundAccepts = AND_STATE(*s, acceptEodMask); - SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, - offset + 1 /* EOD 'symbol' */, &foundAccepts); + SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, + offset + 1 /* EOD 'symbol' */, &foundAccepts); if (unlikely(ISNONZERO_STATE(foundAccepts))) { const struct NFAAccept *acceptEodTable = getAcceptEodTable(limex); - if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &foundAccepts, &acceptEodMask, - acceptEodTable, offset, callback, + if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &foundAccepts, &acceptEodMask, + acceptEodTable, offset, callback, context)) { return MO_HALT_MATCHING; } @@ -225,8 +225,8 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) { assert(q->state); assert(q_cur_type(q) == MQE_START); - STATE_T s = *(STATE_T *)q->state; - STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); + STATE_T s = *(STATE_T *)q->state; + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { @@ -235,8 +235,8 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) { const struct NFAAccept *acceptTable = getAcceptTable(limex); u64a offset = q_cur_offset(q); - if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &foundAccepts, &acceptMask, - acceptTable, offset, q->cb, + if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &foundAccepts, &acceptMask, + acceptTable, offset, q->cb, q->context)) { return MO_HALT_MATCHING; } @@ -247,7 +247,7 @@ char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) { static really_inline STATE_T INITIAL_FN(const IMPL_NFA_T *impl, char onlyDs) { - return LOAD_FROM_ENG(onlyDs ? &impl->initDS : &impl->init); + return LOAD_FROM_ENG(onlyDs ? &impl->initDS : &impl->init); } static really_inline @@ -258,9 +258,9 @@ STATE_T TOP_FN(const IMPL_NFA_T *impl, char onlyDs, STATE_T state) { static really_inline STATE_T TOPN_FN(const IMPL_NFA_T *limex, STATE_T state, u32 n) { assert(n < limex->topCount); - const ENG_STATE_T *topsptr = - (const ENG_STATE_T *)((const char *)limex + limex->topOffset); - STATE_T top = LOAD_FROM_ENG(&topsptr[n]); + const ENG_STATE_T *topsptr = + (const ENG_STATE_T *)((const char *)limex + limex->topOffset); + STATE_T top = LOAD_FROM_ENG(&topsptr[n]); return OR_STATE(top, state); } @@ -276,8 +276,8 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx, DEBUG_PRINTF("expire estate at offset %llu\n", offset); - const STATE_T cyclics - = AND_STATE(ctx->s, LOAD_FROM_ENG(&limex->repeatCyclicMask)); + const STATE_T cyclics + = AND_STATE(ctx->s, LOAD_FROM_ENG(&limex->repeatCyclicMask)); if (ISZERO_STATE(cyclics)) { DEBUG_PRINTF("no cyclic states are on\n"); return; @@ -287,7 +287,7 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx, const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); u32 cyclicState = info->cyclicState; - if (!TESTBIT_STATE(cyclics, cyclicState)) { + if (!TESTBIT_STATE(cyclics, cyclicState)) { continue; } @@ -307,14 +307,14 @@ void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx, last_top, repeat->repeatMax); u64a adj = 0; /* if the cycle's tugs are active at repeat max, it is still alive */ - if (TESTBIT_STATE(LOAD_FROM_ENG(&limex->accept), cyclicState) || - TESTBIT_STATE(LOAD_FROM_ENG(&limex->acceptAtEOD), cyclicState)) { + if (TESTBIT_STATE(LOAD_FROM_ENG(&limex->accept), cyclicState) || + TESTBIT_STATE(LOAD_FROM_ENG(&limex->acceptAtEOD), cyclicState)) { DEBUG_PRINTF("lazy tug possible - may still be inspected\n"); adj = 1; } else { - const ENG_STATE_T *tug_mask = - (const ENG_STATE_T *)((const char *)info + info->tugMaskOffset); - if (ISNONZERO_STATE(AND_STATE(ctx->s, LOAD_FROM_ENG(tug_mask)))) { + const ENG_STATE_T *tug_mask = + (const ENG_STATE_T *)((const char *)info + info->tugMaskOffset); + if (ISNONZERO_STATE(AND_STATE(ctx->s, LOAD_FROM_ENG(tug_mask)))) { DEBUG_PRINTF("tug possible - may still be inspected\n"); adj = 1; } @@ -336,75 +336,75 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, u64a offset, ReportID report) { assert(limex); - const STATE_T accept_mask = LOAD_FROM_ENG(&limex->accept); - STATE_T accepts = AND_STATE(state, accept_mask); + const STATE_T accept_mask = LOAD_FROM_ENG(&limex->accept); + STATE_T accepts = AND_STATE(state, accept_mask); // Are we in an accept state? - if (ISZERO_STATE(accepts)) { + if (ISZERO_STATE(accepts)) { DEBUG_PRINTF("no accept states are on\n"); return 0; } - SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accepts); + SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accepts); DEBUG_PRINTF("looking for report %u\n", report); - const struct NFAAccept *acceptTable = getAcceptTable(limex); - - CHUNK_T chunks[NUM_STATE_CHUNKS]; - memcpy(chunks, &accepts, sizeof(accepts)); - - CHUNK_T mask_chunks[NUM_STATE_CHUNKS]; - memcpy(mask_chunks, &accept_mask, sizeof(accept_mask)); - - u32 base_index = 0; // Cumulative sum of mask popcount up to current chunk. - for (u32 i = 0; i < NUM_STATE_CHUNKS; i++) { - CHUNK_T chunk = chunks[i]; - while (chunk != 0) { - u32 bit = FIND_AND_CLEAR_FN(&chunk); - u32 local_idx = RANK_IN_MASK_FN(mask_chunks[i], bit); - u32 idx = local_idx + base_index; - assert(idx < limex->acceptCount); - const struct NFAAccept *a = &acceptTable[idx]; - DEBUG_PRINTF("state %u is on, report list at %u\n", - bit + i * (u32)sizeof(chunk) * 8, a->reports); - - if (limexAcceptHasReport((const char *)limex, a, report)) { - DEBUG_PRINTF("report %u is on\n", report); - return 1; - } + const struct NFAAccept *acceptTable = getAcceptTable(limex); + + CHUNK_T chunks[NUM_STATE_CHUNKS]; + memcpy(chunks, &accepts, sizeof(accepts)); + + CHUNK_T mask_chunks[NUM_STATE_CHUNKS]; + memcpy(mask_chunks, &accept_mask, sizeof(accept_mask)); + + u32 base_index = 0; // Cumulative sum of mask popcount up to current chunk. + for (u32 i = 0; i < NUM_STATE_CHUNKS; i++) { + CHUNK_T chunk = chunks[i]; + while (chunk != 0) { + u32 bit = FIND_AND_CLEAR_FN(&chunk); + u32 local_idx = RANK_IN_MASK_FN(mask_chunks[i], bit); + u32 idx = local_idx + base_index; + assert(idx < limex->acceptCount); + const struct NFAAccept *a = &acceptTable[idx]; + DEBUG_PRINTF("state %u is on, report list at %u\n", + bit + i * (u32)sizeof(chunk) * 8, a->reports); + + if (limexAcceptHasReport((const char *)limex, a, report)) { + DEBUG_PRINTF("report %u is on\n", report); + return 1; + } } - base_index += POPCOUNT_FN(mask_chunks[i]); + base_index += POPCOUNT_FN(mask_chunks[i]); } return 0; } -static really_inline -char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, - union RepeatControl *repeat_ctrl, char *repeat_state, - u64a offset) { - assert(limex); - - const STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); - STATE_T accstate = AND_STATE(state, acceptMask); - - // Are we in an accept state? - if (ISZERO_STATE(accstate)) { - DEBUG_PRINTF("no accept states are on\n"); - return 0; - } - - SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accstate); - - return ISNONZERO_STATE(accstate); -} - +static really_inline +char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, + union RepeatControl *repeat_ctrl, char *repeat_state, + u64a offset) { + assert(limex); + + const STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); + STATE_T accstate = AND_STATE(state, acceptMask); + + // Are we in an accept state? + if (ISZERO_STATE(accstate)) { + DEBUG_PRINTF("no accept states are on\n"); + return 0; + } + + SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accstate); + + return ISNONZERO_STATE(accstate); +} + #undef TESTEOD_FN #undef REPORTCURRENT_FN #undef EXPIRE_ESTATE_FN #undef LIMEX_INACCEPT_FN -#undef LIMEX_INANYACCEPT_FN +#undef LIMEX_INANYACCEPT_FN #undef INITIAL_FN #undef TOP_FN #undef TOPN_FN @@ -418,14 +418,14 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, #undef TESTBIT_STATE #undef ISNONZERO_STATE #undef ISZERO_STATE -#undef PROCESS_ACCEPTS_IMPL_FN +#undef PROCESS_ACCEPTS_IMPL_FN #undef PROCESS_ACCEPTS_FN #undef PROCESS_ACCEPTS_NOSQUASH_FN #undef SQUASH_UNTUG_BR_FN #undef GET_NFA_REPEAT_INFO_FN -#undef CHUNK_T -#undef FIND_AND_CLEAR_FN -#undef POPCOUNT_FN -#undef RANK_IN_MASK_FN -#undef NUM_STATE_CHUNKS +#undef CHUNK_T +#undef FIND_AND_CLEAR_FN +#undef POPCOUNT_FN +#undef RANK_IN_MASK_FN +#undef NUM_STATE_CHUNKS diff --git a/contrib/libs/hyperscan/src/nfa/limex_compile.cpp b/contrib/libs/hyperscan/src/nfa/limex_compile.cpp index 9233ae515e..16985ec6e6 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_compile.cpp +++ b/contrib/libs/hyperscan/src/nfa/limex_compile.cpp @@ -26,11 +26,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief Main NFA build code. */ - + #include "limex_compile.h" #include "accel.h" @@ -39,7 +39,7 @@ #include "limex_internal.h" #include "limex_limits.h" #include "nfa_build_util.h" -#include "nfagraph/ng_dominators.h" +#include "nfagraph/ng_dominators.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_limex_accel.h" #include "nfagraph/ng_repeat.h" @@ -49,16 +49,16 @@ #include "repeatcompile.h" #include "util/alloc.h" #include "util/bitutils.h" -#include "util/bytecode_ptr.h" +#include "util/bytecode_ptr.h" #include "util/charreach.h" #include "util/compile_context.h" #include "util/container.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph.h" #include "util/graph_range.h" -#include "util/graph_small_color_map.h" +#include "util/graph_small_color_map.h" #include "util/order_check.h" -#include "util/unordered.h" +#include "util/unordered.h" #include "util/verify_types.h" #include <algorithm> @@ -69,22 +69,22 @@ #include <map> #include <set> #include <vector> - + #include <boost/graph/breadth_first_search.hpp> -#include <boost/graph/depth_first_search.hpp> -#include <boost/range/adaptor/map.hpp> +#include <boost/graph/depth_first_search.hpp> +#include <boost/range/adaptor/map.hpp> using namespace std; -using boost::adaptors::map_values; +using boost::adaptors::map_values; namespace ue2 { -/** - * \brief Special state index value meaning that the vertex will not - * participate in an (NFA/DFA/etc) implementation. - */ -static constexpr u32 NO_STATE = ~0; - +/** + * \brief Special state index value meaning that the vertex will not + * participate in an (NFA/DFA/etc) implementation. + */ +static constexpr u32 NO_STATE = ~0; + /* Maximum number of states taken as a small NFA */ static constexpr u32 MAX_SMALL_NFA_STATES = 64; @@ -109,21 +109,21 @@ struct precalcAccel { u32 double_offset; }; -struct limex_accel_info { - unordered_set<NFAVertex> accelerable; +struct limex_accel_info { + unordered_set<NFAVertex> accelerable; map<NFAStateSet, precalcAccel> precalc; - unordered_map<NFAVertex, flat_set<NFAVertex>> friends; - unordered_map<NFAVertex, AccelScheme> accel_map; + unordered_map<NFAVertex, flat_set<NFAVertex>> friends; + unordered_map<NFAVertex, AccelScheme> accel_map; }; static -unordered_map<NFAVertex, NFAStateSet> -reindexByStateId(const unordered_map<NFAVertex, NFAStateSet> &in, - const NGHolder &g, - const unordered_map<NFAVertex, u32> &state_ids, +unordered_map<NFAVertex, NFAStateSet> +reindexByStateId(const unordered_map<NFAVertex, NFAStateSet> &in, + const NGHolder &g, + const unordered_map<NFAVertex, u32> &state_ids, const u32 num_states) { - unordered_map<NFAVertex, NFAStateSet> out; - out.reserve(in.size()); + unordered_map<NFAVertex, NFAStateSet> out; + out.reserve(in.size()); vector<u32> indexToState(num_vertices(g), NO_STATE); for (const auto &m : state_ids) { @@ -153,20 +153,20 @@ reindexByStateId(const unordered_map<NFAVertex, NFAStateSet> &in, struct build_info { build_info(NGHolder &hi, - const unordered_map<NFAVertex, u32> &states_in, + const unordered_map<NFAVertex, u32> &states_in, const vector<BoundedRepeatData> &ri, - const unordered_map<NFAVertex, NFAStateSet> &rsmi, - const unordered_map<NFAVertex, NFAStateSet> &smi, - const map<u32, set<NFAVertex>> &ti, const set<NFAVertex> &zi, - bool dai, bool sci, const CompileContext &cci, u32 nsi) - : h(hi), state_ids(states_in), repeats(ri), tops(ti), tugs(nsi), - zombies(zi), do_accel(dai), stateCompression(sci), cc(cci), + const unordered_map<NFAVertex, NFAStateSet> &rsmi, + const unordered_map<NFAVertex, NFAStateSet> &smi, + const map<u32, set<NFAVertex>> &ti, const set<NFAVertex> &zi, + bool dai, bool sci, const CompileContext &cci, u32 nsi) + : h(hi), state_ids(states_in), repeats(ri), tops(ti), tugs(nsi), + zombies(zi), do_accel(dai), stateCompression(sci), cc(cci), num_states(nsi) { for (const auto &br : repeats) { - for (auto v : br.tug_triggers) { - assert(state_ids.at(v) != NO_STATE); - tugs.set(state_ids.at(v)); - } + for (auto v : br.tug_triggers) { + assert(state_ids.at(v) != NO_STATE); + tugs.set(state_ids.at(v)); + } br_cyclic[br.cyclic] = BoundedRepeatSummary(br.repeatMin, br.repeatMax); } @@ -178,28 +178,28 @@ struct build_info { } NGHolder &h; - const unordered_map<NFAVertex, u32> &state_ids; + const unordered_map<NFAVertex, u32> &state_ids; const vector<BoundedRepeatData> &repeats; // Squash maps; state sets are indexed by state_id. - unordered_map<NFAVertex, NFAStateSet> reportSquashMap; - unordered_map<NFAVertex, NFAStateSet> squashMap; + unordered_map<NFAVertex, NFAStateSet> reportSquashMap; + unordered_map<NFAVertex, NFAStateSet> squashMap; - const map<u32, set<NFAVertex>> &tops; - NFAStateSet tugs; + const map<u32, set<NFAVertex>> &tops; + NFAStateSet tugs; map<NFAVertex, BoundedRepeatSummary> br_cyclic; const set<NFAVertex> &zombies; bool do_accel; bool stateCompression; const CompileContext &cc; u32 num_states; - limex_accel_info accel; + limex_accel_info accel; }; -#define LAST_LIMEX_NFA LIMEX_NFA_512 - +#define LAST_LIMEX_NFA LIMEX_NFA_512 + // Constants for scoring mechanism -const int SHIFT_COST = 10; // limex: cost per shift mask +const int SHIFT_COST = 10; // limex: cost per shift mask const int EXCEPTION_COST = 4; // limex: per exception template<NFAEngineType t> struct NFATraits { }; @@ -256,7 +256,7 @@ bool isLimitedTransition(int from, int to, int maxshift) { // Fill a bit mask template<class Mask> -void maskFill(Mask &m, u8 c) { +void maskFill(Mask &m, u8 c) { memset(&m, c, sizeof(m)); } @@ -288,17 +288,17 @@ void maskSetBits(Mask &m, const NFAStateSet &bits) { } } -template<class Mask> -bool isMaskZero(Mask &m) { - u8 *m8 = (u8 *)&m; - for (u32 i = 0; i < sizeof(m); i++) { - if (m8[i]) { - return false; - } - } - return true; -} - +template<class Mask> +bool isMaskZero(Mask &m) { + u8 *m8 = (u8 *)&m; + for (u32 i = 0; i < sizeof(m); i++) { + if (m8[i]) { + return false; + } + } + return true; +} + // Sets an entire byte in a mask to the given value template<class Mask> void maskSetByte(Mask &m, const unsigned int idx, const char val) { @@ -374,7 +374,7 @@ void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach, } struct AccelBuild { - AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0) {} + AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0) {} NFAVertex v; u32 state; u32 offset; // offset correction to apply @@ -496,7 +496,7 @@ bool allow_wide_accel(const vector<NFAVertex> &vv, const NGHolder &g, static void nfaFindAccelSchemes(const NGHolder &g, const map<NFAVertex, BoundedRepeatSummary> &br_cyclic, - unordered_map<NFAVertex, AccelScheme> *out) { + unordered_map<NFAVertex, AccelScheme> *out) { vector<CharReach> refined_cr = reduced_cr(g, br_cyclic); NFAVertex sds_or_proxy = get_sds_or_proxy(g); @@ -505,7 +505,7 @@ void nfaFindAccelSchemes(const NGHolder &g, // We want to skip any vertices that don't lead to at least one other // (self-loops don't count) vertex. if (!has_proper_successor(v, g)) { - DEBUG_PRINTF("skipping vertex %zu\n", g[v].index); + DEBUG_PRINTF("skipping vertex %zu\n", g[v].index); continue; } @@ -513,7 +513,7 @@ void nfaFindAccelSchemes(const NGHolder &g, AccelScheme as; if (nfaCheckAccel(g, v, refined_cr, br_cyclic, &as, allow_wide)) { - DEBUG_PRINTF("graph vertex %zu is accelerable with offset %u.\n", + DEBUG_PRINTF("graph vertex %zu is accelerable with offset %u.\n", g[v].index, as.offset); (*out)[v] = as; } @@ -521,11 +521,11 @@ void nfaFindAccelSchemes(const NGHolder &g, } struct fas_visitor : public boost::default_bfs_visitor { - fas_visitor(const unordered_map<NFAVertex, AccelScheme> &am_in, - unordered_map<NFAVertex, AccelScheme> *out_in) + fas_visitor(const unordered_map<NFAVertex, AccelScheme> &am_in, + unordered_map<NFAVertex, AccelScheme> *out_in) : accel_map(am_in), out(out_in) {} - void discover_vertex(NFAVertex v, const NGHolder &) { + void discover_vertex(NFAVertex v, const NGHolder &) { if (accel_map.find(v) != accel_map.end()) { (*out)[v] = accel_map.find(v)->second; } @@ -533,50 +533,50 @@ struct fas_visitor : public boost::default_bfs_visitor { throw this; /* done */ } } - const unordered_map<NFAVertex, AccelScheme> &accel_map; - unordered_map<NFAVertex, AccelScheme> *out; + const unordered_map<NFAVertex, AccelScheme> &accel_map; + unordered_map<NFAVertex, AccelScheme> *out; }; static -void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops, - unordered_map<NFAVertex, AccelScheme> *accel_map) { +void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops, + unordered_map<NFAVertex, AccelScheme> *accel_map) { /* We want the NFA_MAX_ACCEL_STATES best acceleration states, everything * else should be ditched. We use a simple BFS to choose accel states near * the start. */ - vector<NFAEdge> tempEdges; - for (const auto &vv : tops | map_values) { - for (NFAVertex v : vv) { - if (!edge(g.start, v, g).second) { - tempEdges.push_back(add_edge(g.start, v, g).first); - } - } - } + vector<NFAEdge> tempEdges; + for (const auto &vv : tops | map_values) { + for (NFAVertex v : vv) { + if (!edge(g.start, v, g).second) { + tempEdges.push_back(add_edge(g.start, v, g).first); + } + } + } // Similarly, connect (start, startDs) if necessary. if (!edge(g.start, g.startDs, g).second) { - NFAEdge e = add_edge(g.start, g.startDs, g); - tempEdges.push_back(e); // Remove edge later. + NFAEdge e = add_edge(g.start, g.startDs, g); + tempEdges.push_back(e); // Remove edge later. } - unordered_map<NFAVertex, AccelScheme> out; + unordered_map<NFAVertex, AccelScheme> out; try { - boost::breadth_first_search(g, g.start, - visitor(fas_visitor(*accel_map, &out)) - .color_map(make_small_color_map(g))); + boost::breadth_first_search(g, g.start, + visitor(fas_visitor(*accel_map, &out)) + .color_map(make_small_color_map(g))); } catch (fas_visitor *) { ; /* found max accel_states */ } - remove_edges(tempEdges, g); + remove_edges(tempEdges, g); assert(out.size() <= NFA_MAX_ACCEL_STATES); accel_map->swap(out); } static -bool containsBadSubset(const limex_accel_info &accel, +bool containsBadSubset(const limex_accel_info &accel, const NFAStateSet &state_set, const u32 effective_sds) { NFAStateSet subset(state_set.size()); for (size_t j = state_set.find_first(); j != state_set.npos; @@ -597,28 +597,28 @@ bool containsBadSubset(const limex_accel_info &accel, } static -bool is_too_wide(const AccelScheme &as) { - return as.cr.count() > MAX_MERGED_ACCEL_STOPS; -} - -static -void fillAccelInfo(build_info &bi) { - if (!bi.do_accel) { - return; - } - - NGHolder &g = bi.h; - limex_accel_info &accel = bi.accel; - unordered_map<NFAVertex, AccelScheme> &accel_map = accel.accel_map; - const map<NFAVertex, BoundedRepeatSummary> &br_cyclic = bi.br_cyclic; - const unordered_map<NFAVertex, u32> &state_ids = bi.state_ids; - const u32 num_states = bi.num_states; - - nfaFindAccelSchemes(g, br_cyclic, &accel_map); - filterAccelStates(g, bi.tops, &accel_map); - - assert(accel_map.size() <= NFA_MAX_ACCEL_STATES); - +bool is_too_wide(const AccelScheme &as) { + return as.cr.count() > MAX_MERGED_ACCEL_STOPS; +} + +static +void fillAccelInfo(build_info &bi) { + if (!bi.do_accel) { + return; + } + + NGHolder &g = bi.h; + limex_accel_info &accel = bi.accel; + unordered_map<NFAVertex, AccelScheme> &accel_map = accel.accel_map; + const map<NFAVertex, BoundedRepeatSummary> &br_cyclic = bi.br_cyclic; + const unordered_map<NFAVertex, u32> &state_ids = bi.state_ids; + const u32 num_states = bi.num_states; + + nfaFindAccelSchemes(g, br_cyclic, &accel_map); + filterAccelStates(g, bi.tops, &accel_map); + + assert(accel_map.size() <= NFA_MAX_ACCEL_STATES); + vector<CharReach> refined_cr = reduced_cr(g, br_cyclic); vector<NFAVertex> astates; @@ -635,7 +635,7 @@ void fillAccelInfo(build_info &bi) { /* for each subset of the accel keys need to find an accel scheme */ assert(astates.size() < 32); - sort(astates.begin(), astates.end()); + sort(astates.begin(), astates.end()); for (u32 i = 1, i_end = 1U << astates.size(); i < i_end; i++) { DEBUG_PRINTF("saving info for accel %u\n", i); @@ -649,7 +649,7 @@ void fillAccelInfo(build_info &bi) { } } - if (containsBadSubset(accel, state_set, effective_sds)) { + if (containsBadSubset(accel, state_set, effective_sds)) { DEBUG_PRINTF("accel %u has bad subset\n", i); continue; /* if a subset failed to build we would too */ } @@ -657,27 +657,27 @@ void fillAccelInfo(build_info &bi) { const bool allow_wide = allow_wide_accel(states, g, sds_or_proxy); AccelScheme as = nfaFindAccel(g, states, refined_cr, br_cyclic, - allow_wide, true); - if (is_too_wide(as)) { + allow_wide, true); + if (is_too_wide(as)) { DEBUG_PRINTF("accel %u too wide (%zu, %d)\n", i, as.cr.count(), MAX_MERGED_ACCEL_STOPS); continue; } - DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset, - as.double_offset); + DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset, + as.double_offset); - precalcAccel &pa = accel.precalc[state_set]; + precalcAccel &pa = accel.precalc[state_set]; pa.single_offset = as.offset; pa.single_cr = as.cr; - if (as.double_byte.size() != 0) { - pa.double_offset = as.double_offset; - pa.double_lits = as.double_byte; - pa.double_cr = as.double_cr; + if (as.double_byte.size() != 0) { + pa.double_offset = as.double_offset; + pa.double_lits = as.double_byte; + pa.double_cr = as.double_cr; } - - useful |= state_set; + + useful |= state_set; } for (const auto &m : accel_map) { @@ -694,169 +694,169 @@ void fillAccelInfo(build_info &bi) { state_set.reset(); state_set.set(state_id); - accel.accelerable.insert(v); - findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]); + accel.accelerable.insert(v); + findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]); } } -/** The AccelAux structure has large alignment specified, and this makes some - * compilers do odd things unless we specify a custom allocator. */ -typedef vector<AccelAux, AlignedAllocator<AccelAux, alignof(AccelAux)>> - AccelAuxVector; - -#define IMPOSSIBLE_ACCEL_MASK (~0U) - +/** The AccelAux structure has large alignment specified, and this makes some + * compilers do odd things unless we specify a custom allocator. */ +typedef vector<AccelAux, AlignedAllocator<AccelAux, alignof(AccelAux)>> + AccelAuxVector; + +#define IMPOSSIBLE_ACCEL_MASK (~0U) + static -u32 getEffectiveAccelStates(const build_info &args, - const unordered_map<NFAVertex, NFAVertex> &dom_map, - u32 active_accel_mask, - const vector<AccelBuild> &accelStates) { - /* accelStates is indexed by the acceleration bit index and contains a - * reference to the original vertex & state_id */ - - /* Cases to consider: - * - * 1: Accel states a and b are on and b can squash a - * --> we can ignore a. This will result in a no longer being accurately - * modelled - we may miss escapes turning it off and we may also miss - * its successors being activated. - * - * 2: Accel state b is on but accel state a is off and a is .* and must be - * seen before b is reached (and would not be covered by (1)) - * --> if a is squashable (or may die unexpectedly) we should continue - * as is - * --> if a is not squashable we can treat this as a+b or as a no accel, - * impossible case - * --> this case could be extended to handle non dot reaches by - * effectively creating something similar to squash masks for the - * reverse graph - * - * - * Other cases: - * - * 3: Accel states a and b are on but have incompatible reaches - * --> we should treat this as an impossible case. Actually, this case - * is unlikely to arise as we pick states with wide reaches to - * accelerate so an empty intersection is unlikely. - * - * Note: we need to be careful when dealing with accel states corresponding - * to bounded repeat cyclics - they may 'turn off' based on a max bound and - * so we may still require on earlier states to be accurately modelled. - */ - const NGHolder &h = args.h; - - /* map from accel_id to mask of accel_ids that it is dominated by */ - vector<u32> dominated_by(accelStates.size()); - - map<NFAVertex, u32> accel_id_map; - for (u32 accel_id = 0; accel_id < accelStates.size(); accel_id++) { - NFAVertex v = accelStates[accel_id].v; - accel_id_map[v] = accel_id; - } - - /* Note: we want a slightly less strict defn of dominate as skip edges - * prevent .* 'truly' dominating */ - for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { - u32 accel_id = findAndClearLSB_32(&local_accel_mask); - assert(accel_id < accelStates.size()); - NFAVertex v = accelStates[accel_id].v; - while (contains(dom_map, v) && dom_map.at(v)) { - v = dom_map.at(v); - if (contains(accel_id_map, v)) { - dominated_by[accel_id] |= 1U << accel_id_map[v]; - } - /* TODO: could also look at inv_adj vertices to handle fan-in */ - for (NFAVertex a : adjacent_vertices_range(v, h)) { - if (a == v || !contains(accel_id_map, a) - || a == accelStates[accel_id].v /* not likely */) { - continue; - } - if (!is_subset_of(h[v].reports, h[a].reports)) { - continue; - } - auto v_succ = succs(v, h); - auto a_succ = succs(a, h); - if (is_subset_of(v_succ, a_succ)) { - dominated_by[accel_id] |= 1U << accel_id_map[a]; - } - } - } - } - - u32 may_turn_off = 0; /* BR with max bound, non-dots, squashed, etc */ - for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { - u32 accel_id = findAndClearLSB_32(&local_accel_mask); - NFAVertex v = accelStates[accel_id].v; - u32 state_id = accelStates[accel_id].state; - assert(contains(args.accel.accelerable, v)); - if (!h[v].char_reach.all()) { - may_turn_off |= 1U << accel_id; - continue; - } - if (contains(args.br_cyclic, v) - && args.br_cyclic.at(v).repeatMax != depth::infinity()) { - may_turn_off |= 1U << accel_id; - continue; - } - for (const auto &s_mask : args.squashMap | map_values) { - if (!s_mask.test(state_id)) { - may_turn_off |= 1U << accel_id; - break; - } - } - for (const auto &s_mask : args.reportSquashMap | map_values) { - if (!s_mask.test(state_id)) { - may_turn_off |= 1U << accel_id; - break; - } - } - } - - /* Case 1: */ - u32 ignored = 0; - for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { - u32 accel_id_b = findAndClearLSB_32(&local_accel_mask); - NFAVertex v = accelStates[accel_id_b].v; - if (!contains(args.squashMap, v)) { - continue; - } - assert(!contains(args.br_cyclic, v) - || args.br_cyclic.at(v).repeatMax == depth::infinity()); - NFAStateSet squashed = args.squashMap.at(v); - squashed.flip(); /* default sense for mask of survivors */ - - for (u32 local_accel_mask2 = active_accel_mask; local_accel_mask2; ) { - u32 accel_id_a = findAndClearLSB_32(&local_accel_mask2); - if (squashed.test(accelStates[accel_id_a].state)) { - ignored |= 1U << accel_id_a; - } - } - } - - /* Case 2: */ - for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { - u32 accel_id = findAndClearLSB_32(&local_accel_mask); - - u32 stuck_dominators = dominated_by[accel_id] & ~may_turn_off; - if ((stuck_dominators & active_accel_mask) != stuck_dominators) { - DEBUG_PRINTF("only %08x on, but we require %08x\n", - active_accel_mask, stuck_dominators); - return IMPOSSIBLE_ACCEL_MASK; - } - } - - if (ignored) { - DEBUG_PRINTF("in %08x, ignoring %08x\n", active_accel_mask, ignored); - } - - return active_accel_mask & ~ignored; +u32 getEffectiveAccelStates(const build_info &args, + const unordered_map<NFAVertex, NFAVertex> &dom_map, + u32 active_accel_mask, + const vector<AccelBuild> &accelStates) { + /* accelStates is indexed by the acceleration bit index and contains a + * reference to the original vertex & state_id */ + + /* Cases to consider: + * + * 1: Accel states a and b are on and b can squash a + * --> we can ignore a. This will result in a no longer being accurately + * modelled - we may miss escapes turning it off and we may also miss + * its successors being activated. + * + * 2: Accel state b is on but accel state a is off and a is .* and must be + * seen before b is reached (and would not be covered by (1)) + * --> if a is squashable (or may die unexpectedly) we should continue + * as is + * --> if a is not squashable we can treat this as a+b or as a no accel, + * impossible case + * --> this case could be extended to handle non dot reaches by + * effectively creating something similar to squash masks for the + * reverse graph + * + * + * Other cases: + * + * 3: Accel states a and b are on but have incompatible reaches + * --> we should treat this as an impossible case. Actually, this case + * is unlikely to arise as we pick states with wide reaches to + * accelerate so an empty intersection is unlikely. + * + * Note: we need to be careful when dealing with accel states corresponding + * to bounded repeat cyclics - they may 'turn off' based on a max bound and + * so we may still require on earlier states to be accurately modelled. + */ + const NGHolder &h = args.h; + + /* map from accel_id to mask of accel_ids that it is dominated by */ + vector<u32> dominated_by(accelStates.size()); + + map<NFAVertex, u32> accel_id_map; + for (u32 accel_id = 0; accel_id < accelStates.size(); accel_id++) { + NFAVertex v = accelStates[accel_id].v; + accel_id_map[v] = accel_id; + } + + /* Note: we want a slightly less strict defn of dominate as skip edges + * prevent .* 'truly' dominating */ + for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { + u32 accel_id = findAndClearLSB_32(&local_accel_mask); + assert(accel_id < accelStates.size()); + NFAVertex v = accelStates[accel_id].v; + while (contains(dom_map, v) && dom_map.at(v)) { + v = dom_map.at(v); + if (contains(accel_id_map, v)) { + dominated_by[accel_id] |= 1U << accel_id_map[v]; + } + /* TODO: could also look at inv_adj vertices to handle fan-in */ + for (NFAVertex a : adjacent_vertices_range(v, h)) { + if (a == v || !contains(accel_id_map, a) + || a == accelStates[accel_id].v /* not likely */) { + continue; + } + if (!is_subset_of(h[v].reports, h[a].reports)) { + continue; + } + auto v_succ = succs(v, h); + auto a_succ = succs(a, h); + if (is_subset_of(v_succ, a_succ)) { + dominated_by[accel_id] |= 1U << accel_id_map[a]; + } + } + } + } + + u32 may_turn_off = 0; /* BR with max bound, non-dots, squashed, etc */ + for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { + u32 accel_id = findAndClearLSB_32(&local_accel_mask); + NFAVertex v = accelStates[accel_id].v; + u32 state_id = accelStates[accel_id].state; + assert(contains(args.accel.accelerable, v)); + if (!h[v].char_reach.all()) { + may_turn_off |= 1U << accel_id; + continue; + } + if (contains(args.br_cyclic, v) + && args.br_cyclic.at(v).repeatMax != depth::infinity()) { + may_turn_off |= 1U << accel_id; + continue; + } + for (const auto &s_mask : args.squashMap | map_values) { + if (!s_mask.test(state_id)) { + may_turn_off |= 1U << accel_id; + break; + } + } + for (const auto &s_mask : args.reportSquashMap | map_values) { + if (!s_mask.test(state_id)) { + may_turn_off |= 1U << accel_id; + break; + } + } + } + + /* Case 1: */ + u32 ignored = 0; + for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { + u32 accel_id_b = findAndClearLSB_32(&local_accel_mask); + NFAVertex v = accelStates[accel_id_b].v; + if (!contains(args.squashMap, v)) { + continue; + } + assert(!contains(args.br_cyclic, v) + || args.br_cyclic.at(v).repeatMax == depth::infinity()); + NFAStateSet squashed = args.squashMap.at(v); + squashed.flip(); /* default sense for mask of survivors */ + + for (u32 local_accel_mask2 = active_accel_mask; local_accel_mask2; ) { + u32 accel_id_a = findAndClearLSB_32(&local_accel_mask2); + if (squashed.test(accelStates[accel_id_a].state)) { + ignored |= 1U << accel_id_a; + } + } + } + + /* Case 2: */ + for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { + u32 accel_id = findAndClearLSB_32(&local_accel_mask); + + u32 stuck_dominators = dominated_by[accel_id] & ~may_turn_off; + if ((stuck_dominators & active_accel_mask) != stuck_dominators) { + DEBUG_PRINTF("only %08x on, but we require %08x\n", + active_accel_mask, stuck_dominators); + return IMPOSSIBLE_ACCEL_MASK; + } + } + + if (ignored) { + DEBUG_PRINTF("in %08x, ignoring %08x\n", active_accel_mask, ignored); + } + + return active_accel_mask & ~ignored; } static void buildAccel(const build_info &args, NFAStateSet &accelMask, NFAStateSet &accelFriendsMask, AccelAuxVector &auxvec, vector<u8> &accelTable) { - const limex_accel_info &accel = args.accel; + const limex_accel_info &accel = args.accel; // Init, all zeroes. accelMask.resize(args.num_states); @@ -874,8 +874,8 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, return; } - const auto dom_map = findDominators(args.h); - + const auto dom_map = findDominators(args.h); + // We have 2^n different accel entries, one for each possible // combination of accelerable states. assert(accelStates.size() < 32); @@ -885,24 +885,24 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, // Set up a unioned AccelBuild for every possible combination of the set // bits in accelStates. vector<AccelBuild> accelOuts(accelCount); - vector<u32> effective_accel_set; - effective_accel_set.push_back(0); /* empty is effectively empty */ - + vector<u32> effective_accel_set; + effective_accel_set.push_back(0); /* empty is effectively empty */ + for (u32 i = 1; i < accelCount; i++) { - u32 effective_i = getEffectiveAccelStates(args, dom_map, i, - accelStates); - effective_accel_set.push_back(effective_i); - - if (effective_i == IMPOSSIBLE_ACCEL_MASK) { - DEBUG_PRINTF("this combination of accel states is not possible\n"); - accelOuts[i].stop1 = CharReach::dot(); - continue; - } - - while (effective_i) { - u32 base_accel_state = findAndClearLSB_32(&effective_i); - combineAccel(accelStates[base_accel_state], accelOuts[i]); - } + u32 effective_i = getEffectiveAccelStates(args, dom_map, i, + accelStates); + effective_accel_set.push_back(effective_i); + + if (effective_i == IMPOSSIBLE_ACCEL_MASK) { + DEBUG_PRINTF("this combination of accel states is not possible\n"); + accelOuts[i].stop1 = CharReach::dot(); + continue; + } + + while (effective_i) { + u32 base_accel_state = findAndClearLSB_32(&effective_i); + combineAccel(accelStates[base_accel_state], accelOuts[i]); + } minimiseAccel(accelOuts[i]); } @@ -921,25 +921,25 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, for (u32 i = 1; i < accelCount; i++) { memset(&aux, 0, sizeof(aux)); - NFAStateSet effective_states(args.num_states); - u32 effective_i = effective_accel_set[i]; + NFAStateSet effective_states(args.num_states); + u32 effective_i = effective_accel_set[i]; AccelInfo ainfo; ainfo.double_offset = accelOuts[i].offset; ainfo.double_stop1 = accelOuts[i].stop1; ainfo.double_stop2 = accelOuts[i].stop2; - if (effective_i != IMPOSSIBLE_ACCEL_MASK) { - while (effective_i) { - u32 base_accel_id = findAndClearLSB_32(&effective_i); - effective_states.set(accelStates[base_accel_id].state); - } - - if (contains(accel.precalc, effective_states)) { - const auto &precalc = accel.precalc.at(effective_states); - ainfo.single_offset = precalc.single_offset; - ainfo.single_stops = precalc.single_cr; - } + if (effective_i != IMPOSSIBLE_ACCEL_MASK) { + while (effective_i) { + u32 base_accel_id = findAndClearLSB_32(&effective_i); + effective_states.set(accelStates[base_accel_id].state); + } + + if (contains(accel.precalc, effective_states)) { + const auto &precalc = accel.precalc.at(effective_states); + ainfo.single_offset = precalc.single_offset; + ainfo.single_stops = precalc.single_cr; + } } buildAccelAux(ainfo, &aux); @@ -981,105 +981,105 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, } static -u32 addSquashMask(const build_info &args, const NFAVertex &v, - vector<NFAStateSet> &squash) { - auto sit = args.reportSquashMap.find(v); - if (sit == args.reportSquashMap.end()) { - return MO_INVALID_IDX; - } - - // This state has a squash mask. Paw through the existing vector to - // see if we've already seen it, otherwise add a new one. - auto it = find(squash.begin(), squash.end(), sit->second); - if (it != squash.end()) { +u32 addSquashMask(const build_info &args, const NFAVertex &v, + vector<NFAStateSet> &squash) { + auto sit = args.reportSquashMap.find(v); + if (sit == args.reportSquashMap.end()) { + return MO_INVALID_IDX; + } + + // This state has a squash mask. Paw through the existing vector to + // see if we've already seen it, otherwise add a new one. + auto it = find(squash.begin(), squash.end(), sit->second); + if (it != squash.end()) { return verify_u32(std::distance(squash.begin(), it)); - } - u32 idx = verify_u32(squash.size()); - squash.push_back(sit->second); - return idx; -} - -using ReportListCache = ue2_unordered_map<vector<ReportID>, u32>; - -static -u32 addReports(const flat_set<ReportID> &r, vector<ReportID> &reports, - ReportListCache &reports_cache) { - assert(!r.empty()); - - vector<ReportID> my_reports(begin(r), end(r)); - my_reports.push_back(MO_INVALID_IDX); // sentinel - - auto cache_it = reports_cache.find(my_reports); - if (cache_it != end(reports_cache)) { - u32 offset = cache_it->second; - DEBUG_PRINTF("reusing cached report list at %u\n", offset); - return offset; - } - - auto it = search(begin(reports), end(reports), begin(my_reports), - end(my_reports)); - if (it != end(reports)) { + } + u32 idx = verify_u32(squash.size()); + squash.push_back(sit->second); + return idx; +} + +using ReportListCache = ue2_unordered_map<vector<ReportID>, u32>; + +static +u32 addReports(const flat_set<ReportID> &r, vector<ReportID> &reports, + ReportListCache &reports_cache) { + assert(!r.empty()); + + vector<ReportID> my_reports(begin(r), end(r)); + my_reports.push_back(MO_INVALID_IDX); // sentinel + + auto cache_it = reports_cache.find(my_reports); + if (cache_it != end(reports_cache)) { + u32 offset = cache_it->second; + DEBUG_PRINTF("reusing cached report list at %u\n", offset); + return offset; + } + + auto it = search(begin(reports), end(reports), begin(my_reports), + end(my_reports)); + if (it != end(reports)) { u32 offset = verify_u32(std::distance(begin(reports), it)); - DEBUG_PRINTF("reusing found report list at %u\n", offset); - return offset; - } - - u32 offset = verify_u32(reports.size()); - insert(&reports, reports.end(), my_reports); - reports_cache.emplace(move(my_reports), offset); - return offset; -} - -static -void buildAcceptsList(const build_info &args, ReportListCache &reports_cache, - vector<NFAVertex> &verts, vector<NFAAccept> &accepts, - vector<ReportID> &reports, vector<NFAStateSet> &squash) { - if (verts.empty()) { - return; - } - - DEBUG_PRINTF("building accept lists for %zu states\n", verts.size()); - - auto cmp_state_id = [&args](NFAVertex a, NFAVertex b) { - u32 a_state = args.state_ids.at(a); - u32 b_state = args.state_ids.at(b); - assert(a_state != b_state || a == b); - return a_state < b_state; - }; - - sort(begin(verts), end(verts), cmp_state_id); - + DEBUG_PRINTF("reusing found report list at %u\n", offset); + return offset; + } + + u32 offset = verify_u32(reports.size()); + insert(&reports, reports.end(), my_reports); + reports_cache.emplace(move(my_reports), offset); + return offset; +} + +static +void buildAcceptsList(const build_info &args, ReportListCache &reports_cache, + vector<NFAVertex> &verts, vector<NFAAccept> &accepts, + vector<ReportID> &reports, vector<NFAStateSet> &squash) { + if (verts.empty()) { + return; + } + + DEBUG_PRINTF("building accept lists for %zu states\n", verts.size()); + + auto cmp_state_id = [&args](NFAVertex a, NFAVertex b) { + u32 a_state = args.state_ids.at(a); + u32 b_state = args.state_ids.at(b); + assert(a_state != b_state || a == b); + return a_state < b_state; + }; + + sort(begin(verts), end(verts), cmp_state_id); + const NGHolder &h = args.h; - for (const auto &v : verts) { - DEBUG_PRINTF("state=%u, reports: [%s]\n", args.state_ids.at(v), - as_string_list(h[v].reports).c_str()); - NFAAccept a; - memset(&a, 0, sizeof(a)); - assert(!h[v].reports.empty()); - if (h[v].reports.size() == 1) { - a.single_report = 1; - a.reports = *h[v].reports.begin(); - } else { - a.single_report = 0; - a.reports = addReports(h[v].reports, reports, reports_cache); - } - a.squash = addSquashMask(args, v, squash); - accepts.push_back(move(a)); - } -} - -static -void buildAccepts(const build_info &args, ReportListCache &reports_cache, - NFAStateSet &acceptMask, NFAStateSet &acceptEodMask, - vector<NFAAccept> &accepts, vector<NFAAccept> &acceptsEod, - vector<ReportID> &reports, vector<NFAStateSet> &squash) { - const NGHolder &h = args.h; - + for (const auto &v : verts) { + DEBUG_PRINTF("state=%u, reports: [%s]\n", args.state_ids.at(v), + as_string_list(h[v].reports).c_str()); + NFAAccept a; + memset(&a, 0, sizeof(a)); + assert(!h[v].reports.empty()); + if (h[v].reports.size() == 1) { + a.single_report = 1; + a.reports = *h[v].reports.begin(); + } else { + a.single_report = 0; + a.reports = addReports(h[v].reports, reports, reports_cache); + } + a.squash = addSquashMask(args, v, squash); + accepts.push_back(move(a)); + } +} + +static +void buildAccepts(const build_info &args, ReportListCache &reports_cache, + NFAStateSet &acceptMask, NFAStateSet &acceptEodMask, + vector<NFAAccept> &accepts, vector<NFAAccept> &acceptsEod, + vector<ReportID> &reports, vector<NFAStateSet> &squash) { + const NGHolder &h = args.h; + acceptMask.resize(args.num_states); acceptEodMask.resize(args.num_states); - vector<NFAVertex> verts_accept, verts_accept_eod; - + vector<NFAVertex> verts_accept, verts_accept_eod; + for (auto v : vertices_range(h)) { u32 state_id = args.state_ids.at(v); @@ -1089,18 +1089,18 @@ void buildAccepts(const build_info &args, ReportListCache &reports_cache, if (edge(v, h.accept, h).second) { acceptMask.set(state_id); - verts_accept.push_back(v); + verts_accept.push_back(v); } else { assert(edge(v, h.acceptEod, h).second); acceptEodMask.set(state_id); - verts_accept_eod.push_back(v); + verts_accept_eod.push_back(v); } - } + } - buildAcceptsList(args, reports_cache, verts_accept, accepts, reports, - squash); - buildAcceptsList(args, reports_cache, verts_accept_eod, acceptsEod, reports, - squash); + buildAcceptsList(args, reports_cache, verts_accept, accepts, reports, + squash); + buildAcceptsList(args, reports_cache, verts_accept_eod, acceptsEod, reports, + squash); } static @@ -1116,15 +1116,15 @@ void buildTopMasks(const build_info &args, vector<NFAStateSet> &topMasks) { for (const auto &m : args.tops) { u32 mask_idx = m.first; - for (NFAVertex v : m.second) { - u32 state_id = args.state_ids.at(v); - DEBUG_PRINTF("state %u is in top mask %u\n", state_id, mask_idx); + for (NFAVertex v : m.second) { + u32 state_id = args.state_ids.at(v); + DEBUG_PRINTF("state %u is in top mask %u\n", state_id, mask_idx); - assert(mask_idx < numMasks); - assert(state_id != NO_STATE); + assert(mask_idx < numMasks); + assert(state_id != NO_STATE); - topMasks[mask_idx].set(state_id); - } + topMasks[mask_idx].set(state_id); + } } } @@ -1136,7 +1136,7 @@ u32 uncompressedStateSize(u32 num_states) { static u32 compressedStateSize(const NGHolder &h, const NFAStateSet &maskedStates, - const unordered_map<NFAVertex, u32> &state_ids) { + const unordered_map<NFAVertex, u32> &state_ids) { // Shrink state requirement to enough to fit the compressed largest reach. vector<u32> allreach(N_CHARS, 0); @@ -1207,7 +1207,7 @@ bool hasSquashableInitDs(const build_info &args) { static bool hasInitDsStates(const NGHolder &h, - const unordered_map<NFAVertex, u32> &state_ids) { + const unordered_map<NFAVertex, u32> &state_ids) { if (state_ids.at(h.startDs) != NO_STATE) { return true; } @@ -1236,8 +1236,8 @@ void findMaskedCompressionStates(const build_info &args, // Suffixes and outfixes can mask out leaf states, which should all be // accepts. Right now we can only do this when there is nothing in initDs, // as we switch that on unconditionally in the expand call. - if (!inspects_states_for_accepts(h) - && !hasInitDsStates(h, args.state_ids)) { + if (!inspects_states_for_accepts(h) + && !hasInitDsStates(h, args.state_ids)) { NFAStateSet nonleaf(args.num_states); for (const auto &e : edges_range(h)) { u32 from = args.state_ids.at(source(e, h)); @@ -1375,16 +1375,16 @@ struct ExceptionProto { }; static -u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache, - const unordered_set<NFAEdge> &exceptional, - map<ExceptionProto, vector<u32>> &exceptionMap, - vector<ReportID> &reportList) { +u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache, + const unordered_set<NFAEdge> &exceptional, + map<ExceptionProto, vector<u32>> &exceptionMap, + vector<ReportID> &reportList) { const NGHolder &h = args.h; const u32 num_states = args.num_states; - u32 exceptionCount = 0; + u32 exceptionCount = 0; - unordered_map<NFAVertex, u32> pos_trigger; - unordered_map<NFAVertex, u32> tug_trigger; + unordered_map<NFAVertex, u32> pos_trigger; + unordered_map<NFAVertex, u32> tug_trigger; for (u32 i = 0; i < args.repeats.size(); i++) { const BoundedRepeatData &br = args.repeats[i]; @@ -1414,12 +1414,12 @@ u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache, DEBUG_PRINTF("state %u is exceptional due to accept " "(%zu reports)\n", i, reports.size()); - if (reports.empty()) { - e.reports_index = MO_INVALID_IDX; - } else { - e.reports_index = - addReports(reports, reportList, reports_cache); - } + if (reports.empty()) { + e.reports_index = MO_INVALID_IDX; + } else { + e.reports_index = + addReports(reports, reportList, reports_cache); + } // We may be applying a report squash too. auto mi = args.reportSquashMap.find(v); @@ -1511,13 +1511,13 @@ u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache, assert(e.succ_states.size() == num_states); assert(e.squash_states.size() == num_states); exceptionMap[e].push_back(i); - exceptionCount++; + exceptionCount++; } } - DEBUG_PRINTF("%u exceptions found (%zu unique)\n", exceptionCount, - exceptionMap.size()); - return exceptionCount; + DEBUG_PRINTF("%u exceptions found (%zu unique)\n", exceptionCount, + exceptionMap.size()); + return exceptionCount; } static @@ -1532,164 +1532,164 @@ u32 depth_to_u32(const depth &d) { return d_val; } -static -bool isExceptionalTransition(u32 from, u32 to, const build_info &args, - u32 maxShift) { - if (!isLimitedTransition(from, to, maxShift)) { - return true; - } - - // All transitions out of a tug trigger are exceptional. - if (args.tugs.test(from)) { - return true; - } - return false; -} - -static -u32 findMaxVarShift(const build_info &args, u32 nShifts) { - const NGHolder &h = args.h; - u32 shiftMask = 0; - for (const auto &e : edges_range(h)) { - u32 from = args.state_ids.at(source(e, h)); - u32 to = args.state_ids.at(target(e, h)); - if (from == NO_STATE || to == NO_STATE) { - continue; - } - if (!isExceptionalTransition(from, to, args, MAX_SHIFT_AMOUNT)) { - shiftMask |= (1UL << (to - from)); - } - } - - u32 maxVarShift = 0; - for (u32 shiftCnt = 0; shiftMask != 0 && shiftCnt < nShifts; shiftCnt++) { - maxVarShift = findAndClearLSB_32(&shiftMask); - } - - return maxVarShift; -} - -static -int getLimexScore(const build_info &args, u32 nShifts) { - const NGHolder &h = args.h; - u32 maxVarShift = nShifts; - int score = 0; - - score += SHIFT_COST * nShifts; - maxVarShift = findMaxVarShift(args, nShifts); - - NFAStateSet exceptionalStates(args.num_states); - for (const auto &e : edges_range(h)) { - u32 from = args.state_ids.at(source(e, h)); - u32 to = args.state_ids.at(target(e, h)); - if (from == NO_STATE || to == NO_STATE) { - continue; - } - if (isExceptionalTransition(from, to, args, maxVarShift)) { - exceptionalStates.set(from); - } - } - score += EXCEPTION_COST * exceptionalStates.count(); - return score; -} - -// This function finds the best shift scheme with highest score -// Returns number of shifts and score calculated for appropriate scheme -// Returns zero if no appropriate scheme was found -static -u32 findBestNumOfVarShifts(const build_info &args, - int *bestScoreRet = nullptr) { - u32 bestNumOfVarShifts = 0; - int bestScore = INT_MAX; - for (u32 shiftCount = 1; shiftCount <= MAX_SHIFT_COUNT; shiftCount++) { - int score = getLimexScore(args, shiftCount); - if (score < bestScore) { - bestScore = score; - bestNumOfVarShifts = shiftCount; - } - } - if (bestScoreRet != nullptr) { - *bestScoreRet = bestScore; - } - return bestNumOfVarShifts; -} - -static -bool cannotDie(const build_info &args, const set<NFAVertex> &tops) { - const auto &h = args.h; - - // When this top is activated, all of the vertices in 'tops' are switched - // on. If any of those lead to a graph that cannot die, then this top - // cannot die. - - // For each top, we use a depth-first search to traverse the graph from the - // top, looking for a cyclic path consisting of vertices of dot reach. If - // one exists, than the NFA cannot die after this top is triggered. - - auto colour_map = make_small_color_map(h); - - struct CycleFound {}; - struct CannotDieVisitor : public boost::default_dfs_visitor { - void back_edge(const NFAEdge &e, const NGHolder &g) const { - DEBUG_PRINTF("back-edge %zu,%zu\n", g[source(e, g)].index, - g[target(e, g)].index); - if (g[target(e, g)].char_reach.all()) { - assert(g[source(e, g)].char_reach.all()); - throw CycleFound(); - } - } - }; - - try { - for (const auto &top : tops) { - DEBUG_PRINTF("checking top vertex %zu\n", h[top].index); - - // Constrain the search to the top vertices and any dot vertices it - // can reach. - auto term_func = [&](NFAVertex v, const NGHolder &g) { - if (v == top) { - return false; - } - if (!g[v].char_reach.all()) { - return true; - } - if (contains(args.br_cyclic, v) && - args.br_cyclic.at(v).repeatMax != depth::infinity()) { - // Bounded repeat vertices without inf max can be turned - // off. - return true; - } - return false; - }; - - boost::depth_first_visit(h, top, CannotDieVisitor(), colour_map, - term_func); - } - } catch (const CycleFound &) { - DEBUG_PRINTF("cycle found\n"); - return true; - } - - return false; -} - -/** \brief True if this NFA cannot ever be in no states at all. */ -static -bool cannotDie(const build_info &args) { - const auto &h = args.h; - const auto &state_ids = args.state_ids; - - // If we have a startDs we're actually using, we can't die. - if (state_ids.at(h.startDs) != NO_STATE) { - DEBUG_PRINTF("is using startDs\n"); - return true; - } - - return all_of_in(args.tops | map_values, [&](const set<NFAVertex> &verts) { - return cannotDie(args, verts); - }); -} - +static +bool isExceptionalTransition(u32 from, u32 to, const build_info &args, + u32 maxShift) { + if (!isLimitedTransition(from, to, maxShift)) { + return true; + } + + // All transitions out of a tug trigger are exceptional. + if (args.tugs.test(from)) { + return true; + } + return false; +} + +static +u32 findMaxVarShift(const build_info &args, u32 nShifts) { + const NGHolder &h = args.h; + u32 shiftMask = 0; + for (const auto &e : edges_range(h)) { + u32 from = args.state_ids.at(source(e, h)); + u32 to = args.state_ids.at(target(e, h)); + if (from == NO_STATE || to == NO_STATE) { + continue; + } + if (!isExceptionalTransition(from, to, args, MAX_SHIFT_AMOUNT)) { + shiftMask |= (1UL << (to - from)); + } + } + + u32 maxVarShift = 0; + for (u32 shiftCnt = 0; shiftMask != 0 && shiftCnt < nShifts; shiftCnt++) { + maxVarShift = findAndClearLSB_32(&shiftMask); + } + + return maxVarShift; +} + +static +int getLimexScore(const build_info &args, u32 nShifts) { + const NGHolder &h = args.h; + u32 maxVarShift = nShifts; + int score = 0; + + score += SHIFT_COST * nShifts; + maxVarShift = findMaxVarShift(args, nShifts); + + NFAStateSet exceptionalStates(args.num_states); + for (const auto &e : edges_range(h)) { + u32 from = args.state_ids.at(source(e, h)); + u32 to = args.state_ids.at(target(e, h)); + if (from == NO_STATE || to == NO_STATE) { + continue; + } + if (isExceptionalTransition(from, to, args, maxVarShift)) { + exceptionalStates.set(from); + } + } + score += EXCEPTION_COST * exceptionalStates.count(); + return score; +} + +// This function finds the best shift scheme with highest score +// Returns number of shifts and score calculated for appropriate scheme +// Returns zero if no appropriate scheme was found +static +u32 findBestNumOfVarShifts(const build_info &args, + int *bestScoreRet = nullptr) { + u32 bestNumOfVarShifts = 0; + int bestScore = INT_MAX; + for (u32 shiftCount = 1; shiftCount <= MAX_SHIFT_COUNT; shiftCount++) { + int score = getLimexScore(args, shiftCount); + if (score < bestScore) { + bestScore = score; + bestNumOfVarShifts = shiftCount; + } + } + if (bestScoreRet != nullptr) { + *bestScoreRet = bestScore; + } + return bestNumOfVarShifts; +} + +static +bool cannotDie(const build_info &args, const set<NFAVertex> &tops) { + const auto &h = args.h; + + // When this top is activated, all of the vertices in 'tops' are switched + // on. If any of those lead to a graph that cannot die, then this top + // cannot die. + + // For each top, we use a depth-first search to traverse the graph from the + // top, looking for a cyclic path consisting of vertices of dot reach. If + // one exists, than the NFA cannot die after this top is triggered. + + auto colour_map = make_small_color_map(h); + + struct CycleFound {}; + struct CannotDieVisitor : public boost::default_dfs_visitor { + void back_edge(const NFAEdge &e, const NGHolder &g) const { + DEBUG_PRINTF("back-edge %zu,%zu\n", g[source(e, g)].index, + g[target(e, g)].index); + if (g[target(e, g)].char_reach.all()) { + assert(g[source(e, g)].char_reach.all()); + throw CycleFound(); + } + } + }; + + try { + for (const auto &top : tops) { + DEBUG_PRINTF("checking top vertex %zu\n", h[top].index); + + // Constrain the search to the top vertices and any dot vertices it + // can reach. + auto term_func = [&](NFAVertex v, const NGHolder &g) { + if (v == top) { + return false; + } + if (!g[v].char_reach.all()) { + return true; + } + if (contains(args.br_cyclic, v) && + args.br_cyclic.at(v).repeatMax != depth::infinity()) { + // Bounded repeat vertices without inf max can be turned + // off. + return true; + } + return false; + }; + + boost::depth_first_visit(h, top, CannotDieVisitor(), colour_map, + term_func); + } + } catch (const CycleFound &) { + DEBUG_PRINTF("cycle found\n"); + return true; + } + + return false; +} + +/** \brief True if this NFA cannot ever be in no states at all. */ +static +bool cannotDie(const build_info &args) { + const auto &h = args.h; + const auto &state_ids = args.state_ids; + + // If we have a startDs we're actually using, we can't die. + if (state_ids.at(h.startDs) != NO_STATE) { + DEBUG_PRINTF("is using startDs\n"); + return true; + } + + return all_of_in(args.tops | map_values, [&](const set<NFAVertex> &verts) { + return cannotDie(args, verts); + }); +} + template<NFAEngineType dtype> struct Factory { // typedefs for readability, for types derived from traits @@ -1713,8 +1713,8 @@ struct Factory { sizeof(limex->init), stateSize, repeatscratchStateSize, repeatStreamState); - size_t scratchStateSize = NFATraits<dtype>::scratch_state_size; - + size_t scratchStateSize = NFATraits<dtype>::scratch_state_size; + if (repeatscratchStateSize) { scratchStateSize = ROUNDUP_N(scratchStateSize, alignof(RepeatControl)); @@ -1753,8 +1753,8 @@ struct Factory { static void buildRepeats(const build_info &args, - vector<bytecode_ptr<NFARepeatInfo>> &out, - u32 *scratchStateSize, u32 *streamState) { + vector<bytecode_ptr<NFARepeatInfo>> &out, + u32 *scratchStateSize, u32 *streamState) { out.reserve(args.repeats.size()); u32 repeat_idx = 0; @@ -1765,7 +1765,7 @@ struct Factory { u32 tableOffset, tugMaskOffset; size_t len = repeatAllocSize(br, &tableOffset, &tugMaskOffset); - auto info = make_zeroed_bytecode_ptr<NFARepeatInfo>(len); + auto info = make_zeroed_bytecode_ptr<NFARepeatInfo>(len); char *info_ptr = (char *)info.get(); // Collect state space info. @@ -1819,7 +1819,7 @@ struct Factory { *streamState += streamStateLen; *scratchStateSize += sizeof(RepeatControl); - out.emplace_back(move(info)); + out.emplace_back(move(info)); } } @@ -1856,19 +1856,19 @@ struct Factory { assert(cyclic != NO_STATE); maskSetBit(limex->repeatCyclicMask, cyclic); } - /* also include tugs in repeat cyclic mask */ - for (size_t i = args.tugs.find_first(); i != args.tugs.npos; - i = args.tugs.find_next(i)) { - maskSetBit(limex->repeatCyclicMask, i); - } + /* also include tugs in repeat cyclic mask */ + for (size_t i = args.tugs.find_first(); i != args.tugs.npos; + i = args.tugs.find_next(i)) { + maskSetBit(limex->repeatCyclicMask, i); + } } static void writeShiftMasks(const build_info &args, implNFA_t *limex) { const NGHolder &h = args.h; - u32 maxShift = findMaxVarShift(args, limex->shiftCount); - u32 shiftMask = 0; - int shiftMaskIdx = 0; + u32 maxShift = findMaxVarShift(args, limex->shiftCount); + u32 shiftMask = 0; + int shiftMaskIdx = 0; for (const auto &e : edges_range(h)) { u32 from = args.state_ids.at(source(e, h)); @@ -1880,32 +1880,32 @@ struct Factory { // We check for exceptional transitions here, as we don't want tug // trigger transitions emitted as limited transitions (even if they // could be in this model). - if (!isExceptionalTransition(from, to, args, maxShift)) { - u32 shift = to - from; - if ((shiftMask & (1UL << shift)) == 0UL) { - shiftMask |= (1UL << shift); - limex->shiftAmount[shiftMaskIdx++] = (u8)shift; - } - assert(limex->shiftCount <= MAX_SHIFT_COUNT); - for (u32 i = 0; i < limex->shiftCount; i++) { - if (limex->shiftAmount[i] == (u8)shift) { - maskSetBit(limex->shift[i], from); - break; - } - } - } - } - if (maxShift && limex->shiftCount > 1) { - for (u32 i = 0; i < limex->shiftCount; i++) { - assert(!isMaskZero(limex->shift[i])); + if (!isExceptionalTransition(from, to, args, maxShift)) { + u32 shift = to - from; + if ((shiftMask & (1UL << shift)) == 0UL) { + shiftMask |= (1UL << shift); + limex->shiftAmount[shiftMaskIdx++] = (u8)shift; + } + assert(limex->shiftCount <= MAX_SHIFT_COUNT); + for (u32 i = 0; i < limex->shiftCount; i++) { + if (limex->shiftAmount[i] == (u8)shift) { + maskSetBit(limex->shift[i], from); + break; + } + } } } + if (maxShift && limex->shiftCount > 1) { + for (u32 i = 0; i < limex->shiftCount; i++) { + assert(!isMaskZero(limex->shift[i])); + } + } } static void findExceptionalTransitions(const build_info &args, - unordered_set<NFAEdge> &exceptional, - u32 maxShift) { + unordered_set<NFAEdge> &exceptional, + u32 maxShift) { const NGHolder &h = args.h; for (const auto &e : edges_range(h)) { @@ -1915,7 +1915,7 @@ struct Factory { continue; } - if (isExceptionalTransition(from, to, args, maxShift)) { + if (isExceptionalTransition(from, to, args, maxShift)) { exceptional.insert(e); } } @@ -1924,41 +1924,41 @@ struct Factory { static void writeExceptions(const build_info &args, const map<ExceptionProto, vector<u32>> &exceptionMap, - const vector<u32> &repeatOffsets, implNFA_t *limex, - const u32 exceptionsOffset, - const u32 reportListOffset) { + const vector<u32> &repeatOffsets, implNFA_t *limex, + const u32 exceptionsOffset, + const u32 reportListOffset) { DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset); exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset); assert(ISALIGNED(etable)); - map<u32, ExceptionProto> exception_by_state; + map<u32, ExceptionProto> exception_by_state; for (const auto &m : exceptionMap) { const ExceptionProto &proto = m.first; const vector<u32> &states = m.second; - for (u32 i : states) { - assert(!contains(exception_by_state, i)); - exception_by_state.emplace(i, proto); - } - } - - u32 ecount = 0; - for (const auto &m : exception_by_state) { - const ExceptionProto &proto = m.second; - u32 state_id = m.first; - DEBUG_PRINTF("exception %u, triggered by state %u\n", ecount, - state_id); - + for (u32 i : states) { + assert(!contains(exception_by_state, i)); + exception_by_state.emplace(i, proto); + } + } + + u32 ecount = 0; + for (const auto &m : exception_by_state) { + const ExceptionProto &proto = m.second; + u32 state_id = m.first; + DEBUG_PRINTF("exception %u, triggered by state %u\n", ecount, + state_id); + // Write the exception entry. exception_t &e = etable[ecount]; maskSetBits(e.squash, proto.squash_states); maskSetBits(e.successors, proto.succ_states); - if (proto.reports_index == MO_INVALID_IDX) { - e.reports = MO_INVALID_IDX; - } else { - e.reports = reportListOffset + - proto.reports_index * sizeof(ReportID); - } + if (proto.reports_index == MO_INVALID_IDX) { + e.reports = MO_INVALID_IDX; + } else { + e.reports = reportListOffset + + proto.reports_index * sizeof(ReportID); + } e.hasSquash = verify_u8(proto.squash); e.trigger = verify_u8(proto.trigger); u32 repeat_offset = proto.repeat_index == MO_INVALID_IDX @@ -1966,10 +1966,10 @@ struct Factory { : repeatOffsets[proto.repeat_index]; e.repeatOffset = repeat_offset; - // for the state that can switch it on - // set this bit in the exception mask - maskSetBit(limex->exceptionMask, state_id); - + // for the state that can switch it on + // set this bit in the exception mask + maskSetBit(limex->exceptionMask, state_id); + ecount++; } @@ -2130,9 +2130,9 @@ struct Factory { const vector<NFAAccept> &acceptsEod, const vector<NFAStateSet> &squash, implNFA_t *limex, const u32 acceptsOffset, const u32 acceptsEodOffset, - const u32 squashOffset, const u32 reportListOffset) { - char *limex_base = (char *)limex; - + const u32 squashOffset, const u32 reportListOffset) { + char *limex_base = (char *)limex; + DEBUG_PRINTF("acceptsOffset=%u, acceptsEodOffset=%u, squashOffset=%u\n", acceptsOffset, acceptsEodOffset, squashOffset); @@ -2140,39 +2140,39 @@ struct Factory { maskSetBits(limex->accept, acceptMask); maskSetBits(limex->acceptAtEOD, acceptEodMask); - // Transforms the indices (report list, squash mask) into offsets - // relative to the base of the limex. - auto transform_offset_fn = [&](NFAAccept a) { - if (!a.single_report) { - a.reports = reportListOffset + a.reports * sizeof(ReportID); - } - a.squash = squashOffset + a.squash * sizeof(tableRow_t); - return a; - }; - + // Transforms the indices (report list, squash mask) into offsets + // relative to the base of the limex. + auto transform_offset_fn = [&](NFAAccept a) { + if (!a.single_report) { + a.reports = reportListOffset + a.reports * sizeof(ReportID); + } + a.squash = squashOffset + a.squash * sizeof(tableRow_t); + return a; + }; + // Write accept table. limex->acceptOffset = acceptsOffset; limex->acceptCount = verify_u32(accepts.size()); DEBUG_PRINTF("NFA has %zu accepts\n", accepts.size()); - NFAAccept *acceptsTable = (NFAAccept *)(limex_base + acceptsOffset); + NFAAccept *acceptsTable = (NFAAccept *)(limex_base + acceptsOffset); assert(ISALIGNED(acceptsTable)); - transform(accepts.begin(), accepts.end(), acceptsTable, - transform_offset_fn); + transform(accepts.begin(), accepts.end(), acceptsTable, + transform_offset_fn); // Write eod accept table. limex->acceptEodOffset = acceptsEodOffset; limex->acceptEodCount = verify_u32(acceptsEod.size()); DEBUG_PRINTF("NFA has %zu EOD accepts\n", acceptsEod.size()); - NFAAccept *acceptsEodTable = (NFAAccept *)(limex_base + acceptsEodOffset); + NFAAccept *acceptsEodTable = (NFAAccept *)(limex_base + acceptsEodOffset); assert(ISALIGNED(acceptsEodTable)); - transform(acceptsEod.begin(), acceptsEod.end(), acceptsEodTable, - transform_offset_fn); + transform(acceptsEod.begin(), acceptsEod.end(), acceptsEodTable, + transform_offset_fn); // Write squash mask table. limex->squashCount = verify_u32(squash.size()); limex->squashOffset = squashOffset; DEBUG_PRINTF("NFA has %zu report squash masks\n", squash.size()); - tableRow_t *mask = (tableRow_t *)(limex_base + squashOffset); + tableRow_t *mask = (tableRow_t *)(limex_base + squashOffset); assert(ISALIGNED(mask)); for (size_t i = 0, end = squash.size(); i < end; i++) { maskSetBits(mask[i], squash[i]); @@ -2180,7 +2180,7 @@ struct Factory { } static - void writeRepeats(const vector<bytecode_ptr<NFARepeatInfo>> &repeats, + void writeRepeats(const vector<bytecode_ptr<NFARepeatInfo>> &repeats, vector<u32> &repeatOffsets, implNFA_t *limex, const u32 repeatOffsetsOffset, const u32 repeatOffset) { const u32 num_repeats = verify_u32(repeats.size()); @@ -2193,9 +2193,9 @@ struct Factory { for (u32 i = 0; i < num_repeats; i++) { repeatOffsets[i] = offset; - assert(repeats[i]); - memcpy((char *)limex + offset, repeats[i].get(), repeats[i].size()); - offset += repeats[i].size(); + assert(repeats[i]); + memcpy((char *)limex + offset, repeats[i].get(), repeats[i].size()); + offset += repeats[i].size(); } // Write repeat offset lookup table. @@ -2207,48 +2207,48 @@ struct Factory { } static - void writeReportList(const vector<ReportID> &reports, implNFA_t *limex, - const u32 reportListOffset) { - DEBUG_PRINTF("reportListOffset=%u\n", reportListOffset); - assert(ISALIGNED_N((char *)limex + reportListOffset, + void writeReportList(const vector<ReportID> &reports, implNFA_t *limex, + const u32 reportListOffset) { + DEBUG_PRINTF("reportListOffset=%u\n", reportListOffset); + assert(ISALIGNED_N((char *)limex + reportListOffset, alignof(ReportID))); - copy_bytes((char *)limex + reportListOffset, reports); + copy_bytes((char *)limex + reportListOffset, reports); } static - bytecode_ptr<NFA> generateNfa(const build_info &args) { + bytecode_ptr<NFA> generateNfa(const build_info &args) { if (args.num_states > NFATraits<dtype>::maxStates) { return nullptr; } // Build bounded repeat structures. - vector<bytecode_ptr<NFARepeatInfo>> repeats; + vector<bytecode_ptr<NFARepeatInfo>> repeats; u32 repeats_full_state = 0; u32 repeats_stream_state = 0; buildRepeats(args, repeats, &repeats_full_state, &repeats_stream_state); size_t repeatSize = 0; for (size_t i = 0; i < repeats.size(); i++) { - repeatSize += repeats[i].size(); + repeatSize += repeats[i].size(); } - // We track report lists that have already been written into the global - // list in case we can reuse them. - ReportListCache reports_cache; - - unordered_set<NFAEdge> exceptional; - u32 shiftCount = findBestNumOfVarShifts(args); - assert(shiftCount); - u32 maxShift = findMaxVarShift(args, shiftCount); - findExceptionalTransitions(args, exceptional, maxShift); - - map<ExceptionProto, vector<u32>> exceptionMap; - vector<ReportID> reportList; + // We track report lists that have already been written into the global + // list in case we can reuse them. + ReportListCache reports_cache; - u32 exceptionCount = buildExceptionMap(args, reports_cache, exceptional, - exceptionMap, reportList); + unordered_set<NFAEdge> exceptional; + u32 shiftCount = findBestNumOfVarShifts(args); + assert(shiftCount); + u32 maxShift = findMaxVarShift(args, shiftCount); + findExceptionalTransitions(args, exceptional, maxShift); - assert(exceptionCount <= args.num_states); + map<ExceptionProto, vector<u32>> exceptionMap; + vector<ReportID> reportList; + u32 exceptionCount = buildExceptionMap(args, reports_cache, exceptional, + exceptionMap, reportList); + + assert(exceptionCount <= args.num_states); + // Build reach table and character mapping. vector<NFAStateSet> reach; vector<u8> reachMap; @@ -2262,8 +2262,8 @@ struct Factory { NFAStateSet acceptMask, acceptEodMask; vector<NFAAccept> accepts, acceptsEod; vector<NFAStateSet> squash; - buildAccepts(args, reports_cache, acceptMask, acceptEodMask, accepts, - acceptsEod, reportList, squash); + buildAccepts(args, reports_cache, acceptMask, acceptEodMask, accepts, + acceptsEod, reportList, squash); // Build all our accel info. NFAStateSet accelMask, accelFriendsMask; @@ -2302,10 +2302,10 @@ struct Factory { offset = ROUNDUP_CL(offset); const u32 exceptionsOffset = offset; - offset += sizeof(exception_t) * exceptionCount; + offset += sizeof(exception_t) * exceptionCount; - const u32 reportListOffset = offset; - offset += sizeof(ReportID) * reportList.size(); + const u32 reportListOffset = offset; + offset += sizeof(ReportID) * reportList.size(); const u32 repeatOffsetsOffset = offset; offset += sizeof(u32) * args.repeats.size(); @@ -2318,7 +2318,7 @@ struct Factory { size_t nfaSize = sizeof(NFA) + offset; DEBUG_PRINTF("nfa size %zu\n", nfaSize); - auto nfa = make_zeroed_bytecode_ptr<NFA>(nfaSize); + auto nfa = make_zeroed_bytecode_ptr<NFA>(nfaSize); assert(nfa); // otherwise we would have thrown std::bad_alloc implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa.get()); @@ -2332,21 +2332,21 @@ struct Factory { limex, accelTableOffset, accelAuxOffset); writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash, - limex, acceptsOffset, acceptsEodOffset, squashOffset, - reportListOffset); + limex, acceptsOffset, acceptsEodOffset, squashOffset, + reportListOffset); - limex->shiftCount = shiftCount; + limex->shiftCount = shiftCount; writeShiftMasks(args, limex); - if (cannotDie(args)) { - DEBUG_PRINTF("nfa cannot die\n"); - setLimexFlag(limex, LIMEX_FLAG_CANNOT_DIE); - } - + if (cannotDie(args)) { + DEBUG_PRINTF("nfa cannot die\n"); + setLimexFlag(limex, LIMEX_FLAG_CANNOT_DIE); + } + // Determine the state required for our state vector. findStateSize(args, limex); - writeReportList(reportList, limex, reportListOffset); + writeReportList(reportList, limex, reportListOffset); // Repeat structures and offset table. vector<u32> repeatOffsets; @@ -2354,7 +2354,7 @@ struct Factory { repeatsOffset); writeExceptions(args, exceptionMap, repeatOffsets, limex, exceptionsOffset, - reportListOffset); + reportListOffset); writeLimexMasks(args, limex); @@ -2393,10 +2393,10 @@ struct Factory { // We are of the right size, calculate a score based on the number // of exceptions and the number of shifts used by this LimEx. - int score; - u32 shiftCount = findBestNumOfVarShifts(args, &score); - if (shiftCount == 0) { - return -1; + int score; + u32 shiftCount = findBestNumOfVarShifts(args, &score); + if (shiftCount == 0) { + return -1; } return score; } @@ -2404,7 +2404,7 @@ struct Factory { template<NFAEngineType dtype> struct generateNfa { - static bytecode_ptr<NFA> call(const build_info &args) { + static bytecode_ptr<NFA> call(const build_info &args) { return Factory<dtype>::generateNfa(args); } }; @@ -2416,40 +2416,40 @@ struct scoreNfa { } }; -#define MAKE_LIMEX_TRAITS(mlt_size) \ - template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \ - typedef LimExNFA##mlt_size implNFA_t; \ - typedef u_##mlt_size tableRow_t; \ - typedef NFAException##mlt_size exception_t; \ - static const size_t maxStates = mlt_size; \ - static const size_t scratch_state_size = mlt_size == 64 ? sizeof(m128) \ - : sizeof(tableRow_t); \ - }; - -MAKE_LIMEX_TRAITS(32) -MAKE_LIMEX_TRAITS(64) -MAKE_LIMEX_TRAITS(128) -MAKE_LIMEX_TRAITS(256) -MAKE_LIMEX_TRAITS(384) -MAKE_LIMEX_TRAITS(512) +#define MAKE_LIMEX_TRAITS(mlt_size) \ + template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \ + typedef LimExNFA##mlt_size implNFA_t; \ + typedef u_##mlt_size tableRow_t; \ + typedef NFAException##mlt_size exception_t; \ + static const size_t maxStates = mlt_size; \ + static const size_t scratch_state_size = mlt_size == 64 ? sizeof(m128) \ + : sizeof(tableRow_t); \ + }; + +MAKE_LIMEX_TRAITS(32) +MAKE_LIMEX_TRAITS(64) +MAKE_LIMEX_TRAITS(128) +MAKE_LIMEX_TRAITS(256) +MAKE_LIMEX_TRAITS(384) +MAKE_LIMEX_TRAITS(512) } // namespace #ifndef NDEBUG // Some sanity tests, called by an assertion in generate(). static UNUSED -bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops, - const unordered_map<NFAVertex, u32> &state_ids, +bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops, + const unordered_map<NFAVertex, u32> &state_ids, u32 num_states) { - unordered_set<u32> seen; - unordered_set<NFAVertex> top_starts; - for (const auto &vv : tops | map_values) { - insert(&top_starts, vv); + unordered_set<u32> seen; + unordered_set<NFAVertex> top_starts; + for (const auto &vv : tops | map_values) { + insert(&top_starts, vv); } for (auto v : vertices_range(h)) { if (!contains(state_ids, v)) { - DEBUG_PRINTF("no entry for vertex %zu in state map\n", h[v].index); + DEBUG_PRINTF("no entry for vertex %zu in state map\n", h[v].index); return false; } const u32 i = state_ids.at(v); @@ -2457,7 +2457,7 @@ bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops, continue; } - DEBUG_PRINTF("checking vertex %zu (state %u)\n", h[v].index, i); + DEBUG_PRINTF("checking vertex %zu (state %u)\n", h[v].index, i); if (i >= num_states || contains(seen, i)) { DEBUG_PRINTF("vertex %u/%u has invalid state\n", i, num_states); @@ -2467,7 +2467,7 @@ bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops, // All our states should be reachable and have a state assigned. if (h[v].char_reach.none()) { - DEBUG_PRINTF("vertex %zu has empty reachability\n", h[v].index); + DEBUG_PRINTF("vertex %zu has empty reachability\n", h[v].index); return false; } @@ -2475,7 +2475,7 @@ bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops, // must have at least one predecessor that is not itself. if (v != h.start && v != h.startDs && !contains(top_starts, v) && !proper_in_degree(v, h)) { - DEBUG_PRINTF("vertex %zu has no pred\n", h[v].index); + DEBUG_PRINTF("vertex %zu has no pred\n", h[v].index); return false; } } @@ -2551,7 +2551,7 @@ bool isFast(const build_info &args) { } static -u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) { +u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) { u32 rv = 0; for (const auto &m : state_ids) { DEBUG_PRINTF("state %u\n", m.second); @@ -2563,15 +2563,15 @@ u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) { return rv; } -bytecode_ptr<NFA> generate(NGHolder &h, - const unordered_map<NFAVertex, u32> &states, - const vector<BoundedRepeatData> &repeats, - const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap, - const unordered_map<NFAVertex, NFAStateSet> &squashMap, - const map<u32, set<NFAVertex>> &tops, - const set<NFAVertex> &zombies, bool do_accel, +bytecode_ptr<NFA> generate(NGHolder &h, + const unordered_map<NFAVertex, u32> &states, + const vector<BoundedRepeatData> &repeats, + const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap, + const unordered_map<NFAVertex, NFAStateSet> &squashMap, + const map<u32, set<NFAVertex>> &tops, + const set<NFAVertex> &zombies, bool do_accel, bool stateCompression, bool &fast, u32 hint, - const CompileContext &cc) { + const CompileContext &cc) { const u32 num_states = max_state(states) + 1; DEBUG_PRINTF("total states: %u\n", num_states); @@ -2594,18 +2594,18 @@ bytecode_ptr<NFA> generate(NGHolder &h, // Acceleration analysis. fillAccelInfo(arg); - vector<pair<int, NFAEngineType>> scores; + vector<pair<int, NFAEngineType>> scores; if (hint != INVALID_NFA) { // The caller has told us what to (attempt to) build. - scores.emplace_back(0, (NFAEngineType)hint); + scores.emplace_back(0, (NFAEngineType)hint); } else { for (size_t i = 0; i <= LAST_LIMEX_NFA; i++) { NFAEngineType ntype = (NFAEngineType)i; int score = DISPATCH_BY_LIMEX_TYPE(ntype, scoreNfa, arg); if (score >= 0) { DEBUG_PRINTF("%s scores %d\n", nfa_type_name(ntype), score); - scores.emplace_back(score, ntype); + scores.emplace_back(score, ntype); } } } @@ -2615,39 +2615,39 @@ bytecode_ptr<NFA> generate(NGHolder &h, return nullptr; } - // Sort acceptable models in priority order, lowest score first. - sort(scores.begin(), scores.end()); + // Sort acceptable models in priority order, lowest score first. + sort(scores.begin(), scores.end()); - for (const auto &elem : scores) { - assert(elem.first >= 0); - NFAEngineType limex_model = elem.second; - auto nfa = DISPATCH_BY_LIMEX_TYPE(limex_model, generateNfa, arg); - if (nfa) { - DEBUG_PRINTF("successful build with NFA engine: %s\n", - nfa_type_name(limex_model)); + for (const auto &elem : scores) { + assert(elem.first >= 0); + NFAEngineType limex_model = elem.second; + auto nfa = DISPATCH_BY_LIMEX_TYPE(limex_model, generateNfa, arg); + if (nfa) { + DEBUG_PRINTF("successful build with NFA engine: %s\n", + nfa_type_name(limex_model)); fast = isFast(arg); - return nfa; - } + return nfa; + } } - DEBUG_PRINTF("NFA build failed.\n"); - return nullptr; + DEBUG_PRINTF("NFA build failed.\n"); + return nullptr; } u32 countAccelStates(NGHolder &h, - const unordered_map<NFAVertex, u32> &states, - const vector<BoundedRepeatData> &repeats, - const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap, - const unordered_map<NFAVertex, NFAStateSet> &squashMap, - const map<u32, set<NFAVertex>> &tops, - const set<NFAVertex> &zombies, - const CompileContext &cc) { + const unordered_map<NFAVertex, u32> &states, + const vector<BoundedRepeatData> &repeats, + const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap, + const unordered_map<NFAVertex, NFAStateSet> &squashMap, + const map<u32, set<NFAVertex>> &tops, + const set<NFAVertex> &zombies, + const CompileContext &cc) { const u32 num_states = max_state(states) + 1; DEBUG_PRINTF("total states: %u\n", num_states); if (!cc.grey.allowLimExNFA) { DEBUG_PRINTF("limex not allowed\n"); - return 0; + return 0; } // Sanity check the input data. @@ -2661,11 +2661,11 @@ u32 countAccelStates(NGHolder &h, do_accel, state_compression, cc, num_states); // Acceleration analysis. - nfaFindAccelSchemes(bi.h, bi.br_cyclic, &bi.accel.accel_map); + nfaFindAccelSchemes(bi.h, bi.br_cyclic, &bi.accel.accel_map); - u32 num_accel = verify_u32(bi.accel.accel_map.size()); + u32 num_accel = verify_u32(bi.accel.accel_map.size()); DEBUG_PRINTF("found %u accel states\n", num_accel); - return num_accel; + return num_accel; } } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/limex_compile.h b/contrib/libs/hyperscan/src/nfa/limex_compile.h index 4afdcdb3e4..e657d6f44e 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_compile.h +++ b/contrib/libs/hyperscan/src/nfa/limex_compile.h @@ -26,23 +26,23 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief Main NFA build code. */ #ifndef LIMEX_COMPILE_H #define LIMEX_COMPILE_H -#include "nfagraph/ng_holder.h" -#include "nfagraph/ng_squash.h" // for NFAStateSet -#include "ue2common.h" -#include "util/bytecode_ptr.h" - -#include <set> +#include "nfagraph/ng_holder.h" +#include "nfagraph/ng_squash.h" // for NFAStateSet +#include "ue2common.h" +#include "util/bytecode_ptr.h" + +#include <set> #include <map> #include <memory> -#include <unordered_map> +#include <unordered_map> #include <vector> struct NFA; @@ -52,8 +52,8 @@ namespace ue2 { struct BoundedRepeatData; struct CompileContext; -/** - * \brief Construct a LimEx NFA from an NGHolder. +/** + * \brief Construct a LimEx NFA from an NGHolder. * * \param g Input NFA graph. Must have state IDs assigned. * \param repeats Bounded repeat information, if any. @@ -69,33 +69,33 @@ struct CompileContext; * \return a built NFA, or nullptr if no NFA could be constructed for this * graph. */ -bytecode_ptr<NFA> generate(NGHolder &g, - const std::unordered_map<NFAVertex, u32> &states, - const std::vector<BoundedRepeatData> &repeats, - const std::unordered_map<NFAVertex, NFAStateSet> &reportSquashMap, - const std::unordered_map<NFAVertex, NFAStateSet> &squashMap, - const std::map<u32, std::set<NFAVertex>> &tops, - const std::set<NFAVertex> &zombies, - bool do_accel, - bool stateCompression, +bytecode_ptr<NFA> generate(NGHolder &g, + const std::unordered_map<NFAVertex, u32> &states, + const std::vector<BoundedRepeatData> &repeats, + const std::unordered_map<NFAVertex, NFAStateSet> &reportSquashMap, + const std::unordered_map<NFAVertex, NFAStateSet> &squashMap, + const std::map<u32, std::set<NFAVertex>> &tops, + const std::set<NFAVertex> &zombies, + bool do_accel, + bool stateCompression, bool &fast, - u32 hint, - const CompileContext &cc); + u32 hint, + const CompileContext &cc); /** - * \brief For a given graph, count the number of accelerable states it has. + * \brief For a given graph, count the number of accelerable states it has. * - * Note that this number may be greater than the number that are actually - * implementable. + * Note that this number may be greater than the number that are actually + * implementable. */ u32 countAccelStates(NGHolder &h, - const std::unordered_map<NFAVertex, u32> &states, - const std::vector<BoundedRepeatData> &repeats, - const std::unordered_map<NFAVertex, NFAStateSet> &reportSquashMap, - const std::unordered_map<NFAVertex, NFAStateSet> &squashMap, - const std::map<u32, std::set<NFAVertex>> &tops, - const std::set<NFAVertex> &zombies, - const CompileContext &cc); + const std::unordered_map<NFAVertex, u32> &states, + const std::vector<BoundedRepeatData> &repeats, + const std::unordered_map<NFAVertex, NFAStateSet> &reportSquashMap, + const std::unordered_map<NFAVertex, NFAStateSet> &squashMap, + const std::map<u32, std::set<NFAVertex>> &tops, + const std::set<NFAVertex> &zombies, + const CompileContext &cc); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/limex_context.h b/contrib/libs/hyperscan/src/nfa/limex_context.h index 60d2087935..25972bcd13 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_context.h +++ b/contrib/libs/hyperscan/src/nfa/limex_context.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,16 +39,16 @@ // Runtime context structures. -/* Note: The size of the context structures may vary from platform to platform - * (notably, for the Limex64 structure). As a result, information based on the - * size and other detail of these structures should not be written into the - * bytecode -- really, the details of the structure should not be accessed by - * the ue2 compile side at all. - */ -#ifdef __cplusplus -#error ue2 runtime only file -#endif - +/* Note: The size of the context structures may vary from platform to platform + * (notably, for the Limex64 structure). As a result, information based on the + * size and other detail of these structures should not be written into the + * bytecode -- really, the details of the structure should not be accessed by + * the ue2 compile side at all. + */ +#ifdef __cplusplus +#error ue2 runtime only file +#endif + /* cached_estate/esucc etc... * * If the exception state matches the cached_estate we will apply @@ -76,11 +76,11 @@ struct ALIGN_CL_DIRECTIVE NFAContext##nsize { \ }; GEN_CONTEXT_STRUCT(32, u32) -#ifdef ARCH_64_BIT -GEN_CONTEXT_STRUCT(64, u64a) -#else -GEN_CONTEXT_STRUCT(64, m128) -#endif +#ifdef ARCH_64_BIT +GEN_CONTEXT_STRUCT(64, u64a) +#else +GEN_CONTEXT_STRUCT(64, m128) +#endif GEN_CONTEXT_STRUCT(128, m128) GEN_CONTEXT_STRUCT(256, m256) GEN_CONTEXT_STRUCT(384, m384) diff --git a/contrib/libs/hyperscan/src/nfa/limex_exceptional.h b/contrib/libs/hyperscan/src/nfa/limex_exceptional.h index 6c7335f1b9..fce8b2ca98 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_exceptional.h +++ b/contrib/libs/hyperscan/src/nfa/limex_exceptional.h @@ -32,8 +32,8 @@ * X-macro generic impl, included into the various LimEx model implementations. */ -#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) -# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. #endif #include "config.h" @@ -59,7 +59,7 @@ #define ESTATE_ARG STATE_T estate #else #define ESTATE_ARG const STATE_T *estatep -#define estate (*estatep) +#define estate (*estatep) #endif #ifdef STATE_ON_STACK @@ -79,13 +79,13 @@ #ifdef ARCH_64_BIT #define CHUNK_T u64a #define FIND_AND_CLEAR_FN findAndClearLSB_64 -#define POPCOUNT_FN popcount64 -#define RANK_IN_MASK_FN rank_in_mask64 +#define POPCOUNT_FN popcount64 +#define RANK_IN_MASK_FN rank_in_mask64 #else #define CHUNK_T u32 #define FIND_AND_CLEAR_FN findAndClearLSB_32 -#define POPCOUNT_FN popcount32 -#define RANK_IN_MASK_FN rank_in_mask32 +#define POPCOUNT_FN popcount32 +#define RANK_IN_MASK_FN rank_in_mask32 #endif /** \brief Process a single exception. Returns 1 if exception handling should @@ -132,7 +132,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, char *repeat_state = ctx->repeat_state + info->stateOffset; if (e->trigger == LIMEX_TRIGGER_POS) { - char cyclic_on = TESTBIT_STATE(*STATE_ARG_P, info->cyclicState); + char cyclic_on = TESTBIT_STATE(*STATE_ARG_P, info->cyclicState); processPosTrigger(repeat, repeat_ctrl, repeat_state, offset, cyclic_on); *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; @@ -148,7 +148,7 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; DEBUG_PRINTF("stale history, squashing cyclic state\n"); assert(e->hasSquash == LIMEX_SQUASH_TUG); - *succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash)); + *succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash)); return 1; // continue } else if (rv == TRIGGER_SUCCESS_CACHE) { new_cache->br = 1; @@ -162,8 +162,8 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, // Some exceptions fire accepts. if (e->reports != MO_INVALID_IDX) { if (flags & CALLBACK_OUTPUT) { - const ReportID *reports = - (const ReportID *)((const char *)limex + e->reports); + const ReportID *reports = + (const ReportID *)((const char *)limex + e->reports); if (unlikely(limexRunReports(reports, ctx->callback, ctx->context, offset) == MO_HALT_MATCHING)) { @@ -187,16 +187,16 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, // Most exceptions have a set of successors to switch on. `local_succ' is // ORed into `succ' at the end of the caller's loop. #ifndef BIG_MODEL - *local_succ = OR_STATE(*local_succ, LOAD_FROM_ENG(&e->successors)); + *local_succ = OR_STATE(*local_succ, LOAD_FROM_ENG(&e->successors)); #else - ctx->local_succ = OR_STATE(ctx->local_succ, LOAD_FROM_ENG(&e->successors)); + ctx->local_succ = OR_STATE(ctx->local_succ, LOAD_FROM_ENG(&e->successors)); #endif // Some exceptions squash states behind them. Note that we squash states in // 'succ', not local_succ. - if (e->hasSquash == LIMEX_SQUASH_CYCLIC - || e->hasSquash == LIMEX_SQUASH_REPORT) { - *succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash)); + if (e->hasSquash == LIMEX_SQUASH_CYCLIC + || e->hasSquash == LIMEX_SQUASH_REPORT) { + *succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash)); if (*cacheable == CACHE_RESULT) { *cacheable = DO_NOT_CACHE_RESULT; } @@ -207,17 +207,17 @@ int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, #ifndef RUN_EXCEPTION_FN_ONLY -/** \brief Process all of the exceptions associated with the states in the \a - * estate. */ +/** \brief Process all of the exceptions associated with the states in the \a + * estate. */ static really_inline int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, - const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, + const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) { assert(diffmask > 0); // guaranteed by caller macro - if (EQ_STATE(estate, ctx->cached_estate)) { + if (EQ_STATE(estate, ctx->cached_estate)) { DEBUG_PRINTF("using cached succ from previous state\n"); - *succ = OR_STATE(*succ, ctx->cached_esucc); + *succ = OR_STATE(*succ, ctx->cached_esucc); if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) { DEBUG_PRINTF("firing cached reports from previous state\n"); if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback, @@ -232,7 +232,7 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, #ifndef BIG_MODEL STATE_T local_succ = ZERO_STATE; #else - ctx->local_succ = ZERO_STATE; + ctx->local_succ = ZERO_STATE; #endif struct proto_cache new_cache = {0, NULL}; @@ -303,20 +303,20 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, #else // A copy of the estate as an array of GPR-sized chunks. CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; - CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; + CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; #ifdef ESTATE_ON_STACK memcpy(chunks, &estate, sizeof(STATE_T)); #else memcpy(chunks, estatep, sizeof(STATE_T)); #endif - memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T)); - - u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; - base_index[0] = 0; - for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) { - base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]); - } - + memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T)); + + u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; + base_index[0] = 0; + for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) { + base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]); + } + do { u32 t = findAndClearLSB_32(&diffmask); #ifdef ARCH_64_BIT @@ -326,17 +326,17 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, CHUNK_T word = chunks[t]; assert(word != 0); do { - u32 bit = FIND_AND_CLEAR_FN(&word); - u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit); - u32 idx = local_index + base_index[t]; + u32 bit = FIND_AND_CLEAR_FN(&word); + u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit); + u32 idx = local_index + base_index[t]; const EXCEPTION_T *e = &exceptions[idx]; if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ, #ifndef BIG_MODEL &local_succ, #endif - limex, offset, ctx, &new_cache, &cacheable, - in_rev, flags)) { + limex, offset, ctx, &new_cache, &cacheable, + in_rev, flags)) { return PE_RV_HALT; } } while (word); @@ -344,23 +344,23 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, #endif #ifndef BIG_MODEL - *succ = OR_STATE(*succ, local_succ); + *succ = OR_STATE(*succ, local_succ); #else - *succ = OR_STATE(*succ, ctx->local_succ); + *succ = OR_STATE(*succ, ctx->local_succ); #endif if (cacheable == CACHE_RESULT) { - ctx->cached_estate = estate; + ctx->cached_estate = estate; #ifndef BIG_MODEL ctx->cached_esucc = local_succ; #else - ctx->cached_esucc = ctx->local_succ; + ctx->cached_esucc = ctx->local_succ; #endif ctx->cached_reports = new_cache.reports; ctx->cached_br = new_cache.br; } else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) { if (ctx->cached_br) { - ctx->cached_estate = ZERO_STATE; + ctx->cached_estate = ZERO_STATE; } } @@ -393,9 +393,9 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, #undef STATE_ARG_NAME #undef STATE_ARG_P -#undef IMPL_NFA_T - +#undef IMPL_NFA_T + #undef CHUNK_T #undef FIND_AND_CLEAR_FN -#undef POPCOUNT_FN -#undef RANK_IN_MASK_FN +#undef POPCOUNT_FN +#undef RANK_IN_MASK_FN diff --git a/contrib/libs/hyperscan/src/nfa/limex_internal.h b/contrib/libs/hyperscan/src/nfa/limex_internal.h index 23b1bd9707..2b0ba02a77 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_internal.h +++ b/contrib/libs/hyperscan/src/nfa/limex_internal.h @@ -68,9 +68,9 @@ The value of NFA.stateSize gives the total state size in bytes (the sum of all the above). - Number of shifts should be always greater or equal to 1 - Number of shifts 0 means that no appropriate NFA engine was found. - + Number of shifts should be always greater or equal to 1 + Number of shifts 0 means that no appropriate NFA engine was found. + */ #ifndef LIMEX_INTERNAL_H @@ -80,12 +80,12 @@ #include "repeat_internal.h" // Constants -#define MAX_SHIFT_COUNT 8 /**< largest number of shifts used by a LimEx NFA */ -#define MAX_SHIFT_AMOUNT 16 /**< largest shift amount used by a LimEx NFA */ +#define MAX_SHIFT_COUNT 8 /**< largest number of shifts used by a LimEx NFA */ +#define MAX_SHIFT_AMOUNT 16 /**< largest shift amount used by a LimEx NFA */ #define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */ #define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */ -#define LIMEX_FLAG_CANNOT_DIE 4 /**< limex cannot have no states on */ +#define LIMEX_FLAG_CANNOT_DIE 4 /**< limex cannot have no states on */ #define LIMEX_FLAG_EXTRACT_EXP 8 /**< use limex exception bit extraction */ enum LimExTrigger { @@ -121,7 +121,7 @@ struct NFAException##size { \ u8 trigger; /**< from enum LimExTrigger */ \ }; \ \ -struct LimExNFA##size { \ +struct LimExNFA##size { \ u8 reachMap[N_CHARS]; /**< map of char -> entry in reach[] */ \ u32 reachSize; /**< number of reach masks */ \ u32 accelCount; /**< number of entries in accel table */ \ @@ -153,18 +153,18 @@ struct LimExNFA##size { \ * followers */ \ u_##size compressMask; /**< switch off before compress */ \ u_##size exceptionMask; \ - u_##size repeatCyclicMask; /**< also includes tug states */ \ + u_##size repeatCyclicMask; /**< also includes tug states */ \ u_##size zombieMask; /**< zombie if in any of the set states */ \ - u_##size shift[MAX_SHIFT_COUNT]; \ - u32 shiftCount; /**< number of shift masks used */ \ - u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \ + u_##size shift[MAX_SHIFT_COUNT]; \ + u32 shiftCount; /**< number of shift masks used */ \ + u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \ m512 exceptionShufMask; /**< exception byte shuffle mask */ \ m512 exceptionBitMask; /**< exception bit mask */ \ m512 exceptionAndMask; /**< exception and mask */ \ }; CREATE_NFA_LIMEX(32) -CREATE_NFA_LIMEX(64) +CREATE_NFA_LIMEX(64) CREATE_NFA_LIMEX(128) CREATE_NFA_LIMEX(256) CREATE_NFA_LIMEX(384) @@ -188,16 +188,16 @@ struct NFARepeatInfo { }; struct NFAAccept { - u8 single_report; //!< If true, 'reports' is report id. - - /** - * \brief If single report is true, this is the report id to fire. - * Otherwise, it is the offset (relative to the start of the LimExNFA - * structure) of a list of reports, terminated with MO_INVALID_IDX. - */ - u32 reports; - - u32 squash; //!< Offset (from LimEx) into squash masks, or MO_INVALID_IDX. + u8 single_report; //!< If true, 'reports' is report id. + + /** + * \brief If single report is true, this is the report id to fire. + * Otherwise, it is the offset (relative to the start of the LimExNFA + * structure) of a list of reports, terminated with MO_INVALID_IDX. + */ + u32 reports; + + u32 squash; //!< Offset (from LimEx) into squash masks, or MO_INVALID_IDX. }; #endif diff --git a/contrib/libs/hyperscan/src/nfa/limex_limits.h b/contrib/libs/hyperscan/src/nfa/limex_limits.h index f4df54a4b0..37a496dee9 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_limits.h +++ b/contrib/libs/hyperscan/src/nfa/limex_limits.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/contrib/libs/hyperscan/src/nfa/limex_native.c b/contrib/libs/hyperscan/src/nfa/limex_native.c index f6f5809c36..5a5e92d240 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_native.c +++ b/contrib/libs/hyperscan/src/nfa/limex_native.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -49,11 +49,11 @@ #include "limex_runtime.h" // Other implementation code from X-Macro impl. -#define SIZE 32 -#define STATE_T u32 -#define ENG_STATE_T u32 -#define LOAD_FROM_ENG load_u32 - +#define SIZE 32 +#define STATE_T u32 +#define ENG_STATE_T u32 +#define LOAD_FROM_ENG load_u32 + #include "limex_state_impl.h" #define INLINE_ATTR really_inline @@ -73,7 +73,7 @@ static really_inline int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, const struct LimExNFA32 *limex, - const struct NFAException32 *exceptions, u64a offset, + const struct NFAException32 *exceptions, u64a offset, struct NFAContext32 *ctx, char in_rev, char flags) { assert(estate != 0); // guaranteed by calling macro @@ -101,10 +101,10 @@ int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, do { u32 bit = findAndClearLSB_32(&estate); - u32 idx = rank_in_mask32(limex->exceptionMask, bit); + u32 idx = rank_in_mask32(limex->exceptionMask, bit); const struct NFAException32 *e = &exceptions[idx]; - if (!runException32(e, s, succ, &local_succ, limex, offset, ctx, - &new_cache, &cacheable, in_rev, flags)) { + if (!runException32(e, s, succ, &local_succ, limex, offset, ctx, + &new_cache, &cacheable, in_rev, flags)) { return PE_RV_HALT; } } while (estate != 0); diff --git a/contrib/libs/hyperscan/src/nfa/limex_runtime.h b/contrib/libs/hyperscan/src/nfa/limex_runtime.h index 6109d382d8..f307b21909 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_runtime.h +++ b/contrib/libs/hyperscan/src/nfa/limex_runtime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,8 +30,8 @@ \brief Limex Execution Engine Or: How I Learned To Stop Worrying And Love The Preprocessor - This file includes utility functions which do not depend on the size of the - state or shift masks directly. + This file includes utility functions which do not depend on the size of the + state or shift masks directly. */ #ifndef LIMEX_RUNTIME_H @@ -95,7 +95,7 @@ int limexRunReports(const ReportID *reports, NfaCallback callback, for (; *reports != MO_INVALID_IDX; ++reports) { DEBUG_PRINTF("firing report for id %u at offset %llu\n", *reports, offset); - int rv = callback(0, offset, *reports, context); + int rv = callback(0, offset, *reports, context); if (rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } @@ -103,38 +103,38 @@ int limexRunReports(const ReportID *reports, NfaCallback callback, return MO_CONTINUE_MATCHING; // continue } -static really_inline -int limexRunAccept(const char *limex_base, const struct NFAAccept *accept, - NfaCallback callback, void *context, u64a offset) { - if (accept->single_report) { - const ReportID report = accept->reports; - DEBUG_PRINTF("firing single report for id %u at offset %llu\n", report, - offset); - return callback(0, offset, report, context); - } - const ReportID *reports = (const ReportID *)(limex_base + accept->reports); - return limexRunReports(reports, callback, context, offset); -} - -static really_inline -int limexAcceptHasReport(const char *limex_base, const struct NFAAccept *accept, - ReportID report) { - if (accept->single_report) { - return accept->reports == report; - } - - const ReportID *reports = (const ReportID *)(limex_base + accept->reports); - assert(*reports != MO_INVALID_IDX); - do { - if (*reports == report) { - return 1; - } - reports++; - } while (*reports != MO_INVALID_IDX); - - return 0; -} - +static really_inline +int limexRunAccept(const char *limex_base, const struct NFAAccept *accept, + NfaCallback callback, void *context, u64a offset) { + if (accept->single_report) { + const ReportID report = accept->reports; + DEBUG_PRINTF("firing single report for id %u at offset %llu\n", report, + offset); + return callback(0, offset, report, context); + } + const ReportID *reports = (const ReportID *)(limex_base + accept->reports); + return limexRunReports(reports, callback, context, offset); +} + +static really_inline +int limexAcceptHasReport(const char *limex_base, const struct NFAAccept *accept, + ReportID report) { + if (accept->single_report) { + return accept->reports == report; + } + + const ReportID *reports = (const ReportID *)(limex_base + accept->reports); + assert(*reports != MO_INVALID_IDX); + do { + if (*reports == report) { + return 1; + } + reports++; + } while (*reports != MO_INVALID_IDX); + + return 0; +} + /** \brief Return a (correctly typed) pointer to the exception table. */ #define getExceptionTable(exc_type, lim) \ ((const exc_type *)((const char *)(lim) + (lim)->exceptionOffset)) @@ -163,7 +163,7 @@ int limexAcceptHasReport(const char *limex_base, const struct NFAAccept *accept, } MAKE_GET_NFA_REPEAT_INFO(32) -MAKE_GET_NFA_REPEAT_INFO(64) +MAKE_GET_NFA_REPEAT_INFO(64) MAKE_GET_NFA_REPEAT_INFO(128) MAKE_GET_NFA_REPEAT_INFO(256) MAKE_GET_NFA_REPEAT_INFO(384) diff --git a/contrib/libs/hyperscan/src/nfa/limex_runtime_impl.h b/contrib/libs/hyperscan/src/nfa/limex_runtime_impl.h index 7b89182bea..6486ffe8d7 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_runtime_impl.h +++ b/contrib/libs/hyperscan/src/nfa/limex_runtime_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,12 +36,12 @@ * Version 2.0: now with X-Macros, so you get line numbers in your debugger. */ - -#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) -# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. + +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. #endif -#define LIMEX_API_ROOT JOIN(nfaExecLimEx, SIZE) +#define LIMEX_API_ROOT JOIN(nfaExecLimEx, SIZE) #define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) @@ -60,7 +60,7 @@ #define RUN_ACCEL_FN JOIN(LIMEX_API_ROOT, _Run_Accel) #define RUN_EXCEPTIONS_FN JOIN(LIMEX_API_ROOT, _Run_Exceptions) #define REV_STREAM_FN JOIN(LIMEX_API_ROOT, _Rev_Stream) -#define LOOP_NOACCEL_FN JOIN(LIMEX_API_ROOT, _Loop_No_Accel) +#define LOOP_NOACCEL_FN JOIN(LIMEX_API_ROOT, _Loop_No_Accel) #define STREAM_FN JOIN(LIMEX_API_ROOT, _Stream) #define STREAMCB_FN JOIN(LIMEX_API_ROOT, _Stream_CB) #define STREAMFIRST_FN JOIN(LIMEX_API_ROOT, _Stream_First) @@ -70,9 +70,9 @@ #define AND_STATE JOIN(and_, STATE_T) #define ANDNOT_STATE JOIN(andnot_, STATE_T) #define OR_STATE JOIN(or_, STATE_T) -#define LSHIFT_STATE JOIN(lshift_, STATE_T) +#define LSHIFT_STATE JOIN(lshift_, STATE_T) #define TESTBIT_STATE JOIN(testbit_, STATE_T) -#define CLEARBIT_STATE JOIN(clearbit_, STATE_T) +#define CLEARBIT_STATE JOIN(clearbit_, STATE_T) #define ZERO_STATE JOIN(zero_, STATE_T) #define ISNONZERO_STATE JOIN(isNonZero_, STATE_T) #define ISZERO_STATE JOIN(isZero_, STATE_T) @@ -95,9 +95,9 @@ #define ACCEL_AND_FRIENDS_MASK accel_and_friendsMask #define EXCEPTION_MASK exceptionMask #else -#define ACCEL_MASK LOAD_FROM_ENG(&limex->accel) -#define ACCEL_AND_FRIENDS_MASK LOAD_FROM_ENG(&limex->accel_and_friends) -#define EXCEPTION_MASK LOAD_FROM_ENG(&limex->exceptionMask) +#define ACCEL_MASK LOAD_FROM_ENG(&limex->accel) +#define ACCEL_AND_FRIENDS_MASK LOAD_FROM_ENG(&limex->accel_and_friends) +#define EXCEPTION_MASK LOAD_FROM_ENG(&limex->exceptionMask) #endif // Run exception processing, if necessary. Returns 0 if scanning should @@ -115,13 +115,13 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, } if (first_match && i) { - STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { DEBUG_PRINTF("first match at %zu\n", i); DEBUG_PRINTF("for nfa %p\n", limex); assert(final_loc); - ctx->s = s; + ctx->s = s; *final_loc = i; return 1; // Halt matching. } @@ -131,8 +131,8 @@ char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags; int rv = JOIN(processExceptional, SIZE)( - pass_state, pass_estate, diffmask, succ, limex, exceptions, - callback_offset, ctx, in_rev, localflags); + pass_state, pass_estate, diffmask, succ, limex, exceptions, + callback_offset, ctx, in_rev, localflags); if (rv == PE_RV_HALT) { return 1; // Halt matching. } @@ -159,107 +159,107 @@ size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask, return j; } -// Shift macros for Limited NFAs. Defined in terms of uniform ops. -// LimExNFAxxx ptr in 'limex' and the current state in 's' -#define NFA_EXEC_LIM_SHIFT(limex_m, curr_m, shift_idx) \ - LSHIFT_STATE(AND_STATE(curr_m, LOAD_FROM_ENG(&limex_m->shift[shift_idx])), \ - limex_m->shiftAmount[shift_idx]) - -// Calculate the (limited model) successors for a number of variable shifts. -// Assumes current state in 'curr_m' and places the successors in 'succ_m'. -#define NFA_EXEC_GET_LIM_SUCC(limex_m, curr_m, succ_m) \ - do { \ - succ_m = NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 0); \ - switch (limex_m->shiftCount) { \ - case 8: \ - succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 7)); \ - /* fallthrough */ \ - case 7: \ - succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 6)); \ - /* fallthrough */ \ - case 6: \ - succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 5)); \ - /* fallthrough */ \ - case 5: \ - succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 4)); \ - /* fallthrough */ \ - case 4: \ - succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 3)); \ - /* fallthrough */ \ - case 3: \ - succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 2)); \ - /* fallthrough */ \ - case 2: \ - succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 1)); \ - /* fallthrough */ \ - case 1: \ - /* fallthrough */ \ - case 0: \ - ; \ - } \ - } while (0) - -/** - * \brief LimEx NFAS inner loop without accel. - * - * Note that the "all zeroes" early death check is only performed if can_die is - * true. - * - */ -static really_inline -char LOOP_NOACCEL_FN(const IMPL_NFA_T *limex, const u8 *input, size_t *loc, - size_t length, STATE_T *s_ptr, struct CONTEXT_T *ctx, - u64a offset, const char flags, u64a *final_loc, - const char first_match, const char can_die) { - const ENG_STATE_T *reach = get_reach_table(limex); -#if SIZE < 256 - const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); -#endif - const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); - STATE_T s = *s_ptr; - - size_t i = *loc; - for (; i != length; i++) { - DUMP_INPUT(i); - if (can_die && ISZERO_STATE(s)) { - DEBUG_PRINTF("no states are switched on, early exit\n"); - break; - } - - STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(limex, s, succ); - - if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, - &succ, final_loc, ctx, flags, 0, first_match)) { - return MO_HALT_MATCHING; - } - - u8 c = input[i]; - s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); - } - - *loc = i; - *s_ptr = s; - return MO_CONTINUE_MATCHING; -} - +// Shift macros for Limited NFAs. Defined in terms of uniform ops. +// LimExNFAxxx ptr in 'limex' and the current state in 's' +#define NFA_EXEC_LIM_SHIFT(limex_m, curr_m, shift_idx) \ + LSHIFT_STATE(AND_STATE(curr_m, LOAD_FROM_ENG(&limex_m->shift[shift_idx])), \ + limex_m->shiftAmount[shift_idx]) + +// Calculate the (limited model) successors for a number of variable shifts. +// Assumes current state in 'curr_m' and places the successors in 'succ_m'. +#define NFA_EXEC_GET_LIM_SUCC(limex_m, curr_m, succ_m) \ + do { \ + succ_m = NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 0); \ + switch (limex_m->shiftCount) { \ + case 8: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 7)); \ + /* fallthrough */ \ + case 7: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 6)); \ + /* fallthrough */ \ + case 6: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 5)); \ + /* fallthrough */ \ + case 5: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 4)); \ + /* fallthrough */ \ + case 4: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 3)); \ + /* fallthrough */ \ + case 3: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 2)); \ + /* fallthrough */ \ + case 2: \ + succ_m = OR_STATE(succ_m, NFA_EXEC_LIM_SHIFT(limex_m, curr_m, 1)); \ + /* fallthrough */ \ + case 1: \ + /* fallthrough */ \ + case 0: \ + ; \ + } \ + } while (0) + +/** + * \brief LimEx NFAS inner loop without accel. + * + * Note that the "all zeroes" early death check is only performed if can_die is + * true. + * + */ static really_inline +char LOOP_NOACCEL_FN(const IMPL_NFA_T *limex, const u8 *input, size_t *loc, + size_t length, STATE_T *s_ptr, struct CONTEXT_T *ctx, + u64a offset, const char flags, u64a *final_loc, + const char first_match, const char can_die) { + const ENG_STATE_T *reach = get_reach_table(limex); +#if SIZE < 256 + const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); +#endif + const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); + STATE_T s = *s_ptr; + + size_t i = *loc; + for (; i != length; i++) { + DUMP_INPUT(i); + if (can_die && ISZERO_STATE(s)) { + DEBUG_PRINTF("no states are switched on, early exit\n"); + break; + } + + STATE_T succ; + NFA_EXEC_GET_LIM_SUCC(limex, s, succ); + + if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, + &succ, final_loc, ctx, flags, 0, first_match)) { + return MO_HALT_MATCHING; + } + + u8 c = input[i]; + s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); + } + + *loc = i; + *s_ptr = s; + return MO_CONTINUE_MATCHING; +} + +static really_inline char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, struct CONTEXT_T *ctx, u64a offset, const char flags, u64a *final_loc, const char first_match) { - const ENG_STATE_T *reach = get_reach_table(limex); + const ENG_STATE_T *reach = get_reach_table(limex); #if SIZE < 256 - const STATE_T accelMask = LOAD_FROM_ENG(&limex->accel); - const STATE_T accel_and_friendsMask - = LOAD_FROM_ENG(&limex->accel_and_friends); - const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); + const STATE_T accelMask = LOAD_FROM_ENG(&limex->accel); + const STATE_T accel_and_friendsMask + = LOAD_FROM_ENG(&limex->accel_and_friends); + const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); #endif - const u8 *accelTable = - (const u8 *)((const char *)limex + limex->accelTableOffset); + const u8 *accelTable = + (const u8 *)((const char *)limex + limex->accelTableOffset); const union AccelAux *accelAux = (const union AccelAux *)((const char *)limex + limex->accelAuxOffset); const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); - STATE_T s = ctx->s; + STATE_T s = ctx->s; /* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(reach)); */ @@ -274,18 +274,18 @@ char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, } without_accel: - if (limex->flags & LIMEX_FLAG_CANNOT_DIE) { - const char can_die = 0; - if (LOOP_NOACCEL_FN(limex, input, &i, min_accel_offset, &s, ctx, offset, - flags, final_loc, first_match, - can_die) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; + if (limex->flags & LIMEX_FLAG_CANNOT_DIE) { + const char can_die = 0; + if (LOOP_NOACCEL_FN(limex, input, &i, min_accel_offset, &s, ctx, offset, + flags, final_loc, first_match, + can_die) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; } - } else { - const char can_die = 1; - if (LOOP_NOACCEL_FN(limex, input, &i, min_accel_offset, &s, ctx, offset, - flags, final_loc, first_match, - can_die) == MO_HALT_MATCHING) { + } else { + const char can_die = 1; + if (LOOP_NOACCEL_FN(limex, input, &i, min_accel_offset, &s, ctx, offset, + flags, final_loc, first_match, + can_die) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } } @@ -329,31 +329,31 @@ with_accel: } STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(limex, s, succ); + NFA_EXEC_GET_LIM_SUCC(limex, s, succ); - if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, - &succ, final_loc, ctx, flags, 0, first_match)) { + if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, + &succ, final_loc, ctx, flags, 0, first_match)) { return MO_HALT_MATCHING; } - u8 c = input[i]; - s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); + u8 c = input[i]; + s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } - ctx->s = s; + ctx->s = s; if ((first_match || (flags & CALLBACK_OUTPUT)) && limex->acceptCount) { - STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); const struct NFAAccept *acceptTable = getAcceptTable(limex); STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { if (first_match) { - ctx->s = s; + ctx->s = s; assert(final_loc); *final_loc = length; return MO_HALT_MATCHING; - } else if (PROCESS_ACCEPTS_FN(limex, &ctx->s, &acceptMask, - acceptTable, offset + length, + } else if (PROCESS_ACCEPTS_FN(limex, &ctx->s, &acceptMask, + acceptTable, offset + length, ctx->callback, ctx->context)) { return MO_HALT_MATCHING; } @@ -369,12 +369,12 @@ with_accel: static never_inline char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, struct CONTEXT_T *ctx, u64a offset) { - const ENG_STATE_T *reach = get_reach_table(limex); + const ENG_STATE_T *reach = get_reach_table(limex); #if SIZE < 256 - const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); + const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); #endif const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); - STATE_T s = ctx->s; + STATE_T s = ctx->s; /* assert(ISALIGNED_16(exceptions)); */ /* assert(ISALIGNED_16(reach)); */ @@ -382,36 +382,36 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, u64a *final_loc = NULL; for (size_t i = length; i != 0; i--) { - DUMP_INPUT(i - 1); + DUMP_INPUT(i - 1); if (ISZERO_STATE(s)) { DEBUG_PRINTF("no states are switched on, early exit\n"); - ctx->s = s; + ctx->s = s; return MO_CONTINUE_MATCHING; } STATE_T succ; - NFA_EXEC_GET_LIM_SUCC(limex, s, succ); + NFA_EXEC_GET_LIM_SUCC(limex, s, succ); - if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, - &succ, final_loc, ctx, flags, 1, 0)) { + if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, + &succ, final_loc, ctx, flags, 1, 0)) { return MO_HALT_MATCHING; } - u8 c = input[i - 1]; - s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); + u8 c = input[i - 1]; + s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); } - ctx->s = s; + ctx->s = s; - STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); const struct NFAAccept *acceptTable = getAcceptTable(limex); const u32 acceptCount = limex->acceptCount; assert(flags & CALLBACK_OUTPUT); if (acceptCount) { STATE_T foundAccepts = AND_STATE(s, acceptMask); if (unlikely(ISNONZERO_STATE(foundAccepts))) { - if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &ctx->s, &acceptMask, - acceptTable, offset, ctx->callback, + if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &ctx->s, &acceptMask, + acceptTable, offset, ctx->callback, ctx->context)) { return MO_HALT_MATCHING; } @@ -421,53 +421,53 @@ char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, } static really_inline -void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src, +void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src, u64a offset) { if (!limex->repeatCount) { return; } - STATE_T s = *(STATE_T *)src; - - if (ISZERO_STATE(AND_STATE(LOAD_FROM_ENG(&limex->repeatCyclicMask), s))) { - DEBUG_PRINTF("no cyclics are on\n"); - return; - } + STATE_T s = *(STATE_T *)src; + if (ISZERO_STATE(AND_STATE(LOAD_FROM_ENG(&limex->repeatCyclicMask), s))) { + DEBUG_PRINTF("no cyclics are on\n"); + return; + } + const union RepeatControl *ctrl = getRepeatControlBaseConst((const char *)src, sizeof(STATE_T)); char *state_base = (char *)dest + limex->stateSize; for (u32 i = 0; i < limex->repeatCount; i++) { - DEBUG_PRINTF("repeat %u\n", i); + DEBUG_PRINTF("repeat %u\n", i); const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); - - const ENG_STATE_T *tug_mask = - (const ENG_STATE_T *)((const char *)info + info->tugMaskOffset); - /* repeat may still be inspected if its tug state is on */ - if (!TESTBIT_STATE(s, info->cyclicState) - && ISZERO_STATE(AND_STATE(s, LOAD_FROM_ENG(tug_mask)))) { - DEBUG_PRINTF("is dead\n"); - continue; - } - + + const ENG_STATE_T *tug_mask = + (const ENG_STATE_T *)((const char *)info + info->tugMaskOffset); + /* repeat may still be inspected if its tug state is on */ + if (!TESTBIT_STATE(s, info->cyclicState) + && ISZERO_STATE(AND_STATE(s, LOAD_FROM_ENG(tug_mask)))) { + DEBUG_PRINTF("is dead\n"); + continue; + } + const struct RepeatInfo *repeat = getRepeatInfo(info); - DEBUG_PRINTF("packing state (packedCtrlOffset=%u)\n", - info->packedCtrlOffset); + DEBUG_PRINTF("packing state (packedCtrlOffset=%u)\n", + info->packedCtrlOffset); repeatPack(state_base + info->packedCtrlOffset, repeat, &ctrl[i], offset); } - - *(STATE_T *)src = s; + + *(STATE_T *)src = s; } char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n, - const struct mq *q, s64a loc) { + const struct mq *q, s64a loc) { void *dest = q->streamState; - void *src = q->state; + void *src = q->state; u8 key = queue_prev_byte(q, loc); const IMPL_NFA_T *limex = getImplNfa(n); - COMPRESS_REPEATS_FN(limex, dest, src, q->offset + loc); + COMPRESS_REPEATS_FN(limex, dest, src, q->offset + loc); COMPRESS_FN(limex, dest, src, key); return 0; } @@ -479,32 +479,32 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src, return; } - // Note: state has already been expanded into 'dest'. - const STATE_T cyclics = - AND_STATE(*(STATE_T *)dest, LOAD_FROM_ENG(&limex->repeatCyclicMask)); - if (ISZERO_STATE(cyclics)) { - DEBUG_PRINTF("no cyclics are on\n"); - return; - } + // Note: state has already been expanded into 'dest'. + const STATE_T cyclics = + AND_STATE(*(STATE_T *)dest, LOAD_FROM_ENG(&limex->repeatCyclicMask)); + if (ISZERO_STATE(cyclics)) { + DEBUG_PRINTF("no cyclics are on\n"); + return; + } union RepeatControl *ctrl = getRepeatControlBase((char *)dest, sizeof(STATE_T)); const char *state_base = (const char *)src + limex->stateSize; for (u32 i = 0; i < limex->repeatCount; i++) { - DEBUG_PRINTF("repeat %u\n", i); + DEBUG_PRINTF("repeat %u\n", i); const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); - const ENG_STATE_T *tug_mask = - (const ENG_STATE_T *)((const char *)info + info->tugMaskOffset); - - if (!TESTBIT_STATE(cyclics, info->cyclicState) - && ISZERO_STATE(AND_STATE(cyclics, LOAD_FROM_ENG(tug_mask)))) { - DEBUG_PRINTF("is dead\n"); - continue; - } - - DEBUG_PRINTF("unpacking state (packedCtrlOffset=%u)\n", - info->packedCtrlOffset); + const ENG_STATE_T *tug_mask = + (const ENG_STATE_T *)((const char *)info + info->tugMaskOffset); + + if (!TESTBIT_STATE(cyclics, info->cyclicState) + && ISZERO_STATE(AND_STATE(cyclics, LOAD_FROM_ENG(tug_mask)))) { + DEBUG_PRINTF("is dead\n"); + continue; + } + + DEBUG_PRINTF("unpacking state (packedCtrlOffset=%u)\n", + info->packedCtrlOffset); const struct RepeatInfo *repeat = getRepeatInfo(info); repeatUnpack(state_base + info->packedCtrlOffset, repeat, offset, &ctrl[i]); @@ -520,8 +520,8 @@ char JOIN(LIMEX_API_ROOT, _expandState)(const struct NFA *n, void *dest, return 0; } -char JOIN(LIMEX_API_ROOT, _queueInitState)(const struct NFA *n, struct mq *q) { - *(STATE_T *)q->state = ZERO_STATE; +char JOIN(LIMEX_API_ROOT, _queueInitState)(const struct NFA *n, struct mq *q) { + *(STATE_T *)q->state = ZERO_STATE; // Zero every bounded repeat control block in state. const IMPL_NFA_T *limex = getImplNfa(n); @@ -601,7 +601,7 @@ void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex, u32 e = q->items[q->cur].type; switch (e) { DEFINE_CASE(MQE_TOP) - ctx->s = TOP_FN(limex, !!sp, ctx->s); + ctx->s = TOP_FN(limex, !!sp, ctx->s); break; DEFINE_CASE(MQE_START) break; @@ -611,7 +611,7 @@ void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex, assert(e >= MQE_TOP_FIRST); assert(e < MQE_INVALID); DEBUG_PRINTF("MQE_TOP + %d\n", ((int)e - MQE_TOP_FIRST)); - ctx->s = TOPN_FN(limex, ctx->s, e - MQE_TOP_FIRST); + ctx->s = TOPN_FN(limex, ctx->s, e - MQE_TOP_FIRST); } #undef DEFINE_CASE } @@ -636,17 +636,17 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { assert(q->cur + 1 < q->end); /* require at least two items */ - struct CONTEXT_T ctx; - ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); - ctx.repeat_state = q->streamState + limex->stateSize; - ctx.callback = q->cb; - ctx.context = q->context; - ctx.cached_estate = ZERO_STATE; - ctx.cached_br = 0; + struct CONTEXT_T ctx; + ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); + ctx.repeat_state = q->streamState + limex->stateSize; + ctx.callback = q->cb; + ctx.context = q->context; + ctx.cached_estate = ZERO_STATE; + ctx.cached_br = 0; assert(q->items[q->cur].location >= 0); DEBUG_PRINTF("LOAD STATE\n"); - ctx.s = *(STATE_T *)q->state; + ctx.s = *(STATE_T *)q->state; assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; @@ -668,9 +668,9 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { /* do main buffer region */ DEBUG_PRINTF("MAIN BUFFER SCAN\n"); assert(ep - offset <= q->length); - if (STREAMCB_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp) + if (STREAMCB_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp) == MO_HALT_MATCHING) { - *(STATE_T *)q->state = ZERO_STATE; + *(STATE_T *)q->state = ZERO_STATE; return 0; } @@ -687,19 +687,19 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp - offset; DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); - *(STATE_T *)q->state = ctx.s; + *(STATE_T *)q->state = ctx.s; return MO_ALIVE; } - JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp); + JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp); q->cur++; } - EXPIRE_ESTATE_FN(limex, &ctx, sp); + EXPIRE_ESTATE_FN(limex, &ctx, sp); DEBUG_PRINTF("END\n"); - *(STATE_T *)q->state = ctx.s; + *(STATE_T *)q->state = ctx.s; if (q->cur != q->end) { q->cur--; @@ -708,7 +708,7 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { return MO_ALIVE; } - return ISNONZERO_STATE(ctx.s); + return ISNONZERO_STATE(ctx.s); } /* used by suffix execution in Rose */ @@ -731,16 +731,16 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { assert(q->cur + 1 < q->end); /* require at least two items */ - struct CONTEXT_T ctx; - ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); - ctx.repeat_state = q->streamState + limex->stateSize; - ctx.callback = q->cb; - ctx.context = q->context; - ctx.cached_estate = ZERO_STATE; - ctx.cached_br = 0; + struct CONTEXT_T ctx; + ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); + ctx.repeat_state = q->streamState + limex->stateSize; + ctx.callback = q->cb; + ctx.context = q->context; + ctx.cached_estate = ZERO_STATE; + ctx.cached_br = 0; DEBUG_PRINTF("LOAD STATE\n"); - ctx.s = *(STATE_T *)q->state; + ctx.s = *(STATE_T *)q->state; assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; @@ -755,28 +755,28 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { ep = MIN(ep, end_abs); assert(ep >= sp); - if (sp < offset) { - DEBUG_PRINTF("HISTORY BUFFER SCAN\n"); - assert(offset - sp <= q->hlength); - u64a local_ep = MIN(offset, ep); - u64a final_look = 0; - /* we are starting inside the history buffer */ - if (STREAMFIRST_FN(limex, q->history + q->hlength + sp - offset, - local_ep - sp, &ctx, sp, - &final_look) == MO_HALT_MATCHING) { - DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu " - "offset:%llu\n", final_look, sp, end_abs, offset); - assert(q->cur); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = sp + final_look - offset; - *(STATE_T *)q->state = ctx.s; - return MO_MATCHES_PENDING; - } - - sp = local_ep; - } - + if (sp < offset) { + DEBUG_PRINTF("HISTORY BUFFER SCAN\n"); + assert(offset - sp <= q->hlength); + u64a local_ep = MIN(offset, ep); + u64a final_look = 0; + /* we are starting inside the history buffer */ + if (STREAMFIRST_FN(limex, q->history + q->hlength + sp - offset, + local_ep - sp, &ctx, sp, + &final_look) == MO_HALT_MATCHING) { + DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu " + "offset:%llu\n", final_look, sp, end_abs, offset); + assert(q->cur); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = sp + final_look - offset; + *(STATE_T *)q->state = ctx.s; + return MO_MATCHES_PENDING; + } + + sp = local_ep; + } + if (sp >= ep) { goto scan_done; } @@ -784,7 +784,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { /* do main buffer region */ u64a final_look = 0; assert(ep - offset <= q->length); - if (STREAMFIRST_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp, + if (STREAMFIRST_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp, &final_look) == MO_HALT_MATCHING) { DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu offset:%llu\n", final_look, sp, end_abs, offset); @@ -792,7 +792,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp + final_look - offset; - *(STATE_T *)q->state = ctx.s; + *(STATE_T *)q->state = ctx.s; return MO_MATCHES_PENDING; } @@ -808,19 +808,19 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { q->items[q->cur].type = MQE_START; q->items[q->cur].location = sp - offset; DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); - *(STATE_T *)q->state = ctx.s; + *(STATE_T *)q->state = ctx.s; return MO_ALIVE; } - JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp); + JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp); q->cur++; } - EXPIRE_ESTATE_FN(limex, &ctx, sp); + EXPIRE_ESTATE_FN(limex, &ctx, sp); DEBUG_PRINTF("END\n"); - *(STATE_T *)q->state = ctx.s; + *(STATE_T *)q->state = ctx.s; if (q->cur != q->end) { q->cur--; @@ -829,7 +829,7 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { return MO_ALIVE; } - return ISNONZERO_STATE(ctx.s); + return ISNONZERO_STATE(ctx.s); } // Used for execution Rose prefix/infixes. @@ -843,16 +843,16 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, assert(q->cur + 1 < q->end); /* require at least two items */ - struct CONTEXT_T ctx; - ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); - ctx.repeat_state = q->streamState + limex->stateSize; - ctx.callback = NULL; - ctx.context = NULL; - ctx.cached_estate = ZERO_STATE; - ctx.cached_br = 0; + struct CONTEXT_T ctx; + ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); + ctx.repeat_state = q->streamState + limex->stateSize; + ctx.callback = NULL; + ctx.context = NULL; + ctx.cached_estate = ZERO_STATE; + ctx.cached_br = 0; DEBUG_PRINTF("LOAD STATE\n"); - ctx.s = *(STATE_T *)q->state; + ctx.s = *(STATE_T *)q->state; assert(q->items[q->cur].type == MQE_START); u64a offset = q->offset; @@ -864,7 +864,7 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, if (n->maxWidth) { if (ep - sp > n->maxWidth) { sp = ep - n->maxWidth; - ctx.s = INITIAL_FN(limex, !!sp); + ctx.s = INITIAL_FN(limex, !!sp); } } assert(ep >= sp); @@ -875,7 +875,7 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, u64a local_ep = MIN(offset, ep); /* we are starting inside the history buffer */ STREAMSILENT_FN(limex, q->history + q->hlength + sp - offset, - local_ep - sp, &ctx, sp); + local_ep - sp, &ctx, sp); sp = local_ep; } @@ -887,35 +887,35 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, /* do main buffer region */ DEBUG_PRINTF("MAIN BUFFER SCAN\n"); assert(ep - offset <= q->length); - STREAMSILENT_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp); + STREAMSILENT_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp); DEBUG_PRINTF("SCAN DONE\n"); scan_done: sp = ep; - JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp); + JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp); q->cur++; } - EXPIRE_ESTATE_FN(limex, &ctx, sp); + EXPIRE_ESTATE_FN(limex, &ctx, sp); DEBUG_PRINTF("END, nfa is %s\n", - ISNONZERO_STATE(ctx.s) ? "still alive" : "dead"); + ISNONZERO_STATE(ctx.s) ? "still alive" : "dead"); - *(STATE_T *)q->state = ctx.s; + *(STATE_T *)q->state = ctx.s; - if (JOIN(limexInAccept, SIZE)(limex, ctx.s, ctx.repeat_ctrl, - ctx.repeat_state, sp + 1, report)) { + if (JOIN(limexInAccept, SIZE)(limex, ctx.s, ctx.repeat_ctrl, + ctx.repeat_state, sp + 1, report)) { return MO_MATCHES_PENDING; } - return ISNONZERO_STATE(ctx.s); + return ISNONZERO_STATE(ctx.s); } char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context) { + const char *streamState, u64a offset, + NfaCallback callback, void *context) { assert(n && state); const IMPL_NFA_T *limex = getImplNfa(n); @@ -923,8 +923,8 @@ char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state, const union RepeatControl *repeat_ctrl = getRepeatControlBaseConst(state, sizeof(STATE_T)); const char *repeat_state = streamState + limex->stateSize; - return TESTEOD_FN(limex, sptr, repeat_ctrl, repeat_state, offset, callback, - context); + return TESTEOD_FN(limex, sptr, repeat_ctrl, repeat_state, offset, callback, + context); } char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) { @@ -935,43 +935,43 @@ char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) { // Block mode reverse scan. char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, - const u8 *buf, size_t buflen, - const u8 *hbuf, size_t hlen, - NfaCallback cb, void *context) { + const u8 *buf, size_t buflen, + const u8 *hbuf, size_t hlen, + NfaCallback cb, void *context) { assert(buf || hbuf); assert(buflen || hlen); - struct CONTEXT_T ctx; - ctx.repeat_ctrl = NULL; - ctx.repeat_state = NULL; - ctx.callback = cb; - ctx.context = context; - ctx.cached_estate = ZERO_STATE; - ctx.cached_br = 0; + struct CONTEXT_T ctx; + ctx.repeat_ctrl = NULL; + ctx.repeat_state = NULL; + ctx.callback = cb; + ctx.context = context; + ctx.cached_estate = ZERO_STATE; + ctx.cached_br = 0; const IMPL_NFA_T *limex = getImplNfa(n); - ctx.s = INITIAL_FN(limex, 0); // always anchored + ctx.s = INITIAL_FN(limex, 0); // always anchored // 'buf' may be null, for example when we're scanning at EOD time. if (buflen) { assert(buf); DEBUG_PRINTF("MAIN BUFFER SCAN, %zu bytes\n", buflen); offset -= buflen; - REV_STREAM_FN(limex, buf, buflen, &ctx, offset); + REV_STREAM_FN(limex, buf, buflen, &ctx, offset); } if (hlen) { assert(hbuf); DEBUG_PRINTF("HISTORY BUFFER SCAN, %zu bytes\n", hlen); offset -= hlen; - REV_STREAM_FN(limex, hbuf, hlen, &ctx, offset); + REV_STREAM_FN(limex, hbuf, hlen, &ctx, offset); } - if (offset == 0 && limex->acceptEodCount && ISNONZERO_STATE(ctx.s)) { - const union RepeatControl *repeat_ctrl = NULL; - const char *repeat_state = NULL; - TESTEOD_FN(limex, &ctx.s, repeat_ctrl, repeat_state, offset, cb, - context); + if (offset == 0 && limex->acceptEodCount && ISNONZERO_STATE(ctx.s)) { + const union RepeatControl *repeat_ctrl = NULL; + const char *repeat_state = NULL; + TESTEOD_FN(limex, &ctx.s, repeat_ctrl, repeat_state, offset, cb, + context); } // NOTE: return value is unused. @@ -987,36 +987,36 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa, union RepeatControl *repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); char *repeat_state = q->streamState + limex->stateSize; - STATE_T state = *(STATE_T *)q->state; + STATE_T state = *(STATE_T *)q->state; u64a offset = q->offset + q_last_loc(q) + 1; return JOIN(limexInAccept, SIZE)(limex, state, repeat_ctrl, repeat_state, offset, report); } -char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { - assert(nfa && q); - assert(q->state && q->streamState); - - const IMPL_NFA_T *limex = getImplNfa(nfa); - union RepeatControl *repeat_ctrl = - getRepeatControlBase(q->state, sizeof(STATE_T)); - char *repeat_state = q->streamState + limex->stateSize; - STATE_T state = *(STATE_T *)q->state; - u64a offset = q->offset + q_last_loc(q) + 1; - - return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state, - offset); -} - +char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { + assert(nfa && q); + assert(q->state && q->streamState); + + const IMPL_NFA_T *limex = getImplNfa(nfa); + union RepeatControl *repeat_ctrl = + getRepeatControlBase(q->state, sizeof(STATE_T)); + char *repeat_state = q->streamState + limex->stateSize; + STATE_T state = *(STATE_T *)q->state; + u64a offset = q->offset + q_last_loc(q) + 1; + + return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state, + offset); +} + enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( const struct NFA *nfa, struct mq *q, s64a loc) { assert(nfa->flags & NFA_ZOMBIE); const IMPL_NFA_T *limex = getImplNfa(nfa); - STATE_T state = *(STATE_T *)q->state; - STATE_T zmask = LOAD_FROM_ENG(&limex->zombieMask); + STATE_T state = *(STATE_T *)q->state; + STATE_T zmask = LOAD_FROM_ENG(&limex->zombieMask); if (limex->repeatCount) { u64a offset = q->offset + loc + 1; @@ -1048,7 +1048,7 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( #undef RUN_ACCEL_FN #undef RUN_EXCEPTIONS_FN #undef REV_STREAM_FN -#undef LOOP_NOACCEL_FN +#undef LOOP_NOACCEL_FN #undef STREAM_FN #undef STREAMCB_FN #undef STREAMFIRST_FN @@ -1058,9 +1058,9 @@ enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( #undef AND_STATE #undef ANDNOT_STATE #undef OR_STATE -#undef LSHIFT_STATE +#undef LSHIFT_STATE #undef TESTBIT_STATE -#undef CLEARBIT_STATE +#undef CLEARBIT_STATE #undef ZERO_STATE #undef ISNONZERO_STATE #undef ISZERO_STATE diff --git a/contrib/libs/hyperscan/src/nfa/limex_shuffle.h b/contrib/libs/hyperscan/src/nfa/limex_shuffle.h index 365d47296e..7786ed8b07 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_shuffle.h +++ b/contrib/libs/hyperscan/src/nfa/limex_shuffle.h @@ -1,78 +1,78 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Naive dynamic shuffles. - * - * These are written with the assumption that the provided masks are sparsely - * populated and never contain more than 32 on bits. Other implementations will - * be faster and actually correct if these assumptions don't hold true. - */ - -#ifndef LIMEX_SHUFFLE_H -#define LIMEX_SHUFFLE_H - -#include "ue2common.h" -#include "util/arch.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -static really_inline -u32 packedExtract128(m128 s, const m128 permute, const m128 compare) { - m128 shuffled = pshufb_m128(s, permute); - m128 compared = and128(shuffled, compare); - u16 rv = ~movemask128(eq128(compared, shuffled)); - return (u32)rv; -} - -#if defined(HAVE_AVX2) -static really_inline -u32 packedExtract256(m256 s, const m256 permute, const m256 compare) { - // vpshufb doesn't cross lanes, so this is a bit of a cheat - m256 shuffled = pshufb_m256(s, permute); - m256 compared = and256(shuffled, compare); - u32 rv = ~movemask256(eq256(compared, shuffled)); - // stitch the lane-wise results back together - return (u32)((rv >> 16) | (rv & 0xffffU)); -} -#endif // AVX2 - -#if defined(HAVE_AVX512) -static really_inline -u32 packedExtract512(m512 s, const m512 permute, const m512 compare) { - // vpshufb doesn't cross lanes, so this is a bit of a cheat - m512 shuffled = pshufb_m512(s, permute); - m512 compared = and512(shuffled, compare); - u64a rv = ~eq512mask(compared, shuffled); - // stitch the lane-wise results back together - rv = rv >> 32 | rv; - return (u32)(((rv >> 16) | rv) & 0xffffU); -} -#endif // AVX512 - -#endif // LIMEX_SHUFFLE_H +/* + * Copyright (c) 2015-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Naive dynamic shuffles. + * + * These are written with the assumption that the provided masks are sparsely + * populated and never contain more than 32 on bits. Other implementations will + * be faster and actually correct if these assumptions don't hold true. + */ + +#ifndef LIMEX_SHUFFLE_H +#define LIMEX_SHUFFLE_H + +#include "ue2common.h" +#include "util/arch.h" +#include "util/bitutils.h" +#include "util/simd_utils.h" + +static really_inline +u32 packedExtract128(m128 s, const m128 permute, const m128 compare) { + m128 shuffled = pshufb_m128(s, permute); + m128 compared = and128(shuffled, compare); + u16 rv = ~movemask128(eq128(compared, shuffled)); + return (u32)rv; +} + +#if defined(HAVE_AVX2) +static really_inline +u32 packedExtract256(m256 s, const m256 permute, const m256 compare) { + // vpshufb doesn't cross lanes, so this is a bit of a cheat + m256 shuffled = pshufb_m256(s, permute); + m256 compared = and256(shuffled, compare); + u32 rv = ~movemask256(eq256(compared, shuffled)); + // stitch the lane-wise results back together + return (u32)((rv >> 16) | (rv & 0xffffU)); +} +#endif // AVX2 + +#if defined(HAVE_AVX512) +static really_inline +u32 packedExtract512(m512 s, const m512 permute, const m512 compare) { + // vpshufb doesn't cross lanes, so this is a bit of a cheat + m512 shuffled = pshufb_m512(s, permute); + m512 compared = and512(shuffled, compare); + u64a rv = ~eq512mask(compared, shuffled); + // stitch the lane-wise results back together + rv = rv >> 32 | rv; + return (u32)(((rv >> 16) | rv) & 0xffffU); +} +#endif // AVX512 + +#endif // LIMEX_SHUFFLE_H diff --git a/contrib/libs/hyperscan/src/nfa/limex_simd128.c b/contrib/libs/hyperscan/src/nfa/limex_simd128.c index c5f2b33e3e..f6f86ac27b 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_simd128.c +++ b/contrib/libs/hyperscan/src/nfa/limex_simd128.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,11 +48,11 @@ #include "limex_runtime.h" -#define SIZE 128 -#define STATE_T m128 -#define ENG_STATE_T m128 -#define LOAD_FROM_ENG load_m128 - +#define SIZE 128 +#define STATE_T m128 +#define ENG_STATE_T m128 +#define LOAD_FROM_ENG load_m128 + #include "limex_exceptional.h" #include "limex_state_impl.h" diff --git a/contrib/libs/hyperscan/src/nfa/limex_simd256.c b/contrib/libs/hyperscan/src/nfa/limex_simd256.c index cc23290810..2de8c162cd 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_simd256.c +++ b/contrib/libs/hyperscan/src/nfa/limex_simd256.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,11 +45,11 @@ // Common code #include "limex_runtime.h" -#define SIZE 256 -#define STATE_T m256 -#define ENG_STATE_T m256 -#define LOAD_FROM_ENG load_m256 - +#define SIZE 256 +#define STATE_T m256 +#define ENG_STATE_T m256 +#define LOAD_FROM_ENG load_m256 + #include "limex_exceptional.h" #include "limex_state_impl.h" diff --git a/contrib/libs/hyperscan/src/nfa/limex_simd384.c b/contrib/libs/hyperscan/src/nfa/limex_simd384.c index 7e596e48b0..6a7e5c94ac 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_simd384.c +++ b/contrib/libs/hyperscan/src/nfa/limex_simd384.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,11 +45,11 @@ // Common code #include "limex_runtime.h" -#define SIZE 384 -#define STATE_T m384 -#define ENG_STATE_T m384 -#define LOAD_FROM_ENG load_m384 - +#define SIZE 384 +#define STATE_T m384 +#define ENG_STATE_T m384 +#define LOAD_FROM_ENG load_m384 + #include "limex_exceptional.h" #include "limex_state_impl.h" diff --git a/contrib/libs/hyperscan/src/nfa/limex_simd512.c b/contrib/libs/hyperscan/src/nfa/limex_simd512.c index f779f335d2..a85da3138c 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_simd512.c +++ b/contrib/libs/hyperscan/src/nfa/limex_simd512.c @@ -1,60 +1,60 @@ -/* - * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief LimEx NFA: 512-bit SIMD runtime implementations. - */ - -//#define DEBUG_INPUT -//#define DEBUG_EXCEPTIONS - -#include "limex.h" - -#include "accel.h" -#include "limex_internal.h" -#include "nfa_internal.h" -#include "ue2common.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -// Common code -#include "limex_runtime.h" - -#define SIZE 512 -#define STATE_T m512 -#define ENG_STATE_T m512 -#define LOAD_FROM_ENG load_m512 - -#include "limex_exceptional.h" - -#include "limex_state_impl.h" - -#define INLINE_ATTR really_inline -#include "limex_common_impl.h" - -#include "limex_runtime_impl.h" +/* + * Copyright (c) 2015-2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief LimEx NFA: 512-bit SIMD runtime implementations. + */ + +//#define DEBUG_INPUT +//#define DEBUG_EXCEPTIONS + +#include "limex.h" + +#include "accel.h" +#include "limex_internal.h" +#include "nfa_internal.h" +#include "ue2common.h" +#include "util/bitutils.h" +#include "util/simd_utils.h" + +// Common code +#include "limex_runtime.h" + +#define SIZE 512 +#define STATE_T m512 +#define ENG_STATE_T m512 +#define LOAD_FROM_ENG load_m512 + +#include "limex_exceptional.h" + +#include "limex_state_impl.h" + +#define INLINE_ATTR really_inline +#include "limex_common_impl.h" + +#include "limex_runtime_impl.h" diff --git a/contrib/libs/hyperscan/src/nfa/limex_state_impl.h b/contrib/libs/hyperscan/src/nfa/limex_state_impl.h index 81153f7171..7d1721432d 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_state_impl.h +++ b/contrib/libs/hyperscan/src/nfa/limex_state_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,8 +35,8 @@ #include "util/state_compress.h" #include <string.h> -#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) -# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. +#if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) +# error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. #endif #define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) @@ -44,8 +44,8 @@ #define REACHMASK_FN JOIN(moNfaReachMask, SIZE) #define COMPRESS_FN JOIN(moNfaCompressState, SIZE) #define EXPAND_FN JOIN(moNfaExpandState, SIZE) -#define COMPRESSED_STORE_FN JOIN(store_compressed_, STATE_T) -#define COMPRESSED_LOAD_FN JOIN(load_compressed_, STATE_T) +#define COMPRESSED_STORE_FN JOIN(store_compressed_, STATE_T) +#define COMPRESSED_LOAD_FN JOIN(load_compressed_, STATE_T) #define PARTIAL_STORE_FN JOIN(partial_store_, STATE_T) #define PARTIAL_LOAD_FN JOIN(partial_load_, STATE_T) #define OR_STATE JOIN(or_, STATE_T) @@ -53,24 +53,24 @@ #define ISZERO_STATE JOIN(isZero_, STATE_T) static really_inline -const ENG_STATE_T *get_reach_table(const IMPL_NFA_T *limex) { - const ENG_STATE_T *reach - = (const ENG_STATE_T *)((const char *)limex + sizeof(*limex)); - assert(ISALIGNED_N(reach, alignof(ENG_STATE_T))); - return reach; -} - -static really_inline -STATE_T REACHMASK_FN(const IMPL_NFA_T *limex, const u8 key) { - const ENG_STATE_T *reach = get_reach_table(limex); - return LOAD_FROM_ENG(&reach[limex->reachMap[key]]); +const ENG_STATE_T *get_reach_table(const IMPL_NFA_T *limex) { + const ENG_STATE_T *reach + = (const ENG_STATE_T *)((const char *)limex + sizeof(*limex)); + assert(ISALIGNED_N(reach, alignof(ENG_STATE_T))); + return reach; } static really_inline +STATE_T REACHMASK_FN(const IMPL_NFA_T *limex, const u8 key) { + const ENG_STATE_T *reach = get_reach_table(limex); + return LOAD_FROM_ENG(&reach[limex->reachMap[key]]); +} + +static really_inline void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src, u8 key) { assert(ISALIGNED_N(src, alignof(STATE_T))); - STATE_T a_src = *src; + STATE_T a_src = *src; DEBUG_PRINTF("compress state: %p -> %p\n", src, dest); @@ -81,30 +81,30 @@ void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src, } else { DEBUG_PRINTF("compress state, key=%hhx\n", key); - STATE_T reachmask = REACHMASK_FN(limex, key); + STATE_T reachmask = REACHMASK_FN(limex, key); // Masked compression means that we mask off the initDs states and // provide a shortcut for the all-zeroes case. Note that these must be // switched on in the EXPAND call below. if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) { - STATE_T s = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), a_src); + STATE_T s = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), a_src); if (ISZERO_STATE(s)) { DEBUG_PRINTF("after compression mask, all states are zero\n"); memset(dest, 0, limex->stateSize); return; } - STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), - reachmask); + STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), + reachmask); COMPRESSED_STORE_FN(dest, &s, &mask, limex->stateSize); } else { - COMPRESSED_STORE_FN(dest, src, &reachmask, limex->stateSize); + COMPRESSED_STORE_FN(dest, src, &reachmask, limex->stateSize); } } } static really_inline -void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, u8 key) { +void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, u8 key) { assert(ISALIGNED_N(dest, alignof(STATE_T))); DEBUG_PRINTF("expand state: %p -> %p\n", src, dest); @@ -114,15 +114,15 @@ void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, u8 key) { *dest = PARTIAL_LOAD_FN(src, limex->stateSize); } else { DEBUG_PRINTF("expand state, key=%hhx\n", key); - STATE_T reachmask = REACHMASK_FN(limex, key); + STATE_T reachmask = REACHMASK_FN(limex, key); if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) { - STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), - reachmask); + STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), + reachmask); COMPRESSED_LOAD_FN(dest, src, &mask, limex->stateSize); - *dest = OR_STATE(LOAD_FROM_ENG(&limex->initDS), *dest); + *dest = OR_STATE(LOAD_FROM_ENG(&limex->initDS), *dest); } else { - COMPRESSED_LOAD_FN(dest, src, &reachmask, limex->stateSize); + COMPRESSED_LOAD_FN(dest, src, &reachmask, limex->stateSize); } } } diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan.c b/contrib/libs/hyperscan/src/nfa/mcclellan.c index 71f71e3275..9f79ae1fc0 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellan.c +++ b/contrib/libs/hyperscan/src/nfa/mcclellan.c @@ -42,13 +42,13 @@ static really_inline char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m, - u32 s, u64a loc, char eod, u32 *cached_accept_state, - u32 *cached_accept_id) { - DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n", - s & STATE_MASK, loc, eod); + u32 s, u64a loc, char eod, u32 *cached_accept_state, + u32 *cached_accept_id) { + DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n", + s & STATE_MASK, loc, eod); if (!eod && s == *cached_accept_state) { - if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { + if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; /* termination requested */ } @@ -71,7 +71,7 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m, *cached_accept_id = rl->report[0]; DEBUG_PRINTF("reporting %u\n", rl->report[0]); - if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { + if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; /* termination requested */ } @@ -80,7 +80,7 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m, for (u32 i = 0; i < count; i++) { DEBUG_PRINTF("reporting %u\n", rl->report[i]); - if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { + if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; /* termination requested */ } } @@ -89,84 +89,84 @@ char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m, } static really_inline -const u8 *run_mcclellan_accel(const struct mcclellan *m, - const struct mstate_aux *aux, u32 s, - const u8 **min_accel_offset, - const u8 *c, const u8 *c_end) { - DEBUG_PRINTF("skipping\n"); - u32 accel_offset = aux[s].accel_offset; - - assert(aux[s].accel_offset); - assert(accel_offset >= m->aux_offset); - assert(!m->sherman_offset || accel_offset < m->sherman_offset); - - const union AccelAux *aaux = (const void *)((const char *)m + accel_offset); - const u8 *c2 = run_accel(aaux, c, c_end); - - if (c2 < *min_accel_offset + BAD_ACCEL_DIST) { - *min_accel_offset = c2 + BIG_ACCEL_PENALTY; - } else { - *min_accel_offset = c2 + SMALL_ACCEL_PENALTY; - } - - if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) { - *min_accel_offset = c_end; - } - - DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", - c2 - c, *min_accel_offset - c2, c_end - c2); - - return c2; -} - -static really_inline -u32 doNormal16(const struct mcclellan *m, const u8 **c_inout, const u8 *end, - u32 s, char do_accel, enum MatchMode mode) { - const u8 *c = *c_inout; - - const u16 *succ_table - = (const u16 *)((const char *)m + sizeof(struct mcclellan)); +const u8 *run_mcclellan_accel(const struct mcclellan *m, + const struct mstate_aux *aux, u32 s, + const u8 **min_accel_offset, + const u8 *c, const u8 *c_end) { + DEBUG_PRINTF("skipping\n"); + u32 accel_offset = aux[s].accel_offset; + + assert(aux[s].accel_offset); + assert(accel_offset >= m->aux_offset); + assert(!m->sherman_offset || accel_offset < m->sherman_offset); + + const union AccelAux *aaux = (const void *)((const char *)m + accel_offset); + const u8 *c2 = run_accel(aaux, c, c_end); + + if (c2 < *min_accel_offset + BAD_ACCEL_DIST) { + *min_accel_offset = c2 + BIG_ACCEL_PENALTY; + } else { + *min_accel_offset = c2 + SMALL_ACCEL_PENALTY; + } + + if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) { + *min_accel_offset = c_end; + } + + DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", + c2 - c, *min_accel_offset - c2, c_end - c2); + + return c2; +} + +static really_inline +u32 doNormal16(const struct mcclellan *m, const u8 **c_inout, const u8 *end, + u32 s, char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + + const u16 *succ_table + = (const u16 *)((const char *)m + sizeof(struct mcclellan)); assert(ISALIGNED_N(succ_table, 2)); - u32 sherman_base = m->sherman_limit; + u32 sherman_base = m->sherman_limit; const char *sherman_base_offset = (const char *)m - sizeof(struct NFA) + m->sherman_offset; - u32 as = m->alphaShift; + u32 as = m->alphaShift; s &= STATE_MASK; - while (c < end && s) { - u8 cprime = m->remap[*c]; - DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c, - ourisprint(*c) ? *c : '?', cprime, s); - if (s < sherman_base) { - DEBUG_PRINTF("doing normal\n"); - assert(s < m->state_count); - s = succ_table[(s << as) + cprime]; - } else { - const char *sherman_state - = findShermanState(m, sherman_base_offset, sherman_base, s); - DEBUG_PRINTF("doing sherman (%u)\n", s); - s = doSherman16(sherman_state, cprime, succ_table, as); - } - - DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); - c++; - - if (do_accel && (s & ACCEL_FLAG)) { - break; - } - if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { - break; - } - - s &= STATE_MASK; - } - - *c_inout = c; - return s; -} - -static really_inline + while (c < end && s) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c, + ourisprint(*c) ? *c : '?', cprime, s); + if (s < sherman_base) { + DEBUG_PRINTF("doing normal\n"); + assert(s < m->state_count); + s = succ_table[(s << as) + cprime]; + } else { + const char *sherman_state + = findShermanState(m, sherman_base_offset, sherman_base, s); + DEBUG_PRINTF("doing sherman (%u)\n", s); + s = doSherman16(sherman_state, cprime, succ_table, as); + } + + DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); + c++; + + if (do_accel && (s & ACCEL_FLAG)) { + break; + } + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + break; + } + + s &= STATE_MASK; + } + + *c_inout = c; + return s; +} + +static really_inline u32 doNormalWide16(const struct mcclellan *m, const u8 **c_inout, const u8 *end, u32 s, char *qstate, u16 *offset, char do_accel, enum MatchMode mode) { @@ -229,28 +229,28 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, char *qstate, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **c_final, enum MatchMode mode) { - assert(ISALIGNED_N(state, 2)); - if (!len) { - if (mode == STOP_AT_MATCH) { - *c_final = buf; - } - return MO_ALIVE; - } - - u32 s = *state; + assert(ISALIGNED_N(state, 2)); + if (!len) { + if (mode == STOP_AT_MATCH) { + *c_final = buf; + } + return MO_ALIVE; + } + + u32 s = *state; u16 offset = 0; - const u8 *c = buf; - const u8 *c_end = buf + len; - const struct mstate_aux *aux - = (const struct mstate_aux *)((const char *)m + m->aux_offset - - sizeof(struct NFA)); - - s &= STATE_MASK; - + const u8 *c = buf; + const u8 *c_end = buf + len; + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset + - sizeof(struct NFA)); + + s &= STATE_MASK; + u32 cached_accept_id = 0; - u32 cached_accept_state = 0; + u32 cached_accept_state = 0; - DEBUG_PRINTF("s: %u, len %zu\n", s, len); + DEBUG_PRINTF("s: %u, len %zu\n", s, len); const u8 *min_accel_offset = c; if (!m->has_accel || len < ACCEL_MIN_LEN) { @@ -261,10 +261,10 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, char *qstate, goto with_accel; without_accel: - do { - assert(c < min_accel_offset); - if (!s) { - goto exit; + do { + assert(c < min_accel_offset); + if (!s) { + goto exit; } if (unlikely(m->has_wide)) { @@ -273,96 +273,96 @@ without_accel: } else { s = doNormal16(m, &c, min_accel_offset, s, 0, mode); } - + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { if (mode == STOP_AT_MATCH) { *state = s & STATE_MASK; *c_final = c - 1; - return MO_MATCHES_PENDING; + return MO_MATCHES_PENDING; } u64a loc = (c - 1) - buf + offAdj + 1; if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_DEAD; /* termination requested */ + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; /* termination requested */ } } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, - &cached_accept_state, &cached_accept_id) - == MO_HALT_MATCHING) { - return MO_DEAD; + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; } } - assert(c <= min_accel_offset); - } while (c < min_accel_offset); - - s &= STATE_MASK; - - if (c == c_end) { - goto exit; - } else { - goto with_accel; + assert(c <= min_accel_offset); + } while (c < min_accel_offset); + + s &= STATE_MASK; + + if (c == c_end) { + goto exit; + } else { + goto with_accel; } with_accel: - do { - assert(c < c_end); - if (!s) { - goto exit; - } - - if (s & ACCEL_FLAG) { - DEBUG_PRINTF("skipping\n"); - s &= STATE_MASK; - c = run_mcclellan_accel(m, aux, s, &min_accel_offset, c, c_end); - if (c == c_end) { - goto exit; - } else { - goto without_accel; - } + do { + assert(c < c_end); + if (!s) { + goto exit; } + if (s & ACCEL_FLAG) { + DEBUG_PRINTF("skipping\n"); + s &= STATE_MASK; + c = run_mcclellan_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + if (unlikely(m->has_wide)) { s = doNormalWide16(m, &c, c_end, s, qstate, &offset, 1, mode); } else { s = doNormal16(m, &c, c_end, s, 1, mode); } - + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { if (mode == STOP_AT_MATCH) { *state = s & STATE_MASK; *c_final = c - 1; - return MO_MATCHES_PENDING; + return MO_MATCHES_PENDING; } u64a loc = (c - 1) - buf + offAdj + 1; if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_DEAD; /* termination requested */ + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; /* termination requested */ } } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, - &cached_accept_state, &cached_accept_id) - == MO_HALT_MATCHING) { - return MO_DEAD; + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; } - } + } - assert(c <= c_end); - } while (c < c_end); + assert(c <= c_end); + } while (c < c_end); -exit: - s &= STATE_MASK; +exit: + s &= STATE_MASK; if (mode == STOP_AT_MATCH) { *c_final = c_end; } *state = s; - return MO_ALIVE; + return MO_ALIVE; } static never_inline @@ -404,69 +404,69 @@ char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, char *qstate, return mcclellanExec16_i_sam(m, state, qstate, buf, len, offAdj, cb, ctxt, single, final_point); } else { - assert(mode == NO_MATCHES); + assert(mode == NO_MATCHES); return mcclellanExec16_i_nm(m, state, qstate, buf, len, offAdj, cb, ctxt, single, final_point); } } static really_inline -u32 doNormal8(const struct mcclellan *m, const u8 **c_inout, const u8 *end, - u32 s, char do_accel, enum MatchMode mode) { - const u8 *c = *c_inout; - u32 accel_limit = m->accel_limit_8; - u32 accept_limit = m->accept_limit_8; - - const u32 as = m->alphaShift; +u32 doNormal8(const struct mcclellan *m, const u8 **c_inout, const u8 *end, + u32 s, char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + u32 accel_limit = m->accel_limit_8; + u32 accept_limit = m->accept_limit_8; + + const u32 as = m->alphaShift; const u8 *succ_table = (const u8 *)((const char *)m + sizeof(struct mcclellan)); - while (c < end && s) { - u8 cprime = m->remap[*c]; - DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c, - ourisprint(*c) ? *c : '?', cprime); - s = succ_table[(s << as) + cprime]; - - DEBUG_PRINTF("s: %u\n", s); - c++; - if (do_accel) { - if (s >= accel_limit) { - break; - } - } else { - if (mode != NO_MATCHES && s >= accept_limit) { - break; - } - } - } - *c_inout = c; - return s; -} - -static really_inline -char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **c_final, enum MatchMode mode) { - if (!len) { - if (mode == STOP_AT_MATCH) { - *c_final = buf; - } - return MO_ALIVE; - } - u32 s = *state; - const u8 *c = buf; - const u8 *c_end = buf + len; - - const struct mstate_aux *aux - = (const struct mstate_aux *)((const char *)m + m->aux_offset + while (c < end && s) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c, + ourisprint(*c) ? *c : '?', cprime); + s = succ_table[(s << as) + cprime]; + + DEBUG_PRINTF("s: %u\n", s); + c++; + if (do_accel) { + if (s >= accel_limit) { + break; + } + } else { + if (mode != NO_MATCHES && s >= accept_limit) { + break; + } + } + } + *c_inout = c; + return s; +} + +static really_inline +char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **c_final, enum MatchMode mode) { + if (!len) { + if (mode == STOP_AT_MATCH) { + *c_final = buf; + } + return MO_ALIVE; + } + u32 s = *state; + const u8 *c = buf; + const u8 *c_end = buf + len; + + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset - sizeof(struct NFA)); - u32 accept_limit = m->accept_limit_8; + u32 accept_limit = m->accept_limit_8; u32 cached_accept_id = 0; - u32 cached_accept_state = 0; + u32 cached_accept_state = 0; - DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit); + DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit); - DEBUG_PRINTF("s: %u, len %zu\n", s, len); + DEBUG_PRINTF("s: %u, len %zu\n", s, len); const u8 *min_accel_offset = c; if (!m->has_accel || len < ACCEL_MIN_LEN) { @@ -477,119 +477,119 @@ char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf, goto with_accel; without_accel: - do { - assert(c < min_accel_offset); - if (!s) { - goto exit; - } - - s = doNormal8(m, &c, min_accel_offset, s, 0, mode); - + do { + assert(c < min_accel_offset); + if (!s) { + goto exit; + } + + s = doNormal8(m, &c, min_accel_offset, s, 0, mode); + if (mode != NO_MATCHES && s >= accept_limit) { if (mode == STOP_AT_MATCH) { DEBUG_PRINTF("match - pausing\n"); *state = s; *c_final = c - 1; - return MO_MATCHES_PENDING; + return MO_MATCHES_PENDING; } u64a loc = (c - 1) - buf + offAdj + 1; if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_DEAD; + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; } } else if (doComplexReport(cb, ctxt, m, s, loc, 0, - &cached_accept_state, &cached_accept_id) + &cached_accept_state, &cached_accept_id) == MO_HALT_MATCHING) { - return MO_DEAD; + return MO_DEAD; } } - - assert(c <= min_accel_offset); - } while (c < min_accel_offset); - - if (c == c_end) { - goto exit; + + assert(c <= min_accel_offset); + } while (c < min_accel_offset); + + if (c == c_end) { + goto exit; } with_accel: - do { - u32 accel_limit = m->accel_limit_8; - assert(c < c_end); - - if (!s) { - goto exit; - } - - if (s >= accel_limit && aux[s].accel_offset) { - c = run_mcclellan_accel(m, aux, s, &min_accel_offset, c, c_end); - if (c == c_end) { - goto exit; - } else { - goto without_accel; - } - } - s = doNormal8(m, &c, c_end, s, 1, mode); - - if (mode != NO_MATCHES && s >= accept_limit) { - if (mode == STOP_AT_MATCH) { - DEBUG_PRINTF("match - pausing\n"); - *state = s; - *c_final = c - 1; - return MO_MATCHES_PENDING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_DEAD; + do { + u32 accel_limit = m->accel_limit_8; + assert(c < c_end); + + if (!s) { + goto exit; + } + + if (s >= accel_limit && aux[s].accel_offset) { + c = run_mcclellan_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doNormal8(m, &c, c_end, s, 1, mode); + + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; } - } else if (doComplexReport(cb, ctxt, m, s, loc, 0, - &cached_accept_state, &cached_accept_id) - == MO_HALT_MATCHING) { - return MO_DEAD; + } else if (doComplexReport(cb, ctxt, m, s, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; } } - assert(c <= c_end); - } while (c < c_end); - -exit: + assert(c <= c_end); + } while (c < c_end); + +exit: *state = s; if (mode == STOP_AT_MATCH) { *c_final = c_end; } - return MO_ALIVE; + return MO_ALIVE; } static never_inline -char mcclellanExec8_i_cb(const struct mcclellan *m, u32 *state, const u8 *buf, +char mcclellanExec8_i_cb(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, CALLBACK_OUTPUT); + final_point, CALLBACK_OUTPUT); } static never_inline -char mcclellanExec8_i_sam(const struct mcclellan *m, u32 *state, const u8 *buf, +char mcclellanExec8_i_sam(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, STOP_AT_MATCH); + final_point, STOP_AT_MATCH); } static never_inline -char mcclellanExec8_i_nm(const struct mcclellan *m, u32 *state, const u8 *buf, +char mcclellanExec8_i_nm(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, NO_MATCHES); + final_point, NO_MATCHES); } static really_inline -char mcclellanExec8_i_ni(const struct mcclellan *m, u32 *state, const u8 *buf, +char mcclellanExec8_i_ni(const struct mcclellan *m, u32 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point, enum MatchMode mode) { @@ -607,7 +607,7 @@ char mcclellanExec8_i_ni(const struct mcclellan *m, u32 *state, const u8 *buf, } static really_inline -char mcclellanCheckEOD(const struct NFA *nfa, u32 s, u64a offset, +char mcclellanCheckEOD(const struct NFA *nfa, u32 s, u64a offset, NfaCallback cb, void *ctxt) { const struct mcclellan *m = getImplNfa(nfa); const struct mstate_aux *aux = get_aux(m, s); @@ -616,10 +616,10 @@ char mcclellanCheckEOD(const struct NFA *nfa, u32 s, u64a offset, return MO_CONTINUE_MATCHING; } - if (!aux->accept_eod) { - return MO_CONTINUE_MATCHING; + if (!aux->accept_eod) { + return MO_CONTINUE_MATCHING; } - return doComplexReport(cb, ctxt, m, s, offset, 1, NULL, NULL); + return doComplexReport(cb, ctxt, m, s, offset, 1, NULL, NULL); } static really_inline @@ -632,7 +632,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, s64a sp; assert(ISALIGNED_N(q->state, 2)); - u32 s = *(u16 *)q->state; + u32 s = *(u16 *)q->state; if (q->report_current) { assert(s); @@ -641,10 +641,10 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, int rv; if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); - rv = cb(0, q_cur_offset(q), m->arb_report, context); + rv = cb(0, q_cur_offset(q), m->arb_report, context); } else { u32 cached_accept_id = 0; - u32 cached_accept_state = 0; + u32 cached_accept_state = 0; rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, &cached_accept_state, &cached_accept_id); @@ -653,7 +653,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, q->report_current = 0; if (rv == MO_HALT_MATCHING) { - return MO_DEAD; + return MO_DEAD; } } @@ -691,17 +691,17 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, char rv = mcclellanExec16_i_ni(m, &s, q->state, cur_buf + sp, local_ep - sp, offset + sp, cb, context, single, &final_look, mode); - if (rv == MO_DEAD) { + if (rv == MO_DEAD) { *(u16 *)q->state = 0; - return MO_DEAD; + return MO_DEAD; } - if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { DEBUG_PRINTF("this is as far as we go\n"); - DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); - + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + assert(q->cur); - assert(final_look != cur_buf + local_ep); - + assert(final_look != cur_buf + local_ep); + q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = final_look - cur_buf + 1; /* due to @@ -710,7 +710,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, return MO_MATCHES_PENDING; } - assert(rv == MO_ALIVE); + assert(rv == MO_ALIVE); assert(q->cur); if (mode != NO_MATCHES && q->items[q->cur].location > end) { DEBUG_PRINTF("this is as far as we go\n"); @@ -743,7 +743,7 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, case MQE_END: *(u16 *)q->state = s; q->cur++; - return s ? MO_ALIVE : MO_DEAD; + return s ? MO_ALIVE : MO_DEAD; default: assert(!"invalid queue event"); } @@ -752,18 +752,18 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, } } -static really_inline -char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer, - size_t length, NfaCallback cb, void *context, - char single) { +static really_inline +char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context, + char single) { assert(n->type == MCCLELLAN_NFA_16); const struct mcclellan *m = getImplNfa(n); - u32 s = m->start_anchored; + u32 s = m->start_anchored; if (mcclellanExec16_i(m, &s, NULL, buffer, length, offset, cb, context, single, NULL, CALLBACK_OUTPUT) - == MO_DEAD) { - return s ? MO_ALIVE : MO_DEAD; + == MO_DEAD) { + return s ? MO_ALIVE : MO_DEAD; } if (m->has_wide == 1 && s >= m->wide_limit) { @@ -776,19 +776,19 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer, doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL); } - return MO_ALIVE; + return MO_ALIVE; } static really_inline char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, - const u8 *hend, NfaCallback cb, void *context, - struct mq *q, char single, s64a end, - enum MatchMode mode) { + const u8 *hend, NfaCallback cb, void *context, + struct mq *q, char single, s64a end, + enum MatchMode mode) { assert(n->type == MCCLELLAN_NFA_8); const struct mcclellan *m = getImplNfa(n); s64a sp; - u32 s = *(u8 *)q->state; + u32 s = *(u8 *)q->state; if (q->report_current) { assert(s); @@ -797,10 +797,10 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, int rv; if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); - rv = cb(0, q_cur_offset(q), m->arb_report, context); + rv = cb(0, q_cur_offset(q), m->arb_report, context); } else { u32 cached_accept_id = 0; - u32 cached_accept_state = 0; + u32 cached_accept_state = 0; rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, &cached_accept_state, &cached_accept_id); @@ -809,7 +809,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, q->report_current = 0; if (rv == MO_HALT_MATCHING) { - return MO_DEAD; + return MO_DEAD; } } @@ -845,21 +845,21 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, } const u8 *final_look; - char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, single, - &final_look, mode); + char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); - if (rv == MO_HALT_MATCHING) { + if (rv == MO_HALT_MATCHING) { *(u8 *)q->state = 0; - return MO_DEAD; + return MO_DEAD; } - if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { - DEBUG_PRINTF("this is as far as we go\n"); - DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); - + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { + DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + assert(q->cur); - assert(final_look != cur_buf + local_ep); - + assert(final_look != cur_buf + local_ep); + q->cur--; q->items[q->cur].type = MQE_START; q->items[q->cur].location = final_look - cur_buf + 1; /* due to @@ -868,7 +868,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, return MO_MATCHES_PENDING; } - assert(rv == MO_ALIVE); + assert(rv == MO_ALIVE); assert(q->cur); if (mode != NO_MATCHES && q->items[q->cur].location > end) { DEBUG_PRINTF("this is as far as we go\n"); @@ -902,7 +902,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, case MQE_END: *(u8 *)q->state = s; q->cur++; - return s ? MO_ALIVE : MO_DEAD; + return s ? MO_ALIVE : MO_DEAD; default: assert(!"invalid queue event"); } @@ -911,18 +911,18 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, } } -static really_inline +static really_inline char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer, size_t length, NfaCallback cb, void *context, char single) { assert(n->type == MCCLELLAN_NFA_8); const struct mcclellan *m = getImplNfa(n); - u32 s = m->start_anchored; + u32 s = m->start_anchored; if (mcclellanExec8_i(m, &s, buffer, length, offset, cb, context, single, NULL, CALLBACK_OUTPUT) - == MO_DEAD) { - return MO_DEAD; + == MO_DEAD) { + return MO_DEAD; } const struct mstate_aux *aux = get_aux(m, s); @@ -931,7 +931,7 @@ char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer, doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL); } - return s ? MO_ALIVE : MO_DEAD; + return s ? MO_ALIVE : MO_DEAD; } char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer, @@ -990,7 +990,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) { const struct mcclellan *m = getImplNfa(n); NfaCallback cb = q->cb; void *ctxt = q->context; - u32 s = *(u8 *)q->state; + u32 s = *(u8 *)q->state; u8 single = m->flags & MCCLELLAN_FLAG_SINGLE; u64a offset = q_cur_offset(q); assert(q_cur_type(q) == MQE_START); @@ -999,10 +999,10 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) { if (s >= m->accept_limit_8) { if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); - cb(0, offset, m->arb_report, ctxt); + cb(0, offset, m->arb_report, ctxt); } else { u32 cached_accept_id = 0; - u32 cached_accept_state = 0; + u32 cached_accept_state = 0; doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, &cached_accept_id); @@ -1013,24 +1013,24 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) { } char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) { - const struct mcclellan *m = getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); NfaCallback cb = q->cb; void *ctxt = q->context; - u32 s = *(u16 *)q->state; + u32 s = *(u16 *)q->state; const struct mstate_aux *aux = get_aux(m, s); u8 single = m->flags & MCCLELLAN_FLAG_SINGLE; u64a offset = q_cur_offset(q); assert(q_cur_type(q) == MQE_START); - DEBUG_PRINTF("state %u\n", s); + DEBUG_PRINTF("state %u\n", s); assert(s); if (aux->accept) { if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); - cb(0, offset, m->arb_report, ctxt); + cb(0, offset, m->arb_report, ctxt); } else { u32 cached_accept_id = 0; - u32 cached_accept_state = 0; + u32 cached_accept_state = 0; doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, &cached_accept_id); @@ -1068,7 +1068,7 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report, struct mq *q) { assert(n && q); - const struct mcclellan *m = getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); u8 s = *(u8 *)q->state; DEBUG_PRINTF("checking accepts for %hhu\n", s); if (s < m->accept_limit_8) { @@ -1078,22 +1078,22 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report, return mcclellanHasAccept(m, get_aux(m, s), report); } -char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q) { - assert(n && q); - - const struct mcclellan *m = getImplNfa(n); - u8 s = *(u8 *)q->state; - DEBUG_PRINTF("checking accepts for %hhu\n", s); - assert(s < m->accept_limit_8 || get_aux(m, s)->accept); - - return s >= m->accept_limit_8; -} - +char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcclellan *m = getImplNfa(n); + u8 s = *(u8 *)q->state; + DEBUG_PRINTF("checking accepts for %hhu\n", s); + assert(s < m->accept_limit_8 || get_aux(m, s)->accept); + + return s >= m->accept_limit_8; +} + char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, struct mq *q) { assert(n && q); - const struct mcclellan *m = getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); u16 s = *(u16 *)q->state; DEBUG_PRINTF("checking accepts for %hu\n", s); @@ -1101,24 +1101,24 @@ char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, 0 : mcclellanHasAccept(m, get_aux(m, s), report); } -char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) { - assert(n && q); - - const struct mcclellan *m = getImplNfa(n); - u16 s = *(u16 *)q->state; - DEBUG_PRINTF("checking accepts for %hu\n", s); - +char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcclellan *m = getImplNfa(n); + u16 s = *(u16 *)q->state; + DEBUG_PRINTF("checking accepts for %hu\n", s); + return (m->has_wide == 1 && s >= m->wide_limit) ? 0 : !!get_aux(m, s)->accept; -} - +} + char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) { u64a offset = q->offset; const u8 *buffer = q->buffer; NfaCallback cb = q->cb; void *context = q->context; assert(n->type == MCCLELLAN_NFA_8); - const struct mcclellan *m = getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); const u8 *hend = q->history + q->hlength; return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q, @@ -1132,7 +1132,7 @@ char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end) { NfaCallback cb = q->cb; void *context = q->context; assert(n->type == MCCLELLAN_NFA_16); - const struct mcclellan *m = getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); const u8 *hend = q->history + q->hlength; return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q, @@ -1146,7 +1146,7 @@ char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report) { NfaCallback cb = q->cb; void *context = q->context; assert(n->type == MCCLELLAN_NFA_8); - const struct mcclellan *m = getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); const u8 *hend = q->history + q->hlength; char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q, @@ -1165,7 +1165,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) { NfaCallback cb = q->cb; void *context = q->context; assert(n->type == MCCLELLAN_NFA_16); - const struct mcclellan *m = getImplNfa(n); + const struct mcclellan *m = getImplNfa(n); const u8 *hend = q->history + q->hlength; char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q, @@ -1181,7 +1181,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) { char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset, void *state, UNUSED u8 key) { - const struct mcclellan *m = getImplNfa(nfa); + const struct mcclellan *m = getImplNfa(nfa); u8 s = offset ? m->start_floating : m->start_anchored; if (s) { *(u8 *)state = s; @@ -1192,7 +1192,7 @@ char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset, char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset, void *state, UNUSED u8 key) { - const struct mcclellan *m = getImplNfa(nfa); + const struct mcclellan *m = getImplNfa(nfa); u16 s = offset ? m->start_floating : m->start_anchored; // new byte @@ -1210,30 +1210,30 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset, void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state, const u8 *buf, char top, size_t start_off, size_t len, NfaCallback cb, void *ctxt) { - const struct mcclellan *m = getImplNfa(nfa); - - u32 s = top ? m->start_anchored : *(u8 *)state; + const struct mcclellan *m = getImplNfa(nfa); + u32 s = top ? m->start_anchored : *(u8 *)state; + if (m->flags & MCCLELLAN_FLAG_SINGLE) { - mcclellanExec8_i(m, &s, buf + start_off, len - start_off, + mcclellanExec8_i(m, &s, buf + start_off, len - start_off, start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); } else { - mcclellanExec8_i(m, &s, buf + start_off, len - start_off, + mcclellanExec8_i(m, &s, buf + start_off, len - start_off, start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); } - - *(u8 *)state = s; + + *(u8 *)state = s; } void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, const u8 *buf, char top, size_t start_off, size_t len, NfaCallback cb, void *ctxt) { - const struct mcclellan *m = getImplNfa(nfa); + const struct mcclellan *m = getImplNfa(nfa); u32 s; if (top) { s = m->start_anchored; - + // new byte if (m->has_wide) { unaligned_store_u16((u16 *)state + 1, 0); @@ -1244,39 +1244,39 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, if (m->flags & MCCLELLAN_FLAG_SINGLE) { mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off, - start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); + start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); } else { mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off, - start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); + start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); } - - unaligned_store_u16(state, s); + + unaligned_store_u16(state, s); } char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, u64a offset, - NfaCallback callback, void *context) { - return mcclellanCheckEOD(nfa, *(const u8 *)state, offset, callback, - context); + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { + return mcclellanCheckEOD(nfa, *(const u8 *)state, offset, callback, + context); } char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, u64a offset, - NfaCallback callback, void *context) { + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { assert(ISALIGNED_N(state, 2)); - return mcclellanCheckEOD(nfa, *(const u16 *)state, offset, callback, - context); + return mcclellanCheckEOD(nfa, *(const u16 *)state, offset, callback, + context); } -char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa, - struct mq *q) { +char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa, + struct mq *q) { assert(nfa->scratchStateSize == 1); *(u8 *)q->state = 0; return 0; } -char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa, - struct mq *q) { +char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa, + struct mq *q) { const struct mcclellan *m = getImplNfa(nfa); assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 : nfa->scratchStateSize == 2); diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan.h b/contrib/libs/hyperscan/src/nfa/mcclellan.h index 9c6b3eecb1..c26a7944f9 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellan.h +++ b/contrib/libs/hyperscan/src/nfa/mcclellan.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,14 +39,14 @@ struct NFA; char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state, const char *streamState, u64a offset, - NfaCallback callback, void *context); + NfaCallback callback, void *context); char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report, struct mq *q); -char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q); char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); @@ -62,14 +62,14 @@ char nfaExecMcClellan8_expandState(const struct NFA *nfa, void *dest, char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state, const char *streamState, u64a offset, - NfaCallback callback, void *context); + NfaCallback callback, void *context); char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report); char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, struct mq *q); -char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q); char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q); char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset, void *state, u8 key); diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h b/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h index 7b0e7f48cd..ad55579627 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h +++ b/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h @@ -33,7 +33,7 @@ enum MatchMode { }; static really_inline -const struct mstate_aux *get_aux(const struct mcclellan *m, u32 s) { +const struct mstate_aux *get_aux(const struct mcclellan *m, u32 s) { const char *nfa = (const char *)m - sizeof(struct NFA); const struct mstate_aux *aux = s + (const struct mstate_aux *)(nfa + m->aux_offset); @@ -43,15 +43,15 @@ const struct mstate_aux *get_aux(const struct mcclellan *m, u32 s) { } static really_inline -u32 mcclellanEnableStarts(const struct mcclellan *m, u32 s) { +u32 mcclellanEnableStarts(const struct mcclellan *m, u32 s) { const struct mstate_aux *aux = get_aux(m, s); - DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top); + DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top); return aux->top; } static really_inline -u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, +u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, u32 as) { assert(ISALIGNED_N(sherman_state, 16)); @@ -70,17 +70,17 @@ u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, if (z) { u32 i = ctz32(z & ~0xf) - 4; - u32 s_out = unaligned_load_u16((const u8 *)sherman_state + u32 s_out = unaligned_load_u16((const u8 *)sherman_state + SHERMAN_STATES_OFFSET(len) + sizeof(u16) * i); - DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu s=%u\n", i, - len, cprime, s_out); + DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu s=%u\n", i, + len, cprime, s_out); return s_out; } } - u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); - return succ_table[(daddy << as) + cprime]; + u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); + return succ_table[(daddy << as) + cprime]; } static really_inline diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h b/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h index 482fdb1bc9..c2571a0468 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h +++ b/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h @@ -81,18 +81,18 @@ struct mcclellan { u16 start_floating; /**< floating start state */ u32 aux_offset; /**< offset of the aux structures relative to the start of * the nfa structure */ - u32 sherman_offset; /**< offset of array of sherman state offsets the - * state_info structures relative to the start of the - * nfa structure */ - u32 sherman_end; /**< offset of the end of the state_info structures - * relative to the start of the nfa structure */ + u32 sherman_offset; /**< offset of array of sherman state offsets the + * state_info structures relative to the start of the + * nfa structure */ + u32 sherman_end; /**< offset of the end of the state_info structures + * relative to the start of the nfa structure */ u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ u16 accept_limit_8; /**< 8 bit, lowest accept state */ u16 sherman_limit; /**< lowest sherman state */ u16 wide_limit; /**< 8/16 bit, lowest wide head state */ u8 alphaShift; u8 flags; - u8 has_accel; /**< 1 iff there are any accel plans */ + u8 has_accel; /**< 1 iff there are any accel plans */ u8 has_wide; /**< 1 iff there exists any wide state */ u8 remap[256]; /**< remaps characters to a smaller alphabet */ ReportID arb_report; /**< one of the accepts that this dfa may raise */ @@ -104,8 +104,8 @@ struct mcclellan { static really_inline const char *findShermanState(UNUSED const struct mcclellan *m, - const char *sherman_base_offset, u32 sherman_base, - u32 s) { + const char *sherman_base_offset, u32 sherman_base, + u32 s) { const char *rv = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); assert(rv < (const char *)m + m->length - sizeof(struct NFA)); @@ -116,7 +116,7 @@ const char *findShermanState(UNUSED const struct mcclellan *m, static really_inline char *findMutableShermanState(char *sherman_base_offset, u16 sherman_base, - u32 s) { + u32 s) { return sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); } diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp b/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp index 27ec1716e9..ecfd636bbd 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp @@ -29,10 +29,10 @@ #include "mcclellancompile.h" #include "accel.h" -#include "accelcompile.h" +#include "accelcompile.h" #include "grey.h" #include "mcclellan_internal.h" -#include "mcclellancompile_util.h" +#include "mcclellancompile_util.h" #include "nfa_internal.h" #include "shufticompile.h" #include "trufflecompile.h" @@ -45,8 +45,8 @@ #include "util/container.h" #include "util/make_unique.h" #include "util/order_check.h" -#include "util/report_manager.h" -#include "util/flat_containers.h" +#include "util/report_manager.h" +#include "util/flat_containers.h" #include "util/unaligned.h" #include "util/verify_types.h" @@ -60,40 +60,40 @@ #include <set> #include <vector> -#include <boost/range/adaptor/map.hpp> - +#include <boost/range/adaptor/map.hpp> + #include "mcclellandump.h" #include "util/dump_util.h" #include "util/dump_charclass.h" using namespace std; -using boost::adaptors::map_keys; +using boost::adaptors::map_keys; using boost::dynamic_bitset; -#define ACCEL_DFA_MAX_OFFSET_DEPTH 4 - -/** Maximum tolerated number of escape character from an accel state. - * This is larger than nfa, as we don't have a budget and the nfa cheats on stop - * characters for sets of states */ -#define ACCEL_DFA_MAX_STOP_CHAR 160 +#define ACCEL_DFA_MAX_OFFSET_DEPTH 4 -/** Maximum tolerated number of escape character from a sds accel state. Larger - * than normal states as accelerating sds is important. Matches NFA value */ -#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 +/** Maximum tolerated number of escape character from an accel state. + * This is larger than nfa, as we don't have a budget and the nfa cheats on stop + * characters for sets of states */ +#define ACCEL_DFA_MAX_STOP_CHAR 160 -namespace ue2 { +/** Maximum tolerated number of escape character from a sds accel state. Larger + * than normal states as accelerating sds is important. Matches NFA value */ +#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 +namespace ue2 { + namespace /* anon */ { struct dstate_extra { - u16 daddytaken = 0; - bool shermanState = false; + u16 daddytaken = 0; + bool shermanState = false; bool wideState = false; bool wideHead = false; }; struct dfa_info { - accel_dfa_build_strat &strat; + accel_dfa_build_strat &strat; raw_dfa &raw; vector<dstate> &states; vector<dstate_extra> extra; @@ -105,7 +105,7 @@ struct dfa_info { u8 getAlphaShift() const; - explicit dfa_info(accel_dfa_build_strat &s) + explicit dfa_info(accel_dfa_build_strat &s) : strat(s), raw(s.get_raw()), states(raw.states), @@ -292,16 +292,16 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { } } -u32 mcclellan_build_strat::max_allowed_offset_accel() const { - return ACCEL_DFA_MAX_OFFSET_DEPTH; +u32 mcclellan_build_strat::max_allowed_offset_accel() const { + return ACCEL_DFA_MAX_OFFSET_DEPTH; } -u32 mcclellan_build_strat::max_stop_char() const { - return ACCEL_DFA_MAX_STOP_CHAR; +u32 mcclellan_build_strat::max_stop_char() const { + return ACCEL_DFA_MAX_STOP_CHAR; } -u32 mcclellan_build_strat::max_floating_stop_char() const { - return ACCEL_DFA_MAX_FLOATING_STOP_CHAR; +u32 mcclellan_build_strat::max_floating_stop_char() const { + return ACCEL_DFA_MAX_FLOATING_STOP_CHAR; } static @@ -359,16 +359,16 @@ namespace { struct raw_report_list { flat_set<ReportID> reports; - raw_report_list(const flat_set<ReportID> &reports_in, - const ReportManager &rm, bool do_remap) { - if (do_remap) { - for (auto &id : reports_in) { - reports.insert(rm.getProgramOffset(id)); - } - } else { - reports = reports_in; - } - } + raw_report_list(const flat_set<ReportID> &reports_in, + const ReportManager &rm, bool do_remap) { + if (do_remap) { + for (auto &id : reports_in) { + reports.insert(rm.getProgramOffset(id)); + } + } else { + reports = reports_in; + } + } bool operator<(const raw_report_list &b) const { return reports < b.reports; @@ -391,8 +391,8 @@ unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports( ReportID *arbReport) const { DEBUG_PRINTF("gathering reports\n"); - const bool remap_reports = has_managed_reports(rdfa.kind); - + const bool remap_reports = has_managed_reports(rdfa.kind); + auto ri = ue2::make_unique<raw_report_info_impl>(); map<raw_report_list, u32> rev; @@ -402,14 +402,14 @@ unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports( continue; } - raw_report_list rrl(s.reports, rm, remap_reports); + raw_report_list rrl(s.reports, rm, remap_reports); DEBUG_PRINTF("non empty r\n"); - auto it = rev.find(rrl); - if (it != rev.end()) { - reports.push_back(it->second); + auto it = rev.find(rrl); + if (it != rev.end()) { + reports.push_back(it->second); } else { DEBUG_PRINTF("adding to rl %zu\n", ri->size()); - rev.emplace(rrl, ri->size()); + rev.emplace(rrl, ri->size()); reports.push_back(ri->size()); ri->rl.push_back(rrl); } @@ -422,15 +422,15 @@ unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports( } DEBUG_PRINTF("non empty r eod\n"); - raw_report_list rrl(s.reports_eod, rm, remap_reports); - auto it = rev.find(rrl); - if (it != rev.end()) { - reports_eod.push_back(it->second); + raw_report_list rrl(s.reports_eod, rm, remap_reports); + auto it = rev.find(rrl); + if (it != rev.end()) { + reports_eod.push_back(it->second); continue; } DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size()); - rev.emplace(rrl, ri->size()); + rev.emplace(rrl, ri->size()); reports_eod.push_back(ri->size()); ri->rl.push_back(rrl); } @@ -445,7 +445,7 @@ unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports( /* if we have only a single report id generated from all accepts (not eod) * we can take some short cuts */ - flat_set<ReportID> reps; + flat_set<ReportID> reps; for (u32 rl_index : reports) { if (rl_index == MO_INVALID_IDX) { @@ -500,14 +500,14 @@ void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, } static -void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info, - set<dstate_id_t> *accel_states) { - for (dstate_id_t i : accel_escape_info | map_keys) { - accel_states->insert(i); - } -} - -static +void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info, + set<dstate_id_t> *accel_states) { + for (dstate_id_t i : accel_escape_info | map_keys) { + accel_states->insert(i); + } +} + +static size_t calcShermanRegionSize(const dfa_info &info) { size_t rv = 0; @@ -550,7 +550,7 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, : info.raw.start_floating); } -/* returns false on error */ +/* returns false on error */ static bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base, dstate_id_t *wide_limit) { @@ -564,7 +564,7 @@ bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base, if (info.size() > (1 << 16)) { DEBUG_PRINTF("too many states\n"); *wide_limit = 0; - return false; + return false; } for (u32 i = 1; i < info.size(); i++) { @@ -609,8 +609,8 @@ bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base, } static -bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, - set<dstate_id_t> *accel_states) { +bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, + set<dstate_id_t> *accel_states) { DEBUG_PRINTF("building mcclellan 16\n"); vector<u32> reports; /* index in ri for the appropriate report list */ @@ -632,9 +632,9 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, DEBUG_PRINTF("count_real_states: %d\n", count_real_states); DEBUG_PRINTF("non_wide_states: %d\n", wide_limit); - auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); - map<dstate_id_t, AccelScheme> accel_escape_info - = info.strat.getAccelInfo(cc.grey); + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); + map<dstate_id_t, AccelScheme> accel_escape_info + = info.strat.getAccelInfo(cc.grey); size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) * count_real_states; @@ -642,7 +642,7 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, size_t aux_size = sizeof(mstate_aux) * wide_limit; size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); - size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + ri->getReportListSize(), 32); size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); @@ -665,11 +665,11 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, DEBUG_PRINTF("wide_size %zu\n", wide_size); DEBUG_PRINTF("total_size %zu\n", total_size); - auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); + auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); char *nfa_base = (char *)nfa.get(); populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, - accel_escape_info.size(), arb, single, nfa.get()); + accel_escape_info.size(), arb, single, nfa.get()); vector<u32> reportOffsets; @@ -705,12 +705,12 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, fillInAux(&aux[fs], i, info, reports, reports_eod, reportOffsets); - if (contains(accel_escape_info, i)) { + if (contains(accel_escape_info, i)) { this_aux->accel_offset = accel_offset; accel_offset += info.strat.accelSize(); assert(accel_offset + sizeof(NFA) <= sherman_offset); assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - info.strat.buildAccel(i, accel_escape_info.at(i), + info.strat.buildAccel(i, accel_escape_info.at(i), (void *)((char *)m + this_aux->accel_offset)); } } @@ -735,12 +735,12 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets); - if (contains(accel_escape_info, i)) { + if (contains(accel_escape_info, i)) { this_aux->accel_offset = accel_offset; accel_offset += info.strat.accelSize(); assert(accel_offset + sizeof(NFA) <= sherman_offset); assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - info.strat.buildAccel(i, accel_escape_info.at(i), + info.strat.buildAccel(i, accel_escape_info.at(i), (void *)((char *)m + this_aux->accel_offset)); } @@ -838,10 +838,10 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, markEdges(nfa.get(), succ_table, info); - if (accel_states && nfa) { - fillAccelOut(accel_escape_info, accel_states); - } - + if (accel_states && nfa) { + fillAccelOut(accel_escape_info, accel_states); + } + return nfa; } @@ -880,9 +880,9 @@ void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table, } static -void allocateFSN8(dfa_info &info, - const map<dstate_id_t, AccelScheme> &accel_escape_info, - u16 *accel_limit, u16 *accept_limit) { +void allocateFSN8(dfa_info &info, + const map<dstate_id_t, AccelScheme> &accel_escape_info, + u16 *accel_limit, u16 *accept_limit) { info.states[0].impl_id = 0; /* dead is always 0 */ vector<dstate_id_t> norm; @@ -894,7 +894,7 @@ void allocateFSN8(dfa_info &info, for (u32 i = 1; i < info.size(); i++) { if (!info.states[i].reports.empty()) { accept.push_back(i); - } else if (contains(accel_escape_info, i)) { + } else if (contains(accel_escape_info, i)) { accel.push_back(i); } else { norm.push_back(i); @@ -922,8 +922,8 @@ void allocateFSN8(dfa_info &info, } static -bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc, - set<dstate_id_t> *accel_states) { +bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc, + set<dstate_id_t> *accel_states) { DEBUG_PRINTF("building mcclellan 8\n"); vector<u32> reports; @@ -931,14 +931,14 @@ bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc, ReportID arb; u8 single; - auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); - map<dstate_id_t, AccelScheme> accel_escape_info - = info.strat.getAccelInfo(cc.grey); + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); + map<dstate_id_t, AccelScheme> accel_escape_info + = info.strat.getAccelInfo(cc.grey); size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size(); size_t aux_size = sizeof(mstate_aux) * info.size(); size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); - size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + ri->getReportListSize(), 32); size_t total_size = accel_offset + accel_size; @@ -953,15 +953,15 @@ bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc, accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); + auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); char *nfa_base = (char *)nfa.get(); mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); - allocateFSN8(info, accel_escape_info, &m->accel_limit_8, - &m->accept_limit_8); + allocateFSN8(info, accel_escape_info, &m->accel_limit_8, + &m->accept_limit_8); populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset, - accel_escape_info.size(), arb, single, nfa.get()); + accel_escape_info.size(), arb, single, nfa.get()); vector<u32> reportOffsets; @@ -972,14 +972,14 @@ bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc, mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset); for (size_t i = 0; i < info.size(); i++) { - if (contains(accel_escape_info, i)) { + if (contains(accel_escape_info, i)) { u32 j = info.implId(i); aux[j].accel_offset = accel_offset; accel_offset += info.strat.accelSize(); - info.strat.buildAccel(i, accel_escape_info.at(i), - (void *)((char *)m + aux[j].accel_offset)); + info.strat.buildAccel(i, accel_escape_info.at(i), + (void *)((char *)m + aux[j].accel_offset)); } fillInBasicState8(info, aux, succ_table, reportOffsets, reports, @@ -990,17 +990,17 @@ bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc, DEBUG_PRINTF("rl size %zu\n", ri->size()); - if (accel_states && nfa) { - fillAccelOut(accel_escape_info, accel_states); - } - + if (accel_states && nfa) { + fillAccelOut(accel_escape_info, accel_states); + } + return nfa; } #define MAX_SHERMAN_LIST_LEN 9 static -void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate, +void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate, dstate_id_t max) { if (candidate < max) { dest.insert(candidate); @@ -1008,41 +1008,41 @@ void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate, } static -void addSuccessors(flat_set<dstate_id_t> &dest, const dstate &source, +void addSuccessors(flat_set<dstate_id_t> &dest, const dstate &source, u16 alphasize, dstate_id_t curr_id) { for (symbol_t s = 0; s < alphasize; s++) { addIfEarlier(dest, source.next[s], curr_id); } } -/* \brief Returns a set of states to search for a better daddy. */ -static -flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info, - dstate_id_t curr_id) { - flat_set<dstate_id_t> hinted; - - addIfEarlier(hinted, 0, curr_id); - addIfEarlier(hinted, info.raw.start_anchored, curr_id); - addIfEarlier(hinted, info.raw.start_floating, curr_id); - - // Add existing daddy and his successors, then search back one generation. - const u16 alphasize = info.impl_alpha_size; - dstate_id_t daddy = info.states[curr_id].daddy; - for (u32 level = 0; daddy && level < 2; level++) { - addIfEarlier(hinted, daddy, curr_id); - addSuccessors(hinted, info.states[daddy], alphasize, curr_id); - daddy = info.states[daddy].daddy; - } - - return hinted; -} - +/* \brief Returns a set of states to search for a better daddy. */ +static +flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info, + dstate_id_t curr_id) { + flat_set<dstate_id_t> hinted; + + addIfEarlier(hinted, 0, curr_id); + addIfEarlier(hinted, info.raw.start_anchored, curr_id); + addIfEarlier(hinted, info.raw.start_floating, curr_id); + + // Add existing daddy and his successors, then search back one generation. + const u16 alphasize = info.impl_alpha_size; + dstate_id_t daddy = info.states[curr_id].daddy; + for (u32 level = 0; daddy && level < 2; level++) { + addIfEarlier(hinted, daddy, curr_id); + addSuccessors(hinted, info.states[daddy], alphasize, curr_id); + daddy = info.states[daddy].daddy; + } + + return hinted; +} + #define MAX_SHERMAN_SELF_LOOP 20 static -void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, - bool any_cyclic_near_anchored_state, - bool trust_daddy_states, const Grey &grey) { +void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, + bool any_cyclic_near_anchored_state, + bool trust_daddy_states, const Grey &grey) { if (!grey.allowShermanStates) { return; } @@ -1077,25 +1077,25 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, dstate_id_t best_daddy = 0; dstate &currState = info.states[curr_id]; - flat_set<dstate_id_t> hinted; - if (trust_daddy_states) { - // Use the daddy already set for this state so long as it isn't already - // a Sherman state. + flat_set<dstate_id_t> hinted; + if (trust_daddy_states) { + // Use the daddy already set for this state so long as it isn't already + // a Sherman state. dstate_id_t daddy = currState.daddy; if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) { - hinted.insert(currState.daddy); - } else { - // Fall back to granddaddy, which has already been processed (due - // to BFS ordering) and cannot be a Sherman state. - dstate_id_t granddaddy = info.states[currState.daddy].daddy; + hinted.insert(currState.daddy); + } else { + // Fall back to granddaddy, which has already been processed (due + // to BFS ordering) and cannot be a Sherman state. + dstate_id_t granddaddy = info.states[currState.daddy].daddy; if (info.is_widestate(granddaddy)) { return; } - assert(!info.is_sherman(granddaddy)); - hinted.insert(granddaddy); + assert(!info.is_sherman(granddaddy)); + hinted.insert(granddaddy); } - } else { - hinted = find_daddy_candidates(info, curr_id); + } else { + hinted = find_daddy_candidates(info, curr_id); } for (const dstate_id_t &donor : hinted) { @@ -1139,7 +1139,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, } u32 self_loop_width = 0; - const dstate &curr_raw = info.states[curr_id]; + const dstate &curr_raw = info.states[curr_id]; for (unsigned i = 0; i < N_CHARS; i++) { if (curr_raw.next[info.alpha_remap[i]] == curr_id) { self_loop_width++; @@ -1148,7 +1148,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, if (self_loop_width > MAX_SHERMAN_SELF_LOOP) { DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id, - self_loop_width); + self_loop_width); return; } @@ -1459,11 +1459,11 @@ void find_wide_state(dfa_info &info) { generate_symbol_chain(info, chain_tail); } -bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, - const CompileContext &cc, - bool trust_daddy_states, - set<dstate_id_t> *accel_states) { - assert(!is_dead(raw)); +bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, + const CompileContext &cc, + bool trust_daddy_states, + set<dstate_id_t> *accel_states) { + assert(!is_dead(raw)); dfa_info info(strat); bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; @@ -1475,17 +1475,17 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, bool has_eod_reports = raw.hasEodReports(); - bytecode_ptr<NFA> nfa; - if (!using8bit) { + bytecode_ptr<NFA> nfa; + if (!using8bit) { // Wide state optimization if (cc.grey.allowWideStates && strat.getType() == McClellan && !is_triggered(raw.kind)) { find_wide_state(info); } - u16 total_daddy = 0; - bool any_cyclic_near_anchored_state - = is_cyclic_near(raw, raw.start_anchored); + u16 total_daddy = 0; + bool any_cyclic_near_anchored_state + = is_cyclic_near(raw, raw.start_anchored); // Sherman optimization if (info.impl_alpha_size > 16) { @@ -1502,11 +1502,11 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, info.size() * info.impl_alpha_size, info.size(), info.impl_alpha_size); - } + } - nfa = mcclellanCompile16(info, cc, accel_states); + nfa = mcclellanCompile16(info, cc, accel_states); } else { - nfa = mcclellanCompile8(info, cc, accel_states); + nfa = mcclellanCompile8(info, cc, accel_states); } if (has_eod_reports) { @@ -1517,13 +1517,13 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, return nfa; } -bytecode_ptr<NFA> mcclellanCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, - bool only_accel_init, - bool trust_daddy_states, - set<dstate_id_t> *accel_states) { - mcclellan_build_strat mbs(raw, rm, only_accel_init); - return mcclellanCompile_i(raw, mbs, cc, trust_daddy_states, accel_states); +bytecode_ptr<NFA> mcclellanCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, + bool only_accel_init, + bool trust_daddy_states, + set<dstate_id_t> *accel_states) { + mcclellan_build_strat mbs(raw, rm, only_accel_init); + return mcclellanCompile_i(raw, mbs, cc, trust_daddy_states, accel_states); } size_t mcclellan_build_strat::accelSize(void) const { @@ -1548,7 +1548,7 @@ u32 mcclellanStartReachSize(const raw_dfa *raw) { return out.count(); } -bool has_accel_mcclellan(const NFA *nfa) { +bool has_accel_mcclellan(const NFA *nfa) { const mcclellan *m = (const mcclellan *)getImplNfa(nfa); return m->has_accel; } diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile.h b/contrib/libs/hyperscan/src/nfa/mcclellancompile.h index 73cb9fd775..f1c5ea0888 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellancompile.h +++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile.h @@ -29,10 +29,10 @@ #ifndef MCCLELLANCOMPILE_H #define MCCLELLANCOMPILE_H -#include "accel_dfa_build_strat.h" +#include "accel_dfa_build_strat.h" #include "rdfa.h" #include "ue2common.h" -#include "util/bytecode_ptr.h" +#include "util/bytecode_ptr.h" #include <memory> #include <vector> @@ -42,54 +42,54 @@ struct NFA; namespace ue2 { -class ReportManager; +class ReportManager; struct CompileContext; -class mcclellan_build_strat : public accel_dfa_build_strat { +class mcclellan_build_strat : public accel_dfa_build_strat { public: - mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in, - bool only_accel_init_in) - : accel_dfa_build_strat(rm_in, only_accel_init_in), rdfa(rdfa_in) {} + mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in, + bool only_accel_init_in) + : accel_dfa_build_strat(rm_in, only_accel_init_in), rdfa(rdfa_in) {} raw_dfa &get_raw() const override { return rdfa; } std::unique_ptr<raw_report_info> gatherReports( - std::vector<u32> &reports /* out */, - std::vector<u32> &reports_eod /* out */, - u8 *isSingleReport /* out */, - ReportID *arbReport /* out */) const override; + std::vector<u32> &reports /* out */, + std::vector<u32> &reports_eod /* out */, + u8 *isSingleReport /* out */, + ReportID *arbReport /* out */) const override; size_t accelSize(void) const override; - u32 max_allowed_offset_accel() const override; - u32 max_stop_char() const override; - u32 max_floating_stop_char() const override; + u32 max_allowed_offset_accel() const override; + u32 max_stop_char() const override; + u32 max_floating_stop_char() const override; DfaType getType() const override { return McClellan; } private: raw_dfa &rdfa; }; -/** - * \brief Construct an implementation DFA. - * - * \param raw the raw dfa to construct from - * \param cc compile context - * \param rm report manger - * \param only_accel_init if true, only the init states will be examined for - * acceleration opportunities - * \param trust_daddy_states if true, trust the daddy state set in the raw dfa - * rather than conducting a search for a better daddy (for Sherman - * states) - * \param accel_states (optional) success, is filled with the set of - * accelerable states - */ -bytecode_ptr<NFA> +/** + * \brief Construct an implementation DFA. + * + * \param raw the raw dfa to construct from + * \param cc compile context + * \param rm report manger + * \param only_accel_init if true, only the init states will be examined for + * acceleration opportunities + * \param trust_daddy_states if true, trust the daddy state set in the raw dfa + * rather than conducting a search for a better daddy (for Sherman + * states) + * \param accel_states (optional) success, is filled with the set of + * accelerable states + */ +bytecode_ptr<NFA> mcclellanCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, bool only_accel_init, - bool trust_daddy_states = false, + const ReportManager &rm, bool only_accel_init, + bool trust_daddy_states = false, std::set<dstate_id_t> *accel_states = nullptr); /* used internally by mcclellan/haig/gough compile process */ -bytecode_ptr<NFA> -mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, - const CompileContext &cc, bool trust_daddy_states = false, +bytecode_ptr<NFA> +mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, + const CompileContext &cc, bool trust_daddy_states = false, std::set<dstate_id_t> *accel_states = nullptr); /** @@ -99,8 +99,8 @@ u32 mcclellanStartReachSize(const raw_dfa *raw); std::set<ReportID> all_reports(const raw_dfa &rdfa); -bool has_accel_mcclellan(const NFA *nfa); +bool has_accel_mcclellan(const NFA *nfa); } // namespace ue2 -#endif // MCCLELLANCOMPILE_H +#endif // MCCLELLANCOMPILE_H diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.cpp b/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.cpp index 3e299b81e2..5a2ac16cf3 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.cpp +++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,11 +30,11 @@ #include "rdfa.h" #include "util/container.h" -#include "util/hash.h" +#include "util/hash.h" #include "ue2common.h" #include <deque> -#include <map> +#include <map> using namespace std; @@ -43,12 +43,12 @@ namespace ue2 { #define INIT_STATE 1 static -bool state_has_reports(const raw_dfa &raw, dstate_id_t s) { - const auto &ds = raw.states[s]; - return !ds.reports.empty() || !ds.reports_eod.empty(); -} - -static +bool state_has_reports(const raw_dfa &raw, dstate_id_t s) { + const auto &ds = raw.states[s]; + return !ds.reports.empty() || !ds.reports_eod.empty(); +} + +static u32 count_dots(const raw_dfa &raw) { assert(raw.start_anchored == INIT_STATE); @@ -65,7 +65,7 @@ u32 count_dots(const raw_dfa &raw) { } } - if (state_has_reports(raw, raw.states[i].next[0])) { + if (state_has_reports(raw, raw.states[i].next[0])) { goto validate; } @@ -126,11 +126,11 @@ u32 remove_leading_dots(raw_dfa &raw) { static never_inline u32 calc_min_dist_from_bob(raw_dfa &raw, vector<u32> *dist_in) { vector<u32> &dist = *dist_in; - dist.assign(raw.states.size(), ~0U); + dist.assign(raw.states.size(), ~0U); assert(raw.start_anchored != DEAD_STATE); - deque<dstate_id_t> to_visit = { raw.start_anchored }; + deque<dstate_id_t> to_visit = { raw.start_anchored }; dist[raw.start_anchored] = 0; u32 last_d = 0; @@ -145,7 +145,7 @@ u32 calc_min_dist_from_bob(raw_dfa &raw, vector<u32> *dist_in) { assert(d >= last_d); assert(d != ~0U); - for (dstate_id_t t : raw.states[s].next) { + for (dstate_id_t t : raw.states[s].next) { if (t == DEAD_STATE) { continue; } @@ -163,41 +163,41 @@ u32 calc_min_dist_from_bob(raw_dfa &raw, vector<u32> *dist_in) { return last_d; } -bool clear_deeper_reports(raw_dfa &raw, u32 max_offset) { - DEBUG_PRINTF("clearing reports on states deeper than %u\n", max_offset); +bool clear_deeper_reports(raw_dfa &raw, u32 max_offset) { + DEBUG_PRINTF("clearing reports on states deeper than %u\n", max_offset); vector<u32> bob_dist; u32 max_min_dist_bob = calc_min_dist_from_bob(raw, &bob_dist); if (max_min_dist_bob <= max_offset) { - return false; + return false; } - bool changed = false; + bool changed = false; for (u32 s = DEAD_STATE + 1; s < raw.states.size(); s++) { - if (bob_dist[s] > max_offset && state_has_reports(raw, s)) { - DEBUG_PRINTF("clearing reports on %u (depth %u)\n", s, bob_dist[s]); - auto &ds = raw.states[s]; - ds.reports.clear(); - ds.reports_eod.clear(); - changed = true; + if (bob_dist[s] > max_offset && state_has_reports(raw, s)) { + DEBUG_PRINTF("clearing reports on %u (depth %u)\n", s, bob_dist[s]); + auto &ds = raw.states[s]; + ds.reports.clear(); + ds.reports_eod.clear(); + changed = true; } } - if (!changed) { - return false; + if (!changed) { + return false; + } + + // We may have cleared all reports from the DFA, in which case it should + // become empty. + if (all_of_in(raw.states, [](const dstate &ds) { + return ds.reports.empty() && ds.reports_eod.empty(); + })) { + DEBUG_PRINTF("no reports left at all, dfa is dead\n"); + raw.start_anchored = DEAD_STATE; + raw.start_floating = DEAD_STATE; } - // We may have cleared all reports from the DFA, in which case it should - // become empty. - if (all_of_in(raw.states, [](const dstate &ds) { - return ds.reports.empty() && ds.reports_eod.empty(); - })) { - DEBUG_PRINTF("no reports left at all, dfa is dead\n"); - raw.start_anchored = DEAD_STATE; - raw.start_floating = DEAD_STATE; - } - - return true; + return true; } set<ReportID> all_reports(const raw_dfa &rdfa) { @@ -230,10 +230,10 @@ bool has_non_eod_accepts(const raw_dfa &rdfa) { size_t hash_dfa_no_reports(const raw_dfa &rdfa) { size_t v = 0; hash_combine(v, rdfa.alpha_size); - hash_combine(v, rdfa.alpha_remap); + hash_combine(v, rdfa.alpha_remap); for (const auto &ds : rdfa.states) { - hash_combine(v, ds.next); + hash_combine(v, ds.next); } return v; @@ -246,41 +246,41 @@ size_t hash_dfa(const raw_dfa &rdfa) { return v; } -static -bool can_die_early(const raw_dfa &raw, dstate_id_t s, - map<dstate_id_t, u32> &visited, u32 age_limit) { - if (contains(visited, s) && visited[s] >= age_limit) { - /* we have already visited (or are in the process of visiting) here with - * a looser limit. */ - return false; - } - visited[s] = age_limit; - - if (s == DEAD_STATE) { - return true; - } - - if (age_limit == 0) { - return false; - } - - for (const auto &next : raw.states[s].next) { - if (can_die_early(raw, next, visited, age_limit - 1)) { - return true; - } - } - - return false; -} - -bool can_die_early(const raw_dfa &raw, u32 age_limit) { - map<dstate_id_t, u32> visited; - return can_die_early(raw, raw.start_anchored, visited, age_limit); -} - -bool is_dead(const raw_dfa &rdfa) { - return rdfa.start_anchored == DEAD_STATE && - rdfa.start_floating == DEAD_STATE; -} - +static +bool can_die_early(const raw_dfa &raw, dstate_id_t s, + map<dstate_id_t, u32> &visited, u32 age_limit) { + if (contains(visited, s) && visited[s] >= age_limit) { + /* we have already visited (or are in the process of visiting) here with + * a looser limit. */ + return false; + } + visited[s] = age_limit; + + if (s == DEAD_STATE) { + return true; + } + + if (age_limit == 0) { + return false; + } + + for (const auto &next : raw.states[s].next) { + if (can_die_early(raw, next, visited, age_limit - 1)) { + return true; + } + } + + return false; +} + +bool can_die_early(const raw_dfa &raw, u32 age_limit) { + map<dstate_id_t, u32> visited; + return can_die_early(raw, raw.start_anchored, visited, age_limit); +} + +bool is_dead(const raw_dfa &rdfa) { + return rdfa.start_anchored == DEAD_STATE && + rdfa.start_floating == DEAD_STATE; +} + } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.h b/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.h index bc730cddea..a489496133 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.h +++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,23 +29,23 @@ #ifndef MCCLELLAN_COMPILE_UTIL_H #define MCCLELLAN_COMPILE_UTIL_H -#include "rdfa.h" +#include "rdfa.h" #include "ue2common.h" #include <set> namespace ue2 { -u32 remove_leading_dots(raw_dfa &raw); - -/** - * \brief Clear reports on any states that are deeper than \a max_offset from - * start of stream. - * - * Returns false if no changes are made to the DFA. - */ -bool clear_deeper_reports(raw_dfa &raw, u32 max_offset); +u32 remove_leading_dots(raw_dfa &raw); +/** + * \brief Clear reports on any states that are deeper than \a max_offset from + * start of stream. + * + * Returns false if no changes are made to the DFA. + */ +bool clear_deeper_reports(raw_dfa &raw, u32 max_offset); + std::set<ReportID> all_reports(const raw_dfa &rdfa); bool has_eod_accepts(const raw_dfa &rdfa); bool has_non_eod_accepts(const raw_dfa &rdfa); @@ -57,15 +57,15 @@ size_t hash_dfa_no_reports(const raw_dfa &rdfa); /** \brief Compute a simple hash of this raw_dfa, including its reports. */ size_t hash_dfa(const raw_dfa &rdfa); -bool can_die_early(const raw_dfa &raw, u32 age_limit); - -/** - * \brief Returns true if this DFA cannot match, i.e. its start state is - * DEAD_STATE. - */ -bool is_dead(const raw_dfa &rdfa); - - +bool can_die_early(const raw_dfa &raw, u32 age_limit); + +/** + * \brief Returns true if this DFA cannot match, i.e. its start state is + * DEAD_STATE. + */ +bool is_dead(const raw_dfa &rdfa); + + } // namespace ue2 #endif diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng.c b/contrib/libs/hyperscan/src/nfa/mcsheng.c index 22cac119fb..901385573e 100644 --- a/contrib/libs/hyperscan/src/nfa/mcsheng.c +++ b/contrib/libs/hyperscan/src/nfa/mcsheng.c @@ -1,1410 +1,1410 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "mcsheng.h" - -#include "accel.h" -#include "mcsheng_internal.h" -#include "nfa_api.h" -#include "nfa_api_queue.h" -#include "nfa_internal.h" -#include "util/arch.h" -#include "util/bitutils.h" -#include "util/compare.h" -#include "util/simd_utils.h" -#include "ue2common.h" - -enum MatchMode { - CALLBACK_OUTPUT, - STOP_AT_MATCH, - NO_MATCHES -}; - -static really_inline -const struct mstate_aux *get_aux(const struct mcsheng *m, u32 s) { - const char *nfa = (const char *)m - sizeof(struct NFA); - const struct mstate_aux *aux - = s + (const struct mstate_aux *)(nfa + m->aux_offset); - - assert(ISALIGNED(aux)); - return aux; -} - -static really_inline -u32 mcshengEnableStarts(const struct mcsheng *m, u32 s) { - const struct mstate_aux *aux = get_aux(m, s); - - DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top); - return aux->top; -} - -static really_inline -u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, - u32 as) { - assert(ISALIGNED_N(sherman_state, 16)); - - u8 len = *(const u8 *)(sherman_state + SHERMAN_LEN_OFFSET); - - if (len) { - m128 ss_char = load128(sherman_state); - m128 cur_char = set16x8(cprime); - - u32 z = movemask128(eq128(ss_char, cur_char)); - - /* remove header cruft: type 1, len 1, daddy 2*/ - z &= ~0xf; - z &= (1U << (len + 4)) - 1; - - if (z) { - u32 i = ctz32(z & ~0xf) - 4; - - u32 s_out = unaligned_load_u16((const u8 *)sherman_state - + SHERMAN_STATES_OFFSET(len) - + sizeof(u16) * i); - DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu s=%u\n", i, - len, cprime, s_out); - return s_out; - } - } - - u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); - return succ_table[(daddy << as) + cprime]; -} - -static really_inline -char doComplexReport(NfaCallback cb, void *ctxt, const struct mcsheng *m, - u32 s, u64a loc, char eod, u32 *cached_accept_state, - u32 *cached_accept_id) { - DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n", - s & STATE_MASK, loc, eod); - - if (!eod && s == *cached_accept_state) { - if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - - return MO_CONTINUE_MATCHING; /* continue execution */ - } - - const struct mstate_aux *aux = get_aux(m, s); - size_t offset = eod ? aux->accept_eod : aux->accept; - - assert(offset); - const struct report_list *rl - = (const void *)((const char *)m + offset - sizeof(struct NFA)); - assert(ISALIGNED(rl)); - - DEBUG_PRINTF("report list size %u\n", rl->count); - u32 count = rl->count; - - if (!eod && count == 1) { - *cached_accept_state = s; - *cached_accept_id = rl->report[0]; - - DEBUG_PRINTF("reporting %u\n", rl->report[0]); - if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - - return MO_CONTINUE_MATCHING; /* continue execution */ - } - - for (u32 i = 0; i < count; i++) { - DEBUG_PRINTF("reporting %u\n", rl->report[i]); - if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - } - - return MO_CONTINUE_MATCHING; /* continue execution */ -} - -#define SHENG_CHUNK 8 - -static really_inline -u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, - const u8 *hard_c_end, u32 s_in, char do_accel) { - assert(s_in < m->sheng_end); - assert(s_in); /* should not already be dead */ - assert(soft_c_end <= hard_c_end); - DEBUG_PRINTF("s_in = %u (adjusted %u)\n", s_in, s_in - 1); - m128 s = set16x8(s_in - 1); - const u8 *c = *c_inout; - const u8 *c_end = hard_c_end - SHENG_CHUNK + 1; - if (!do_accel) { - c_end = MIN(soft_c_end, hard_c_end - SHENG_CHUNK + 1); - } - const m128 *masks = m->sheng_masks; - u8 sheng_limit = m->sheng_end - 1; /* - 1: no dead state */ - u8 sheng_stop_limit = do_accel ? m->sheng_accel_limit : sheng_limit; - - /* When we use movd to get a u32 containing our state, it will have 4 lanes - * all duplicating the state. We can create versions of our limits with 4 - * copies to directly compare against, this prevents us generating code to - * extract a single copy of the state from the u32 for checking. */ - u32 sheng_stop_limit_x4 = sheng_stop_limit * 0x01010101; - -#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) - u32 sheng_limit_x4 = sheng_limit * 0x01010101; - m128 simd_stop_limit = set4x32(sheng_stop_limit_x4); - m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit); + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcsheng.h" + +#include "accel.h" +#include "mcsheng_internal.h" +#include "nfa_api.h" +#include "nfa_api_queue.h" +#include "nfa_internal.h" +#include "util/arch.h" +#include "util/bitutils.h" +#include "util/compare.h" +#include "util/simd_utils.h" +#include "ue2common.h" + +enum MatchMode { + CALLBACK_OUTPUT, + STOP_AT_MATCH, + NO_MATCHES +}; + +static really_inline +const struct mstate_aux *get_aux(const struct mcsheng *m, u32 s) { + const char *nfa = (const char *)m - sizeof(struct NFA); + const struct mstate_aux *aux + = s + (const struct mstate_aux *)(nfa + m->aux_offset); + + assert(ISALIGNED(aux)); + return aux; +} + +static really_inline +u32 mcshengEnableStarts(const struct mcsheng *m, u32 s) { + const struct mstate_aux *aux = get_aux(m, s); + + DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top); + return aux->top; +} + +static really_inline +u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, + u32 as) { + assert(ISALIGNED_N(sherman_state, 16)); + + u8 len = *(const u8 *)(sherman_state + SHERMAN_LEN_OFFSET); + + if (len) { + m128 ss_char = load128(sherman_state); + m128 cur_char = set16x8(cprime); + + u32 z = movemask128(eq128(ss_char, cur_char)); + + /* remove header cruft: type 1, len 1, daddy 2*/ + z &= ~0xf; + z &= (1U << (len + 4)) - 1; + + if (z) { + u32 i = ctz32(z & ~0xf) - 4; + + u32 s_out = unaligned_load_u16((const u8 *)sherman_state + + SHERMAN_STATES_OFFSET(len) + + sizeof(u16) * i); + DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu s=%u\n", i, + len, cprime, s_out); + return s_out; + } + } + + u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); + return succ_table[(daddy << as) + cprime]; +} + +static really_inline +char doComplexReport(NfaCallback cb, void *ctxt, const struct mcsheng *m, + u32 s, u64a loc, char eod, u32 *cached_accept_state, + u32 *cached_accept_id) { + DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n", + s & STATE_MASK, loc, eod); + + if (!eod && s == *cached_accept_state) { + if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + const struct mstate_aux *aux = get_aux(m, s); + size_t offset = eod ? aux->accept_eod : aux->accept; + + assert(offset); + const struct report_list *rl + = (const void *)((const char *)m + offset - sizeof(struct NFA)); + assert(ISALIGNED(rl)); + + DEBUG_PRINTF("report list size %u\n", rl->count); + u32 count = rl->count; + + if (!eod && count == 1) { + *cached_accept_state = s; + *cached_accept_id = rl->report[0]; + + DEBUG_PRINTF("reporting %u\n", rl->report[0]); + if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + for (u32 i = 0; i < count; i++) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); + if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + } + + return MO_CONTINUE_MATCHING; /* continue execution */ +} + +#define SHENG_CHUNK 8 + +static really_inline +u32 doSheng(const struct mcsheng *m, const u8 **c_inout, const u8 *soft_c_end, + const u8 *hard_c_end, u32 s_in, char do_accel) { + assert(s_in < m->sheng_end); + assert(s_in); /* should not already be dead */ + assert(soft_c_end <= hard_c_end); + DEBUG_PRINTF("s_in = %u (adjusted %u)\n", s_in, s_in - 1); + m128 s = set16x8(s_in - 1); + const u8 *c = *c_inout; + const u8 *c_end = hard_c_end - SHENG_CHUNK + 1; + if (!do_accel) { + c_end = MIN(soft_c_end, hard_c_end - SHENG_CHUNK + 1); + } + const m128 *masks = m->sheng_masks; + u8 sheng_limit = m->sheng_end - 1; /* - 1: no dead state */ + u8 sheng_stop_limit = do_accel ? m->sheng_accel_limit : sheng_limit; + + /* When we use movd to get a u32 containing our state, it will have 4 lanes + * all duplicating the state. We can create versions of our limits with 4 + * copies to directly compare against, this prevents us generating code to + * extract a single copy of the state from the u32 for checking. */ + u32 sheng_stop_limit_x4 = sheng_stop_limit * 0x01010101; + +#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) + u32 sheng_limit_x4 = sheng_limit * 0x01010101; + m128 simd_stop_limit = set4x32(sheng_stop_limit_x4); + m128 accel_delta = set16x8(sheng_limit - sheng_stop_limit); DEBUG_PRINTF("end %hhu, accel %hu --> limit %hhu\n", sheng_limit, - m->sheng_accel_limit, sheng_stop_limit); -#endif - -#define SHENG_SINGLE_ITER do { \ - m128 shuffle_mask = masks[*(c++)]; \ - s = pshufb_m128(shuffle_mask, s); \ - u32 s_gpr_x4 = movd(s); /* convert to u8 */ \ + m->sheng_accel_limit, sheng_stop_limit); +#endif + +#define SHENG_SINGLE_ITER do { \ + m128 shuffle_mask = masks[*(c++)]; \ + s = pshufb_m128(shuffle_mask, s); \ + u32 s_gpr_x4 = movd(s); /* convert to u8 */ \ DEBUG_PRINTF("c %hhu (%c) --> s %u\n", c[-1], c[-1], s_gpr_x4); \ - if (s_gpr_x4 >= sheng_stop_limit_x4) { \ - s_gpr = s_gpr_x4; \ - goto exit; \ - } \ - } while (0) - - u8 s_gpr; - while (c < c_end) { -#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) + if (s_gpr_x4 >= sheng_stop_limit_x4) { \ + s_gpr = s_gpr_x4; \ + goto exit; \ + } \ + } while (0) + + u8 s_gpr; + while (c < c_end) { +#if defined(HAVE_BMI2) && defined(ARCH_64_BIT) /* This version uses pext for efficiently bitbashing out scaled - * versions of the bytes to process from a u64a */ - - u64a data_bytes = unaligned_load_u64a(c); - u64a cc0 = pdep64(data_bytes, 0xff0); /* extract scaled low byte */ - data_bytes &= ~0xffULL; /* clear low bits for scale space */ - m128 shuffle_mask0 = load128((const char *)masks + cc0); - s = pshufb_m128(shuffle_mask0, s); - m128 s_max = s; - m128 s_max0 = s_max; + * versions of the bytes to process from a u64a */ + + u64a data_bytes = unaligned_load_u64a(c); + u64a cc0 = pdep64(data_bytes, 0xff0); /* extract scaled low byte */ + data_bytes &= ~0xffULL; /* clear low bits for scale space */ + m128 shuffle_mask0 = load128((const char *)masks + cc0); + s = pshufb_m128(shuffle_mask0, s); + m128 s_max = s; + m128 s_max0 = s_max; DEBUG_PRINTF("c %02llx --> s %u\n", cc0 >> 4, movd(s)); - -#define SHENG_SINGLE_UNROLL_ITER(iter) \ - assert(iter); \ - u64a cc##iter = pext64(data_bytes, mcsheng_pext_mask[iter]); \ - assert(cc##iter == (u64a)c[iter] << 4); \ - m128 shuffle_mask##iter = load128((const char *)masks + cc##iter); \ - s = pshufb_m128(shuffle_mask##iter, s); \ - if (do_accel && iter == 7) { \ - /* in the final iteration we also have to check against accel */ \ - m128 s_temp = sadd_u8_m128(s, accel_delta); \ - s_max = max_u8_m128(s_max, s_temp); \ - } else { \ - s_max = max_u8_m128(s_max, s); \ - } \ - m128 s_max##iter = s_max; \ + +#define SHENG_SINGLE_UNROLL_ITER(iter) \ + assert(iter); \ + u64a cc##iter = pext64(data_bytes, mcsheng_pext_mask[iter]); \ + assert(cc##iter == (u64a)c[iter] << 4); \ + m128 shuffle_mask##iter = load128((const char *)masks + cc##iter); \ + s = pshufb_m128(shuffle_mask##iter, s); \ + if (do_accel && iter == 7) { \ + /* in the final iteration we also have to check against accel */ \ + m128 s_temp = sadd_u8_m128(s, accel_delta); \ + s_max = max_u8_m128(s_max, s_temp); \ + } else { \ + s_max = max_u8_m128(s_max, s); \ + } \ + m128 s_max##iter = s_max; \ DEBUG_PRINTF("c %02llx --> s %u max %u\n", cc##iter >> 4, \ - movd(s), movd(s_max)); - - SHENG_SINGLE_UNROLL_ITER(1); - - SHENG_SINGLE_UNROLL_ITER(2); - SHENG_SINGLE_UNROLL_ITER(3); - - SHENG_SINGLE_UNROLL_ITER(4); - SHENG_SINGLE_UNROLL_ITER(5); - - SHENG_SINGLE_UNROLL_ITER(6); - SHENG_SINGLE_UNROLL_ITER(7); - - if (movd(s_max7) >= sheng_limit_x4) { - DEBUG_PRINTF("exit found\n"); - - /* Explicitly check the last byte as it is more likely as it also - * checks for acceleration. */ - if (movd(s_max6) < sheng_limit_x4) { - c += SHENG_CHUNK; - s_gpr = movq(s); - assert(s_gpr >= sheng_stop_limit); - goto exit; - } - - /* use shift-xor to create a register containing all of the max - * values */ - m128 blended = rshift64_m128(s_max0, 56); - blended = xor128(blended, rshift64_m128(s_max1, 48)); - blended = xor128(blended, rshift64_m128(s_max2, 40)); - blended = xor128(blended, rshift64_m128(s_max3, 32)); - blended = xor128(blended, rshift64_m128(s_max4, 24)); - blended = xor128(blended, rshift64_m128(s_max5, 16)); - blended = xor128(blended, rshift64_m128(s_max6, 8)); - blended = xor128(blended, s); - blended = xor128(blended, rshift64_m128(blended, 8)); - DEBUG_PRINTF("blended %016llx\n", movq(blended)); - - m128 final = min_u8_m128(blended, simd_stop_limit); - m128 cmp = sub_u8_m128(final, simd_stop_limit); - u64a stops = ~movemask128(cmp); - assert(stops); - u32 earliest = ctz32(stops); - DEBUG_PRINTF("stops %02llx, earliest %u\n", stops, earliest); - assert(earliest < 8); - c += earliest + 1; - s_gpr = movq(blended) >> (earliest * 8); - assert(s_gpr >= sheng_stop_limit); - goto exit; - } else { - c += SHENG_CHUNK; - } -#else - SHENG_SINGLE_ITER; - SHENG_SINGLE_ITER; - SHENG_SINGLE_ITER; - SHENG_SINGLE_ITER; - - SHENG_SINGLE_ITER; - SHENG_SINGLE_ITER; - SHENG_SINGLE_ITER; - SHENG_SINGLE_ITER; -#endif - } - - assert(c_end - c < SHENG_CHUNK); - if (c < soft_c_end) { - assert(soft_c_end - c < SHENG_CHUNK); - switch (soft_c_end - c) { - case 7: - SHENG_SINGLE_ITER; // fallthrough - case 6: - SHENG_SINGLE_ITER; // fallthrough - case 5: - SHENG_SINGLE_ITER; // fallthrough - case 4: - SHENG_SINGLE_ITER; // fallthrough - case 3: - SHENG_SINGLE_ITER; // fallthrough - case 2: - SHENG_SINGLE_ITER; // fallthrough - case 1: - SHENG_SINGLE_ITER; // fallthrough - } - } - - assert(c >= soft_c_end); - - s_gpr = movd(s); -exit: - assert(c <= hard_c_end); - DEBUG_PRINTF("%zu from end; s %hhu\n", c_end - c, s_gpr); - assert(c >= soft_c_end || s_gpr >= sheng_stop_limit); - /* undo state adjustment to match mcclellan view */ - if (s_gpr == sheng_limit) { - s_gpr = 0; - } else if (s_gpr < sheng_limit) { - s_gpr++; - } - - *c_inout = c; - return s_gpr; -} - -static really_inline -const char *findShermanState(UNUSED const struct mcsheng *m, - const char *sherman_base_offset, u32 sherman_base, - u32 s) { - const char *rv - = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); - assert(rv < (const char *)m + m->length - sizeof(struct NFA)); - UNUSED u8 type = *(const u8 *)(rv + SHERMAN_TYPE_OFFSET); - assert(type == SHERMAN_STATE); - return rv; -} - -static really_inline -const u8 *run_mcsheng_accel(const struct mcsheng *m, - const struct mstate_aux *aux, u32 s, - const u8 **min_accel_offset, - const u8 *c, const u8 *c_end) { - DEBUG_PRINTF("skipping\n"); - u32 accel_offset = aux[s].accel_offset; - - assert(aux[s].accel_offset); - assert(accel_offset >= m->aux_offset); - assert(!m->sherman_offset || accel_offset < m->sherman_offset); - - const union AccelAux *aaux = (const void *)((const char *)m + accel_offset); - const u8 *c2 = run_accel(aaux, c, c_end); - - if (c2 < *min_accel_offset + BAD_ACCEL_DIST) { - *min_accel_offset = c2 + BIG_ACCEL_PENALTY; - } else { - *min_accel_offset = c2 + SMALL_ACCEL_PENALTY; - } - - if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) { - *min_accel_offset = c_end; - } - - DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", - c2 - c, *min_accel_offset - c2, c_end - c2); - - return c2; -} - -static really_inline -u32 doNormal16(const struct mcsheng *m, const u8 **c_inout, const u8 *end, - u32 s, char do_accel, enum MatchMode mode) { - const u8 *c = *c_inout; - - const u16 *succ_table - = (const u16 *)((const char *)m + sizeof(struct mcsheng)); - assert(ISALIGNED_N(succ_table, 2)); - u32 sheng_end = m->sheng_end; - u32 sherman_base = m->sherman_limit; - const char *sherman_base_offset - = (const char *)m - sizeof(struct NFA) + m->sherman_offset; - u32 as = m->alphaShift; - - /* Adjust start of succ table so we can index into using state id (rather - * than adjust to normal id). As we will not be processing states with low - * state ids, we will not be accessing data before the succ table. Note: due - * to the size of the sheng tables, the succ_table pointer will still be - * inside the engine.*/ - succ_table -= sheng_end << as; - - s &= STATE_MASK; - - while (c < end && s >= sheng_end) { - u8 cprime = m->remap[*c]; - DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c, - ourisprint(*c) ? *c : '?', cprime, s); - if (s < sherman_base) { - DEBUG_PRINTF("doing normal\n"); - assert(s < m->state_count); - s = succ_table[(s << as) + cprime]; - } else { - const char *sherman_state - = findShermanState(m, sherman_base_offset, sherman_base, s); - DEBUG_PRINTF("doing sherman (%u)\n", s); - s = doSherman16(sherman_state, cprime, succ_table, as); - } - - DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); - c++; - - if (do_accel && (s & ACCEL_FLAG)) { - break; - } - if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { - break; - } - - s &= STATE_MASK; - } - - *c_inout = c; - return s; -} - -static really_inline -char mcshengExec16_i(const struct mcsheng *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **c_final, enum MatchMode mode) { - assert(ISALIGNED_N(state, 2)); - if (!len) { - if (mode == STOP_AT_MATCH) { - *c_final = buf; - } - return MO_ALIVE; - } - - u32 s = *state; - const u8 *c = buf; - const u8 *c_end = buf + len; - const u8 sheng_end = m->sheng_end; - const struct mstate_aux *aux - = (const struct mstate_aux *)((const char *)m + m->aux_offset - - sizeof(struct NFA)); - - s &= STATE_MASK; - - u32 cached_accept_id = 0; - u32 cached_accept_state = 0; - - DEBUG_PRINTF("s: %u, len %zu\n", s, len); - - const u8 *min_accel_offset = c; - if (!m->has_accel || len < ACCEL_MIN_LEN) { - min_accel_offset = c_end; - goto without_accel; - } - - goto with_accel; - -without_accel: - do { - assert(c < min_accel_offset); - int do_accept; - if (!s) { - goto exit; - } else if (s < sheng_end) { - s = doSheng(m, &c, min_accel_offset, c_end, s, 0); - do_accept = mode != NO_MATCHES && get_aux(m, s)->accept; - } else { - s = doNormal16(m, &c, min_accel_offset, s, 0, mode); - - do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG); - } - - if (do_accept) { - if (mode == STOP_AT_MATCH) { - *state = s & STATE_MASK; - *c_final = c - 1; - return MO_MATCHES_PENDING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_DEAD; /* termination requested */ - } - } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, - &cached_accept_state, &cached_accept_id) - == MO_HALT_MATCHING) { - return MO_DEAD; - } - } - - assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */ - } while (c < min_accel_offset); - - if (c == c_end) { - goto exit; - } - -with_accel: - do { - assert(c < c_end); - int do_accept; - - if (!s) { - goto exit; - } else if (s < sheng_end) { - if (s > m->sheng_accel_limit) { - c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); - if (c == c_end) { - goto exit; - } else { - goto without_accel; - } - } - s = doSheng(m, &c, c_end, c_end, s, 1); - do_accept = mode != NO_MATCHES && get_aux(m, s)->accept; - } else { - if (s & ACCEL_FLAG) { - DEBUG_PRINTF("skipping\n"); - s &= STATE_MASK; - c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); - if (c == c_end) { - goto exit; - } else { - goto without_accel; - } - } - - s = doNormal16(m, &c, c_end, s, 1, mode); - do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG); - } - - if (do_accept) { - if (mode == STOP_AT_MATCH) { - *state = s & STATE_MASK; - *c_final = c - 1; - return MO_MATCHES_PENDING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_DEAD; /* termination requested */ - } - } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, - &cached_accept_state, &cached_accept_id) - == MO_HALT_MATCHING) { - return MO_DEAD; - } - } - - assert(c <= c_end); - } while (c < c_end); - -exit: - s &= STATE_MASK; - - if (mode == STOP_AT_MATCH) { - *c_final = c_end; - } - *state = s; - - return MO_ALIVE; -} - -static never_inline -char mcshengExec16_i_cb(const struct mcsheng *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, CALLBACK_OUTPUT); -} - -static never_inline -char mcshengExec16_i_sam(const struct mcsheng *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, STOP_AT_MATCH); -} - -static never_inline -char mcshengExec16_i_nm(const struct mcsheng *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, NO_MATCHES); -} - -static really_inline -char mcshengExec16_i_ni(const struct mcsheng *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point, - enum MatchMode mode) { - if (mode == CALLBACK_OUTPUT) { - return mcshengExec16_i_cb(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); - } else if (mode == STOP_AT_MATCH) { - return mcshengExec16_i_sam(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); - } else { - assert (mode == NO_MATCHES); - return mcshengExec16_i_nm(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); - } -} - -static really_inline -u32 doNormal8(const struct mcsheng *m, const u8 **c_inout, const u8 *end, u32 s, - char do_accel, enum MatchMode mode) { - const u8 *c = *c_inout; - u32 sheng_end = m->sheng_end; - u32 accel_limit = m->accel_limit_8; - u32 accept_limit = m->accept_limit_8; - - const u32 as = m->alphaShift; - const u8 *succ_table = (const u8 *)((const char *)m - + sizeof(struct mcsheng)); - /* Adjust start of succ table so we can index into using state id (rather - * than adjust to normal id). As we will not be processing states with low - * state ids, we will not be accessing data before the succ table. Note: due - * to the size of the sheng tables, the succ_table pointer will still be - * inside the engine.*/ - succ_table -= sheng_end << as; - - assert(s >= sheng_end); - - while (c < end && s >= sheng_end) { - u8 cprime = m->remap[*c]; - DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c, - ourisprint(*c) ? *c : '?', cprime); - s = succ_table[(s << as) + cprime]; - - DEBUG_PRINTF("s: %u\n", s); - c++; - if (do_accel) { - if (s >= accel_limit) { - break; - } - } else { - if (mode != NO_MATCHES && s >= accept_limit) { - break; - } - } - } - *c_inout = c; - return s; -} - -static really_inline -char mcshengExec8_i(const struct mcsheng *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **c_final, enum MatchMode mode) { - if (!len) { - *c_final = buf; - return MO_ALIVE; - } - u32 s = *state; - const u8 *c = buf; - const u8 *c_end = buf + len; - const u8 sheng_end = m->sheng_end; - - const struct mstate_aux *aux - = (const struct mstate_aux *)((const char *)m + m->aux_offset - - sizeof(struct NFA)); - u32 accept_limit = m->accept_limit_8; - - u32 cached_accept_id = 0; - u32 cached_accept_state = 0; - - DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit); - - DEBUG_PRINTF("s: %u, len %zu\n", s, len); - - const u8 *min_accel_offset = c; - if (!m->has_accel || len < ACCEL_MIN_LEN) { - min_accel_offset = c_end; - goto without_accel; - } - - goto with_accel; - -without_accel: - do { - assert(c < min_accel_offset); - if (!s) { - goto exit; - } else if (s < sheng_end) { - s = doSheng(m, &c, min_accel_offset, c_end, s, 0); - } else { - s = doNormal8(m, &c, min_accel_offset, s, 0, mode); - assert(c <= min_accel_offset); - } - - if (mode != NO_MATCHES && s >= accept_limit) { - if (mode == STOP_AT_MATCH) { - DEBUG_PRINTF("match - pausing\n"); - *state = s; - *c_final = c - 1; - return MO_MATCHES_PENDING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_DEAD; - } - } else if (doComplexReport(cb, ctxt, m, s, loc, 0, - &cached_accept_state, &cached_accept_id) - == MO_HALT_MATCHING) { - return MO_DEAD; - } - } - - assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */ - } while (c < min_accel_offset); - - if (c == c_end) { - goto exit; - } - -with_accel: - do { - u32 accel_limit = m->accel_limit_8; - - assert(c < c_end); - if (!s) { - goto exit; - } else if (s < sheng_end) { - if (s > m->sheng_accel_limit) { - c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); - if (c == c_end) { - goto exit; - } else { - goto without_accel; - } - } - s = doSheng(m, &c, c_end, c_end, s, 1); - } else { - if (s >= accel_limit && aux[s].accel_offset) { - c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); - if (c == c_end) { - goto exit; - } else { - goto without_accel; - } - } - s = doNormal8(m, &c, c_end, s, 1, mode); - } - - if (mode != NO_MATCHES && s >= accept_limit) { - if (mode == STOP_AT_MATCH) { - DEBUG_PRINTF("match - pausing\n"); - *state = s; - *c_final = c - 1; - return MO_MATCHES_PENDING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { - return MO_DEAD; - } - } else if (doComplexReport(cb, ctxt, m, s, loc, 0, - &cached_accept_state, &cached_accept_id) - == MO_HALT_MATCHING) { - return MO_DEAD; - } - } - - assert(c <= c_end); - } while (c < c_end); - -exit: - *state = s; - if (mode == STOP_AT_MATCH) { - *c_final = c_end; - } - return MO_ALIVE; -} - -static never_inline -char mcshengExec8_i_cb(const struct mcsheng *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, CALLBACK_OUTPUT); -} - -static never_inline -char mcshengExec8_i_sam(const struct mcsheng *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, STOP_AT_MATCH); -} - -static never_inline -char mcshengExec8_i_nm(const struct mcsheng *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, NO_MATCHES); -} - -static really_inline -char mcshengExec8_i_ni(const struct mcsheng *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point, - enum MatchMode mode) { - if (mode == CALLBACK_OUTPUT) { - return mcshengExec8_i_cb(m, state, buf, len, offAdj, cb, ctxt, single, - final_point); - } else if (mode == STOP_AT_MATCH) { - return mcshengExec8_i_sam(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); - } else { - assert(mode == NO_MATCHES); - return mcshengExec8_i_nm(m, state, buf, len, offAdj, cb, ctxt, single, - final_point); - } -} - -static really_inline -char mcshengCheckEOD(const struct NFA *nfa, u32 s, u64a offset, - NfaCallback cb, void *ctxt) { - const struct mcsheng *m = getImplNfa(nfa); - const struct mstate_aux *aux = get_aux(m, s); - - if (!aux->accept_eod) { - return MO_CONTINUE_MATCHING; - } - return doComplexReport(cb, ctxt, m, s, offset, 1, NULL, NULL); -} - -static really_inline -char nfaExecMcSheng16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, - const u8 *hend, NfaCallback cb, void *context, - struct mq *q, char single, s64a end, - enum MatchMode mode) { - assert(n->type == MCSHENG_NFA_16); - const struct mcsheng *m = getImplNfa(n); - s64a sp; - - assert(ISALIGNED_N(q->state, 2)); - u32 s = *(u16 *)q->state; - - if (q->report_current) { - assert(s); - assert(get_aux(m, s)->accept); - - int rv; - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - rv = cb(0, q_cur_offset(q), m->arb_report, context); - } else { - u32 cached_accept_id = 0; - u32 cached_accept_state = 0; - - rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, - &cached_accept_state, &cached_accept_id); - } - - q->report_current = 0; - - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - } - - sp = q_cur_loc(q); - q->cur++; - - const u8 *cur_buf = sp < 0 ? hend : buffer; - - assert(q->cur); - if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { - DEBUG_PRINTF("this is as far as we go\n"); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u16 *)q->state = s; - return MO_ALIVE; - } - - while (1) { - assert(q->cur < q->end); - s64a ep = q->items[q->cur].location; - if (mode != NO_MATCHES) { - ep = MIN(ep, end); - } - - assert(ep >= sp); - - s64a local_ep = ep; - if (sp < 0) { - local_ep = MIN(0, ep); - } - - /* do main buffer region */ - const u8 *final_look; - char rv = mcshengExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, single, - &final_look, mode); - if (rv == MO_DEAD) { - *(u16 *)q->state = 0; - return MO_DEAD; - } - if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { - DEBUG_PRINTF("this is as far as we go\n"); - DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); - - assert(q->cur); - assert(final_look != cur_buf + local_ep); - - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = final_look - cur_buf + 1; /* due to - * early -1 */ - *(u16 *)q->state = s; - return MO_MATCHES_PENDING; - } - - assert(rv == MO_ALIVE); - assert(q->cur); - if (mode != NO_MATCHES && q->items[q->cur].location > end) { - DEBUG_PRINTF("this is as far as we go\n"); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u16 *)q->state = s; - return MO_ALIVE; - } - - sp = local_ep; - - if (sp == 0) { - cur_buf = buffer; - } - - if (sp != ep) { - continue; - } - - switch (q->items[q->cur].type) { - case MQE_TOP: - assert(sp + offset || !s); - if (sp + offset == 0) { - s = m->start_anchored; - break; - } - s = mcshengEnableStarts(m, s); - break; - case MQE_END: - *(u16 *)q->state = s; - q->cur++; - return s ? MO_ALIVE : MO_DEAD; - default: - assert(!"invalid queue event"); - } - - q->cur++; - } -} - -static really_inline -char nfaExecMcSheng8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, - const u8 *hend, NfaCallback cb, void *context, - struct mq *q, char single, s64a end, - enum MatchMode mode) { - assert(n->type == MCSHENG_NFA_8); - const struct mcsheng *m = getImplNfa(n); - s64a sp; - - u32 s = *(u8 *)q->state; - - if (q->report_current) { - assert(s); - assert(s >= m->accept_limit_8); - - int rv; - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - rv = cb(0, q_cur_offset(q), m->arb_report, context); - } else { - u32 cached_accept_id = 0; - u32 cached_accept_state = 0; - - rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, - &cached_accept_state, &cached_accept_id); - } - - q->report_current = 0; - - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - } - - sp = q_cur_loc(q); - q->cur++; - - const u8 *cur_buf = sp < 0 ? hend : buffer; - - if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { - DEBUG_PRINTF("this is as far as we go\n"); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u8 *)q->state = s; - return MO_ALIVE; - } - - while (1) { - DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" : - q->items[q->cur].type == MQE_END ? "END" : "???", - q->items[q->cur].location + offset); - assert(q->cur < q->end); - s64a ep = q->items[q->cur].location; - if (mode != NO_MATCHES) { - ep = MIN(ep, end); - } - - assert(ep >= sp); - - s64a local_ep = ep; - if (sp < 0) { - local_ep = MIN(0, ep); - } - - const u8 *final_look; - char rv = mcshengExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, single, - &final_look, mode); - if (rv == MO_HALT_MATCHING) { - *(u8 *)q->state = 0; - return MO_DEAD; - } - if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { - DEBUG_PRINTF("this is as far as we go\n"); - DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); - - assert(q->cur); - assert(final_look != cur_buf + local_ep); - - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = final_look - cur_buf + 1; /* due to - * early -1 */ - *(u8 *)q->state = s; - return MO_MATCHES_PENDING; - } - - assert(rv == MO_ALIVE); - assert(q->cur); - if (mode != NO_MATCHES && q->items[q->cur].location > end) { - DEBUG_PRINTF("this is as far as we go\n"); - assert(q->cur); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u8 *)q->state = s; - return MO_ALIVE; - } - - sp = local_ep; - - if (sp == 0) { - cur_buf = buffer; - } - - if (sp != ep) { - continue; - } - - switch (q->items[q->cur].type) { - case MQE_TOP: - assert(sp + offset || !s); - if (sp + offset == 0) { - s = (u8)m->start_anchored; - break; - } - s = mcshengEnableStarts(m, s); - break; - case MQE_END: - *(u8 *)q->state = s; - q->cur++; - return s ? MO_ALIVE : MO_DEAD; - default: - assert(!"invalid queue event"); - } - - q->cur++; - } -} - -char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCSHENG_NFA_8); - const struct mcsheng *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - return nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCSHENG_FLAG_SINGLE, end, - CALLBACK_OUTPUT); -} - -char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCSHENG_NFA_16); - const struct mcsheng *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - return nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCSHENG_FLAG_SINGLE, end, - CALLBACK_OUTPUT); -} - -char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q) { - const struct mcsheng *m = getImplNfa(n); - NfaCallback cb = q->cb; - void *ctxt = q->context; - u32 s = *(u8 *)q->state; - u8 single = m->flags & MCSHENG_FLAG_SINGLE; - u64a offset = q_cur_offset(q); - assert(q_cur_type(q) == MQE_START); - assert(s); - - if (s >= m->accept_limit_8) { - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - cb(0, offset, m->arb_report, ctxt); - } else { - u32 cached_accept_id = 0; - u32 cached_accept_state = 0; - - doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, - &cached_accept_id); - } - } - - return 0; -} - -char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q) { - const struct mcsheng *m = getImplNfa(n); - NfaCallback cb = q->cb; - void *ctxt = q->context; - u32 s = *(u16 *)q->state; - const struct mstate_aux *aux = get_aux(m, s); - u8 single = m->flags & MCSHENG_FLAG_SINGLE; - u64a offset = q_cur_offset(q); - assert(q_cur_type(q) == MQE_START); - DEBUG_PRINTF("state %u\n", s); - assert(s); - - if (aux->accept) { - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); - cb(0, offset, m->arb_report, ctxt); - } else { - u32 cached_accept_id = 0; - u32 cached_accept_state = 0; - - doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, - &cached_accept_id); - } - } - - return 0; -} - -static -char mcshengHasAccept(const struct mcsheng *m, const struct mstate_aux *aux, + movd(s), movd(s_max)); + + SHENG_SINGLE_UNROLL_ITER(1); + + SHENG_SINGLE_UNROLL_ITER(2); + SHENG_SINGLE_UNROLL_ITER(3); + + SHENG_SINGLE_UNROLL_ITER(4); + SHENG_SINGLE_UNROLL_ITER(5); + + SHENG_SINGLE_UNROLL_ITER(6); + SHENG_SINGLE_UNROLL_ITER(7); + + if (movd(s_max7) >= sheng_limit_x4) { + DEBUG_PRINTF("exit found\n"); + + /* Explicitly check the last byte as it is more likely as it also + * checks for acceleration. */ + if (movd(s_max6) < sheng_limit_x4) { + c += SHENG_CHUNK; + s_gpr = movq(s); + assert(s_gpr >= sheng_stop_limit); + goto exit; + } + + /* use shift-xor to create a register containing all of the max + * values */ + m128 blended = rshift64_m128(s_max0, 56); + blended = xor128(blended, rshift64_m128(s_max1, 48)); + blended = xor128(blended, rshift64_m128(s_max2, 40)); + blended = xor128(blended, rshift64_m128(s_max3, 32)); + blended = xor128(blended, rshift64_m128(s_max4, 24)); + blended = xor128(blended, rshift64_m128(s_max5, 16)); + blended = xor128(blended, rshift64_m128(s_max6, 8)); + blended = xor128(blended, s); + blended = xor128(blended, rshift64_m128(blended, 8)); + DEBUG_PRINTF("blended %016llx\n", movq(blended)); + + m128 final = min_u8_m128(blended, simd_stop_limit); + m128 cmp = sub_u8_m128(final, simd_stop_limit); + u64a stops = ~movemask128(cmp); + assert(stops); + u32 earliest = ctz32(stops); + DEBUG_PRINTF("stops %02llx, earliest %u\n", stops, earliest); + assert(earliest < 8); + c += earliest + 1; + s_gpr = movq(blended) >> (earliest * 8); + assert(s_gpr >= sheng_stop_limit); + goto exit; + } else { + c += SHENG_CHUNK; + } +#else + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; + SHENG_SINGLE_ITER; +#endif + } + + assert(c_end - c < SHENG_CHUNK); + if (c < soft_c_end) { + assert(soft_c_end - c < SHENG_CHUNK); + switch (soft_c_end - c) { + case 7: + SHENG_SINGLE_ITER; // fallthrough + case 6: + SHENG_SINGLE_ITER; // fallthrough + case 5: + SHENG_SINGLE_ITER; // fallthrough + case 4: + SHENG_SINGLE_ITER; // fallthrough + case 3: + SHENG_SINGLE_ITER; // fallthrough + case 2: + SHENG_SINGLE_ITER; // fallthrough + case 1: + SHENG_SINGLE_ITER; // fallthrough + } + } + + assert(c >= soft_c_end); + + s_gpr = movd(s); +exit: + assert(c <= hard_c_end); + DEBUG_PRINTF("%zu from end; s %hhu\n", c_end - c, s_gpr); + assert(c >= soft_c_end || s_gpr >= sheng_stop_limit); + /* undo state adjustment to match mcclellan view */ + if (s_gpr == sheng_limit) { + s_gpr = 0; + } else if (s_gpr < sheng_limit) { + s_gpr++; + } + + *c_inout = c; + return s_gpr; +} + +static really_inline +const char *findShermanState(UNUSED const struct mcsheng *m, + const char *sherman_base_offset, u32 sherman_base, + u32 s) { + const char *rv + = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); + assert(rv < (const char *)m + m->length - sizeof(struct NFA)); + UNUSED u8 type = *(const u8 *)(rv + SHERMAN_TYPE_OFFSET); + assert(type == SHERMAN_STATE); + return rv; +} + +static really_inline +const u8 *run_mcsheng_accel(const struct mcsheng *m, + const struct mstate_aux *aux, u32 s, + const u8 **min_accel_offset, + const u8 *c, const u8 *c_end) { + DEBUG_PRINTF("skipping\n"); + u32 accel_offset = aux[s].accel_offset; + + assert(aux[s].accel_offset); + assert(accel_offset >= m->aux_offset); + assert(!m->sherman_offset || accel_offset < m->sherman_offset); + + const union AccelAux *aaux = (const void *)((const char *)m + accel_offset); + const u8 *c2 = run_accel(aaux, c, c_end); + + if (c2 < *min_accel_offset + BAD_ACCEL_DIST) { + *min_accel_offset = c2 + BIG_ACCEL_PENALTY; + } else { + *min_accel_offset = c2 + SMALL_ACCEL_PENALTY; + } + + if (*min_accel_offset >= c_end - ACCEL_MIN_LEN) { + *min_accel_offset = c_end; + } + + DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", + c2 - c, *min_accel_offset - c2, c_end - c2); + + return c2; +} + +static really_inline +u32 doNormal16(const struct mcsheng *m, const u8 **c_inout, const u8 *end, + u32 s, char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + + const u16 *succ_table + = (const u16 *)((const char *)m + sizeof(struct mcsheng)); + assert(ISALIGNED_N(succ_table, 2)); + u32 sheng_end = m->sheng_end; + u32 sherman_base = m->sherman_limit; + const char *sherman_base_offset + = (const char *)m - sizeof(struct NFA) + m->sherman_offset; + u32 as = m->alphaShift; + + /* Adjust start of succ table so we can index into using state id (rather + * than adjust to normal id). As we will not be processing states with low + * state ids, we will not be accessing data before the succ table. Note: due + * to the size of the sheng tables, the succ_table pointer will still be + * inside the engine.*/ + succ_table -= sheng_end << as; + + s &= STATE_MASK; + + while (c < end && s >= sheng_end) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c, + ourisprint(*c) ? *c : '?', cprime, s); + if (s < sherman_base) { + DEBUG_PRINTF("doing normal\n"); + assert(s < m->state_count); + s = succ_table[(s << as) + cprime]; + } else { + const char *sherman_state + = findShermanState(m, sherman_base_offset, sherman_base, s); + DEBUG_PRINTF("doing sherman (%u)\n", s); + s = doSherman16(sherman_state, cprime, succ_table, as); + } + + DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); + c++; + + if (do_accel && (s & ACCEL_FLAG)) { + break; + } + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + break; + } + + s &= STATE_MASK; + } + + *c_inout = c; + return s; +} + +static really_inline +char mcshengExec16_i(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **c_final, enum MatchMode mode) { + assert(ISALIGNED_N(state, 2)); + if (!len) { + if (mode == STOP_AT_MATCH) { + *c_final = buf; + } + return MO_ALIVE; + } + + u32 s = *state; + const u8 *c = buf; + const u8 *c_end = buf + len; + const u8 sheng_end = m->sheng_end; + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset + - sizeof(struct NFA)); + + s &= STATE_MASK; + + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + DEBUG_PRINTF("s: %u, len %zu\n", s, len); + + const u8 *min_accel_offset = c; + if (!m->has_accel || len < ACCEL_MIN_LEN) { + min_accel_offset = c_end; + goto without_accel; + } + + goto with_accel; + +without_accel: + do { + assert(c < min_accel_offset); + int do_accept; + if (!s) { + goto exit; + } else if (s < sheng_end) { + s = doSheng(m, &c, min_accel_offset, c_end, s, 0); + do_accept = mode != NO_MATCHES && get_aux(m, s)->accept; + } else { + s = doNormal16(m, &c, min_accel_offset, s, 0, mode); + + do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG); + } + + if (do_accept) { + if (mode == STOP_AT_MATCH) { + *state = s & STATE_MASK; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; /* termination requested */ + } + } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */ + } while (c < min_accel_offset); + + if (c == c_end) { + goto exit; + } + +with_accel: + do { + assert(c < c_end); + int do_accept; + + if (!s) { + goto exit; + } else if (s < sheng_end) { + if (s > m->sheng_accel_limit) { + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doSheng(m, &c, c_end, c_end, s, 1); + do_accept = mode != NO_MATCHES && get_aux(m, s)->accept; + } else { + if (s & ACCEL_FLAG) { + DEBUG_PRINTF("skipping\n"); + s &= STATE_MASK; + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + + s = doNormal16(m, &c, c_end, s, 1, mode); + do_accept = mode != NO_MATCHES && (s & ACCEPT_FLAG); + } + + if (do_accept) { + if (mode == STOP_AT_MATCH) { + *state = s & STATE_MASK; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; /* termination requested */ + } + } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); + } while (c < c_end); + +exit: + s &= STATE_MASK; + + if (mode == STOP_AT_MATCH) { + *c_final = c_end; + } + *state = s; + + return MO_ALIVE; +} + +static never_inline +char mcshengExec16_i_cb(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, CALLBACK_OUTPUT); +} + +static never_inline +char mcshengExec16_i_sam(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, STOP_AT_MATCH); +} + +static never_inline +char mcshengExec16_i_nm(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, NO_MATCHES); +} + +static really_inline +char mcshengExec16_i_ni(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point, + enum MatchMode mode) { + if (mode == CALLBACK_OUTPUT) { + return mcshengExec16_i_cb(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else if (mode == STOP_AT_MATCH) { + return mcshengExec16_i_sam(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else { + assert (mode == NO_MATCHES); + return mcshengExec16_i_nm(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } +} + +static really_inline +u32 doNormal8(const struct mcsheng *m, const u8 **c_inout, const u8 *end, u32 s, + char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + u32 sheng_end = m->sheng_end; + u32 accel_limit = m->accel_limit_8; + u32 accept_limit = m->accept_limit_8; + + const u32 as = m->alphaShift; + const u8 *succ_table = (const u8 *)((const char *)m + + sizeof(struct mcsheng)); + /* Adjust start of succ table so we can index into using state id (rather + * than adjust to normal id). As we will not be processing states with low + * state ids, we will not be accessing data before the succ table. Note: due + * to the size of the sheng tables, the succ_table pointer will still be + * inside the engine.*/ + succ_table -= sheng_end << as; + + assert(s >= sheng_end); + + while (c < end && s >= sheng_end) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c, + ourisprint(*c) ? *c : '?', cprime); + s = succ_table[(s << as) + cprime]; + + DEBUG_PRINTF("s: %u\n", s); + c++; + if (do_accel) { + if (s >= accel_limit) { + break; + } + } else { + if (mode != NO_MATCHES && s >= accept_limit) { + break; + } + } + } + *c_inout = c; + return s; +} + +static really_inline +char mcshengExec8_i(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **c_final, enum MatchMode mode) { + if (!len) { + *c_final = buf; + return MO_ALIVE; + } + u32 s = *state; + const u8 *c = buf; + const u8 *c_end = buf + len; + const u8 sheng_end = m->sheng_end; + + const struct mstate_aux *aux + = (const struct mstate_aux *)((const char *)m + m->aux_offset + - sizeof(struct NFA)); + u32 accept_limit = m->accept_limit_8; + + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit); + + DEBUG_PRINTF("s: %u, len %zu\n", s, len); + + const u8 *min_accel_offset = c; + if (!m->has_accel || len < ACCEL_MIN_LEN) { + min_accel_offset = c_end; + goto without_accel; + } + + goto with_accel; + +without_accel: + do { + assert(c < min_accel_offset); + if (!s) { + goto exit; + } else if (s < sheng_end) { + s = doSheng(m, &c, min_accel_offset, c_end, s, 0); + } else { + s = doNormal8(m, &c, min_accel_offset, s, 0, mode); + assert(c <= min_accel_offset); + } + + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; + } + } else if (doComplexReport(cb, ctxt, m, s, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); /* sheng is fuzzy for min_accel_offset */ + } while (c < min_accel_offset); + + if (c == c_end) { + goto exit; + } + +with_accel: + do { + u32 accel_limit = m->accel_limit_8; + + assert(c < c_end); + if (!s) { + goto exit; + } else if (s < sheng_end) { + if (s > m->sheng_accel_limit) { + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doSheng(m, &c, c_end, c_end, s, 1); + } else { + if (s >= accel_limit && aux[s].accel_offset) { + c = run_mcsheng_accel(m, aux, s, &min_accel_offset, c, c_end); + if (c == c_end) { + goto exit; + } else { + goto without_accel; + } + } + s = doNormal8(m, &c, c_end, s, 1, mode); + } + + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; + return MO_MATCHES_PENDING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { + return MO_DEAD; + } + } else if (doComplexReport(cb, ctxt, m, s, loc, 0, + &cached_accept_state, &cached_accept_id) + == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + assert(c <= c_end); + } while (c < c_end); + +exit: + *state = s; + if (mode == STOP_AT_MATCH) { + *c_final = c_end; + } + return MO_ALIVE; +} + +static never_inline +char mcshengExec8_i_cb(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, CALLBACK_OUTPUT); +} + +static never_inline +char mcshengExec8_i_sam(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, STOP_AT_MATCH); +} + +static never_inline +char mcshengExec8_i_nm(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcshengExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + final_point, NO_MATCHES); +} + +static really_inline +char mcshengExec8_i_ni(const struct mcsheng *m, u32 *state, const u8 *buf, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point, + enum MatchMode mode) { + if (mode == CALLBACK_OUTPUT) { + return mcshengExec8_i_cb(m, state, buf, len, offAdj, cb, ctxt, single, + final_point); + } else if (mode == STOP_AT_MATCH) { + return mcshengExec8_i_sam(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else { + assert(mode == NO_MATCHES); + return mcshengExec8_i_nm(m, state, buf, len, offAdj, cb, ctxt, single, + final_point); + } +} + +static really_inline +char mcshengCheckEOD(const struct NFA *nfa, u32 s, u64a offset, + NfaCallback cb, void *ctxt) { + const struct mcsheng *m = getImplNfa(nfa); + const struct mstate_aux *aux = get_aux(m, s); + + if (!aux->accept_eod) { + return MO_CONTINUE_MATCHING; + } + return doComplexReport(cb, ctxt, m, s, offset, 1, NULL, NULL); +} + +static really_inline +char nfaExecMcSheng16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, + const u8 *hend, NfaCallback cb, void *context, + struct mq *q, char single, s64a end, + enum MatchMode mode) { + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + s64a sp; + + assert(ISALIGNED_N(q->state, 2)); + u32 s = *(u16 *)q->state; + + if (q->report_current) { + assert(s); + assert(get_aux(m, s)->accept); + + int rv; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + rv = cb(0, q_cur_offset(q), m->arb_report, context); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, + &cached_accept_state, &cached_accept_id); + } + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + sp = q_cur_loc(q); + q->cur++; + + const u8 *cur_buf = sp < 0 ? hend : buffer; + + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u16 *)q->state = s; + return MO_ALIVE; + } + + while (1) { + assert(q->cur < q->end); + s64a ep = q->items[q->cur].location; + if (mode != NO_MATCHES) { + ep = MIN(ep, end); + } + + assert(ep >= sp); + + s64a local_ep = ep; + if (sp < 0) { + local_ep = MIN(0, ep); + } + + /* do main buffer region */ + const u8 *final_look; + char rv = mcshengExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_DEAD) { + *(u16 *)q->state = 0; + return MO_DEAD; + } + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { + DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + + assert(q->cur); + assert(final_look != cur_buf + local_ep); + + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = final_look - cur_buf + 1; /* due to + * early -1 */ + *(u16 *)q->state = s; + return MO_MATCHES_PENDING; + } + + assert(rv == MO_ALIVE); + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u16 *)q->state = s; + return MO_ALIVE; + } + + sp = local_ep; + + if (sp == 0) { + cur_buf = buffer; + } + + if (sp != ep) { + continue; + } + + switch (q->items[q->cur].type) { + case MQE_TOP: + assert(sp + offset || !s); + if (sp + offset == 0) { + s = m->start_anchored; + break; + } + s = mcshengEnableStarts(m, s); + break; + case MQE_END: + *(u16 *)q->state = s; + q->cur++; + return s ? MO_ALIVE : MO_DEAD; + default: + assert(!"invalid queue event"); + } + + q->cur++; + } +} + +static really_inline +char nfaExecMcSheng8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, + const u8 *hend, NfaCallback cb, void *context, + struct mq *q, char single, s64a end, + enum MatchMode mode) { + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + s64a sp; + + u32 s = *(u8 *)q->state; + + if (q->report_current) { + assert(s); + assert(s >= m->accept_limit_8); + + int rv; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + rv = cb(0, q_cur_offset(q), m->arb_report, context); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, + &cached_accept_state, &cached_accept_id); + } + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + } + + sp = q_cur_loc(q); + q->cur++; + + const u8 *cur_buf = sp < 0 ? hend : buffer; + + if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u8 *)q->state = s; + return MO_ALIVE; + } + + while (1) { + DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" : + q->items[q->cur].type == MQE_END ? "END" : "???", + q->items[q->cur].location + offset); + assert(q->cur < q->end); + s64a ep = q->items[q->cur].location; + if (mode != NO_MATCHES) { + ep = MIN(ep, end); + } + + assert(ep >= sp); + + s64a local_ep = ep; + if (sp < 0) { + local_ep = MIN(0, ep); + } + + const u8 *final_look; + char rv = mcshengExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, + offset + sp, cb, context, single, + &final_look, mode); + if (rv == MO_HALT_MATCHING) { + *(u8 *)q->state = 0; + return MO_DEAD; + } + if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { + DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); + + assert(q->cur); + assert(final_look != cur_buf + local_ep); + + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = final_look - cur_buf + 1; /* due to + * early -1 */ + *(u8 *)q->state = s; + return MO_MATCHES_PENDING; + } + + assert(rv == MO_ALIVE); + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + assert(q->cur); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u8 *)q->state = s; + return MO_ALIVE; + } + + sp = local_ep; + + if (sp == 0) { + cur_buf = buffer; + } + + if (sp != ep) { + continue; + } + + switch (q->items[q->cur].type) { + case MQE_TOP: + assert(sp + offset || !s); + if (sp + offset == 0) { + s = (u8)m->start_anchored; + break; + } + s = mcshengEnableStarts(m, s); + break; + case MQE_END: + *(u8 *)q->state = s; + q->cur++; + return s ? MO_ALIVE : MO_DEAD; + default: + assert(!"invalid queue event"); + } + + q->cur++; + } +} + +char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + CALLBACK_OUTPUT); +} + +char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + CALLBACK_OUTPUT); +} + +char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q) { + const struct mcsheng *m = getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; + u32 s = *(u8 *)q->state; + u8 single = m->flags & MCSHENG_FLAG_SINGLE; + u64a offset = q_cur_offset(q); + assert(q_cur_type(q) == MQE_START); + assert(s); + + if (s >= m->accept_limit_8) { + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + cb(0, offset, m->arb_report, ctxt); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, + &cached_accept_id); + } + } + + return 0; +} + +char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q) { + const struct mcsheng *m = getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; + u32 s = *(u16 *)q->state; + const struct mstate_aux *aux = get_aux(m, s); + u8 single = m->flags & MCSHENG_FLAG_SINGLE; + u64a offset = q_cur_offset(q); + assert(q_cur_type(q) == MQE_START); + DEBUG_PRINTF("state %u\n", s); + assert(s); + + if (aux->accept) { + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); + cb(0, offset, m->arb_report, ctxt); + } else { + u32 cached_accept_id = 0; + u32 cached_accept_state = 0; + + doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, + &cached_accept_id); + } + } + + return 0; +} + +static +char mcshengHasAccept(const struct mcsheng *m, const struct mstate_aux *aux, ReportID report) { - assert(m && aux); - - if (!aux->accept) { - return 0; - } - - const struct report_list *rl = (const struct report_list *) - ((const char *)m + aux->accept - sizeof(struct NFA)); - assert(ISALIGNED_N(rl, 4)); - - DEBUG_PRINTF("report list has %u entries\n", rl->count); - - for (u32 i = 0; i < rl->count; i++) { - if (rl->report[i] == report) { - return 1; - } - } - - return 0; -} - -char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { - assert(n && q); - - const struct mcsheng *m = getImplNfa(n); - u8 s = *(u8 *)q->state; - DEBUG_PRINTF("checking accepts for %hhu\n", s); - - return mcshengHasAccept(m, get_aux(m, s), report); -} - -char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q) { - assert(n && q); - - const struct mcsheng *m = getImplNfa(n); - u8 s = *(u8 *)q->state; - DEBUG_PRINTF("checking accepts for %hhu\n", s); - - return !!get_aux(m, s)->accept; -} - -char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { - assert(n && q); - - const struct mcsheng *m = getImplNfa(n); - u16 s = *(u16 *)q->state; - DEBUG_PRINTF("checking accepts for %hu\n", s); - - return mcshengHasAccept(m, get_aux(m, s), report); -} - -char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q) { - assert(n && q); - - const struct mcsheng *m = getImplNfa(n); - u16 s = *(u16 *)q->state; - DEBUG_PRINTF("checking accepts for %hu\n", s); - - return !!get_aux(m, s)->accept; -} - -char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCSHENG_NFA_8); - const struct mcsheng *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - return nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCSHENG_FLAG_SINGLE, end, - STOP_AT_MATCH); -} - -char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCSHENG_NFA_16); - const struct mcsheng *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - return nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCSHENG_FLAG_SINGLE, end, - STOP_AT_MATCH); -} - -char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCSHENG_NFA_8); - const struct mcsheng *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - char rv = nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCSHENG_FLAG_SINGLE, 0 /* end */, - NO_MATCHES); - if (rv && nfaExecMcSheng8_inAccept(n, report, q)) { - return MO_MATCHES_PENDING; - } else { - return rv; - } -} - -char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCSHENG_NFA_16); - const struct mcsheng *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - char rv = nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCSHENG_FLAG_SINGLE, 0 /* end */, - NO_MATCHES); - - if (rv && nfaExecMcSheng16_inAccept(n, report, q)) { - return MO_MATCHES_PENDING; - } else { - return rv; - } -} - -char nfaExecMcSheng8_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, UNUSED u8 key) { - const struct mcsheng *m = getImplNfa(nfa); - u8 s = offset ? m->start_floating : m->start_anchored; - if (s) { - *(u8 *)state = s; - return 1; - } - return 0; -} - -char nfaExecMcSheng16_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, UNUSED u8 key) { - const struct mcsheng *m = getImplNfa(nfa); - u16 s = offset ? m->start_floating : m->start_anchored; - if (s) { - unaligned_store_u16(state, s); - return 1; - } - return 0; -} - -char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, u64a offset, - NfaCallback callback, void *context) { - return mcshengCheckEOD(nfa, *(const u8 *)state, offset, callback, - context); -} - -char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, u64a offset, - NfaCallback callback, void *context) { - assert(ISALIGNED_N(state, 2)); - return mcshengCheckEOD(nfa, *(const u16 *)state, offset, callback, - context); -} - -char nfaExecMcSheng8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { - assert(nfa->scratchStateSize == 1); - *(u8 *)q->state = 0; - return 0; -} - -char nfaExecMcSheng16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { - assert(nfa->scratchStateSize == 2); - assert(ISALIGNED_N(q->state, 2)); - *(u16 *)q->state = 0; - return 0; -} - -char nfaExecMcSheng8_queueCompressState(UNUSED const struct NFA *nfa, - const struct mq *q, UNUSED s64a loc) { - void *dest = q->streamState; - const void *src = q->state; - assert(nfa->scratchStateSize == 1); - assert(nfa->streamStateSize == 1); - *(u8 *)dest = *(const u8 *)src; - return 0; -} - -char nfaExecMcSheng8_expandState(UNUSED const struct NFA *nfa, void *dest, - const void *src, UNUSED u64a offset, - UNUSED u8 key) { - assert(nfa->scratchStateSize == 1); - assert(nfa->streamStateSize == 1); - *(u8 *)dest = *(const u8 *)src; - return 0; -} - -char nfaExecMcSheng16_queueCompressState(UNUSED const struct NFA *nfa, - const struct mq *q, - UNUSED s64a loc) { - void *dest = q->streamState; - const void *src = q->state; - assert(nfa->scratchStateSize == 2); - assert(nfa->streamStateSize == 2); - assert(ISALIGNED_N(src, 2)); - unaligned_store_u16(dest, *(const u16 *)(src)); - return 0; -} - -char nfaExecMcSheng16_expandState(UNUSED const struct NFA *nfa, void *dest, - const void *src, UNUSED u64a offset, - UNUSED u8 key) { - assert(nfa->scratchStateSize == 2); - assert(nfa->streamStateSize == 2); - assert(ISALIGNED_N(dest, 2)); - *(u16 *)dest = unaligned_load_u16(src); - return 0; -} + assert(m && aux); + + if (!aux->accept) { + return 0; + } + + const struct report_list *rl = (const struct report_list *) + ((const char *)m + aux->accept - sizeof(struct NFA)); + assert(ISALIGNED_N(rl, 4)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + + for (u32 i = 0; i < rl->count; i++) { + if (rl->report[i] == report) { + return 1; + } + } + + return 0; +} + +char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u8 s = *(u8 *)q->state; + DEBUG_PRINTF("checking accepts for %hhu\n", s); + + return mcshengHasAccept(m, get_aux(m, s), report); +} + +char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u8 s = *(u8 *)q->state; + DEBUG_PRINTF("checking accepts for %hhu\n", s); + + return !!get_aux(m, s)->accept; +} + +char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u16 s = *(u16 *)q->state; + DEBUG_PRINTF("checking accepts for %hu\n", s); + + return mcshengHasAccept(m, get_aux(m, s), report); +} + +char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct mcsheng *m = getImplNfa(n); + u16 s = *(u16 *)q->state; + DEBUG_PRINTF("checking accepts for %hu\n", s); + + return !!get_aux(m, s)->accept; +} + +char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + STOP_AT_MATCH); +} + +char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, end, + STOP_AT_MATCH); +} + +char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_8); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + char rv = nfaExecMcSheng8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, 0 /* end */, + NO_MATCHES); + if (rv && nfaExecMcSheng8_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } else { + return rv; + } +} + +char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCSHENG_NFA_16); + const struct mcsheng *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + char rv = nfaExecMcSheng16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCSHENG_FLAG_SINGLE, 0 /* end */, + NO_MATCHES); + + if (rv && nfaExecMcSheng16_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } else { + return rv; + } +} + +char nfaExecMcSheng8_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct mcsheng *m = getImplNfa(nfa); + u8 s = offset ? m->start_floating : m->start_anchored; + if (s) { + *(u8 *)state = s; + return 1; + } + return 0; +} + +char nfaExecMcSheng16_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct mcsheng *m = getImplNfa(nfa); + u16 s = offset ? m->start_floating : m->start_anchored; + if (s) { + unaligned_store_u16(state, s); + return 1; + } + return 0; +} + +char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { + return mcshengCheckEOD(nfa, *(const u8 *)state, offset, callback, + context); +} + +char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback callback, void *context) { + assert(ISALIGNED_N(state, 2)); + return mcshengCheckEOD(nfa, *(const u16 *)state, offset, callback, + context); +} + +char nfaExecMcSheng8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { + assert(nfa->scratchStateSize == 1); + *(u8 *)q->state = 0; + return 0; +} + +char nfaExecMcSheng16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { + assert(nfa->scratchStateSize == 2); + assert(ISALIGNED_N(q->state, 2)); + *(u16 *)q->state = 0; + return 0; +} + +char nfaExecMcSheng8_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecMcSheng8_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecMcSheng16_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, + UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 2); + assert(nfa->streamStateSize == 2); + assert(ISALIGNED_N(src, 2)); + unaligned_store_u16(dest, *(const u16 *)(src)); + return 0; +} + +char nfaExecMcSheng16_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 2); + assert(nfa->streamStateSize == 2); + assert(ISALIGNED_N(dest, 2)); + *(u16 *)dest = unaligned_load_u16(src); + return 0; +} #if defined(HAVE_AVX512VBMI) static really_inline diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng.h b/contrib/libs/hyperscan/src/nfa/mcsheng.h index 0329e12128..11ab588d0c 100644 --- a/contrib/libs/hyperscan/src/nfa/mcsheng.h +++ b/contrib/libs/hyperscan/src/nfa/mcsheng.h @@ -1,85 +1,85 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MCSHENG_H -#define MCSHENG_H - -#include "callback.h" -#include "ue2common.h" - -struct mq; -struct NFA; - -/* 8-bit Sheng-McClellan hybrid */ - -char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context); -char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report, - struct mq *q); -char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecMcSheng8_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecMcSheng8_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecMcSheng8_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc); -char nfaExecMcSheng8_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); - -#define nfaExecMcSheng8_B_Reverse NFA_API_NO_IMPL -#define nfaExecMcSheng8_zombie_status NFA_API_ZOMBIE_NO_IMPL - -/* 16-bit Sheng-McClellan hybrid */ - -char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context); -char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report, - struct mq *q); -char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecMcSheng16_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecMcSheng16_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecMcSheng16_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc); -char nfaExecMcSheng16_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); - -#define nfaExecMcSheng16_B_Reverse NFA_API_NO_IMPL -#define nfaExecMcSheng16_zombie_status NFA_API_ZOMBIE_NO_IMPL + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCSHENG_H +#define MCSHENG_H + +#include "callback.h" +#include "ue2common.h" + +struct mq; +struct NFA; + +/* 8-bit Sheng-McClellan hybrid */ + +char nfaExecMcSheng8_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecMcSheng8_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng8_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng8_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecMcSheng8_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecMcSheng8_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecMcSheng8_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecMcSheng8_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecMcSheng8_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecMcSheng8_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecMcSheng8_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecMcSheng8_B_Reverse NFA_API_NO_IMPL +#define nfaExecMcSheng8_zombie_status NFA_API_ZOMBIE_NO_IMPL + +/* 16-bit Sheng-McClellan hybrid */ + +char nfaExecMcSheng16_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecMcSheng16_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng16_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcSheng16_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecMcSheng16_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecMcSheng16_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecMcSheng16_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecMcSheng16_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecMcSheng16_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecMcSheng16_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecMcSheng16_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecMcSheng16_B_Reverse NFA_API_NO_IMPL +#define nfaExecMcSheng16_zombie_status NFA_API_ZOMBIE_NO_IMPL #if defined(HAVE_AVX512VBMI) /* 64-8 bit Sheng-McClellan hybrid */ char nfaExecMcSheng64_8_testEOD(const struct NFA *nfa, const char *state, @@ -99,7 +99,7 @@ char nfaExecMcSheng64_8_queueCompressState(const struct NFA *nfa, const struct mq *q, s64a loc); char nfaExecMcSheng64_8_expandState(const struct NFA *nfa, void *dest, const void *src, u64a offset, u8 key); - + #define nfaExecMcSheng64_8_B_Reverse NFA_API_NO_IMPL #define nfaExecMcSheng64_8_zombie_status NFA_API_ZOMBIE_NO_IMPL @@ -154,4 +154,4 @@ char nfaExecMcSheng64_16_expandState(const struct NFA *nfa, void *dest, #endif //end of HAVE_AVX512VBMI -#endif +#endif diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng_compile.cpp b/contrib/libs/hyperscan/src/nfa/mcsheng_compile.cpp index fb75e49a35..4cb40c6435 100644 --- a/contrib/libs/hyperscan/src/nfa/mcsheng_compile.cpp +++ b/contrib/libs/hyperscan/src/nfa/mcsheng_compile.cpp @@ -1,249 +1,249 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "mcsheng_compile.h" - -#include "accel.h" -#include "accelcompile.h" -#include "grey.h" -#include "mcclellancompile.h" -#include "mcclellancompile_util.h" -#include "mcsheng_internal.h" -#include "nfa_internal.h" -#include "rdfa_graph.h" -#include "shufticompile.h" -#include "trufflecompile.h" -#include "ue2common.h" -#include "util/alloc.h" -#include "util/bitutils.h" -#include "util/charreach.h" -#include "util/compare.h" -#include "util/compile_context.h" -#include "util/container.h" -#include "util/flat_containers.h" -#include "util/graph.h" -#include "util/graph_range.h" -#include "util/make_unique.h" -#include "util/order_check.h" -#include "util/report_manager.h" -#include "util/unaligned.h" -#include "util/unordered.h" -#include "util/verify_types.h" - -#include <algorithm> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include <map> -#include <memory> -#include <set> -#include <deque> -#include <vector> -#include <boost/range/adaptor/map.hpp> - -using namespace std; -using boost::adaptors::map_keys; - -namespace ue2 { - -namespace /* anon */ { - -#define MIN_SHENG_SIZE 6 -#define INVALID_SHENG_ID 255 - -struct dstate_extra { - u16 daddytaken = 0; - bool shermanState = false; - bool sheng_succ = false; - u8 sheng_id = INVALID_SHENG_ID; -}; - -struct dfa_info { - accel_dfa_build_strat &strat; - raw_dfa &raw; - vector<dstate> &states; - vector<dstate_extra> extra; - const u16 alpha_size; /* including special symbols */ - const array<u16, ALPHABET_SIZE> &alpha_remap; - vector<CharReach> rev_alpha; - const u16 impl_alpha_size; - - u8 getAlphaShift() const; - - explicit dfa_info(accel_dfa_build_strat &s) - : strat(s), - raw(s.get_raw()), - states(raw.states), - extra(raw.states.size()), - alpha_size(raw.alpha_size), - alpha_remap(raw.alpha_remap), - impl_alpha_size(raw.getImplAlphaSize()) { - rev_alpha.resize(impl_alpha_size); - for (u32 i = 0; i < N_CHARS; i++) { - rev_alpha[alpha_remap[i]].set(i); - } - } - - dstate_id_t implId(dstate_id_t raw_id) const { - return states[raw_id].impl_id; - } - - bool is_sherman(dstate_id_t raw_id) const { - return extra[raw_id].shermanState; - } - - bool is_sheng(dstate_id_t raw_id) const { - return extra[raw_id].sheng_id != INVALID_SHENG_ID; - } - - bool is_sheng_succ(dstate_id_t raw_id) const { - return extra[raw_id].sheng_succ; - } - - /* states which use the normal transition/successor table */ - bool is_normal(dstate_id_t raw_id) const { - return raw_id != DEAD_STATE && !is_sheng(raw_id) && !is_sherman(raw_id); - } - size_t size(void) const { return states.size(); } -}; - -u8 dfa_info::getAlphaShift() const { - if (impl_alpha_size < 2) { - return 1; - } else { - /* log2 round up */ - return 32 - clz32(impl_alpha_size - 1); - } -} - -} // namespace - -static -mstate_aux *getAux(NFA *n, dstate_id_t i) { - mcsheng *m = (mcsheng *)getMutableImplNfa(n); - mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); - - mstate_aux *aux = aux_base + i; - assert((const char *)aux < (const char *)n + m->length); - return aux; -} - -static -void createShuffleMasks(mcsheng *m, const dfa_info &info, - dstate_id_t sheng_end, - const map<dstate_id_t, AccelScheme> &accel_escape_info) { - DEBUG_PRINTF("using first %hu states for a sheng\n", sheng_end); - assert(sheng_end > DEAD_STATE + 1); - assert(sheng_end <= sizeof(m128) + 1); - vector<array<u8, sizeof(m128)>> masks; - masks.resize(info.alpha_size); - /* -1 to avoid wasting a slot as we do not include dead state */ - vector<dstate_id_t> raw_ids; - raw_ids.resize(sheng_end - 1); - for (dstate_id_t s = DEAD_STATE + 1; s < info.states.size(); s++) { - assert(info.implId(s)); /* should not map to DEAD_STATE */ - if (info.is_sheng(s)) { - raw_ids[info.extra[s].sheng_id] = s; - } - } - for (u32 i = 0; i < info.alpha_size; i++) { - if (i == info.alpha_remap[TOP]) { - continue; - } - auto &mask = masks[i]; - assert(sizeof(mask) == sizeof(m128)); - mask.fill(0); - - for (dstate_id_t sheng_id = 0; sheng_id < sheng_end - 1; sheng_id++) { - dstate_id_t raw_id = raw_ids[sheng_id]; - dstate_id_t next_id = info.implId(info.states[raw_id].next[i]); - if (next_id == DEAD_STATE) { - next_id = sheng_end - 1; - } else if (next_id < sheng_end) { - next_id--; - } - DEBUG_PRINTF("%hu: %u->next %hu\n", sheng_id, i, next_id); - mask[sheng_id] = verify_u8(next_id); - } - } - for (u32 i = 0; i < N_CHARS; i++) { - assert(info.alpha_remap[i] != info.alpha_remap[TOP]); - memcpy((u8 *)&m->sheng_masks[i], - (u8 *)masks[info.alpha_remap[i]].data(), sizeof(m128)); - } - m->sheng_end = sheng_end; - m->sheng_accel_limit = sheng_end - 1; - - for (dstate_id_t s : raw_ids) { - if (contains(accel_escape_info, s)) { - LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id); - } - } -} - -static -void populateBasicInfo(size_t state_size, const dfa_info &info, - u32 total_size, u32 aux_offset, u32 accel_offset, - u32 accel_count, ReportID arb, bool single, NFA *nfa) { - assert(state_size == sizeof(u16) || state_size == sizeof(u8)); - - nfa->length = total_size; - nfa->nPositions = info.states.size(); - - nfa->scratchStateSize = verify_u32(state_size); - nfa->streamStateSize = verify_u32(state_size); - - if (state_size == sizeof(u8)) { - nfa->type = MCSHENG_NFA_8; - } else { - nfa->type = MCSHENG_NFA_16; - } - - mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); - for (u32 i = 0; i < 256; i++) { - m->remap[i] = verify_u8(info.alpha_remap[i]); - } - m->alphaShift = info.getAlphaShift(); - m->length = total_size; - m->aux_offset = aux_offset; - m->accel_offset = accel_offset; - m->arb_report = arb; - m->state_count = verify_u16(info.size()); - m->start_anchored = info.implId(info.raw.start_anchored); - m->start_floating = info.implId(info.raw.start_floating); - m->has_accel = accel_count ? 1 : 0; - - if (single) { - m->flags |= MCSHENG_FLAG_SINGLE; - } -} - -static + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcsheng_compile.h" + +#include "accel.h" +#include "accelcompile.h" +#include "grey.h" +#include "mcclellancompile.h" +#include "mcclellancompile_util.h" +#include "mcsheng_internal.h" +#include "nfa_internal.h" +#include "rdfa_graph.h" +#include "shufticompile.h" +#include "trufflecompile.h" +#include "ue2common.h" +#include "util/alloc.h" +#include "util/bitutils.h" +#include "util/charreach.h" +#include "util/compare.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/flat_containers.h" +#include "util/graph.h" +#include "util/graph_range.h" +#include "util/make_unique.h" +#include "util/order_check.h" +#include "util/report_manager.h" +#include "util/unaligned.h" +#include "util/unordered.h" +#include "util/verify_types.h" + +#include <algorithm> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <map> +#include <memory> +#include <set> +#include <deque> +#include <vector> +#include <boost/range/adaptor/map.hpp> + +using namespace std; +using boost::adaptors::map_keys; + +namespace ue2 { + +namespace /* anon */ { + +#define MIN_SHENG_SIZE 6 +#define INVALID_SHENG_ID 255 + +struct dstate_extra { + u16 daddytaken = 0; + bool shermanState = false; + bool sheng_succ = false; + u8 sheng_id = INVALID_SHENG_ID; +}; + +struct dfa_info { + accel_dfa_build_strat &strat; + raw_dfa &raw; + vector<dstate> &states; + vector<dstate_extra> extra; + const u16 alpha_size; /* including special symbols */ + const array<u16, ALPHABET_SIZE> &alpha_remap; + vector<CharReach> rev_alpha; + const u16 impl_alpha_size; + + u8 getAlphaShift() const; + + explicit dfa_info(accel_dfa_build_strat &s) + : strat(s), + raw(s.get_raw()), + states(raw.states), + extra(raw.states.size()), + alpha_size(raw.alpha_size), + alpha_remap(raw.alpha_remap), + impl_alpha_size(raw.getImplAlphaSize()) { + rev_alpha.resize(impl_alpha_size); + for (u32 i = 0; i < N_CHARS; i++) { + rev_alpha[alpha_remap[i]].set(i); + } + } + + dstate_id_t implId(dstate_id_t raw_id) const { + return states[raw_id].impl_id; + } + + bool is_sherman(dstate_id_t raw_id) const { + return extra[raw_id].shermanState; + } + + bool is_sheng(dstate_id_t raw_id) const { + return extra[raw_id].sheng_id != INVALID_SHENG_ID; + } + + bool is_sheng_succ(dstate_id_t raw_id) const { + return extra[raw_id].sheng_succ; + } + + /* states which use the normal transition/successor table */ + bool is_normal(dstate_id_t raw_id) const { + return raw_id != DEAD_STATE && !is_sheng(raw_id) && !is_sherman(raw_id); + } + size_t size(void) const { return states.size(); } +}; + +u8 dfa_info::getAlphaShift() const { + if (impl_alpha_size < 2) { + return 1; + } else { + /* log2 round up */ + return 32 - clz32(impl_alpha_size - 1); + } +} + +} // namespace + +static +mstate_aux *getAux(NFA *n, dstate_id_t i) { + mcsheng *m = (mcsheng *)getMutableImplNfa(n); + mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); + + mstate_aux *aux = aux_base + i; + assert((const char *)aux < (const char *)n + m->length); + return aux; +} + +static +void createShuffleMasks(mcsheng *m, const dfa_info &info, + dstate_id_t sheng_end, + const map<dstate_id_t, AccelScheme> &accel_escape_info) { + DEBUG_PRINTF("using first %hu states for a sheng\n", sheng_end); + assert(sheng_end > DEAD_STATE + 1); + assert(sheng_end <= sizeof(m128) + 1); + vector<array<u8, sizeof(m128)>> masks; + masks.resize(info.alpha_size); + /* -1 to avoid wasting a slot as we do not include dead state */ + vector<dstate_id_t> raw_ids; + raw_ids.resize(sheng_end - 1); + for (dstate_id_t s = DEAD_STATE + 1; s < info.states.size(); s++) { + assert(info.implId(s)); /* should not map to DEAD_STATE */ + if (info.is_sheng(s)) { + raw_ids[info.extra[s].sheng_id] = s; + } + } + for (u32 i = 0; i < info.alpha_size; i++) { + if (i == info.alpha_remap[TOP]) { + continue; + } + auto &mask = masks[i]; + assert(sizeof(mask) == sizeof(m128)); + mask.fill(0); + + for (dstate_id_t sheng_id = 0; sheng_id < sheng_end - 1; sheng_id++) { + dstate_id_t raw_id = raw_ids[sheng_id]; + dstate_id_t next_id = info.implId(info.states[raw_id].next[i]); + if (next_id == DEAD_STATE) { + next_id = sheng_end - 1; + } else if (next_id < sheng_end) { + next_id--; + } + DEBUG_PRINTF("%hu: %u->next %hu\n", sheng_id, i, next_id); + mask[sheng_id] = verify_u8(next_id); + } + } + for (u32 i = 0; i < N_CHARS; i++) { + assert(info.alpha_remap[i] != info.alpha_remap[TOP]); + memcpy((u8 *)&m->sheng_masks[i], + (u8 *)masks[info.alpha_remap[i]].data(), sizeof(m128)); + } + m->sheng_end = sheng_end; + m->sheng_accel_limit = sheng_end - 1; + + for (dstate_id_t s : raw_ids) { + if (contains(accel_escape_info, s)) { + LIMIT_TO_AT_MOST(&m->sheng_accel_limit, info.extra[s].sheng_id); + } + } +} + +static +void populateBasicInfo(size_t state_size, const dfa_info &info, + u32 total_size, u32 aux_offset, u32 accel_offset, + u32 accel_count, ReportID arb, bool single, NFA *nfa) { + assert(state_size == sizeof(u16) || state_size == sizeof(u8)); + + nfa->length = total_size; + nfa->nPositions = info.states.size(); + + nfa->scratchStateSize = verify_u32(state_size); + nfa->streamStateSize = verify_u32(state_size); + + if (state_size == sizeof(u8)) { + nfa->type = MCSHENG_NFA_8; + } else { + nfa->type = MCSHENG_NFA_16; + } + + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); + for (u32 i = 0; i < 256; i++) { + m->remap[i] = verify_u8(info.alpha_remap[i]); + } + m->alphaShift = info.getAlphaShift(); + m->length = total_size; + m->aux_offset = aux_offset; + m->accel_offset = accel_offset; + m->arb_report = arb; + m->state_count = verify_u16(info.size()); + m->start_anchored = info.implId(info.raw.start_anchored); + m->start_floating = info.implId(info.raw.start_floating); + m->has_accel = accel_count ? 1 : 0; + + if (single) { + m->flags |= MCSHENG_FLAG_SINGLE; + } +} + +static mstate_aux *getAux64(NFA *n, dstate_id_t i) { mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(n); mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); @@ -344,379 +344,379 @@ void populateBasicInfo64(size_t state_size, const dfa_info &info, } static -size_t calcShermanRegionSize(const dfa_info &info) { - size_t rv = 0; - - for (size_t i = 0; i < info.size(); i++) { - if (info.is_sherman(i)) { - rv += SHERMAN_FIXED_SIZE; - } - } - - return ROUNDUP_16(rv); -} - -static -void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, - const vector<u32> &reports, const vector<u32> &reports_eod, - const vector<u32> &reportOffsets) { - const dstate &raw_state = info.states[i]; - aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]]; - aux->accept_eod = raw_state.reports_eod.empty() ? 0 - : reportOffsets[reports_eod[i]]; - aux->top = info.implId(i ? raw_state.next[info.alpha_remap[TOP]] - : info.raw.start_floating); -} - -/* returns false on error */ -static -bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end, +size_t calcShermanRegionSize(const dfa_info &info) { + size_t rv = 0; + + for (size_t i = 0; i < info.size(); i++) { + if (info.is_sherman(i)) { + rv += SHERMAN_FIXED_SIZE; + } + } + + return ROUNDUP_16(rv); +} + +static +void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, + const vector<u32> &reports, const vector<u32> &reports_eod, + const vector<u32> &reportOffsets) { + const dstate &raw_state = info.states[i]; + aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]]; + aux->accept_eod = raw_state.reports_eod.empty() ? 0 + : reportOffsets[reports_eod[i]]; + aux->top = info.implId(i ? raw_state.next[info.alpha_remap[TOP]] + : info.raw.start_floating); +} + +/* returns false on error */ +static +bool allocateImplId16(dfa_info &info, dstate_id_t sheng_end, dstate_id_t *sherman_base) { - info.states[0].impl_id = 0; /* dead is always 0 */ - - vector<dstate_id_t> norm; - vector<dstate_id_t> sherm; - vector<dstate_id_t> norm_sheng_succ; - vector<dstate_id_t> sherm_sheng_succ; - - if (info.size() > (1 << 16)) { - DEBUG_PRINTF("too many states\n"); - *sherman_base = 0; - return false; - } - - for (u32 i = 1; i < info.size(); i++) { - if (info.is_sheng(i)) { - continue; /* sheng impl ids have already been allocated */ - } if (info.is_sherman(i)) { - if (info.is_sheng_succ(i)) { - sherm_sheng_succ.push_back(i); - } else { - sherm.push_back(i); - } - } else { - if (info.is_sheng_succ(i)) { - norm_sheng_succ.push_back(i); - } else { - norm.push_back(i); - } - } - } - - dstate_id_t next_norm = sheng_end; - for (dstate_id_t s : norm_sheng_succ) { - info.states[s].impl_id = next_norm++; - } - if (next_norm + norm.size() + sherm_sheng_succ.size() > UINT8_MAX) { - /* we need to give sheng_succs ids which fit into a u8 -- demote these - * to normal states */ - for (dstate_id_t s : sherm_sheng_succ) { - info.states[s].impl_id = next_norm++; - info.extra[s].shermanState = false; - } - sherm_sheng_succ.clear(); - } - for (dstate_id_t s : norm) { - info.states[s].impl_id = next_norm++; - } - - *sherman_base = next_norm; - dstate_id_t next_sherman = next_norm; - - for (dstate_id_t s : sherm_sheng_succ) { - info.states[s].impl_id = next_sherman++; - } - - for (dstate_id_t s : sherm) { - info.states[s].impl_id = next_sherman++; - } - - /* Check to see if we haven't over allocated our states */ - DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman, - (dstate_id_t)(next_sherman & STATE_MASK)); - return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK); -} - -typedef RdfaGraph::vertex_descriptor RdfaVertex; - -static -bool mark_sheng_succs(const RdfaGraph &g, dfa_info &info, - const flat_set<RdfaVertex> &sheng_states) { - u32 exit_count = 0; - - for (auto v : sheng_states) { - dstate_id_t s = g[v].index; - for (u32 i = 0; i != info.alpha_size; i++) { - if (i == info.alpha_remap[TOP]) { - continue; - } - dstate_id_t next = info.states[s].next[i]; - if (!next || info.is_sheng(next) || info.is_sheng_succ(next)) { - continue; - } - exit_count++; - info.extra[next].sheng_succ = true; - } - } - - if (exit_count + sheng_states.size() < UINT8_MAX) { - return true; - } else { - DEBUG_PRINTF("fail: unable to fit %u exits in byte", exit_count); - return false; - } -} - -static -CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) { - CharReach rv; - for (u32 i = 0; i < info.impl_alpha_size; i++) { - if (info.raw.states[u].next[i] == v) { - assert(info.rev_alpha[i].any()); - rv |= info.rev_alpha[i]; - } - } - assert(rv.any()); - return rv; -} - -#define MAX_SHENG_STATES 16 + info.states[0].impl_id = 0; /* dead is always 0 */ + + vector<dstate_id_t> norm; + vector<dstate_id_t> sherm; + vector<dstate_id_t> norm_sheng_succ; + vector<dstate_id_t> sherm_sheng_succ; + + if (info.size() > (1 << 16)) { + DEBUG_PRINTF("too many states\n"); + *sherman_base = 0; + return false; + } + + for (u32 i = 1; i < info.size(); i++) { + if (info.is_sheng(i)) { + continue; /* sheng impl ids have already been allocated */ + } if (info.is_sherman(i)) { + if (info.is_sheng_succ(i)) { + sherm_sheng_succ.push_back(i); + } else { + sherm.push_back(i); + } + } else { + if (info.is_sheng_succ(i)) { + norm_sheng_succ.push_back(i); + } else { + norm.push_back(i); + } + } + } + + dstate_id_t next_norm = sheng_end; + for (dstate_id_t s : norm_sheng_succ) { + info.states[s].impl_id = next_norm++; + } + if (next_norm + norm.size() + sherm_sheng_succ.size() > UINT8_MAX) { + /* we need to give sheng_succs ids which fit into a u8 -- demote these + * to normal states */ + for (dstate_id_t s : sherm_sheng_succ) { + info.states[s].impl_id = next_norm++; + info.extra[s].shermanState = false; + } + sherm_sheng_succ.clear(); + } + for (dstate_id_t s : norm) { + info.states[s].impl_id = next_norm++; + } + + *sherman_base = next_norm; + dstate_id_t next_sherman = next_norm; + + for (dstate_id_t s : sherm_sheng_succ) { + info.states[s].impl_id = next_sherman++; + } + + for (dstate_id_t s : sherm) { + info.states[s].impl_id = next_sherman++; + } + + /* Check to see if we haven't over allocated our states */ + DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman, + (dstate_id_t)(next_sherman & STATE_MASK)); + return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK); +} + +typedef RdfaGraph::vertex_descriptor RdfaVertex; + +static +bool mark_sheng_succs(const RdfaGraph &g, dfa_info &info, + const flat_set<RdfaVertex> &sheng_states) { + u32 exit_count = 0; + + for (auto v : sheng_states) { + dstate_id_t s = g[v].index; + for (u32 i = 0; i != info.alpha_size; i++) { + if (i == info.alpha_remap[TOP]) { + continue; + } + dstate_id_t next = info.states[s].next[i]; + if (!next || info.is_sheng(next) || info.is_sheng_succ(next)) { + continue; + } + exit_count++; + info.extra[next].sheng_succ = true; + } + } + + if (exit_count + sheng_states.size() < UINT8_MAX) { + return true; + } else { + DEBUG_PRINTF("fail: unable to fit %u exits in byte", exit_count); + return false; + } +} + +static +CharReach get_edge_reach(dstate_id_t u, dstate_id_t v, const dfa_info &info) { + CharReach rv; + for (u32 i = 0; i < info.impl_alpha_size; i++) { + if (info.raw.states[u].next[i] == v) { + assert(info.rev_alpha[i].any()); + rv |= info.rev_alpha[i]; + } + } + assert(rv.any()); + return rv; +} + +#define MAX_SHENG_STATES 16 #define MAX_SHENG64_STATES 64 -#define MAX_SHENG_LEAKINESS 0.05 - -using LeakinessCache = ue2_unordered_map<pair<RdfaVertex, u32>, double>; - -/** - * Returns the proportion of strings of length 'depth' which will leave the - * sheng region when starting at state 'u'. - */ -static -double leakiness(const RdfaGraph &g, dfa_info &info, - const flat_set<RdfaVertex> &sheng_states, RdfaVertex u, - u32 depth, LeakinessCache &cache) { - double rv = 0; - if (contains(cache, make_pair(u, depth))) { - return cache[make_pair(u, depth)]; - } - for (RdfaVertex v : adjacent_vertices_range(u, g)) { - if (g[v].index == DEAD_STATE) { - continue; - } - double width = get_edge_reach(g[u].index, g[v].index, info).count(); - width /= N_CHARS; - - double weight; - if (!contains(sheng_states, v)) { - weight = 1; - } else if (depth > 1) { - weight = leakiness(g, info, sheng_states, v, depth - 1, cache); - } else { - continue; /* weight = 0 */ - } - rv += width * weight; - } - - cache[make_pair(u, depth)] = rv; - DEBUG_PRINTF("%zu [%u] q = %g\n", g[u].index, depth, rv); - return rv; -} - -/** - * Returns the proportion of 8 byte strings which will leave the sheng region - * when starting at state 'u'. - */ -static -double leakiness(const RdfaGraph &g, dfa_info &info, - const flat_set<RdfaVertex> &sheng_states, RdfaVertex u) { - LeakinessCache cache; - double rv = leakiness(g, info, sheng_states, u, 8, cache); - return rv; -} - -static -dstate_id_t find_sheng_states(dfa_info &info, +#define MAX_SHENG_LEAKINESS 0.05 + +using LeakinessCache = ue2_unordered_map<pair<RdfaVertex, u32>, double>; + +/** + * Returns the proportion of strings of length 'depth' which will leave the + * sheng region when starting at state 'u'. + */ +static +double leakiness(const RdfaGraph &g, dfa_info &info, + const flat_set<RdfaVertex> &sheng_states, RdfaVertex u, + u32 depth, LeakinessCache &cache) { + double rv = 0; + if (contains(cache, make_pair(u, depth))) { + return cache[make_pair(u, depth)]; + } + for (RdfaVertex v : adjacent_vertices_range(u, g)) { + if (g[v].index == DEAD_STATE) { + continue; + } + double width = get_edge_reach(g[u].index, g[v].index, info).count(); + width /= N_CHARS; + + double weight; + if (!contains(sheng_states, v)) { + weight = 1; + } else if (depth > 1) { + weight = leakiness(g, info, sheng_states, v, depth - 1, cache); + } else { + continue; /* weight = 0 */ + } + rv += width * weight; + } + + cache[make_pair(u, depth)] = rv; + DEBUG_PRINTF("%zu [%u] q = %g\n", g[u].index, depth, rv); + return rv; +} + +/** + * Returns the proportion of 8 byte strings which will leave the sheng region + * when starting at state 'u'. + */ +static +double leakiness(const RdfaGraph &g, dfa_info &info, + const flat_set<RdfaVertex> &sheng_states, RdfaVertex u) { + LeakinessCache cache; + double rv = leakiness(g, info, sheng_states, u, 8, cache); + return rv; +} + +static +dstate_id_t find_sheng_states(dfa_info &info, map<dstate_id_t, AccelScheme> &accel_escape_info, size_t max_sheng_states) { - RdfaGraph g(info.raw); - auto cyclics = find_vertices_in_cycles(g); - - auto base_cyclic = RdfaGraph::null_vertex(); - for (const auto &v : cyclics) { - if (g[v].index == DEAD_STATE) { - continue; - } - DEBUG_PRINTF("considering cyclic %zu\n", g[v].index); - /* get an estimate of stickness of the cyclic: assume any edges from - * states with larger state ids are back edges */ - CharReach est_back_reach; - for (const auto &u : inv_adjacent_vertices_range(v, g)) { - if (g[u].index < g[v].index) { - continue; - } - est_back_reach |= get_edge_reach(g[u].index, g[v].index, info); - } - - if (est_back_reach.count() < 30) { - continue; - } - base_cyclic = v; - break; - } - if (!base_cyclic) { - return DEAD_STATE; - } - - flat_set<RdfaVertex> sheng_states; - deque<RdfaVertex> to_consider = { base_cyclic }; - flat_set<dstate_id_t> considered = { DEAD_STATE }; - bool seen_back_edge = false; - while (!to_consider.empty() + RdfaGraph g(info.raw); + auto cyclics = find_vertices_in_cycles(g); + + auto base_cyclic = RdfaGraph::null_vertex(); + for (const auto &v : cyclics) { + if (g[v].index == DEAD_STATE) { + continue; + } + DEBUG_PRINTF("considering cyclic %zu\n", g[v].index); + /* get an estimate of stickness of the cyclic: assume any edges from + * states with larger state ids are back edges */ + CharReach est_back_reach; + for (const auto &u : inv_adjacent_vertices_range(v, g)) { + if (g[u].index < g[v].index) { + continue; + } + est_back_reach |= get_edge_reach(g[u].index, g[v].index, info); + } + + if (est_back_reach.count() < 30) { + continue; + } + base_cyclic = v; + break; + } + if (!base_cyclic) { + return DEAD_STATE; + } + + flat_set<RdfaVertex> sheng_states; + deque<RdfaVertex> to_consider = { base_cyclic }; + flat_set<dstate_id_t> considered = { DEAD_STATE }; + bool seen_back_edge = false; + while (!to_consider.empty() && sheng_states.size() < max_sheng_states) { - auto v = to_consider.front(); - to_consider.pop_front(); - if (!considered.insert(g[v].index).second) { - continue; - } - - assert(!contains(sheng_states, v)); - - if (generates_callbacks(info.raw.kind) - && !info.states[g[v].index].reports.empty()) { - /* cannot raise callbacks from sheng region */ - continue; - } - - sheng_states.insert(v); - for (const auto &t : adjacent_vertices_range(v, g)) { - if (!contains(considered, g[t].index)) { - to_consider.push_back(t); - } - if (t == base_cyclic) { - seen_back_edge = true; - } - } - } - - /* allocate normal ids */ - dstate_id_t sheng_end = DEAD_STATE + 1; - for (auto v : sheng_states) { - dstate_id_t s = g[v].index; - if (!contains(accel_escape_info, s)) { - info.states[s].impl_id = sheng_end++; - info.extra[s].sheng_id = info.states[s].impl_id - 1; - } - } - - /* allocate accel ids */ - for (auto v : sheng_states) { - dstate_id_t s = g[v].index; - if (contains(accel_escape_info, s)) { - assert(!info.states[s].impl_id); - info.states[s].impl_id = sheng_end++; - info.extra[s].sheng_id = info.states[s].impl_id - 1; - } - } - - if (sheng_states.size() < MIN_SHENG_SIZE) { - DEBUG_PRINTF("sheng region too small\n"); - return DEAD_STATE; - } - - if (!seen_back_edge) { - DEBUG_PRINTF("did not include cyclic\n"); - return DEAD_STATE; - } - - double leak = leakiness(g, info, sheng_states, base_cyclic); - if (leak > MAX_SHENG_LEAKINESS) { - DEBUG_PRINTF("too leaky (%g)\n", leak); - return DEAD_STATE; - } - - if (!mark_sheng_succs(g, info, sheng_states)) { - return DEAD_STATE; - } - - /* TODO: ensure sufficiently 'sticky' */ - /* TODO: check not all states accel */ - DEBUG_PRINTF("sheng_end = %hu\n", sheng_end); - return sheng_end; -} - -static -void fill_in_aux_info(NFA *nfa, const dfa_info &info, - const map<dstate_id_t, AccelScheme> &accel_escape_info, - u32 accel_offset, UNUSED u32 accel_end_offset, - const vector<u32> &reports, - const vector<u32> &reports_eod, - u32 report_base_offset, - const raw_report_info &ri) { - mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); - - vector<u32> reportOffsets; - - ri.fillReportLists(nfa, report_base_offset, reportOffsets); - - for (u32 i = 0; i < info.size(); i++) { - u16 impl_id = info.implId(i); - mstate_aux *this_aux = getAux(nfa, impl_id); - - fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets); - if (contains(accel_escape_info, i)) { - this_aux->accel_offset = accel_offset; - accel_offset += info.strat.accelSize(); - assert(accel_offset <= accel_end_offset); - assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - info.strat.buildAccel(i, accel_escape_info.at(i), - (void *)((char *)m + this_aux->accel_offset)); - } - } -} - -static -u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) { - mstate_aux *aux = getAux(nfa, target_impl_id); - u16 flags = 0; - - if (aux->accept) { - flags |= ACCEPT_FLAG; - } - - if (aux->accel_offset) { - flags |= ACCEL_FLAG; - } - - return flags; -} - -static -void fill_in_succ_table_16(NFA *nfa, const dfa_info &info, - dstate_id_t sheng_end, - UNUSED dstate_id_t sherman_base) { - u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng)); - - u8 alphaShift = info.getAlphaShift(); - assert(alphaShift <= 8); - - for (size_t i = 0; i < info.size(); i++) { - if (!info.is_normal(i)) { - assert(info.implId(i) < sheng_end || info.is_sherman(i)); - continue; - } - - assert(info.implId(i) < sherman_base); - u16 normal_id = verify_u16(info.implId(i) - sheng_end); - - for (size_t s = 0; s < info.impl_alpha_size; s++) { - dstate_id_t raw_succ = info.states[i].next[s]; - u16 &entry = succ_table[((size_t)normal_id << alphaShift) + s]; - - entry = info.implId(raw_succ); - entry |= get_edge_flags(nfa, entry); - } - } -} - + auto v = to_consider.front(); + to_consider.pop_front(); + if (!considered.insert(g[v].index).second) { + continue; + } + + assert(!contains(sheng_states, v)); + + if (generates_callbacks(info.raw.kind) + && !info.states[g[v].index].reports.empty()) { + /* cannot raise callbacks from sheng region */ + continue; + } + + sheng_states.insert(v); + for (const auto &t : adjacent_vertices_range(v, g)) { + if (!contains(considered, g[t].index)) { + to_consider.push_back(t); + } + if (t == base_cyclic) { + seen_back_edge = true; + } + } + } + + /* allocate normal ids */ + dstate_id_t sheng_end = DEAD_STATE + 1; + for (auto v : sheng_states) { + dstate_id_t s = g[v].index; + if (!contains(accel_escape_info, s)) { + info.states[s].impl_id = sheng_end++; + info.extra[s].sheng_id = info.states[s].impl_id - 1; + } + } + + /* allocate accel ids */ + for (auto v : sheng_states) { + dstate_id_t s = g[v].index; + if (contains(accel_escape_info, s)) { + assert(!info.states[s].impl_id); + info.states[s].impl_id = sheng_end++; + info.extra[s].sheng_id = info.states[s].impl_id - 1; + } + } + + if (sheng_states.size() < MIN_SHENG_SIZE) { + DEBUG_PRINTF("sheng region too small\n"); + return DEAD_STATE; + } + + if (!seen_back_edge) { + DEBUG_PRINTF("did not include cyclic\n"); + return DEAD_STATE; + } + + double leak = leakiness(g, info, sheng_states, base_cyclic); + if (leak > MAX_SHENG_LEAKINESS) { + DEBUG_PRINTF("too leaky (%g)\n", leak); + return DEAD_STATE; + } + + if (!mark_sheng_succs(g, info, sheng_states)) { + return DEAD_STATE; + } + + /* TODO: ensure sufficiently 'sticky' */ + /* TODO: check not all states accel */ + DEBUG_PRINTF("sheng_end = %hu\n", sheng_end); + return sheng_end; +} + +static +void fill_in_aux_info(NFA *nfa, const dfa_info &info, + const map<dstate_id_t, AccelScheme> &accel_escape_info, + u32 accel_offset, UNUSED u32 accel_end_offset, + const vector<u32> &reports, + const vector<u32> &reports_eod, + u32 report_base_offset, + const raw_report_info &ri) { + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); + + vector<u32> reportOffsets; + + ri.fillReportLists(nfa, report_base_offset, reportOffsets); + + for (u32 i = 0; i < info.size(); i++) { + u16 impl_id = info.implId(i); + mstate_aux *this_aux = getAux(nfa, impl_id); + + fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets); + if (contains(accel_escape_info, i)) { + this_aux->accel_offset = accel_offset; + accel_offset += info.strat.accelSize(); + assert(accel_offset <= accel_end_offset); + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + info.strat.buildAccel(i, accel_escape_info.at(i), + (void *)((char *)m + this_aux->accel_offset)); + } + } +} + +static +u16 get_edge_flags(NFA *nfa, dstate_id_t target_impl_id) { + mstate_aux *aux = getAux(nfa, target_impl_id); + u16 flags = 0; + + if (aux->accept) { + flags |= ACCEPT_FLAG; + } + + if (aux->accel_offset) { + flags |= ACCEL_FLAG; + } + + return flags; +} + +static +void fill_in_succ_table_16(NFA *nfa, const dfa_info &info, + dstate_id_t sheng_end, + UNUSED dstate_id_t sherman_base) { + u16 *succ_table = (u16 *)((char *)nfa + sizeof(NFA) + sizeof(mcsheng)); + + u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); + + for (size_t i = 0; i < info.size(); i++) { + if (!info.is_normal(i)) { + assert(info.implId(i) < sheng_end || info.is_sherman(i)); + continue; + } + + assert(info.implId(i) < sherman_base); + u16 normal_id = verify_u16(info.implId(i) - sheng_end); + + for (size_t s = 0; s < info.impl_alpha_size; s++) { + dstate_id_t raw_succ = info.states[i].next[s]; + u16 &entry = succ_table[((size_t)normal_id << alphaShift) + s]; + + entry = info.implId(raw_succ); + entry |= get_edge_flags(nfa, entry); + } + } +} + static void fill_in_aux_info64(NFA *nfa, const dfa_info &info, const map<dstate_id_t, AccelScheme> &accel_escape_info, @@ -791,232 +791,232 @@ void fill_in_succ_table_64_16(NFA *nfa, const dfa_info &info, } } -#define MAX_SHERMAN_LIST_LEN 8 - -static -void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate, - dstate_id_t max) { - if (candidate < max) { - dest.insert(candidate); - } -} - -static -void addSuccessors(flat_set<dstate_id_t> &dest, const dstate &source, - u16 alphasize, dstate_id_t curr_id) { - for (symbol_t s = 0; s < alphasize; s++) { - addIfEarlier(dest, source.next[s], curr_id); - } -} - -/* \brief Returns a set of states to search for a better daddy. */ -static -flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info, - dstate_id_t curr_id) { - flat_set<dstate_id_t> hinted; - - addIfEarlier(hinted, 0, curr_id); - addIfEarlier(hinted, info.raw.start_anchored, curr_id); - addIfEarlier(hinted, info.raw.start_floating, curr_id); - - // Add existing daddy and his successors, then search back one generation. - const u16 alphasize = info.impl_alpha_size; - dstate_id_t daddy = info.states[curr_id].daddy; - for (u32 level = 0; daddy && level < 2; level++) { - addIfEarlier(hinted, daddy, curr_id); - addSuccessors(hinted, info.states[daddy], alphasize, curr_id); - daddy = info.states[daddy].daddy; - } - - return hinted; -} - -#define MAX_SHERMAN_SELF_LOOP 20 - -static -void find_better_daddy(dfa_info &info, dstate_id_t curr_id, - bool any_cyclic_near_anchored_state, const Grey &grey) { - if (!grey.allowShermanStates) { - return; - } - - const u16 width = sizeof(u16); - const u16 alphasize = info.impl_alpha_size; - - if (info.raw.start_anchored != DEAD_STATE - && any_cyclic_near_anchored_state - && curr_id < alphasize * 3) { - /* crude attempt to prevent frequent states from being sherman'ed - * depends on the fact that states are numbers are currently in bfs - * order */ - DEBUG_PRINTF("%hu is banned\n", curr_id); - return; - } - - if (info.raw.start_floating != DEAD_STATE - && curr_id >= info.raw.start_floating - && curr_id < info.raw.start_floating + alphasize * 3) { - /* crude attempt to prevent frequent states from being sherman'ed - * depends on the fact that states are numbers are currently in bfs - * order */ - DEBUG_PRINTF("%hu is banned (%hu)\n", curr_id, info.raw.start_floating); - return; - } - - const u16 full_state_size = width * alphasize; - const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN, - (full_state_size - 2)/(width + 1)); - u16 best_score = 0; - dstate_id_t best_daddy = 0; - dstate &currState = info.states[curr_id]; - - flat_set<dstate_id_t> hinted = find_daddy_candidates(info, curr_id); - - for (const dstate_id_t &donor : hinted) { - assert(donor < curr_id); - u32 score = 0; - - if (!info.is_normal(donor)) { - continue; - } - - const dstate &donorState = info.states[donor]; - for (symbol_t s = 0; s < alphasize; s++) { - if (currState.next[s] == donorState.next[s]) { - score++; - } - } - - /* prefer lower ids to provide some stability amongst potential - * siblings */ - if (score > best_score || (score == best_score && donor < best_daddy)) { - best_daddy = donor; - best_score = score; - - if (score == alphasize) { - break; - } - } - } - - currState.daddy = best_daddy; - info.extra[curr_id].daddytaken = best_score; - DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n", curr_id, best_daddy, - best_score, alphasize); - - if (best_daddy == DEAD_STATE) { - return; /* No good daddy */ - } - - if (best_score + max_list_len < alphasize) { - return; /* ??? */ - } - - assert(info.is_normal(currState.daddy)); - - u32 self_loop_width = 0; - const dstate &curr_raw = info.states[curr_id]; - for (unsigned i = 0; i < N_CHARS; i++) { - if (curr_raw.next[info.alpha_remap[i]] == curr_id) { - self_loop_width++; - } - } - - if (self_loop_width > MAX_SHERMAN_SELF_LOOP) { - DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id, - self_loop_width); - return; - } - - if (info.is_sheng(curr_id)) { - return; - } - - DEBUG_PRINTF("%hu is sherman\n", curr_id); - info.extra[curr_id].shermanState = true; -} - -static -bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { - symbol_t alphasize = raw.getImplAlphaSize(); - for (symbol_t s = 0; s < alphasize; s++) { - dstate_id_t succ_id = raw.states[root].next[s]; - if (succ_id == DEAD_STATE) { - continue; - } - - const dstate &succ = raw.states[succ_id]; - for (symbol_t t = 0; t < alphasize; t++) { - if (succ.next[t] == root || succ.next[t] == succ_id) { - return true; - } - } - } - return false; -} - -static -void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { - char *nfa_base = (char *)nfa; - mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); - char *sherman_table = nfa_base + m->sherman_offset; - - assert(ISALIGNED_16(sherman_table)); - for (size_t i = 0; i < info.size(); i++) { - if (!info.is_sherman(i)) { - continue; - } - u16 fs = verify_u16(info.implId(i)); - DEBUG_PRINTF("building sherman %zu impl %hu\n", i, fs); - - assert(fs >= sherman_limit); - - char *curr_sherman_entry - = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; - assert(curr_sherman_entry <= nfa_base + m->length); - - u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken); - assert(len <= 9); - dstate_id_t d = info.states[i].daddy; - - *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE; - *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len; - *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d); - u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET); - - for (u16 s = 0; s < info.impl_alpha_size; s++) { - if (info.states[i].next[s] != info.states[d].next[s]) { - *(chars++) = (u8)s; - } - } - - u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); - for (u16 s = 0; s < info.impl_alpha_size; s++) { - if (info.states[i].next[s] != info.states[d].next[s]) { - DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs, - info.implId(d), - info.implId(info.states[i].next[s])); - u16 entry_val = info.implId(info.states[i].next[s]); - entry_val |= get_edge_flags(nfa, entry_val); - unaligned_store_u16((u8 *)states++, entry_val); - } - } - } -} - -static -bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, - const map<dstate_id_t, AccelScheme> &accel_escape_info, - const Grey &grey) { - DEBUG_PRINTF("building mcsheng 16\n"); - - vector<u32> reports; /* index in ri for the appropriate report list */ - vector<u32> reports_eod; /* as above */ - ReportID arb; - u8 single; - - assert(info.getAlphaShift() <= 8); - +#define MAX_SHERMAN_LIST_LEN 8 + +static +void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate, + dstate_id_t max) { + if (candidate < max) { + dest.insert(candidate); + } +} + +static +void addSuccessors(flat_set<dstate_id_t> &dest, const dstate &source, + u16 alphasize, dstate_id_t curr_id) { + for (symbol_t s = 0; s < alphasize; s++) { + addIfEarlier(dest, source.next[s], curr_id); + } +} + +/* \brief Returns a set of states to search for a better daddy. */ +static +flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info, + dstate_id_t curr_id) { + flat_set<dstate_id_t> hinted; + + addIfEarlier(hinted, 0, curr_id); + addIfEarlier(hinted, info.raw.start_anchored, curr_id); + addIfEarlier(hinted, info.raw.start_floating, curr_id); + + // Add existing daddy and his successors, then search back one generation. + const u16 alphasize = info.impl_alpha_size; + dstate_id_t daddy = info.states[curr_id].daddy; + for (u32 level = 0; daddy && level < 2; level++) { + addIfEarlier(hinted, daddy, curr_id); + addSuccessors(hinted, info.states[daddy], alphasize, curr_id); + daddy = info.states[daddy].daddy; + } + + return hinted; +} + +#define MAX_SHERMAN_SELF_LOOP 20 + +static +void find_better_daddy(dfa_info &info, dstate_id_t curr_id, + bool any_cyclic_near_anchored_state, const Grey &grey) { + if (!grey.allowShermanStates) { + return; + } + + const u16 width = sizeof(u16); + const u16 alphasize = info.impl_alpha_size; + + if (info.raw.start_anchored != DEAD_STATE + && any_cyclic_near_anchored_state + && curr_id < alphasize * 3) { + /* crude attempt to prevent frequent states from being sherman'ed + * depends on the fact that states are numbers are currently in bfs + * order */ + DEBUG_PRINTF("%hu is banned\n", curr_id); + return; + } + + if (info.raw.start_floating != DEAD_STATE + && curr_id >= info.raw.start_floating + && curr_id < info.raw.start_floating + alphasize * 3) { + /* crude attempt to prevent frequent states from being sherman'ed + * depends on the fact that states are numbers are currently in bfs + * order */ + DEBUG_PRINTF("%hu is banned (%hu)\n", curr_id, info.raw.start_floating); + return; + } + + const u16 full_state_size = width * alphasize; + const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN, + (full_state_size - 2)/(width + 1)); + u16 best_score = 0; + dstate_id_t best_daddy = 0; + dstate &currState = info.states[curr_id]; + + flat_set<dstate_id_t> hinted = find_daddy_candidates(info, curr_id); + + for (const dstate_id_t &donor : hinted) { + assert(donor < curr_id); + u32 score = 0; + + if (!info.is_normal(donor)) { + continue; + } + + const dstate &donorState = info.states[donor]; + for (symbol_t s = 0; s < alphasize; s++) { + if (currState.next[s] == donorState.next[s]) { + score++; + } + } + + /* prefer lower ids to provide some stability amongst potential + * siblings */ + if (score > best_score || (score == best_score && donor < best_daddy)) { + best_daddy = donor; + best_score = score; + + if (score == alphasize) { + break; + } + } + } + + currState.daddy = best_daddy; + info.extra[curr_id].daddytaken = best_score; + DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n", curr_id, best_daddy, + best_score, alphasize); + + if (best_daddy == DEAD_STATE) { + return; /* No good daddy */ + } + + if (best_score + max_list_len < alphasize) { + return; /* ??? */ + } + + assert(info.is_normal(currState.daddy)); + + u32 self_loop_width = 0; + const dstate &curr_raw = info.states[curr_id]; + for (unsigned i = 0; i < N_CHARS; i++) { + if (curr_raw.next[info.alpha_remap[i]] == curr_id) { + self_loop_width++; + } + } + + if (self_loop_width > MAX_SHERMAN_SELF_LOOP) { + DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id, + self_loop_width); + return; + } + + if (info.is_sheng(curr_id)) { + return; + } + + DEBUG_PRINTF("%hu is sherman\n", curr_id); + info.extra[curr_id].shermanState = true; +} + +static +bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { + symbol_t alphasize = raw.getImplAlphaSize(); + for (symbol_t s = 0; s < alphasize; s++) { + dstate_id_t succ_id = raw.states[root].next[s]; + if (succ_id == DEAD_STATE) { + continue; + } + + const dstate &succ = raw.states[succ_id]; + for (symbol_t t = 0; t < alphasize; t++) { + if (succ.next[t] == root || succ.next[t] == succ_id) { + return true; + } + } + } + return false; +} + +static +void fill_in_sherman(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { + char *nfa_base = (char *)nfa; + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa); + char *sherman_table = nfa_base + m->sherman_offset; + + assert(ISALIGNED_16(sherman_table)); + for (size_t i = 0; i < info.size(); i++) { + if (!info.is_sherman(i)) { + continue; + } + u16 fs = verify_u16(info.implId(i)); + DEBUG_PRINTF("building sherman %zu impl %hu\n", i, fs); + + assert(fs >= sherman_limit); + + char *curr_sherman_entry + = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; + assert(curr_sherman_entry <= nfa_base + m->length); + + u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken); + assert(len <= 9); + dstate_id_t d = info.states[i].daddy; + + *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE; + *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len; + *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d); + u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET); + + for (u16 s = 0; s < info.impl_alpha_size; s++) { + if (info.states[i].next[s] != info.states[d].next[s]) { + *(chars++) = (u8)s; + } + } + + u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); + for (u16 s = 0; s < info.impl_alpha_size; s++) { + if (info.states[i].next[s] != info.states[d].next[s]) { + DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", fs, + info.implId(d), + info.implId(info.states[i].next[s])); + u16 entry_val = info.implId(info.states[i].next[s]); + entry_val |= get_edge_flags(nfa, entry_val); + unaligned_store_u16((u8 *)states++, entry_val); + } + } + } +} + +static +bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, + const map<dstate_id_t, AccelScheme> &accel_escape_info, + const Grey &grey) { + DEBUG_PRINTF("building mcsheng 16\n"); + + vector<u32> reports; /* index in ri for the appropriate report list */ + vector<u32> reports_eod; /* as above */ + ReportID arb; + u8 single; + + assert(info.getAlphaShift() <= 8); + // Sherman optimization if (info.impl_alpha_size > 16) { u16 total_daddy = 0; @@ -1030,86 +1030,86 @@ bytecode_ptr<NFA> mcshengCompile16(dfa_info &info, dstate_id_t sheng_end, DEBUG_PRINTF("daddy %hu/%zu states=%zu alpha=%hu\n", total_daddy, info.size() * info.impl_alpha_size, info.size(), info.impl_alpha_size); - } - - u16 sherman_limit; - if (!allocateImplId16(info, sheng_end, &sherman_limit)) { - DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", - info.size()); - return nullptr; - } - u16 count_real_states = sherman_limit - sheng_end; - - auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); - - size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) - * count_real_states; - - size_t aux_size = sizeof(mstate_aux) * info.size(); - - size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size); - size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); - size_t accel_offset = ROUNDUP_N(aux_offset + aux_size - + ri->getReportListSize(), 32); - size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); - size_t sherman_size = calcShermanRegionSize(info); - - size_t total_size = sherman_offset + sherman_size; - - accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ - assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - - auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); - mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); - - populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, - accel_escape_info.size(), arb, single, nfa.get()); - createShuffleMasks(m, info, sheng_end, accel_escape_info); - - /* copy in the mc header information */ - m->sherman_offset = sherman_offset; - m->sherman_end = total_size; - m->sherman_limit = sherman_limit; - - DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n", sheng_end, - count_real_states, info.size()); - - fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset, - sherman_offset - sizeof(NFA), reports, reports_eod, - aux_offset + aux_size, *ri); - - fill_in_succ_table_16(nfa.get(), info, sheng_end, sherman_limit); - - fill_in_sherman(nfa.get(), info, sherman_limit); - - return nfa; -} - -static -void fill_in_succ_table_8(NFA *nfa, const dfa_info &info, - dstate_id_t sheng_end) { - u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng); - - u8 alphaShift = info.getAlphaShift(); - assert(alphaShift <= 8); - - for (size_t i = 0; i < info.size(); i++) { - assert(!info.is_sherman(i)); - if (!info.is_normal(i)) { - assert(info.implId(i) < sheng_end); - continue; - } - u8 normal_id = verify_u8(info.implId(i) - sheng_end); - - for (size_t s = 0; s < info.impl_alpha_size; s++) { - dstate_id_t raw_succ = info.states[i].next[s]; - succ_table[((size_t)normal_id << alphaShift) + s] - = info.implId(raw_succ); - } - } -} - -static + } + + u16 sherman_limit; + if (!allocateImplId16(info, sheng_end, &sherman_limit)) { + DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", + info.size()); + return nullptr; + } + u16 count_real_states = sherman_limit - sheng_end; + + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); + + size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) + * count_real_states; + + size_t aux_size = sizeof(mstate_aux) * info.size(); + + size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size); + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); + size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + + ri->getReportListSize(), 32); + size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); + size_t sherman_size = calcShermanRegionSize(info); + + size_t total_size = sherman_offset + sherman_size; + + accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + + auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); + + populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, + accel_escape_info.size(), arb, single, nfa.get()); + createShuffleMasks(m, info, sheng_end, accel_escape_info); + + /* copy in the mc header information */ + m->sherman_offset = sherman_offset; + m->sherman_end = total_size; + m->sherman_limit = sherman_limit; + + DEBUG_PRINTF("%hu sheng, %hu norm, %zu total\n", sheng_end, + count_real_states, info.size()); + + fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset, + sherman_offset - sizeof(NFA), reports, reports_eod, + aux_offset + aux_size, *ri); + + fill_in_succ_table_16(nfa.get(), info, sheng_end, sherman_limit); + + fill_in_sherman(nfa.get(), info, sherman_limit); + + return nfa; +} + +static +void fill_in_succ_table_8(NFA *nfa, const dfa_info &info, + dstate_id_t sheng_end) { + u8 *succ_table = (u8 *)nfa + sizeof(NFA) + sizeof(mcsheng); + + u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); + + for (size_t i = 0; i < info.size(); i++) { + assert(!info.is_sherman(i)); + if (!info.is_normal(i)) { + assert(info.implId(i) < sheng_end); + continue; + } + u8 normal_id = verify_u8(info.implId(i) - sheng_end); + + for (size_t s = 0; s < info.impl_alpha_size; s++) { + dstate_id_t raw_succ = info.states[i].next[s]; + succ_table[((size_t)normal_id << alphaShift) + s] + = info.implId(raw_succ); + } + } +} + +static void fill_in_sherman64(NFA *nfa, dfa_info &info, UNUSED u16 sherman_limit) { char *nfa_base = (char *)nfa; mcsheng64 *m = (mcsheng64 *)getMutableImplNfa(nfa); @@ -1264,102 +1264,102 @@ void fill_in_succ_table_64_8(NFA *nfa, const dfa_info &info, } static -void allocateImplId8(dfa_info &info, dstate_id_t sheng_end, - const map<dstate_id_t, AccelScheme> &accel_escape_info, - u16 *accel_limit, u16 *accept_limit) { - info.states[0].impl_id = 0; /* dead is always 0 */ - - vector<dstate_id_t> norm; - vector<dstate_id_t> accel; - vector<dstate_id_t> accept; - - assert(info.size() <= (1 << 8)); - - for (u32 i = 1; i < info.size(); i++) { - if (info.is_sheng(i)) { - continue; /* already allocated */ - } else if (!info.states[i].reports.empty()) { - accept.push_back(i); - } else if (contains(accel_escape_info, i)) { - accel.push_back(i); - } else { - norm.push_back(i); - } - } - - u32 j = sheng_end; - for (const dstate_id_t &s : norm) { - assert(j <= 256); - DEBUG_PRINTF("mapping state %u to %u\n", s, j); - info.states[s].impl_id = j++; - } - *accel_limit = j; - for (const dstate_id_t &s : accel) { - assert(j <= 256); - DEBUG_PRINTF("mapping state %u to %u\n", s, j); - info.states[s].impl_id = j++; - } - *accept_limit = j; - for (const dstate_id_t &s : accept) { - assert(j <= 256); - DEBUG_PRINTF("mapping state %u to %u\n", s, j); - info.states[s].impl_id = j++; - } -} - -static -bytecode_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, - const map<dstate_id_t, AccelScheme> &accel_escape_info) { - DEBUG_PRINTF("building mcsheng 8\n"); - - vector<u32> reports; - vector<u32> reports_eod; - ReportID arb; - u8 single; - - auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); - - size_t normal_count = info.size() - sheng_end; - - size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * normal_count; - size_t aux_size = sizeof(mstate_aux) * info.size(); - size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size); - size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); - size_t accel_offset = ROUNDUP_N(aux_offset + aux_size - + ri->getReportListSize(), 32); - size_t total_size = accel_offset + accel_size; - - DEBUG_PRINTF("aux_size %zu\n", aux_size); - DEBUG_PRINTF("aux_offset %zu\n", aux_offset); - DEBUG_PRINTF("rl size %u\n", ri->getReportListSize()); - DEBUG_PRINTF("accel_size %zu\n", accel_size); - DEBUG_PRINTF("accel_offset %zu\n", accel_offset); - DEBUG_PRINTF("total_size %zu\n", total_size); - - accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ - assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - - auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); - mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); - - allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8, - &m->accept_limit_8); - - populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset, - accel_escape_info.size(), arb, single, nfa.get()); - createShuffleMasks(m, info, sheng_end, accel_escape_info); - - fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset, - total_size - sizeof(NFA), reports, reports_eod, - aux_offset + aux_size, *ri); - - fill_in_succ_table_8(nfa.get(), info, sheng_end); - - DEBUG_PRINTF("rl size %zu\n", ri->size()); - - return nfa; -} - +void allocateImplId8(dfa_info &info, dstate_id_t sheng_end, + const map<dstate_id_t, AccelScheme> &accel_escape_info, + u16 *accel_limit, u16 *accept_limit) { + info.states[0].impl_id = 0; /* dead is always 0 */ + + vector<dstate_id_t> norm; + vector<dstate_id_t> accel; + vector<dstate_id_t> accept; + + assert(info.size() <= (1 << 8)); + + for (u32 i = 1; i < info.size(); i++) { + if (info.is_sheng(i)) { + continue; /* already allocated */ + } else if (!info.states[i].reports.empty()) { + accept.push_back(i); + } else if (contains(accel_escape_info, i)) { + accel.push_back(i); + } else { + norm.push_back(i); + } + } + + u32 j = sheng_end; + for (const dstate_id_t &s : norm) { + assert(j <= 256); + DEBUG_PRINTF("mapping state %u to %u\n", s, j); + info.states[s].impl_id = j++; + } + *accel_limit = j; + for (const dstate_id_t &s : accel) { + assert(j <= 256); + DEBUG_PRINTF("mapping state %u to %u\n", s, j); + info.states[s].impl_id = j++; + } + *accept_limit = j; + for (const dstate_id_t &s : accept) { + assert(j <= 256); + DEBUG_PRINTF("mapping state %u to %u\n", s, j); + info.states[s].impl_id = j++; + } +} + +static +bytecode_ptr<NFA> mcshengCompile8(dfa_info &info, dstate_id_t sheng_end, + const map<dstate_id_t, AccelScheme> &accel_escape_info) { + DEBUG_PRINTF("building mcsheng 8\n"); + + vector<u32> reports; + vector<u32> reports_eod; + ReportID arb; + u8 single; + + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); + + size_t normal_count = info.size() - sheng_end; + + size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * normal_count; + size_t aux_size = sizeof(mstate_aux) * info.size(); + size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcsheng) + tran_size); + size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); + size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + + ri->getReportListSize(), 32); + size_t total_size = accel_offset + accel_size; + + DEBUG_PRINTF("aux_size %zu\n", aux_size); + DEBUG_PRINTF("aux_offset %zu\n", aux_offset); + DEBUG_PRINTF("rl size %u\n", ri->getReportListSize()); + DEBUG_PRINTF("accel_size %zu\n", accel_size); + DEBUG_PRINTF("accel_offset %zu\n", accel_offset); + DEBUG_PRINTF("total_size %zu\n", total_size); + + accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + + auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); + mcsheng *m = (mcsheng *)getMutableImplNfa(nfa.get()); + + allocateImplId8(info, sheng_end, accel_escape_info, &m->accel_limit_8, + &m->accept_limit_8); + + populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset, + accel_escape_info.size(), arb, single, nfa.get()); + createShuffleMasks(m, info, sheng_end, accel_escape_info); + + fill_in_aux_info(nfa.get(), info, accel_escape_info, accel_offset, + total_size - sizeof(NFA), reports, reports_eod, + aux_offset + aux_size, *ri); + + fill_in_succ_table_8(nfa.get(), info, sheng_end); + + DEBUG_PRINTF("rl size %zu\n", ri->size()); + + return nfa; +} + static bytecode_ptr<NFA> mcsheng64Compile8(dfa_info &info, dstate_id_t sheng_end, const map<dstate_id_t, AccelScheme> &accel_escape_info) { @@ -1412,54 +1412,54 @@ bytecode_ptr<NFA> mcsheng64Compile8(dfa_info &info, dstate_id_t sheng_end, return nfa; } -bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm) { - if (!cc.grey.allowMcSheng) { - return nullptr; - } - - mcclellan_build_strat mbs(raw, rm, false); - dfa_info info(mbs); - bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; - - if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming - * mode with our semantics */ - raw.stripExtraEodReports(); - } - - bool has_eod_reports = raw.hasEodReports(); - - map<dstate_id_t, AccelScheme> accel_escape_info - = info.strat.getAccelInfo(cc.grey); +bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm) { + if (!cc.grey.allowMcSheng) { + return nullptr; + } + + mcclellan_build_strat mbs(raw, rm, false); + dfa_info info(mbs); + bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; + + if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming + * mode with our semantics */ + raw.stripExtraEodReports(); + } + + bool has_eod_reports = raw.hasEodReports(); + + map<dstate_id_t, AccelScheme> accel_escape_info + = info.strat.getAccelInfo(cc.grey); auto old_states = info.states; dstate_id_t sheng_end = find_sheng_states(info, accel_escape_info, MAX_SHENG_STATES); - - if (sheng_end <= DEAD_STATE + 1) { + + if (sheng_end <= DEAD_STATE + 1) { info.states = old_states; - return nullptr; - } - - bytecode_ptr<NFA> nfa; - - if (!using8bit) { - nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey); - } else { - nfa = mcshengCompile8(info, sheng_end, accel_escape_info); - } - - if (!nfa) { + return nullptr; + } + + bytecode_ptr<NFA> nfa; + + if (!using8bit) { + nfa = mcshengCompile16(info, sheng_end, accel_escape_info, cc.grey); + } else { + nfa = mcshengCompile8(info, sheng_end, accel_escape_info); + } + + if (!nfa) { info.states = old_states; - return nfa; - } - - if (has_eod_reports) { - nfa->flags |= NFA_ACCEPTS_EOD; - } - - DEBUG_PRINTF("compile done\n"); - return nfa; -} - + return nfa; + } + + if (has_eod_reports) { + nfa->flags |= NFA_ACCEPTS_EOD; + } + + DEBUG_PRINTF("compile done\n"); + return nfa; +} + bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm) { if (!cc.grey.allowMcSheng) { @@ -1520,8 +1520,8 @@ bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc, return nfa; } -bool has_accel_mcsheng(const NFA *) { - return true; /* consider the sheng region as accelerated */ -} - -} // namespace ue2 +bool has_accel_mcsheng(const NFA *) { + return true; /* consider the sheng region as accelerated */ +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng_compile.h b/contrib/libs/hyperscan/src/nfa/mcsheng_compile.h index 3a79b46a23..faa289807f 100644 --- a/contrib/libs/hyperscan/src/nfa/mcsheng_compile.h +++ b/contrib/libs/hyperscan/src/nfa/mcsheng_compile.h @@ -1,51 +1,51 @@ -/* - * Copyright (c) 2016-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MCSHENGCOMPILE_H -#define MCSHENGCOMPILE_H - -#include "ue2common.h" -#include "util/bytecode_ptr.h" - -struct NFA; - -namespace ue2 { - -class ReportManager; -struct CompileContext; -struct raw_dfa; - -bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm); +/* + * Copyright (c) 2016-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCSHENGCOMPILE_H +#define MCSHENGCOMPILE_H + +#include "ue2common.h" +#include "util/bytecode_ptr.h" + +struct NFA; + +namespace ue2 { + +class ReportManager; +struct CompileContext; +struct raw_dfa; + +bytecode_ptr<NFA> mcshengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm); bytecode_ptr<NFA> mcshengCompile64(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm); -bool has_accel_mcsheng(const NFA *nfa); - -} // namespace ue2 - -#endif +bool has_accel_mcsheng(const NFA *nfa); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng_data.c b/contrib/libs/hyperscan/src/nfa/mcsheng_data.c index 0701b4b313..afd108c790 100644 --- a/contrib/libs/hyperscan/src/nfa/mcsheng_data.c +++ b/contrib/libs/hyperscan/src/nfa/mcsheng_data.c @@ -1,46 +1,46 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "mcsheng_internal.h" - -/* This table is in a separate translation unit from mcsheng.c as we want to - * prevent the compiler from seeing these constants. We have the load resources - * free at runtime to load the masks with no problems. */ -const u64a mcsheng_pext_mask[8] = { - 0, /* dummy */ - 0x000000000000ff0f, - 0x0000000000ff000f, - 0x00000000ff00000f, - 0x000000ff0000000f, - 0x0000ff000000000f, - 0x00ff00000000000f, - 0xff0000000000000f, -}; + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcsheng_internal.h" + +/* This table is in a separate translation unit from mcsheng.c as we want to + * prevent the compiler from seeing these constants. We have the load resources + * free at runtime to load the masks with no problems. */ +const u64a mcsheng_pext_mask[8] = { + 0, /* dummy */ + 0x000000000000ff0f, + 0x0000000000ff000f, + 0x00000000ff00000f, + 0x000000ff0000000f, + 0x0000ff000000000f, + 0x00ff00000000000f, + 0xff0000000000000f, +}; #if defined(HAVE_AVX512VBMI) const u64a mcsheng64_pext_mask[8] = { 0, /* dummy */ diff --git a/contrib/libs/hyperscan/src/nfa/mcsheng_internal.h b/contrib/libs/hyperscan/src/nfa/mcsheng_internal.h index d985574624..a10bafec55 100644 --- a/contrib/libs/hyperscan/src/nfa/mcsheng_internal.h +++ b/contrib/libs/hyperscan/src/nfa/mcsheng_internal.h @@ -1,97 +1,97 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MCSHENG_INTERNAL_H -#define MCSHENG_INTERNAL_H - -#include "nfa_internal.h" -#include "ue2common.h" -#include "util/simd_types.h" - -#define ACCEPT_FLAG 0x8000 -#define ACCEL_FLAG 0x4000 -#define STATE_MASK 0x3fff - -#define SHERMAN_STATE 1 - -#define SHERMAN_TYPE_OFFSET 0 -#define SHERMAN_FIXED_SIZE 32 - -#define SHERMAN_LEN_OFFSET 1 -#define SHERMAN_DADDY_OFFSET 2 -#define SHERMAN_CHARS_OFFSET 4 -#define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len)) - -struct report_list { - u32 count; - ReportID report[]; -}; - -struct mstate_aux { - u32 accept; - u32 accept_eod; - u16 top; - u32 accel_offset; /* relative to start of struct mcsheng; 0 if no accel */ -}; - -#define MCSHENG_FLAG_SINGLE 1 /**< we raise only single accept id */ - -struct mcsheng { - u16 state_count; /**< total number of states */ - u32 length; /**< length of dfa in bytes */ - u16 start_anchored; /**< anchored start state */ - u16 start_floating; /**< floating start state */ - u32 aux_offset; /**< offset of the aux structures relative to the start of - * the nfa structure */ - u32 sherman_offset; /**< offset of array of sherman state offsets the - * state_info structures relative to the start of the - * nfa structure */ - u32 sherman_end; /**< offset of the end of the state_info structures - * relative to the start of the nfa structure */ - u16 sheng_end; /**< first non-sheng state */ - u16 sheng_accel_limit; /**< first sheng accel state. state given in terms of - * internal sheng ids */ - u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ - u16 accept_limit_8; /**< 8 bit, lowest accept state */ - u16 sherman_limit; /**< lowest sherman state */ - u8 alphaShift; - u8 flags; - u8 has_accel; /**< 1 iff there are any accel plans */ - u8 remap[256]; /**< remaps characters to a smaller alphabet */ - ReportID arb_report; /**< one of the accepts that this dfa may raise */ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCSHENG_INTERNAL_H +#define MCSHENG_INTERNAL_H + +#include "nfa_internal.h" +#include "ue2common.h" +#include "util/simd_types.h" + +#define ACCEPT_FLAG 0x8000 +#define ACCEL_FLAG 0x4000 +#define STATE_MASK 0x3fff + +#define SHERMAN_STATE 1 + +#define SHERMAN_TYPE_OFFSET 0 +#define SHERMAN_FIXED_SIZE 32 + +#define SHERMAN_LEN_OFFSET 1 +#define SHERMAN_DADDY_OFFSET 2 +#define SHERMAN_CHARS_OFFSET 4 +#define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len)) + +struct report_list { + u32 count; + ReportID report[]; +}; + +struct mstate_aux { + u32 accept; + u32 accept_eod; + u16 top; + u32 accel_offset; /* relative to start of struct mcsheng; 0 if no accel */ +}; + +#define MCSHENG_FLAG_SINGLE 1 /**< we raise only single accept id */ + +struct mcsheng { + u16 state_count; /**< total number of states */ + u32 length; /**< length of dfa in bytes */ + u16 start_anchored; /**< anchored start state */ + u16 start_floating; /**< floating start state */ + u32 aux_offset; /**< offset of the aux structures relative to the start of + * the nfa structure */ + u32 sherman_offset; /**< offset of array of sherman state offsets the + * state_info structures relative to the start of the + * nfa structure */ + u32 sherman_end; /**< offset of the end of the state_info structures + * relative to the start of the nfa structure */ + u16 sheng_end; /**< first non-sheng state */ + u16 sheng_accel_limit; /**< first sheng accel state. state given in terms of + * internal sheng ids */ + u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ + u16 accept_limit_8; /**< 8 bit, lowest accept state */ + u16 sherman_limit; /**< lowest sherman state */ + u8 alphaShift; + u8 flags; + u8 has_accel; /**< 1 iff there are any accel plans */ + u8 remap[256]; /**< remaps characters to a smaller alphabet */ + ReportID arb_report; /**< one of the accepts that this dfa may raise */ u32 accel_offset; /**< offset of accel structures from start of McClellan */ - m128 sheng_masks[N_CHARS]; -}; - -/* pext masks for the runtime to access appropriately copies of bytes 1..7 - * representing the data from a u64a. */ -extern const u64a mcsheng_pext_mask[8]; - + m128 sheng_masks[N_CHARS]; +}; + +/* pext masks for the runtime to access appropriately copies of bytes 1..7 + * representing the data from a u64a. */ +extern const u64a mcsheng_pext_mask[8]; + struct mcsheng64 { u16 state_count; /**< total number of states */ u32 length; /**< length of dfa in bytes */ @@ -121,4 +121,4 @@ struct mcsheng64 { extern const u64a mcsheng64_pext_mask[8]; -#endif +#endif diff --git a/contrib/libs/hyperscan/src/nfa/mpv.c b/contrib/libs/hyperscan/src/nfa/mpv.c index 552754d608..0bc5f3d8cb 100644 --- a/contrib/libs/hyperscan/src/nfa/mpv.c +++ b/contrib/libs/hyperscan/src/nfa/mpv.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -124,15 +124,15 @@ char processReports(const struct mpv *m, u8 *reporters, DEBUG_PRINTF("report %u at %llu\n", curr->report, report_offset); - if (curr->unbounded && !curr->simple_exhaust) { + if (curr->unbounded && !curr->simple_exhaust) { assert(rl_count < m->puffette_count); *rl = curr->report; ++rl; rl_count++; } - if (cb(0, report_offset, curr->report, ctxt) == - MO_HALT_MATCHING) { + if (cb(0, report_offset, curr->report, ctxt) == + MO_HALT_MATCHING) { DEBUG_PRINTF("bailing\n"); return MO_HALT_MATCHING; } @@ -177,11 +177,11 @@ char processReportsForRange(const struct mpv *m, u8 *reporters, return MO_CONTINUE_MATCHING; } - DEBUG_PRINTF("length=%zu, rl_count=%u\n", length, rl_count); - - for (size_t i = 2; i <= length; i++) { + DEBUG_PRINTF("length=%zu, rl_count=%u\n", length, rl_count); + + for (size_t i = 2; i <= length; i++) { for (u32 j = 0; j < rl_count; j++) { - if (cb(0, first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) { + if (cb(0, first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) { DEBUG_PRINTF("bailing\n"); return MO_HALT_MATCHING; } @@ -825,21 +825,21 @@ void mpvStoreState(const struct NFA *n, char *state, } } -char nfaExecMpv_queueCompressState(const struct NFA *nfa, const struct mq *q, - UNUSED s64a loc) { +char nfaExecMpv_queueCompressState(const struct NFA *nfa, const struct mq *q, + UNUSED s64a loc) { void *dest = q->streamState; const void *src = q->state; mpvStoreState(nfa, dest, src); return 0; } -char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src, - UNUSED u64a offset, UNUSED u8 key) { +char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src, + UNUSED u64a offset, UNUSED u8 key) { mpvLoadState(dest, nfa, src); return 0; } -char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q) { +char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q) { const struct mpv *m = getImplNfa(n); u64a offset = q_cur_offset(q); struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state; @@ -855,7 +855,7 @@ char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q) { return 0; } -char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q) { +char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q) { struct mpv_decomp_state *out = (void *)q->state; const struct mpv *m = getImplNfa(n); assert(sizeof(*out) <= n->scratchStateSize); @@ -880,8 +880,8 @@ char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q) { return 0; } -char nfaExecMpv_initCompressedState(const struct NFA *n, u64a offset, - void *state, UNUSED u8 key) { +char nfaExecMpv_initCompressedState(const struct NFA *n, u64a offset, + void *state, UNUSED u8 key) { const struct mpv *m = getImplNfa(n); memset(state, 0, m->active_offset); /* active_offset marks end of comp * counters */ @@ -896,7 +896,7 @@ char nfaExecMpv_initCompressedState(const struct NFA *n, u64a offset, } static really_inline -char nfaExecMpv_Q_i(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecMpv_Q_i(const struct NFA *n, struct mq *q, s64a end) { u64a offset = q->offset; const u8 *buffer = q->buffer; size_t length = q->length; @@ -1021,18 +1021,18 @@ char nfaExecMpv_Q_i(const struct NFA *n, struct mq *q, s64a end) { return alive; } -char nfaExecMpv_Q(const struct NFA *n, struct mq *q, s64a end) { +char nfaExecMpv_Q(const struct NFA *n, struct mq *q, s64a end) { DEBUG_PRINTF("_Q %lld\n", end); - return nfaExecMpv_Q_i(n, q, end); + return nfaExecMpv_Q_i(n, q, end); } -s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) { +s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) { DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end); #ifdef DEBUG debugQueue(q); #endif - assert(nfa->type == MPV_NFA); + assert(nfa->type == MPV_NFA); assert(q && q->context && q->state); assert(end >= 0); assert(q->cur < q->end); @@ -1058,7 +1058,7 @@ s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) { /* TODO: restore max offset stuff, if/when _interesting_ max offset stuff * is filled in */ - char rv = nfaExecMpv_Q_i(nfa, q, end); + char rv = nfaExecMpv_Q_i(nfa, q, end); assert(!q->report_current); DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed); diff --git a/contrib/libs/hyperscan/src/nfa/mpv.h b/contrib/libs/hyperscan/src/nfa/mpv.h index 3780728d7f..bc514d13e0 100644 --- a/contrib/libs/hyperscan/src/nfa/mpv.h +++ b/contrib/libs/hyperscan/src/nfa/mpv.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,27 +34,27 @@ struct mq; struct NFA; -char nfaExecMpv_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecMpv_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecMpv_queueCompressState(const struct NFA *nfa, const struct mq *q, - s64a loc); -char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src, - u64a offset, u8 key); +char nfaExecMpv_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecMpv_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecMpv_queueCompressState(const struct NFA *nfa, const struct mq *q, + s64a loc); +char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src, + u64a offset, u8 key); -#define nfaExecMpv_testEOD NFA_API_NO_IMPL -#define nfaExecMpv_inAccept NFA_API_NO_IMPL -#define nfaExecMpv_inAnyAccept NFA_API_NO_IMPL -#define nfaExecMpv_QR NFA_API_NO_IMPL -#define nfaExecMpv_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */ -#define nfaExecMpv_B_Reverse NFA_API_NO_IMPL -#define nfaExecMpv_zombie_status NFA_API_ZOMBIE_NO_IMPL +#define nfaExecMpv_testEOD NFA_API_NO_IMPL +#define nfaExecMpv_inAccept NFA_API_NO_IMPL +#define nfaExecMpv_inAnyAccept NFA_API_NO_IMPL +#define nfaExecMpv_QR NFA_API_NO_IMPL +#define nfaExecMpv_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */ +#define nfaExecMpv_B_Reverse NFA_API_NO_IMPL +#define nfaExecMpv_zombie_status NFA_API_ZOMBIE_NO_IMPL /** * return 0 if the mpv dies, otherwise returns the location of the next possible * match (given the currently known events). */ -s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end); +s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end); #endif diff --git a/contrib/libs/hyperscan/src/nfa/mpv_internal.h b/contrib/libs/hyperscan/src/nfa/mpv_internal.h index a52853dce2..ae562de11e 100644 --- a/contrib/libs/hyperscan/src/nfa/mpv_internal.h +++ b/contrib/libs/hyperscan/src/nfa/mpv_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,15 +40,15 @@ struct mpv_puffette { u32 repeats; char unbounded; - - /** - * \brief Report is simple-exhaustible. - * - * If this is true, we do best-effort suppression of runs of reports, only - * delivering the first one. - */ - char simple_exhaust; - + + /** + * \brief Report is simple-exhaustible. + * + * If this is true, we do best-effort suppression of runs of reports, only + * delivering the first one. + */ + char simple_exhaust; + ReportID report; }; diff --git a/contrib/libs/hyperscan/src/nfa/mpvcompile.cpp b/contrib/libs/hyperscan/src/nfa/mpvcompile.cpp index 8497c64870..abb36dd606 100644 --- a/contrib/libs/hyperscan/src/nfa/mpvcompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/mpvcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,9 +34,9 @@ #include "shufticompile.h" #include "trufflecompile.h" #include "util/alloc.h" -#include "util/multibit_build.h" +#include "util/multibit_build.h" #include "util/order_check.h" -#include "util/report_manager.h" +#include "util/report_manager.h" #include "util/verify_types.h" #include <algorithm> @@ -54,8 +54,8 @@ namespace ue2 { namespace { struct pcomp { bool operator()(const raw_puff &a, const raw_puff &b) const { - return tie(a.repeats, a.unbounded, a.simple_exhaust, a.report) < - tie(b.repeats, b.unbounded, b.simple_exhaust, b.report); + return tie(a.repeats, a.unbounded, a.simple_exhaust, a.report) < + tie(b.repeats, b.unbounded, b.simple_exhaust, b.report); } }; @@ -83,24 +83,24 @@ struct ClusterKey { } // namespace static -void writePuffette(mpv_puffette *out, const raw_puff &rp, - const ReportManager &rm) { +void writePuffette(mpv_puffette *out, const raw_puff &rp, + const ReportManager &rm) { DEBUG_PRINTF("outputting %u %d %u to %p\n", rp.repeats, (int)rp.unbounded, rp.report, out); out->repeats = rp.repeats; out->unbounded = rp.unbounded; - out->simple_exhaust = rp.simple_exhaust; - out->report = rm.getProgramOffset(rp.report); -} - -static -void writeSentinel(mpv_puffette *out) { - DEBUG_PRINTF("outputting sentinel to %p\n", out); - memset(out, 0, sizeof(*out)); - out->report = INVALID_REPORT; + out->simple_exhaust = rp.simple_exhaust; + out->report = rm.getProgramOffset(rp.report); } static +void writeSentinel(mpv_puffette *out) { + DEBUG_PRINTF("outputting sentinel to %p\n", out); + memset(out, 0, sizeof(*out)); + out->report = INVALID_REPORT; +} + +static void writeDeadPoint(mpv_kilopuff *out, const vector<raw_puff> &puffs) { for (const auto &puff : puffs) { if (puff.unbounded) { /* mpv can never die */ @@ -156,8 +156,8 @@ void populateClusters(const vector<raw_puff> &puffs_in, static void writeKiloPuff(const map<ClusterKey, vector<raw_puff>>::const_iterator &it, - const ReportManager &rm, u32 counter_offset, mpv *m, - mpv_kilopuff *kp, mpv_puffette **pa) { + const ReportManager &rm, u32 counter_offset, mpv *m, + mpv_kilopuff *kp, mpv_puffette **pa) { const CharReach &reach = it->first.reach; const vector<raw_puff> &puffs = it->second; @@ -175,13 +175,13 @@ void writeKiloPuff(const map<ClusterKey, vector<raw_puff>>::const_iterator &it, size_t set = reach.find_first(); assert(set != CharReach::npos); kp->u.verm.c = (char)set; - } else if (shuftiBuildMasks(~reach, (u8 *)&kp->u.shuf.mask_lo, - (u8 *)&kp->u.shuf.mask_hi) != -1) { + } else if (shuftiBuildMasks(~reach, (u8 *)&kp->u.shuf.mask_lo, + (u8 *)&kp->u.shuf.mask_hi) != -1) { kp->type = MPV_SHUFTI; } else { kp->type = MPV_TRUFFLE; - truffleBuildMasks(~reach, (u8 *)&kp->u.truffle.mask1, - (u8 *)&kp->u.truffle.mask2); + truffleBuildMasks(~reach, (u8 *)&kp->u.truffle.mask1, + (u8 *)&kp->u.truffle.mask2); } kp->count = verify_u32(puffs.size()); @@ -191,11 +191,11 @@ void writeKiloPuff(const map<ClusterKey, vector<raw_puff>>::const_iterator &it, kp->puffette_offset = verify_u32((char *)*pa - (char *)m); for (size_t i = 0; i < puffs.size(); i++) { assert(!it->first.auto_restart || puffs[i].unbounded); - writePuffette(*pa + i, puffs[i], rm); + writePuffette(*pa + i, puffs[i], rm); } *pa += puffs.size(); - writeSentinel(*pa); + writeSentinel(*pa); ++*pa; writeDeadPoint(kp, puffs); @@ -208,7 +208,7 @@ void writeCoreNfa(NFA *nfa, u32 len, u32 min_width, u32 max_counter, nfa->length = len; nfa->nPositions = max_counter - 1; - nfa->type = MPV_NFA; + nfa->type = MPV_NFA; nfa->streamStateSize = streamStateSize; assert(16 >= sizeof(mpv_decomp_kilo)); nfa->scratchStateSize = scratchStateSize; @@ -309,9 +309,9 @@ const mpv_counter_info &findCounter(const vector<mpv_counter_info> &counters, return counters.front(); } -bytecode_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in, - const vector<raw_puff> &triggered_puffs, - const ReportManager &rm) { +bytecode_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in, + const vector<raw_puff> &triggered_puffs, + const ReportManager &rm) { assert(!puffs_in.empty() || !triggered_puffs.empty()); u32 puffette_count = puffs_in.size() + triggered_puffs.size(); @@ -343,7 +343,7 @@ bytecode_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in, DEBUG_PRINTF("%u puffs, len = %u\n", puffette_count, len); - auto nfa = make_zeroed_bytecode_ptr<NFA>(len); + auto nfa = make_zeroed_bytecode_ptr<NFA>(len); mpv_puffette *pa_base = (mpv_puffette *) ((char *)nfa.get() + sizeof(NFA) + sizeof(mpv) @@ -351,7 +351,7 @@ bytecode_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in, + sizeof(mpv_counter_info) * counters.size()); mpv_puffette *pa = pa_base; - writeSentinel(pa); + writeSentinel(pa); ++pa; /* skip init sentinel */ @@ -377,9 +377,9 @@ bytecode_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in, mpv_kilopuff *kp_begin = (mpv_kilopuff *)(m + 1); mpv_kilopuff *kp = kp_begin; for (auto it = puff_clusters.begin(); it != puff_clusters.end(); ++it) { - writeKiloPuff(it, rm, - findCounter(counters, kp - kp_begin).counter_offset, m, - kp, &pa); + writeKiloPuff(it, rm, + findCounter(counters, kp - kp_begin).counter_offset, m, + kp, &pa); ++kp; } assert((char *)pa == (char *)nfa.get() + len); diff --git a/contrib/libs/hyperscan/src/nfa/mpvcompile.h b/contrib/libs/hyperscan/src/nfa/mpvcompile.h index 4f820e4365..d9bfdb4a2f 100644 --- a/contrib/libs/hyperscan/src/nfa/mpvcompile.h +++ b/contrib/libs/hyperscan/src/nfa/mpvcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ #define MPV_COMPILE_H #include "ue2common.h" -#include "util/bytecode_ptr.h" +#include "util/bytecode_ptr.h" #include "util/charreach.h" #include <memory> @@ -40,19 +40,19 @@ struct NFA; namespace ue2 { -class ReportManager; - +class ReportManager; + struct raw_puff { raw_puff(u32 repeats_in, bool unbounded_in, ReportID report_in, - const CharReach &reach_in, bool auto_restart_in = false, - bool simple_exhaust_in = false) + const CharReach &reach_in, bool auto_restart_in = false, + bool simple_exhaust_in = false) : repeats(repeats_in), unbounded(unbounded_in), - auto_restart(auto_restart_in), simple_exhaust(simple_exhaust_in), - report(report_in), reach(reach_in) {} + auto_restart(auto_restart_in), simple_exhaust(simple_exhaust_in), + report(report_in), reach(reach_in) {} u32 repeats; /**< report match after this many matching bytes */ bool unbounded; /**< keep producing matches after repeats are reached */ bool auto_restart; /**< for /[^X]{n}/ type patterns */ - bool simple_exhaust; /* first report will exhaust us */ + bool simple_exhaust; /* first report will exhaust us */ ReportID report; CharReach reach; /**< = ~escapes */ }; @@ -61,9 +61,9 @@ struct raw_puff { * puffs in the triggered_puffs vector are enabled when an TOP_N event is * delivered corresponding to their index in the vector */ -bytecode_ptr<NFA> mpvCompile(const std::vector<raw_puff> &puffs, - const std::vector<raw_puff> &triggered_puffs, - const ReportManager &rm); +bytecode_ptr<NFA> mpvCompile(const std::vector<raw_puff> &puffs, + const std::vector<raw_puff> &triggered_puffs, + const ReportManager &rm); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/nfa_api.h b/contrib/libs/hyperscan/src/nfa/nfa_api.h index e3f7f74311..c97e2cadf2 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_api.h +++ b/contrib/libs/hyperscan/src/nfa/nfa_api.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -120,16 +120,16 @@ char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state, */ char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end); -/** - * Main execution function that doesn't perform the checks and optimisations of - * nfaQueueExec() and just dispatches directly to the nfa implementations. It is - * intended to be used by the Tamarama engine. - */ -char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end); - -/** Return value indicating that the engine is dead. */ -#define MO_DEAD 0 - +/** + * Main execution function that doesn't perform the checks and optimisations of + * nfaQueueExec() and just dispatches directly to the nfa implementations. It is + * intended to be used by the Tamarama engine. + */ +char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end); + +/** Return value indicating that the engine is dead. */ +#define MO_DEAD 0 + /** Return value indicating that the engine is alive. */ #define MO_ALIVE 1 @@ -166,13 +166,13 @@ char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end); char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end); /** - * Main execution function that doesn't perform the checks and optimisations of - * nfaQueueExecToMatch() and just dispatches directly to the nfa - * implementations. It is intended to be used by the Tamarama engine. - */ -char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end); - -/** + * Main execution function that doesn't perform the checks and optimisations of + * nfaQueueExecToMatch() and just dispatches directly to the nfa + * implementations. It is intended to be used by the Tamarama engine. + */ +char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end); + +/** * Report matches at the current queue location. * * @param nfa the NFA to execute @@ -193,15 +193,15 @@ char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q); char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q); /** - * Returns non-zero if the NFA is in any accept state regardless of report - * ID. - */ -char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q); - -/** + * Returns non-zero if the NFA is in any accept state regardless of report + * ID. + */ +char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q); + +/** * Process the queued commands on the given NFA up to end or the first match. * - * Note: This version is meant for rose prefix/infix NFAs: + * Note: This version is meant for rose prefix/infix NFAs: * - never uses a callback * - loading of state at a point in history is not special cased * @@ -210,9 +210,9 @@ char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q); * end with some variant of end. The location field of the events must * be monotonically increasing. If not all the data was processed during * the call, the queue is updated to reflect the remaining work. - * @param report we are interested in. If the given report will be raised at - * the end location, the function returns @ref MO_MATCHES_PENDING. If no - * match information is desired, MO_INVALID_IDX should be passed in. + * @param report we are interested in. If the given report will be raised at + * the end location, the function returns @ref MO_MATCHES_PENDING. If no + * match information is desired, MO_INVALID_IDX should be passed in. * @return @ref MO_ALIVE if the nfa is still active with no matches pending, * and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not * alive @@ -228,9 +228,9 @@ char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID report); * Runs an NFA in reverse from (buf + buflen) to buf and then from (hbuf + hlen) * to hbuf (main buffer and history buffer). * - * Note: provides the match location as the "end" offset when the callback is - * called. - * + * Note: provides the match location as the "end" offset when the callback is + * called. + * * @param nfa engine to run * @param offset base offset of buf * @param buf main buffer @@ -242,7 +242,7 @@ char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID report); */ char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf, size_t buflen, const u8 *hbuf, size_t hlen, - NfaCallback callback, void *context); + NfaCallback callback, void *context); /** * Check whether the given NFA's state indicates that it is in one or more @@ -256,13 +256,13 @@ char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf, * @param offset the offset to return (via the callback) with each match * @param callback the callback to call for each match raised * @param context context pointer passed to each callback - * - * @return @ref MO_HALT_MATCHING if the user instructed us to halt, otherwise - * @ref MO_CONTINUE_MATCHING. + * + * @return @ref MO_HALT_MATCHING if the user instructed us to halt, otherwise + * @ref MO_CONTINUE_MATCHING. */ char nfaCheckFinalState(const struct NFA *nfa, const char *state, const char *streamState, u64a offset, - NfaCallback callback, void *context); + NfaCallback callback, void *context); /** * Indicates if an engine is a zombie. diff --git a/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c b/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c index 75cac4b481..6a52b7ca7d 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c +++ b/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c @@ -41,59 +41,59 @@ #include "lbr.h" #include "limex.h" #include "mcclellan.h" -#include "mcsheng.h" +#include "mcsheng.h" #include "mpv.h" -#include "sheng.h" -#include "tamarama.h" +#include "sheng.h" +#include "tamarama.h" -#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_func_call) \ - case dc_ltype: \ - return nfaExec##dc_ftype##dc_func_call; \ +#define DISPATCH_CASE(dc_ltype, dc_ftype, dc_func_call) \ + case dc_ltype: \ + return nfaExec##dc_ftype##dc_func_call; \ break // general framework calls -#define DISPATCH_BY_NFA_TYPE(dbnt_func) \ - switch (nfa->type) { \ - DISPATCH_CASE(LIMEX_NFA_32, LimEx32, dbnt_func); \ - DISPATCH_CASE(LIMEX_NFA_64, LimEx64, dbnt_func); \ - DISPATCH_CASE(LIMEX_NFA_128, LimEx128, dbnt_func); \ - DISPATCH_CASE(LIMEX_NFA_256, LimEx256, dbnt_func); \ - DISPATCH_CASE(LIMEX_NFA_384, LimEx384, dbnt_func); \ - DISPATCH_CASE(LIMEX_NFA_512, LimEx512, dbnt_func); \ - DISPATCH_CASE(MCCLELLAN_NFA_8, McClellan8, dbnt_func); \ - DISPATCH_CASE(MCCLELLAN_NFA_16, McClellan16, dbnt_func); \ - DISPATCH_CASE(GOUGH_NFA_8, Gough8, dbnt_func); \ - DISPATCH_CASE(GOUGH_NFA_16, Gough16, dbnt_func); \ - DISPATCH_CASE(MPV_NFA, Mpv, dbnt_func); \ - DISPATCH_CASE(LBR_NFA_DOT, LbrDot, dbnt_func); \ - DISPATCH_CASE(LBR_NFA_VERM, LbrVerm, dbnt_func); \ - DISPATCH_CASE(LBR_NFA_NVERM, LbrNVerm, dbnt_func); \ - DISPATCH_CASE(LBR_NFA_SHUF, LbrShuf, dbnt_func); \ - DISPATCH_CASE(LBR_NFA_TRUF, LbrTruf, dbnt_func); \ - DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \ - DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \ - DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \ - DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \ - DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \ +#define DISPATCH_BY_NFA_TYPE(dbnt_func) \ + switch (nfa->type) { \ + DISPATCH_CASE(LIMEX_NFA_32, LimEx32, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_64, LimEx64, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_128, LimEx128, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_256, LimEx256, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_384, LimEx384, dbnt_func); \ + DISPATCH_CASE(LIMEX_NFA_512, LimEx512, dbnt_func); \ + DISPATCH_CASE(MCCLELLAN_NFA_8, McClellan8, dbnt_func); \ + DISPATCH_CASE(MCCLELLAN_NFA_16, McClellan16, dbnt_func); \ + DISPATCH_CASE(GOUGH_NFA_8, Gough8, dbnt_func); \ + DISPATCH_CASE(GOUGH_NFA_16, Gough16, dbnt_func); \ + DISPATCH_CASE(MPV_NFA, Mpv, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_DOT, LbrDot, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_VERM, LbrVerm, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_NVERM, LbrNVerm, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_SHUF, LbrShuf, dbnt_func); \ + DISPATCH_CASE(LBR_NFA_TRUF, LbrTruf, dbnt_func); \ + DISPATCH_CASE(CASTLE_NFA, Castle, dbnt_func); \ + DISPATCH_CASE(SHENG_NFA, Sheng, dbnt_func); \ + DISPATCH_CASE(TAMARAMA_NFA, Tamarama, dbnt_func); \ + DISPATCH_CASE(MCSHENG_NFA_8, McSheng8, dbnt_func); \ + DISPATCH_CASE(MCSHENG_NFA_16, McSheng16, dbnt_func); \ DISPATCH_CASE(SHENG_NFA_32, Sheng32, dbnt_func); \ DISPATCH_CASE(SHENG_NFA_64, Sheng64, dbnt_func); \ DISPATCH_CASE(MCSHENG_64_NFA_8, McSheng64_8, dbnt_func); \ DISPATCH_CASE(MCSHENG_64_NFA_16, McSheng64_16, dbnt_func); \ - default: \ - assert(0); \ + default: \ + assert(0); \ } char nfaCheckFinalState(const struct NFA *nfa, const char *state, const char *streamState, u64a offset, - NfaCallback callback, void *context) { + NfaCallback callback, void *context) { assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); // Caller should avoid calling us if we can never produce matches. assert(nfaAcceptsEod(nfa)); DISPATCH_BY_NFA_TYPE(_testEOD(nfa, state, streamState, offset, callback, - context)); + context)); return 0; } @@ -116,14 +116,14 @@ char nfaQueueExec2_i(const struct NFA *nfa, struct mq *q, s64a end) { return 0; } -char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end) { - return nfaQueueExec_i(nfa, q, end); -} - -char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end) { - return nfaQueueExec2_i(nfa, q, end); -} - +char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end) { + return nfaQueueExec_i(nfa, q, end); +} + +char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end) { + return nfaQueueExec2_i(nfa, q, end); +} + static really_inline char nfaQueueExecRose_i(const struct NFA *nfa, struct mq *q, ReportID report) { DISPATCH_BY_NFA_TYPE(_QR(nfa, q, report)); @@ -303,11 +303,11 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q) { return 0; } -char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q) { - DISPATCH_BY_NFA_TYPE(_inAnyAccept(nfa, q)); - return 0; -} - +char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q) { + DISPATCH_BY_NFA_TYPE(_inAnyAccept(nfa, q)); + return 0; +} + char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) { DEBUG_PRINTF("nfa=%p\n", nfa); #ifdef DEBUG @@ -325,12 +325,12 @@ char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) { char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf, size_t buflen, const u8 *hbuf, size_t hlen, - NfaCallback callback, void *context) { + NfaCallback callback, void *context) { assert(nfa); assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); DISPATCH_BY_NFA_TYPE(_B_Reverse(nfa, offset, buf, buflen, hbuf, hlen, - callback, context)); + callback, context)); return 0; } diff --git a/contrib/libs/hyperscan/src/nfa/nfa_api_queue.h b/contrib/libs/hyperscan/src/nfa/nfa_api_queue.h index e3579a7ee2..52ce214c5f 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_api_queue.h +++ b/contrib/libs/hyperscan/src/nfa/nfa_api_queue.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp b/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp index 47153163e9..293a84f8ac 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp +++ b/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp @@ -30,8 +30,8 @@ #include "limex_internal.h" #include "mcclellancompile.h" -#include "mcsheng_compile.h" -#include "shengcompile.h" +#include "mcsheng_compile.h" +#include "shengcompile.h" #include "nfa_internal.h" #include "repeat_internal.h" #include "ue2common.h" @@ -80,7 +80,7 @@ struct DISPATCH_BY_NFA_TYPE_INT<sfunc, rv_t, arg_t, INVALID_NFA> { decltype(arg), (NFAEngineType)0>::doOp(i, arg) } -typedef bool (*nfa_dispatch_fn)(const NFA *nfa); +typedef bool (*nfa_dispatch_fn)(const NFA *nfa); template<typename T> static @@ -89,40 +89,40 @@ bool has_accel_limex(const NFA *nfa) { return limex->accelCount; } -template<typename T> -static -bool has_repeats_limex(const NFA *nfa) { - const T *limex = (const T *)getImplNfa(nfa); - return limex->repeatCount; -} - - -template<typename T> +template<typename T> static -bool has_repeats_other_than_firsts_limex(const NFA *nfa) { - const T *limex = (const T *)getImplNfa(nfa); - const char *ptr = (const char *)limex; - - const u32 *repeatOffset = (const u32 *)(ptr + limex->repeatOffset); - - for (u32 i = 0; i < limex->repeatCount; i++) { - u32 offset = repeatOffset[i]; - const NFARepeatInfo *info = (const NFARepeatInfo *)(ptr + offset); - const RepeatInfo *repeat = - (const RepeatInfo *)((const char *)info + sizeof(*info)); - if (repeat->type != REPEAT_FIRST) { - return true; - } - } - - return false; -} - -static -bool dispatch_false(const NFA *) { +bool has_repeats_limex(const NFA *nfa) { + const T *limex = (const T *)getImplNfa(nfa); + return limex->repeatCount; +} + + +template<typename T> +static +bool has_repeats_other_than_firsts_limex(const NFA *nfa) { + const T *limex = (const T *)getImplNfa(nfa); + const char *ptr = (const char *)limex; + + const u32 *repeatOffset = (const u32 *)(ptr + limex->repeatOffset); + + for (u32 i = 0; i < limex->repeatCount; i++) { + u32 offset = repeatOffset[i]; + const NFARepeatInfo *info = (const NFARepeatInfo *)(ptr + offset); + const RepeatInfo *repeat = + (const RepeatInfo *)((const char *)info + sizeof(*info)); + if (repeat->type != REPEAT_FIRST) { + return true; + } + } + return false; } +static +bool dispatch_false(const NFA *) { + return false; +} + #ifdef DUMP_SUPPORT namespace { template<NFAEngineType t> @@ -171,51 +171,51 @@ enum NFACategory {NFA_LIMEX, NFA_OTHER}; #define DO_IF_DUMP_SUPPORT(a) #endif -#define MAKE_LIMEX_TRAITS(mlt_size, mlt_align) \ - template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \ +#define MAKE_LIMEX_TRAITS(mlt_size, mlt_align) \ + template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \ static UNUSED const char *name; \ static const NFACategory category = NFA_LIMEX; \ typedef LimExNFA##mlt_size implNFA_t; \ - static const nfa_dispatch_fn has_accel; \ - static const nfa_dispatch_fn has_repeats; \ - static const nfa_dispatch_fn has_repeats_other_than_firsts; \ + static const nfa_dispatch_fn has_accel; \ + static const nfa_dispatch_fn has_repeats; \ + static const nfa_dispatch_fn has_repeats_other_than_firsts; \ static const u32 stateAlign = \ - MAX(mlt_align, alignof(RepeatControl)); \ + MAX(mlt_align, alignof(RepeatControl)); \ }; \ - const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_accel \ + const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_accel \ = has_accel_limex<LimExNFA##mlt_size>; \ - const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_repeats \ - = has_repeats_limex<LimExNFA##mlt_size>; \ - const nfa_dispatch_fn \ - NFATraits<LIMEX_NFA_##mlt_size>::has_repeats_other_than_firsts \ - = has_repeats_other_than_firsts_limex<LimExNFA##mlt_size>; \ + const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_repeats \ + = has_repeats_limex<LimExNFA##mlt_size>; \ + const nfa_dispatch_fn \ + NFATraits<LIMEX_NFA_##mlt_size>::has_repeats_other_than_firsts \ + = has_repeats_other_than_firsts_limex<LimExNFA##mlt_size>; \ DO_IF_DUMP_SUPPORT( \ - const char *NFATraits<LIMEX_NFA_##mlt_size>::name \ - = "LimEx "#mlt_size; \ - template<> struct getDescription<LIMEX_NFA_##mlt_size> { \ - static string call(const void *p) { \ - return getDescriptionLimEx<LIMEX_NFA_##mlt_size>((const NFA *)p); \ - } \ + const char *NFATraits<LIMEX_NFA_##mlt_size>::name \ + = "LimEx "#mlt_size; \ + template<> struct getDescription<LIMEX_NFA_##mlt_size> { \ + static string call(const void *p) { \ + return getDescriptionLimEx<LIMEX_NFA_##mlt_size>((const NFA *)p); \ + } \ };) -MAKE_LIMEX_TRAITS(32, alignof(u32)) -MAKE_LIMEX_TRAITS(64, alignof(m128)) /* special, 32bit arch uses m128 */ -MAKE_LIMEX_TRAITS(128, alignof(m128)) -MAKE_LIMEX_TRAITS(256, alignof(m256)) -MAKE_LIMEX_TRAITS(384, alignof(m384)) -MAKE_LIMEX_TRAITS(512, alignof(m512)) +MAKE_LIMEX_TRAITS(32, alignof(u32)) +MAKE_LIMEX_TRAITS(64, alignof(m128)) /* special, 32bit arch uses m128 */ +MAKE_LIMEX_TRAITS(128, alignof(m128)) +MAKE_LIMEX_TRAITS(256, alignof(m256)) +MAKE_LIMEX_TRAITS(384, alignof(m384)) +MAKE_LIMEX_TRAITS(512, alignof(m512)) template<> struct NFATraits<MCCLELLAN_NFA_8> { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 1; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_accel = has_accel_mcclellan; -const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_accel = has_accel_mcclellan; +const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) const char *NFATraits<MCCLELLAN_NFA_8>::name = "McClellan 8"; #endif @@ -224,13 +224,13 @@ template<> struct NFATraits<MCCLELLAN_NFA_16> { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 2; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_accel = has_accel_mcclellan; -const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_accel = has_accel_mcclellan; +const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) const char *NFATraits<MCCLELLAN_NFA_16>::name = "McClellan 16"; #endif @@ -239,13 +239,13 @@ template<> struct NFATraits<GOUGH_NFA_8> { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_accel = has_accel_mcclellan; -const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_accel = has_accel_mcclellan; +const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) const char *NFATraits<GOUGH_NFA_8>::name = "Goughfish 8"; #endif @@ -254,182 +254,182 @@ template<> struct NFATraits<GOUGH_NFA_16> { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_accel = has_accel_mcclellan; -const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_accel = has_accel_mcclellan; +const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) const char *NFATraits<GOUGH_NFA_16>::name = "Goughfish 16"; #endif -template<> struct NFATraits<MPV_NFA> { +template<> struct NFATraits<MPV_NFA> { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits<MPV_NFA>::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits<MPV_NFA>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<MPV_NFA>::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits<MPV_NFA>::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits<MPV_NFA>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<MPV_NFA>::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits<MPV_NFA>::name = "Mega-Puff-Vac"; +const char *NFATraits<MPV_NFA>::name = "Mega-Puff-Vac"; #endif -template<> struct NFATraits<CASTLE_NFA> { +template<> struct NFATraits<CASTLE_NFA> { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits<CASTLE_NFA>::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits<CASTLE_NFA>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<CASTLE_NFA>::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits<CASTLE_NFA>::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits<CASTLE_NFA>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<CASTLE_NFA>::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits<CASTLE_NFA>::name = "Castle"; +const char *NFATraits<CASTLE_NFA>::name = "Castle"; #endif -template<> struct NFATraits<LBR_NFA_DOT> { +template<> struct NFATraits<LBR_NFA_DOT> { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits<LBR_NFA_DOT>::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits<LBR_NFA_DOT>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<LBR_NFA_DOT>::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits<LBR_NFA_DOT>::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits<LBR_NFA_DOT>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<LBR_NFA_DOT>::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits<LBR_NFA_DOT>::name = "Lim Bounded Repeat (D)"; +const char *NFATraits<LBR_NFA_DOT>::name = "Lim Bounded Repeat (D)"; #endif -template<> struct NFATraits<LBR_NFA_VERM> { +template<> struct NFATraits<LBR_NFA_VERM> { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits<LBR_NFA_VERM>::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits<LBR_NFA_VERM>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<LBR_NFA_VERM>::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits<LBR_NFA_VERM>::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits<LBR_NFA_VERM>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<LBR_NFA_VERM>::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits<LBR_NFA_VERM>::name = "Lim Bounded Repeat (V)"; +const char *NFATraits<LBR_NFA_VERM>::name = "Lim Bounded Repeat (V)"; #endif -template<> struct NFATraits<LBR_NFA_NVERM> { +template<> struct NFATraits<LBR_NFA_NVERM> { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits<LBR_NFA_NVERM>::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits<LBR_NFA_NVERM>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<LBR_NFA_NVERM>::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits<LBR_NFA_NVERM>::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits<LBR_NFA_NVERM>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<LBR_NFA_NVERM>::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits<LBR_NFA_NVERM>::name = "Lim Bounded Repeat (NV)"; +const char *NFATraits<LBR_NFA_NVERM>::name = "Lim Bounded Repeat (NV)"; #endif -template<> struct NFATraits<LBR_NFA_SHUF> { +template<> struct NFATraits<LBR_NFA_SHUF> { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits<LBR_NFA_SHUF>::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits<LBR_NFA_SHUF>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<LBR_NFA_SHUF>::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits<LBR_NFA_SHUF>::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits<LBR_NFA_SHUF>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<LBR_NFA_SHUF>::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits<LBR_NFA_SHUF>::name = "Lim Bounded Repeat (S)"; +const char *NFATraits<LBR_NFA_SHUF>::name = "Lim Bounded Repeat (S)"; #endif -template<> struct NFATraits<LBR_NFA_TRUF> { +template<> struct NFATraits<LBR_NFA_TRUF> { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; static const u32 stateAlign = 8; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; }; -const nfa_dispatch_fn NFATraits<LBR_NFA_TRUF>::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits<LBR_NFA_TRUF>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<LBR_NFA_TRUF>::has_repeats_other_than_firsts = dispatch_false; +const nfa_dispatch_fn NFATraits<LBR_NFA_TRUF>::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits<LBR_NFA_TRUF>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<LBR_NFA_TRUF>::has_repeats_other_than_firsts = dispatch_false; #if defined(DUMP_SUPPORT) -const char *NFATraits<LBR_NFA_TRUF>::name = "Lim Bounded Repeat (M)"; -#endif - -template<> struct NFATraits<SHENG_NFA> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 1; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; -const nfa_dispatch_fn NFATraits<SHENG_NFA>::has_accel = has_accel_sheng; -const nfa_dispatch_fn NFATraits<SHENG_NFA>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<SHENG_NFA>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) -const char *NFATraits<SHENG_NFA>::name = "Sheng"; -#endif - -template<> struct NFATraits<TAMARAMA_NFA> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 64; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; -const nfa_dispatch_fn NFATraits<TAMARAMA_NFA>::has_accel = dispatch_false; -const nfa_dispatch_fn NFATraits<TAMARAMA_NFA>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<TAMARAMA_NFA>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) -const char *NFATraits<TAMARAMA_NFA>::name = "Tamarama"; -#endif - -template<> struct NFATraits<MCSHENG_NFA_8> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 1; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; -const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_accel = has_accel_mcsheng; -const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) -const char *NFATraits<MCSHENG_NFA_8>::name = "Shengy McShengFace 8"; -#endif - -template<> struct NFATraits<MCSHENG_NFA_16> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 2; - static const nfa_dispatch_fn has_accel; - static const nfa_dispatch_fn has_repeats; - static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; -const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_accel = has_accel_mcsheng; -const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_repeats = dispatch_false; -const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) -const char *NFATraits<MCSHENG_NFA_16>::name = "Shengy McShengFace 16"; +const char *NFATraits<LBR_NFA_TRUF>::name = "Lim Bounded Repeat (M)"; #endif +template<> struct NFATraits<SHENG_NFA> { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 1; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits<SHENG_NFA>::has_accel = has_accel_sheng; +const nfa_dispatch_fn NFATraits<SHENG_NFA>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<SHENG_NFA>::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits<SHENG_NFA>::name = "Sheng"; +#endif + +template<> struct NFATraits<TAMARAMA_NFA> { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 64; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits<TAMARAMA_NFA>::has_accel = dispatch_false; +const nfa_dispatch_fn NFATraits<TAMARAMA_NFA>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<TAMARAMA_NFA>::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits<TAMARAMA_NFA>::name = "Tamarama"; +#endif + +template<> struct NFATraits<MCSHENG_NFA_8> { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 1; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_accel = has_accel_mcsheng; +const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<MCSHENG_NFA_8>::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits<MCSHENG_NFA_8>::name = "Shengy McShengFace 8"; +#endif + +template<> struct NFATraits<MCSHENG_NFA_16> { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 2; + static const nfa_dispatch_fn has_accel; + static const nfa_dispatch_fn has_repeats; + static const nfa_dispatch_fn has_repeats_other_than_firsts; +}; +const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_accel = has_accel_mcsheng; +const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_repeats = dispatch_false; +const nfa_dispatch_fn NFATraits<MCSHENG_NFA_16>::has_repeats_other_than_firsts = dispatch_false; +#if defined(DUMP_SUPPORT) +const char *NFATraits<MCSHENG_NFA_16>::name = "Shengy McShengFace 16"; +#endif + template<> struct NFATraits<SHENG_NFA_32> { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; @@ -525,39 +525,39 @@ struct is_limex { }; } -namespace { -template<NFAEngineType t> -struct has_repeats_other_than_firsts_dispatch { - static nfa_dispatch_fn call(const void *) { - return NFATraits<t>::has_repeats_other_than_firsts; +namespace { +template<NFAEngineType t> +struct has_repeats_other_than_firsts_dispatch { + static nfa_dispatch_fn call(const void *) { + return NFATraits<t>::has_repeats_other_than_firsts; } -}; -} - -bool has_bounded_repeats_other_than_firsts(const NFA &nfa) { - return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, - has_repeats_other_than_firsts_dispatch, - &nfa)(&nfa); -} - -namespace { -template<NFAEngineType t> -struct has_repeats_dispatch { - static nfa_dispatch_fn call(const void *) { - return NFATraits<t>::has_repeats; +}; +} + +bool has_bounded_repeats_other_than_firsts(const NFA &nfa) { + return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, + has_repeats_other_than_firsts_dispatch, + &nfa)(&nfa); +} + +namespace { +template<NFAEngineType t> +struct has_repeats_dispatch { + static nfa_dispatch_fn call(const void *) { + return NFATraits<t>::has_repeats; } -}; +}; } bool has_bounded_repeats(const NFA &nfa) { - return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_repeats_dispatch, - &nfa)(&nfa); + return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_repeats_dispatch, + &nfa)(&nfa); } namespace { template<NFAEngineType t> struct has_accel_dispatch { - static nfa_dispatch_fn call(const void *) { + static nfa_dispatch_fn call(const void *) { return NFATraits<t>::has_accel; } }; @@ -565,7 +565,7 @@ struct has_accel_dispatch { bool has_accel(const NFA &nfa) { return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_accel_dispatch, - &nfa)(&nfa); + &nfa)(&nfa); } bool requires_decompress_key(const NFA &nfa) { diff --git a/contrib/libs/hyperscan/src/nfa/nfa_internal.h b/contrib/libs/hyperscan/src/nfa/nfa_internal.h index ad27e28b14..58832fd14d 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_internal.h +++ b/contrib/libs/hyperscan/src/nfa/nfa_internal.h @@ -51,27 +51,27 @@ extern "C" // Common data structures for NFAs enum NFAEngineType { - LIMEX_NFA_32, - LIMEX_NFA_64, - LIMEX_NFA_128, - LIMEX_NFA_256, - LIMEX_NFA_384, - LIMEX_NFA_512, + LIMEX_NFA_32, + LIMEX_NFA_64, + LIMEX_NFA_128, + LIMEX_NFA_256, + LIMEX_NFA_384, + LIMEX_NFA_512, MCCLELLAN_NFA_8, /**< magic pseudo nfa */ MCCLELLAN_NFA_16, /**< magic pseudo nfa */ GOUGH_NFA_8, /**< magic pseudo nfa */ GOUGH_NFA_16, /**< magic pseudo nfa */ - MPV_NFA, /**< magic pseudo nfa */ - LBR_NFA_DOT, /**< magic pseudo nfa */ - LBR_NFA_VERM, /**< magic pseudo nfa */ - LBR_NFA_NVERM, /**< magic pseudo nfa */ - LBR_NFA_SHUF, /**< magic pseudo nfa */ - LBR_NFA_TRUF, /**< magic pseudo nfa */ - CASTLE_NFA, /**< magic pseudo nfa */ - SHENG_NFA, /**< magic pseudo nfa */ - TAMARAMA_NFA, /**< magic nfa container */ - MCSHENG_NFA_8, /**< magic pseudo nfa */ - MCSHENG_NFA_16, /**< magic pseudo nfa */ + MPV_NFA, /**< magic pseudo nfa */ + LBR_NFA_DOT, /**< magic pseudo nfa */ + LBR_NFA_VERM, /**< magic pseudo nfa */ + LBR_NFA_NVERM, /**< magic pseudo nfa */ + LBR_NFA_SHUF, /**< magic pseudo nfa */ + LBR_NFA_TRUF, /**< magic pseudo nfa */ + CASTLE_NFA, /**< magic pseudo nfa */ + SHENG_NFA, /**< magic pseudo nfa */ + TAMARAMA_NFA, /**< magic nfa container */ + MCSHENG_NFA_8, /**< magic pseudo nfa */ + MCSHENG_NFA_16, /**< magic pseudo nfa */ SHENG_NFA_32, /**< magic pseudo nfa */ SHENG_NFA_64, /**< magic pseudo nfa */ MCSHENG_64_NFA_8, /**< magic pseudo nfa */ @@ -149,23 +149,23 @@ static really_inline int isMcClellanType(u8 t) { return t == MCCLELLAN_NFA_8 || t == MCCLELLAN_NFA_16; } -/** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid - * DFA. */ -static really_inline int isShengMcClellanType(u8 t) { +/** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid + * DFA. */ +static really_inline int isShengMcClellanType(u8 t) { return t == MCSHENG_NFA_8 || t == MCSHENG_NFA_16 || t == MCSHENG_64_NFA_8 || t == MCSHENG_64_NFA_16; -} - +} + /** \brief True if the given type (from NFA::type) is a Gough DFA. */ static really_inline int isGoughType(u8 t) { return t == GOUGH_NFA_8 || t == GOUGH_NFA_16; } -/** \brief True if the given type (from NFA::type) is a Sheng DFA. */ +/** \brief True if the given type (from NFA::type) is a Sheng DFA. */ static really_inline int isSheng16Type(u8 t) { - return t == SHENG_NFA; -} - + return t == SHENG_NFA; +} + /** \brief True if the given type (from NFA::type) is a Sheng32 DFA. */ static really_inline int isSheng32Type(u8 t) { return t == SHENG_NFA_32; @@ -181,32 +181,32 @@ static really_inline int isShengType(u8 t) { return t == SHENG_NFA || t == SHENG_NFA_32 || t == SHENG_NFA_64; } -/** - * \brief True if the given type (from NFA::type) is a McClellan, Gough or - * Sheng DFA. - */ +/** + * \brief True if the given type (from NFA::type) is a McClellan, Gough or + * Sheng DFA. + */ static really_inline int isDfaType(u8 t) { - return isMcClellanType(t) || isGoughType(t) || isShengType(t) - || isShengMcClellanType(t); -} - -static really_inline int isBigDfaType(u8 t) { - return t == MCCLELLAN_NFA_16 || t == MCSHENG_NFA_16 || t == GOUGH_NFA_16; -} - -static really_inline int isSmallDfaType(u8 t) { - return isDfaType(t) && !isBigDfaType(t); + return isMcClellanType(t) || isGoughType(t) || isShengType(t) + || isShengMcClellanType(t); } +static really_inline int isBigDfaType(u8 t) { + return t == MCCLELLAN_NFA_16 || t == MCSHENG_NFA_16 || t == GOUGH_NFA_16; +} + +static really_inline int isSmallDfaType(u8 t) { + return isDfaType(t) && !isBigDfaType(t); +} + /** \brief True if the given type (from NFA::type) is an NFA. */ static really_inline int isNfaType(u8 t) { switch (t) { - case LIMEX_NFA_32: - case LIMEX_NFA_64: - case LIMEX_NFA_128: - case LIMEX_NFA_256: - case LIMEX_NFA_384: - case LIMEX_NFA_512: + case LIMEX_NFA_32: + case LIMEX_NFA_64: + case LIMEX_NFA_128: + case LIMEX_NFA_256: + case LIMEX_NFA_384: + case LIMEX_NFA_512: return 1; default: break; @@ -217,17 +217,17 @@ static really_inline int isNfaType(u8 t) { /** \brief True if the given type (from NFA::type) is an LBR. */ static really_inline int isLbrType(u8 t) { - return t == LBR_NFA_DOT || t == LBR_NFA_VERM || t == LBR_NFA_NVERM || - t == LBR_NFA_SHUF || t == LBR_NFA_TRUF; -} - -/** \brief True if the given type (from NFA::type) is a container engine. */ -static really_inline -int isContainerType(u8 t) { - return t == TAMARAMA_NFA; + return t == LBR_NFA_DOT || t == LBR_NFA_VERM || t == LBR_NFA_NVERM || + t == LBR_NFA_SHUF || t == LBR_NFA_TRUF; } +/** \brief True if the given type (from NFA::type) is a container engine. */ static really_inline +int isContainerType(u8 t) { + return t == TAMARAMA_NFA; +} + +static really_inline int isMultiTopType(u8 t) { return !isDfaType(t) && !isLbrType(t); } @@ -239,14 +239,14 @@ int isMultiTopType(u8 t) { /* Use for functions that return an integer. */ #define NFA_API_NO_IMPL(...) \ ({ \ - assert(!"not implemented for this engine!"); \ + assert(!"not implemented for this engine!"); \ 0; /* return value, for places that need it */ \ }) /* Use for _zombie_status functions. */ #define NFA_API_ZOMBIE_NO_IMPL(...) \ ({ \ - assert(!"not implemented for this engine!"); \ + assert(!"not implemented for this engine!"); \ NFA_ZOMBIE_NO; \ }) diff --git a/contrib/libs/hyperscan/src/nfa/nfa_kind.h b/contrib/libs/hyperscan/src/nfa/nfa_kind.h index f2ac6189b1..37e38ccacf 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_kind.h +++ b/contrib/libs/hyperscan/src/nfa/nfa_kind.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,19 +26,19 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file - * \brief Data structures and helper functions used to describe the purpose of - * a particular NFA engine at build time. - */ - +/** + * \file + * \brief Data structures and helper functions used to describe the purpose of + * a particular NFA engine at build time. + */ + #ifndef NFA_KIND_H #define NFA_KIND_H #include "ue2common.h" -#include <string> - +#include <string> + namespace ue2 { /** \brief Specify the use-case for an nfa engine. */ @@ -47,102 +47,102 @@ enum nfa_kind { NFA_INFIX, //!< rose infix NFA_SUFFIX, //!< rose suffix NFA_OUTFIX, //!< "outfix" nfa not triggered by external events - NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports + NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports NFA_REV_PREFIX, //! reverse running prefixes (for som) - NFA_EAGER_PREFIX, //!< rose prefix that is also run up to matches + NFA_EAGER_PREFIX, //!< rose prefix that is also run up to matches }; -/** \brief True if this kind of engine is triggered by a top event. */ -inline +/** \brief True if this kind of engine is triggered by a top event. */ +inline bool is_triggered(enum nfa_kind k) { - switch (k) { - case NFA_INFIX: - case NFA_SUFFIX: - case NFA_REV_PREFIX: - return true; - default: - return false; - } + switch (k) { + case NFA_INFIX: + case NFA_SUFFIX: + case NFA_REV_PREFIX: + return true; + default: + return false; + } } -/** - * \brief True if this kind of engine generates actively checks for accept - * states either to halt matching or to raise a callback. Only these engines - * generated with this property should call nfaQueueExec() or - * nfaQueueExecToMatch(). - */ -inline +/** + * \brief True if this kind of engine generates actively checks for accept + * states either to halt matching or to raise a callback. Only these engines + * generated with this property should call nfaQueueExec() or + * nfaQueueExecToMatch(). + */ +inline bool generates_callbacks(enum nfa_kind k) { - switch (k) { - case NFA_SUFFIX: - case NFA_OUTFIX: - case NFA_OUTFIX_RAW: - case NFA_REV_PREFIX: - case NFA_EAGER_PREFIX: - return true; - default: - return false; - } + switch (k) { + case NFA_SUFFIX: + case NFA_OUTFIX: + case NFA_OUTFIX_RAW: + case NFA_REV_PREFIX: + case NFA_EAGER_PREFIX: + return true; + default: + return false; + } } -/** - * \brief True if this kind of engine has its state inspected to see if it is in - * an accept state. Engines generated with this property will commonly call - * nfaQueueExecRose(), nfaInAcceptState(), and nfaInAnyAcceptState(). - */ -inline -bool inspects_states_for_accepts(enum nfa_kind k) { - switch (k) { - case NFA_PREFIX: - case NFA_INFIX: - case NFA_EAGER_PREFIX: - return true; - default: - return false; - } -} - -/** - * \brief True if this kind of engine has reports that are managed by the \ref - * ReportManager. - */ -inline -bool has_managed_reports(enum nfa_kind k) { - switch (k) { - case NFA_SUFFIX: - case NFA_OUTFIX: - return true; - default: - return false; - } -} - -#if defined(DEBUG) || defined(DUMP_SUPPORT) - -inline -std::string to_string(nfa_kind k) { - switch (k) { - case NFA_PREFIX: - return "PREFIX"; - case NFA_INFIX: - return "INFIX"; - case NFA_SUFFIX: - return "SUFFIX"; - case NFA_OUTFIX: - return "OUTFIX"; - case NFA_REV_PREFIX: - return "REV_PREFIX"; - case NFA_OUTFIX_RAW: - return "OUTFIX_RAW"; - case NFA_EAGER_PREFIX: - return "EAGER_PREFIX"; - } - assert(0); - return "?"; -} - -#endif - +/** + * \brief True if this kind of engine has its state inspected to see if it is in + * an accept state. Engines generated with this property will commonly call + * nfaQueueExecRose(), nfaInAcceptState(), and nfaInAnyAcceptState(). + */ +inline +bool inspects_states_for_accepts(enum nfa_kind k) { + switch (k) { + case NFA_PREFIX: + case NFA_INFIX: + case NFA_EAGER_PREFIX: + return true; + default: + return false; + } +} + +/** + * \brief True if this kind of engine has reports that are managed by the \ref + * ReportManager. + */ +inline +bool has_managed_reports(enum nfa_kind k) { + switch (k) { + case NFA_SUFFIX: + case NFA_OUTFIX: + return true; + default: + return false; + } +} + +#if defined(DEBUG) || defined(DUMP_SUPPORT) + +inline +std::string to_string(nfa_kind k) { + switch (k) { + case NFA_PREFIX: + return "PREFIX"; + case NFA_INFIX: + return "INFIX"; + case NFA_SUFFIX: + return "SUFFIX"; + case NFA_OUTFIX: + return "OUTFIX"; + case NFA_REV_PREFIX: + return "REV_PREFIX"; + case NFA_OUTFIX_RAW: + return "OUTFIX_RAW"; + case NFA_EAGER_PREFIX: + return "EAGER_PREFIX"; + } + assert(0); + return "?"; +} + +#endif + } // namespace ue2 #endif diff --git a/contrib/libs/hyperscan/src/nfa/rdfa.cpp b/contrib/libs/hyperscan/src/nfa/rdfa.cpp index ae857b6af2..f12120e202 100644 --- a/contrib/libs/hyperscan/src/nfa/rdfa.cpp +++ b/contrib/libs/hyperscan/src/nfa/rdfa.cpp @@ -1,55 +1,55 @@ -/* - * Copyright (c) 2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "rdfa.h" - -namespace ue2 { - -// prevent weak vtables -raw_dfa::~raw_dfa() {} - -void raw_dfa::stripExtraEodReports(void) { - /* if a state generates a given report as a normal accept - then it does - * not also need to generate an eod report for it */ - for (dstate &ds : states) { - for (const ReportID &report : ds.reports) { - ds.reports_eod.erase(report); - } - } -} - -bool raw_dfa::hasEodReports(void) const { - for (const dstate &ds : states) { - if (!ds.reports_eod.empty()) { - return true; - } - } - return false; -} - -} // namespace ue2 +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rdfa.h" + +namespace ue2 { + +// prevent weak vtables +raw_dfa::~raw_dfa() {} + +void raw_dfa::stripExtraEodReports(void) { + /* if a state generates a given report as a normal accept - then it does + * not also need to generate an eod report for it */ + for (dstate &ds : states) { + for (const ReportID &report : ds.reports) { + ds.reports_eod.erase(report); + } + } +} + +bool raw_dfa::hasEodReports(void) const { + for (const dstate &ds : states) { + if (!ds.reports_eod.empty()) { + return true; + } + } + return false; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/rdfa.h b/contrib/libs/hyperscan/src/nfa/rdfa.h index 6b994e4f2f..88597c4480 100644 --- a/contrib/libs/hyperscan/src/nfa/rdfa.h +++ b/contrib/libs/hyperscan/src/nfa/rdfa.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,7 +32,7 @@ #include "nfa_kind.h" #include "ue2common.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include <array> #include <vector> @@ -81,7 +81,7 @@ struct raw_dfa { explicit raw_dfa(nfa_kind k) : kind(k) {} virtual ~raw_dfa(); - u16 getImplAlphaSize() const { return alpha_size - N_SPECIAL_SYMBOL; } + u16 getImplAlphaSize() const { return alpha_size - N_SPECIAL_SYMBOL; } virtual void stripExtraEodReports(void); bool hasEodReports(void) const; }; diff --git a/contrib/libs/hyperscan/src/nfa/rdfa_graph.cpp b/contrib/libs/hyperscan/src/nfa/rdfa_graph.cpp index 2467748b98..ee404234cd 100644 --- a/contrib/libs/hyperscan/src/nfa/rdfa_graph.cpp +++ b/contrib/libs/hyperscan/src/nfa/rdfa_graph.cpp @@ -1,68 +1,68 @@ -/* - * Copyright (c) 2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - - -#include "rdfa_graph.h" - -#include "rdfa.h" -#include "util/container.h" - -#include <vector> - -using namespace std; - -namespace ue2 { - -RdfaGraph::RdfaGraph(const raw_dfa &rdfa) { - RdfaGraph &g = *this; - - vector<RdfaGraph::vertex_descriptor> verts; - verts.reserve(rdfa.states.size()); - for (dstate_id_t i = 0; i < rdfa.states.size(); i++) { - verts.push_back(add_vertex(g)); - assert(g[verts.back()].index == i); - } - - symbol_t symbol_end = rdfa.alpha_size - 1; - - flat_set<dstate_id_t> local_succs; - for (dstate_id_t i = 0; i < rdfa.states.size(); i++) { - local_succs.clear(); - for (symbol_t s = 0; s < symbol_end; s++) { - dstate_id_t next = rdfa.states[i].next[s]; - if (contains(local_succs, next)) { - continue; - } - DEBUG_PRINTF("%hu->%hu\n", i, next); - add_edge(verts[i], verts[next], g); - local_succs.insert(next); - } - } -} - -} +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "rdfa_graph.h" + +#include "rdfa.h" +#include "util/container.h" + +#include <vector> + +using namespace std; + +namespace ue2 { + +RdfaGraph::RdfaGraph(const raw_dfa &rdfa) { + RdfaGraph &g = *this; + + vector<RdfaGraph::vertex_descriptor> verts; + verts.reserve(rdfa.states.size()); + for (dstate_id_t i = 0; i < rdfa.states.size(); i++) { + verts.push_back(add_vertex(g)); + assert(g[verts.back()].index == i); + } + + symbol_t symbol_end = rdfa.alpha_size - 1; + + flat_set<dstate_id_t> local_succs; + for (dstate_id_t i = 0; i < rdfa.states.size(); i++) { + local_succs.clear(); + for (symbol_t s = 0; s < symbol_end; s++) { + dstate_id_t next = rdfa.states[i].next[s]; + if (contains(local_succs, next)) { + continue; + } + DEBUG_PRINTF("%hu->%hu\n", i, next); + add_edge(verts[i], verts[next], g); + local_succs.insert(next); + } + } +} + +} diff --git a/contrib/libs/hyperscan/src/nfa/rdfa_graph.h b/contrib/libs/hyperscan/src/nfa/rdfa_graph.h index 6d166c2fb7..63e1233a70 100644 --- a/contrib/libs/hyperscan/src/nfa/rdfa_graph.h +++ b/contrib/libs/hyperscan/src/nfa/rdfa_graph.h @@ -1,54 +1,54 @@ -/* - * Copyright (c) 2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef RDFA_GRAPH_H -#define RDFA_GRAPH_H - -#include "ue2common.h" -#include "util/ue2_graph.h" - -namespace ue2 { - -struct raw_dfa; - -struct RdfaVertexProps { - size_t index = 0; -}; - -struct RdfaEdgeProps { - size_t index = 0; -}; - -struct RdfaGraph : public ue2_graph<RdfaGraph, RdfaVertexProps, RdfaEdgeProps> { - RdfaGraph(const raw_dfa &rdfa); -}; - - -} - -#endif +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RDFA_GRAPH_H +#define RDFA_GRAPH_H + +#include "ue2common.h" +#include "util/ue2_graph.h" + +namespace ue2 { + +struct raw_dfa; + +struct RdfaVertexProps { + size_t index = 0; +}; + +struct RdfaEdgeProps { + size_t index = 0; +}; + +struct RdfaGraph : public ue2_graph<RdfaGraph, RdfaVertexProps, RdfaEdgeProps> { + RdfaGraph(const raw_dfa &rdfa); +}; + + +} + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/rdfa_merge.cpp b/contrib/libs/hyperscan/src/nfa/rdfa_merge.cpp index 2ad871234f..430551be71 100644 --- a/contrib/libs/hyperscan/src/nfa/rdfa_merge.cpp +++ b/contrib/libs/hyperscan/src/nfa/rdfa_merge.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,12 +36,12 @@ #include "nfagraph/ng_mcclellan_internal.h" #include "util/container.h" #include "util/determinise.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/make_unique.h" #include "util/report_manager.h" -#include "util/unordered.h" +#include "util/unordered.h" -#include <algorithm> +#include <algorithm> #include <queue> using namespace std; @@ -54,8 +54,8 @@ namespace { class Automaton_Merge { public: - using StateSet = vector<u16>; - using StateMap = ue2_unordered_map<StateSet, dstate_id_t>; + using StateSet = vector<u16>; + using StateMap = ue2_unordered_map<StateSet, dstate_id_t>; Automaton_Merge(const raw_dfa *rdfa1, const raw_dfa *rdfa2, const ReportManager *rm_in, const Grey &grey_in) @@ -137,10 +137,10 @@ public: } } - // Sort so that our alphabet mapping isn't dependent on the order of - // rdfas passed in. - sort(esets.begin(), esets.end()); - + // Sort so that our alphabet mapping isn't dependent on the order of + // rdfas passed in. + sort(esets.begin(), esets.end()); + alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha); } @@ -290,7 +290,7 @@ unique_ptr<raw_dfa> mergeTwoDfas(const raw_dfa *d1, const raw_dfa *d2, auto rdfa = ue2::make_unique<raw_dfa>(d1->kind); Automaton_Merge autom(d1, d2, rm, grey); - if (determinise(autom, rdfa->states, max_states)) { + if (determinise(autom, rdfa->states, max_states)) { rdfa->start_anchored = autom.start_anchored; rdfa->start_floating = autom.start_floating; rdfa->alpha_size = autom.alphasize; @@ -375,7 +375,7 @@ unique_ptr<raw_dfa> mergeAllDfas(const vector<const raw_dfa *> &dfas, DEBUG_PRINTF("merging dfa\n"); - if (!determinise(n, rdfa->states, max_states)) { + if (!determinise(n, rdfa->states, max_states)) { DEBUG_PRINTF("state limit (%zu) exceeded\n", max_states); return nullptr; /* over state limit */ } diff --git a/contrib/libs/hyperscan/src/nfa/repeat.c b/contrib/libs/hyperscan/src/nfa/repeat.c index 5b2e4df4ed..21cddd6f8a 100644 --- a/contrib/libs/hyperscan/src/nfa/repeat.c +++ b/contrib/libs/hyperscan/src/nfa/repeat.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -177,10 +177,10 @@ u64a repeatLastTopRange(const union RepeatControl *ctrl, const void *state) { u64a repeatLastTopBitmap(const union RepeatControl *ctrl) { const struct RepeatBitmapControl *xs = &ctrl->bitmap; - if (!xs->bitmap) { - /* last top was too long ago */ - return 0; - } + if (!xs->bitmap) { + /* last top was too long ago */ + return 0; + } return xs->offset + 63 - clz64(xs->bitmap); } @@ -886,25 +886,25 @@ enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info, return REPEAT_NOMATCH; } -/** \brief True if the given value can be packed into len bytes. */ -static really_inline -int fits_in_len_bytes(u64a val, u32 len) { - if (len >= 8) { - return 1; - } - return val <= (1ULL << (len * 8)); -} - +/** \brief True if the given value can be packed into len bytes. */ static really_inline +int fits_in_len_bytes(u64a val, u32 len) { + if (len >= 8) { + return 1; + } + return val <= (1ULL << (len * 8)); +} + +static really_inline void storePackedRelative(char *dest, u64a val, u64a offset, u64a max, u32 len) { assert(val <= offset); - assert(fits_in_len_bytes(max, len)); + assert(fits_in_len_bytes(max, len)); u64a delta = offset - val; if (delta >= max) { delta = max; } DEBUG_PRINTF("delta %llu\n", delta); - assert(fits_in_len_bytes(delta, len)); + assert(fits_in_len_bytes(delta, len)); partial_store_u64a(dest, delta, len); } @@ -936,11 +936,11 @@ void repeatPackOffset(char *dest, const struct RepeatInfo *info, const union RepeatControl *ctrl, u64a offset) { const struct RepeatOffsetControl *xs = &ctrl->offset; DEBUG_PRINTF("packing offset %llu [h %u]\n", xs->offset, info->horizon); - if (!info->packedCtrlSize) { - assert(info->type == REPEAT_ALWAYS); - DEBUG_PRINTF("externally guarded .*\n"); - return; - } + if (!info->packedCtrlSize) { + assert(info->type == REPEAT_ALWAYS); + DEBUG_PRINTF("externally guarded .*\n"); + return; + } storePackedRelative(dest, xs->offset, offset, info->horizon, info->packedCtrlSize); } @@ -981,7 +981,7 @@ void repeatPackBitmap(char *dest, const struct RepeatInfo *info, DEBUG_PRINTF("packing %llu into %u bytes\n", bitmap, info->packedCtrlSize); // Write out packed bitmap. - assert(fits_in_len_bytes(bitmap, info->packedCtrlSize)); + assert(fits_in_len_bytes(bitmap, info->packedCtrlSize)); partial_store_u64a(dest, bitmap, info->packedCtrlSize); } @@ -1060,9 +1060,9 @@ void repeatPack(char *dest, const struct RepeatInfo *info, case REPEAT_TRAILER: repeatPackTrailer(dest, info, ctrl, offset); break; - case REPEAT_ALWAYS: - /* nothing to do - no state */ - break; + case REPEAT_ALWAYS: + /* nothing to do - no state */ + break; } } @@ -1095,13 +1095,13 @@ static void repeatUnpackOffset(const char *src, const struct RepeatInfo *info, u64a offset, union RepeatControl *ctrl) { struct RepeatOffsetControl *xs = &ctrl->offset; - if (!info->packedCtrlSize) { - assert(info->type == REPEAT_ALWAYS); - DEBUG_PRINTF("externally guarded .*\n"); - xs->offset = 0; - } else { - xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize); - } + if (!info->packedCtrlSize) { + assert(info->type == REPEAT_ALWAYS); + DEBUG_PRINTF("externally guarded .*\n"); + xs->offset = 0; + } else { + xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize); + } DEBUG_PRINTF("unpacking offset %llu [h%u]\n", xs->offset, info->horizon); } @@ -1178,9 +1178,9 @@ void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset, case REPEAT_TRAILER: repeatUnpackTrailer(src, info, offset, ctrl); break; - case REPEAT_ALWAYS: - /* nothing to do - no state */ - break; + case REPEAT_ALWAYS: + /* nothing to do - no state */ + break; } } @@ -1455,7 +1455,7 @@ void repeatStoreSparseOptimalP(const struct RepeatInfo *info, DEBUG_PRINTF("xs->first:%u xs->last:%u patch:%u\n", xs->first, xs->last, patch); DEBUG_PRINTF("value:%llu\n", val); - assert(fits_in_len_bytes(val, encoding_size)); + assert(fits_in_len_bytes(val, encoding_size)); partial_store_u64a(ring + encoding_size * idx, val, encoding_size); mmbit_set(active, patch_count, idx); } diff --git a/contrib/libs/hyperscan/src/nfa/repeat.h b/contrib/libs/hyperscan/src/nfa/repeat.h index d4f84ea0a9..ae73540133 100644 --- a/contrib/libs/hyperscan/src/nfa/repeat.h +++ b/contrib/libs/hyperscan/src/nfa/repeat.h @@ -135,8 +135,8 @@ u64a repeatLastTop(const struct RepeatInfo *info, return repeatLastTopSparseOptimalP(info, ctrl, state); case REPEAT_TRAILER: return repeatLastTopTrailer(info, ctrl); - case REPEAT_ALWAYS: - return 0; + case REPEAT_ALWAYS: + return 0; } DEBUG_PRINTF("bad repeat type %u\n", info->type); @@ -202,8 +202,8 @@ u64a repeatNextMatch(const struct RepeatInfo *info, return repeatNextMatchSparseOptimalP(info, ctrl, state, offset); case REPEAT_TRAILER: return repeatNextMatchTrailer(info, ctrl, offset); - case REPEAT_ALWAYS: - return offset + 1; + case REPEAT_ALWAYS: + return offset + 1; } DEBUG_PRINTF("bad repeat type %u\n", info->type); @@ -279,9 +279,9 @@ void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl, case REPEAT_TRAILER: repeatStoreTrailer(info, ctrl, offset, is_alive); break; - case REPEAT_ALWAYS: - /* nothing to do - no state */ - break; + case REPEAT_ALWAYS: + /* nothing to do - no state */ + break; } } @@ -355,8 +355,8 @@ enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info, return repeatHasMatchSparseOptimalP(info, ctrl, state, offset); case REPEAT_TRAILER: return repeatHasMatchTrailer(info, ctrl, offset); - case REPEAT_ALWAYS: - return REPEAT_MATCH; + case REPEAT_ALWAYS: + return REPEAT_MATCH; } assert(0); diff --git a/contrib/libs/hyperscan/src/nfa/repeat_internal.h b/contrib/libs/hyperscan/src/nfa/repeat_internal.h index 9e3f455c80..1c99e7919a 100644 --- a/contrib/libs/hyperscan/src/nfa/repeat_internal.h +++ b/contrib/libs/hyperscan/src/nfa/repeat_internal.h @@ -47,26 +47,26 @@ enum RepeatType { /** General mechanism for tracking {N,M} repeats. Stores the first top as * an absolute offset, then subsequent tops in the {N,M} range as a ring of * relative top indices stored in a multibit. */ - REPEAT_RING, + REPEAT_RING, /** Used to track {N,} repeats. Uses the \ref RepeatOffsetControl structure, * since only the first top encountered needs to be stored. */ - REPEAT_FIRST, + REPEAT_FIRST, /** Used to track {0,N} repeats. Much like ::REPEAT_FIRST, except that we * store the most recent top encountered. */ - REPEAT_LAST, + REPEAT_LAST, /** Like ::REPEAT_RING, this is also used for {N,M} repeats, but for cases * where there is a large difference between N and M, and developed to * reduce the state requirements of this case (relative to the RING model). * Uses a small ordered array of top indices relative to \ref * RepeatRangeControl::offset. */ - REPEAT_RANGE, + REPEAT_RANGE, /** Used for {N,M} repeats where 0 < M <= 64. Uses the \ref * RepeatBitmapControl structure at runtime. */ - REPEAT_BITMAP, + REPEAT_BITMAP, /** Optimal mechanism for tracking {N,M} repeats when there is a bound on * how frequently they can be retriggered. @@ -78,17 +78,17 @@ enum RepeatType { * referencing a table that stores values from f(0, min) to f(repeat, min) * eg: repeat = 5, min = 2. 10001 => f(4,2) + f(0,2) = 9. * We search the optimal patch size between min and repeat in advance and - * use the scheme above to do encoding and decoding to reduce stream state - * size. */ - REPEAT_SPARSE_OPTIMAL_P, - - /** Used for {N,M} repeats where 0 < N < 64. Uses the - * \ref RepeatTrailerControl structure at runtime. */ - REPEAT_TRAILER, - - /** Degenerate repeat that always returns true. Used by castle for pseudo - * [^X]* repeats. */ - REPEAT_ALWAYS, + * use the scheme above to do encoding and decoding to reduce stream state + * size. */ + REPEAT_SPARSE_OPTIMAL_P, + + /** Used for {N,M} repeats where 0 < N < 64. Uses the + * \ref RepeatTrailerControl structure at runtime. */ + REPEAT_TRAILER, + + /** Degenerate repeat that always returns true. Used by castle for pseudo + * [^X]* repeats. */ + REPEAT_ALWAYS, }; /** @@ -208,8 +208,8 @@ const char *repeatTypeName(u8 type) { return "SPARSE_OPTIMAL_P"; case REPEAT_TRAILER: return "TRAILER"; - case REPEAT_ALWAYS: - return "ALWAYS"; + case REPEAT_ALWAYS: + return "ALWAYS"; } assert(0); return "UNKNOWN"; diff --git a/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp b/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp index 934dd29e6b..c33851ff9f 100644 --- a/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,7 @@ #include "util/charreach.h" #include "util/depth.h" #include "util/dump_charclass.h" -#include "util/multibit_build.h" +#include "util/multibit_build.h" #include "util/verify_types.h" #include <algorithm> @@ -206,13 +206,13 @@ RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin, packedFieldSizes[1] = repeatMin; packedCtrlSize = (packedFieldSizes[0] + packedFieldSizes[1] + 7U) / 8U; break; - case REPEAT_ALWAYS: - assert(repeatMin == 0ULL); - assert(repeatMax.is_infinite()); - stateSize = 0; // everything is in the control block. - horizon = 0; - packedCtrlSize = 0; - break; + case REPEAT_ALWAYS: + assert(repeatMin == 0ULL); + assert(repeatMax.is_infinite()); + stateSize = 0; // everything is in the control block. + horizon = 0; + packedCtrlSize = 0; + break; } DEBUG_PRINTF("stateSize=%u, packedCtrlSize=%u, horizon=%u\n", stateSize, packedCtrlSize, horizon); @@ -239,14 +239,14 @@ u32 streamStateSize(enum RepeatType type, const depth &repeatMin, } enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax, - u32 minPeriod, bool is_reset, - bool has_external_guard) { + u32 minPeriod, bool is_reset, + bool has_external_guard) { if (repeatMax.is_infinite()) { - if (has_external_guard && !repeatMin) { - return REPEAT_ALWAYS; - } else { - return REPEAT_FIRST; - } + if (has_external_guard && !repeatMin) { + return REPEAT_ALWAYS; + } else { + return REPEAT_FIRST; + } } if (repeatMin == depth(0) || is_reset) { diff --git a/contrib/libs/hyperscan/src/nfa/repeatcompile.h b/contrib/libs/hyperscan/src/nfa/repeatcompile.h index fe9a710623..84c28be568 100644 --- a/contrib/libs/hyperscan/src/nfa/repeatcompile.h +++ b/contrib/libs/hyperscan/src/nfa/repeatcompile.h @@ -68,8 +68,8 @@ struct RepeatStateInfo { * type. */ enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax, - u32 minPeriod, bool is_reset, - bool has_external_guard = false); + u32 minPeriod, bool is_reset, + bool has_external_guard = false); u32 calcPackedBytes(u64a val); diff --git a/contrib/libs/hyperscan/src/nfa/sheng.c b/contrib/libs/hyperscan/src/nfa/sheng.c index 3f36e21891..d4fb1250f2 100644 --- a/contrib/libs/hyperscan/src/nfa/sheng.c +++ b/contrib/libs/hyperscan/src/nfa/sheng.c @@ -1,159 +1,159 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "sheng.h" - -#include "accel.h" -#include "sheng_internal.h" -#include "nfa_api.h" -#include "nfa_api_queue.h" -#include "nfa_internal.h" -#include "util/bitutils.h" -#include "util/compare.h" -#include "util/join.h" -#include "util/simd_utils.h" - -enum MatchMode { - CALLBACK_OUTPUT, - STOP_AT_MATCH, - NO_MATCHES -}; - -static really_inline -const struct sheng *get_sheng(const struct NFA *n) { - return (const struct sheng *)getImplNfa(n); -} - -static really_inline -const struct sstate_aux *get_aux(const struct sheng *sh, u8 id) { - u32 offset = sh->aux_offset - sizeof(struct NFA) + - (id & SHENG_STATE_MASK) * sizeof(struct sstate_aux); - DEBUG_PRINTF("Getting aux for state %u at offset %llu\n", - id & SHENG_STATE_MASK, (u64a)offset + sizeof(struct NFA)); - return (const struct sstate_aux *)((const char *) sh + offset); -} - -static really_inline -const union AccelAux *get_accel(const struct sheng *sh, u8 id) { - const struct sstate_aux *saux = get_aux(sh, id); - DEBUG_PRINTF("Getting accel aux at offset %u\n", saux->accel); - const union AccelAux *aux = (const union AccelAux *) - ((const char *)sh + saux->accel - sizeof(struct NFA)); - return aux; -} - -static really_inline -const struct report_list *get_rl(const struct sheng *sh, - const struct sstate_aux *aux) { - DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept); - return (const struct report_list *) - ((const char *)sh + aux->accept - sizeof(struct NFA)); -} - -static really_inline -const struct report_list *get_eod_rl(const struct sheng *sh, - const struct sstate_aux *aux) { - DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept); - return (const struct report_list *) - ((const char *)sh + aux->accept_eod - sizeof(struct NFA)); -} - -static really_inline -char shengHasAccept(const struct sheng *sh, const struct sstate_aux *aux, - ReportID report) { - assert(sh && aux); - - const struct report_list *rl = get_rl(sh, aux); - assert(ISALIGNED_N(rl, 4)); - - DEBUG_PRINTF("report list has %u entries\n", rl->count); - - for (u32 i = 0; i < rl->count; i++) { - if (rl->report[i] == report) { - DEBUG_PRINTF("reporting %u\n", rl->report[i]); - return 1; - } - } - - return 0; -} - -static really_inline -char fireSingleReport(NfaCallback cb, void *ctxt, ReportID r, u64a loc) { - DEBUG_PRINTF("reporting %u\n", r); - if (cb(0, loc, r, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - return MO_CONTINUE_MATCHING; /* continue execution */ -} - -static really_inline -char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt, - const u8 state, u64a loc, u8 *const cached_accept_state, - ReportID *const cached_accept_id, char eod) { - DEBUG_PRINTF("reporting matches @ %llu\n", loc); - - if (!eod && state == *cached_accept_state) { - DEBUG_PRINTF("reporting %u\n", *cached_accept_id); - if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - - return MO_CONTINUE_MATCHING; /* continue execution */ - } - const struct sstate_aux *aux = get_aux(sh, state); - const struct report_list *rl = eod ? get_eod_rl(sh, aux) : get_rl(sh, aux); - assert(ISALIGNED(rl)); - - DEBUG_PRINTF("report list has %u entries\n", rl->count); - u32 count = rl->count; - - if (!eod && count == 1) { - *cached_accept_state = state; - *cached_accept_id = rl->report[0]; - - DEBUG_PRINTF("reporting %u\n", rl->report[0]); - if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - - return MO_CONTINUE_MATCHING; /* continue execution */ - } - - for (u32 i = 0; i < count; i++) { - DEBUG_PRINTF("reporting %u\n", rl->report[i]); - if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - } - return MO_CONTINUE_MATCHING; /* continue execution */ -} - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "sheng.h" + +#include "accel.h" +#include "sheng_internal.h" +#include "nfa_api.h" +#include "nfa_api_queue.h" +#include "nfa_internal.h" +#include "util/bitutils.h" +#include "util/compare.h" +#include "util/join.h" +#include "util/simd_utils.h" + +enum MatchMode { + CALLBACK_OUTPUT, + STOP_AT_MATCH, + NO_MATCHES +}; + +static really_inline +const struct sheng *get_sheng(const struct NFA *n) { + return (const struct sheng *)getImplNfa(n); +} + +static really_inline +const struct sstate_aux *get_aux(const struct sheng *sh, u8 id) { + u32 offset = sh->aux_offset - sizeof(struct NFA) + + (id & SHENG_STATE_MASK) * sizeof(struct sstate_aux); + DEBUG_PRINTF("Getting aux for state %u at offset %llu\n", + id & SHENG_STATE_MASK, (u64a)offset + sizeof(struct NFA)); + return (const struct sstate_aux *)((const char *) sh + offset); +} + +static really_inline +const union AccelAux *get_accel(const struct sheng *sh, u8 id) { + const struct sstate_aux *saux = get_aux(sh, id); + DEBUG_PRINTF("Getting accel aux at offset %u\n", saux->accel); + const union AccelAux *aux = (const union AccelAux *) + ((const char *)sh + saux->accel - sizeof(struct NFA)); + return aux; +} + +static really_inline +const struct report_list *get_rl(const struct sheng *sh, + const struct sstate_aux *aux) { + DEBUG_PRINTF("Getting report list at offset %u\n", aux->accept); + return (const struct report_list *) + ((const char *)sh + aux->accept - sizeof(struct NFA)); +} + +static really_inline +const struct report_list *get_eod_rl(const struct sheng *sh, + const struct sstate_aux *aux) { + DEBUG_PRINTF("Getting EOD report list at offset %u\n", aux->accept); + return (const struct report_list *) + ((const char *)sh + aux->accept_eod - sizeof(struct NFA)); +} + +static really_inline +char shengHasAccept(const struct sheng *sh, const struct sstate_aux *aux, + ReportID report) { + assert(sh && aux); + + const struct report_list *rl = get_rl(sh, aux); + assert(ISALIGNED_N(rl, 4)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + + for (u32 i = 0; i < rl->count; i++) { + if (rl->report[i] == report) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); + return 1; + } + } + + return 0; +} + +static really_inline +char fireSingleReport(NfaCallback cb, void *ctxt, ReportID r, u64a loc) { + DEBUG_PRINTF("reporting %u\n", r); + if (cb(0, loc, r, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + return MO_CONTINUE_MATCHING; /* continue execution */ +} + +static really_inline +char fireReports(const struct sheng *sh, NfaCallback cb, void *ctxt, + const u8 state, u64a loc, u8 *const cached_accept_state, + ReportID *const cached_accept_id, char eod) { + DEBUG_PRINTF("reporting matches @ %llu\n", loc); + + if (!eod && state == *cached_accept_state) { + DEBUG_PRINTF("reporting %u\n", *cached_accept_id); + if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + const struct sstate_aux *aux = get_aux(sh, state); + const struct report_list *rl = eod ? get_eod_rl(sh, aux) : get_rl(sh, aux); + assert(ISALIGNED(rl)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + u32 count = rl->count; + + if (!eod && count == 1) { + *cached_accept_state = state; + *cached_accept_id = rl->report[0]; + + DEBUG_PRINTF("reporting %u\n", rl->report[0]); + if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + for (u32 i = 0; i < count; i++) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); + if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + } + return MO_CONTINUE_MATCHING; /* continue execution */ +} + #if defined(HAVE_AVX512VBMI) // Sheng32 static really_inline @@ -353,523 +353,523 @@ char fireReports64(const struct sheng64 *sh, NfaCallback cb, void *ctxt, } #endif // end of HAVE_AVX512VBMI -/* include Sheng function definitions */ -#include "sheng_defs.h" - -static really_inline -char runShengCb(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset, - u8 *const cached_accept_state, ReportID *const cached_accept_id, - const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die, - u8 has_accel, u8 single, const u8 **scanned, u8 *state) { - DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n", - (u64a)(end - start), offset); - DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), - (s64a)(end - cur_buf)); - DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, - !!has_accel, !!single); - int rv; - /* scan and report all matches */ - if (can_die) { - if (has_accel) { - rv = sheng4_coda(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, start, - end, scanned); - } else { - rv = sheng4_cod(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, start, - end, scanned); - } - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - rv = sheng_cod(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, *scanned, end, - scanned); - } else { - if (has_accel) { - rv = sheng4_coa(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, start, - end, scanned); - } else { - rv = sheng4_co(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, start, - end, scanned); - } - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - rv = sheng_co(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, *scanned, end, - scanned); - } - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - return MO_ALIVE; -} - -static really_inline -void runShengNm(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset, - u8 *const cached_accept_state, ReportID *const cached_accept_id, - const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die, - u8 has_accel, u8 single, const u8 **scanned, u8 *state) { - DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n", - (u64a)(end - start), offset); - DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), - (s64a)(end - cur_buf)); - DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, - !!has_accel, !!single); - /* just scan the buffer */ - if (can_die) { - if (has_accel) { - sheng4_nmda(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, start, end, - scanned); - } else { - sheng4_nmd(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, start, end, - scanned); - } - sheng_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, - single, offset, cur_buf, *scanned, end, scanned); - } else { - sheng4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, - single, offset, cur_buf, start, end, scanned); - sheng_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, - single, offset, cur_buf, *scanned, end, scanned); - } -} - -static really_inline -char runShengSam(const struct sheng *sh, NfaCallback cb, void *ctxt, - u64a offset, u8 *const cached_accept_state, - ReportID *const cached_accept_id, const u8 *cur_buf, - const u8 *start, const u8 *end, u8 can_die, u8 has_accel, - u8 single, const u8 **scanned, u8 *state) { - DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n", - (u64a)(end - start), offset); - DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), - (s64a)(end - cur_buf)); - DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, - !!has_accel, !!single); - int rv; - /* scan until first match */ - if (can_die) { - if (has_accel) { - rv = sheng4_samda(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, start, - end, scanned); - } else { - rv = sheng4_samd(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, start, - end, scanned); - } - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - /* if we stopped before we expected, we found a match */ - if (rv == MO_MATCHES_PENDING) { - return MO_MATCHES_PENDING; - } - - rv = sheng_samd(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, *scanned, - end, scanned); - } else { - if (has_accel) { - rv = sheng4_sama(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, start, - end, scanned); - } else { - rv = sheng4_sam(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, start, - end, scanned); - } - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - /* if we stopped before we expected, we found a match */ - if (rv == MO_MATCHES_PENDING) { - return MO_MATCHES_PENDING; - } - - rv = sheng_sam(state, cb, ctxt, sh, cached_accept_state, - cached_accept_id, single, offset, cur_buf, *scanned, end, - scanned); - } - if (rv == MO_HALT_MATCHING) { - return MO_DEAD; - } - /* if we stopped before we expected, we found a match */ - if (rv == MO_MATCHES_PENDING) { - return MO_MATCHES_PENDING; - } - return MO_ALIVE; -} - -static never_inline -char runSheng(const struct sheng *sh, struct mq *q, s64a b_end, - enum MatchMode mode) { - u8 state = *(u8 *)q->state; - u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; - u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; - u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; - - u8 cached_accept_state = 0; - ReportID cached_accept_id = 0; - - DEBUG_PRINTF("starting Sheng execution in state %u\n", - state & SHENG_STATE_MASK); - - if (q->report_current) { - DEBUG_PRINTF("reporting current pending matches\n"); - assert(sh); - - q->report_current = 0; - - int rv; - if (single) { - rv = fireSingleReport(q->cb, q->context, sh->report, - q_cur_offset(q)); - } else { - rv = fireReports(sh, q->cb, q->context, state, q_cur_offset(q), - &cached_accept_state, &cached_accept_id, 0); - } - if (rv == MO_HALT_MATCHING) { - DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); - return MO_DEAD; - } - - DEBUG_PRINTF("proceeding with matching\n"); - } - - assert(q_cur_type(q) == MQE_START); - s64a start = q_cur_loc(q); - - DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start, - mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" : - mode == NO_MATCHES ? "NO MATCHES" : - mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???"); - - DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), - q_cur_type(q) == MQE_START ? "START" : - q_cur_type(q) == MQE_TOP ? "TOP" : - q_cur_type(q) == MQE_END ? "END" : "???"); - - const u8* cur_buf; - if (start < 0) { - DEBUG_PRINTF("negative location, scanning history\n"); - DEBUG_PRINTF("min location: %zd\n", -q->hlength); - cur_buf = q->history + q->hlength; - } else { - DEBUG_PRINTF("positive location, scanning buffer\n"); - DEBUG_PRINTF("max location: %lli\n", b_end); - cur_buf = q->buffer; - } - - /* if we our queue event is past our end */ - if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { - DEBUG_PRINTF("current location past buffer end\n"); - DEBUG_PRINTF("setting q location to %llu\n", b_end); - DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); - q->items[q->cur].location = b_end; - return MO_ALIVE; - } - - q->cur++; - - s64a cur_start = start; - - while (1) { - DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), - q_cur_type(q) == MQE_START ? "START" : - q_cur_type(q) == MQE_TOP ? "TOP" : - q_cur_type(q) == MQE_END ? "END" : "???"); - s64a end = q_cur_loc(q); - if (mode != NO_MATCHES) { - end = MIN(end, b_end); - } - assert(end <= (s64a) q->length); - s64a cur_end = end; - - /* we may cross the border between history and current buffer */ - if (cur_start < 0) { - cur_end = MIN(0, cur_end); - } - - DEBUG_PRINTF("start: %lli end: %lli\n", start, end); - - /* don't scan zero length buffer */ - if (cur_start != cur_end) { - const u8 * scanned = cur_buf; - char rv; - - if (mode == NO_MATCHES) { - runShengNm(sh, q->cb, q->context, q->offset, - &cached_accept_state, &cached_accept_id, cur_buf, - cur_buf + cur_start, cur_buf + cur_end, can_die, - has_accel, single, &scanned, &state); - } else if (mode == CALLBACK_OUTPUT) { - rv = runShengCb(sh, q->cb, q->context, q->offset, - &cached_accept_state, &cached_accept_id, - cur_buf, cur_buf + cur_start, cur_buf + cur_end, - can_die, has_accel, single, &scanned, &state); - if (rv == MO_DEAD) { - DEBUG_PRINTF("exiting in state %u\n", - state & SHENG_STATE_MASK); - return MO_DEAD; - } - } else if (mode == STOP_AT_MATCH) { - rv = runShengSam(sh, q->cb, q->context, q->offset, - &cached_accept_state, &cached_accept_id, - cur_buf, cur_buf + cur_start, - cur_buf + cur_end, can_die, has_accel, single, - &scanned, &state); - if (rv == MO_DEAD) { - DEBUG_PRINTF("exiting in state %u\n", - state & SHENG_STATE_MASK); - return rv; - } else if (rv == MO_MATCHES_PENDING) { - assert(q->cur); - DEBUG_PRINTF("found a match, setting q location to %zd\n", - scanned - cur_buf + 1); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = - scanned - cur_buf + 1; /* due to exiting early */ - *(u8 *)q->state = state; - DEBUG_PRINTF("exiting in state %u\n", - state & SHENG_STATE_MASK); - return rv; - } - } else { - assert(!"invalid scanning mode!"); - } - assert(scanned == cur_buf + cur_end); - - cur_start = cur_end; - } - - /* if we our queue event is past our end */ - if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { - DEBUG_PRINTF("current location past buffer end\n"); - DEBUG_PRINTF("setting q location to %llu\n", b_end); - DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = b_end; - *(u8 *)q->state = state; - return MO_ALIVE; - } - - /* crossing over into actual buffer */ - if (cur_start == 0) { - DEBUG_PRINTF("positive location, scanning buffer\n"); - DEBUG_PRINTF("max offset: %lli\n", b_end); - cur_buf = q->buffer; - } - - /* continue scanning the same buffer */ - if (end != cur_end) { - continue; - } - - switch (q_cur_type(q)) { - case MQE_END: - *(u8 *)q->state = state; - q->cur++; - DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); - if (can_die) { - return (state & SHENG_STATE_DEAD) ? MO_DEAD : MO_ALIVE; - } - return MO_ALIVE; - case MQE_TOP: - if (q->offset + cur_start == 0) { - DEBUG_PRINTF("Anchored start, going to state %u\n", - sh->anchored); - state = sh->anchored; - } else { - u8 new_state = get_aux(sh, state)->top; - DEBUG_PRINTF("Top event %u->%u\n", state & SHENG_STATE_MASK, - new_state & SHENG_STATE_MASK); - state = new_state; - } - break; - default: - assert(!"invalid queue event"); - break; - } - q->cur++; - } -} - -char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer, - size_t length, NfaCallback cb, void *context) { - DEBUG_PRINTF("smallwrite Sheng\n"); - assert(n->type == SHENG_NFA); - const struct sheng *sh = getImplNfa(n); - u8 state = sh->anchored; - u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; - u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; - u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; - u8 cached_accept_state = 0; - ReportID cached_accept_id = 0; - - /* scan and report all matches */ - int rv; - s64a end = length; - const u8 *scanned; - - rv = runShengCb(sh, cb, context, offset, &cached_accept_state, - &cached_accept_id, buffer, buffer, buffer + end, can_die, - has_accel, single, &scanned, &state); - if (rv == MO_DEAD) { - DEBUG_PRINTF("exiting in state %u\n", - state & SHENG_STATE_MASK); - return MO_DEAD; - } - - DEBUG_PRINTF("%u\n", state & SHENG_STATE_MASK); - - const struct sstate_aux *aux = get_aux(sh, state); - - if (aux->accept_eod) { - DEBUG_PRINTF("Reporting EOD matches\n"); - fireReports(sh, cb, context, state, end + offset, &cached_accept_state, - &cached_accept_id, 1); - } - - return state & SHENG_STATE_DEAD ? MO_DEAD : MO_ALIVE; -} - -char nfaExecSheng_Q(const struct NFA *n, struct mq *q, s64a end) { - const struct sheng *sh = get_sheng(n); - char rv = runSheng(sh, q, end, CALLBACK_OUTPUT); - return rv; -} - -char nfaExecSheng_Q2(const struct NFA *n, struct mq *q, s64a end) { - const struct sheng *sh = get_sheng(n); - char rv = runSheng(sh, q, end, STOP_AT_MATCH); - return rv; -} - -char nfaExecSheng_QR(const struct NFA *n, struct mq *q, ReportID report) { - assert(q_cur_type(q) == MQE_START); - - const struct sheng *sh = get_sheng(n); - char rv = runSheng(sh, q, 0 /* end */, NO_MATCHES); - - if (rv && nfaExecSheng_inAccept(n, report, q)) { - return MO_MATCHES_PENDING; - } - return rv; -} - -char nfaExecSheng_inAccept(const struct NFA *n, ReportID report, struct mq *q) { - assert(n && q); - - const struct sheng *sh = get_sheng(n); - u8 s = *(const u8 *)q->state; - DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK)); - - const struct sstate_aux *aux = get_aux(sh, s); - - if (!aux->accept) { - return 0; - } - - return shengHasAccept(sh, aux, report); -} - -char nfaExecSheng_inAnyAccept(const struct NFA *n, struct mq *q) { - assert(n && q); - - const struct sheng *sh = get_sheng(n); - u8 s = *(const u8 *)q->state; - DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK)); - - const struct sstate_aux *aux = get_aux(sh, s); - return !!aux->accept; -} - -char nfaExecSheng_testEOD(const struct NFA *nfa, const char *state, - UNUSED const char *streamState, u64a offset, - NfaCallback cb, void *ctxt) { - assert(nfa); - - const struct sheng *sh = get_sheng(nfa); - u8 s = *(const u8 *)state; - DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG_STATE_MASK)); - - const struct sstate_aux *aux = get_aux(sh, s); - - if (!aux->accept_eod) { - return MO_CONTINUE_MATCHING; - } - - return fireReports(sh, cb, ctxt, s, offset, NULL, NULL, 1); -} - -char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) { - const struct sheng *sh = (const struct sheng *)getImplNfa(n); - NfaCallback cb = q->cb; - void *ctxt = q->context; - u8 s = *(u8 *)q->state; - const struct sstate_aux *aux = get_aux(sh, s); - u64a offset = q_cur_offset(q); - u8 cached_state_id = 0; - ReportID cached_report_id = 0; - assert(q_cur_type(q) == MQE_START); - - if (aux->accept) { - if (sh->flags & SHENG_FLAG_SINGLE_REPORT) { - fireSingleReport(cb, ctxt, sh->report, offset); - } else { - fireReports(sh, cb, ctxt, s, offset, &cached_state_id, +/* include Sheng function definitions */ +#include "sheng_defs.h" + +static really_inline +char runShengCb(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset, + u8 *const cached_accept_state, ReportID *const cached_accept_id, + const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die, + u8 has_accel, u8 single, const u8 **scanned, u8 *state) { + DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in callback mode\n", + (u64a)(end - start), offset); + DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), + (s64a)(end - cur_buf)); + DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, + !!has_accel, !!single); + int rv; + /* scan and report all matches */ + if (can_die) { + if (has_accel) { + rv = sheng4_coda(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } else { + rv = sheng4_cod(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + rv = sheng_cod(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, *scanned, end, + scanned); + } else { + if (has_accel) { + rv = sheng4_coa(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } else { + rv = sheng4_co(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + rv = sheng_co(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, *scanned, end, + scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + return MO_ALIVE; +} + +static really_inline +void runShengNm(const struct sheng *sh, NfaCallback cb, void *ctxt, u64a offset, + u8 *const cached_accept_state, ReportID *const cached_accept_id, + const u8 *cur_buf, const u8 *start, const u8 *end, u8 can_die, + u8 has_accel, u8 single, const u8 **scanned, u8 *state) { + DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in nomatch mode\n", + (u64a)(end - start), offset); + DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), + (s64a)(end - cur_buf)); + DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, + !!has_accel, !!single); + /* just scan the buffer */ + if (can_die) { + if (has_accel) { + sheng4_nmda(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, end, + scanned); + } else { + sheng4_nmd(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, end, + scanned); + } + sheng_nmd(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, + single, offset, cur_buf, *scanned, end, scanned); + } else { + sheng4_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, + single, offset, cur_buf, start, end, scanned); + sheng_nm(state, cb, ctxt, sh, cached_accept_state, cached_accept_id, + single, offset, cur_buf, *scanned, end, scanned); + } +} + +static really_inline +char runShengSam(const struct sheng *sh, NfaCallback cb, void *ctxt, + u64a offset, u8 *const cached_accept_state, + ReportID *const cached_accept_id, const u8 *cur_buf, + const u8 *start, const u8 *end, u8 can_die, u8 has_accel, + u8 single, const u8 **scanned, u8 *state) { + DEBUG_PRINTF("Scanning %llu bytes (offset %llu) in stop at match mode\n", + (u64a)(end - start), offset); + DEBUG_PRINTF("start: %lli end: %lli\n", (s64a)(start - cur_buf), + (s64a)(end - cur_buf)); + DEBUG_PRINTF("can die: %u has accel: %u single: %u\n", !!can_die, + !!has_accel, !!single); + int rv; + /* scan until first match */ + if (can_die) { + if (has_accel) { + rv = sheng4_samda(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } else { + rv = sheng4_samd(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + /* if we stopped before we expected, we found a match */ + if (rv == MO_MATCHES_PENDING) { + return MO_MATCHES_PENDING; + } + + rv = sheng_samd(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, *scanned, + end, scanned); + } else { + if (has_accel) { + rv = sheng4_sama(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } else { + rv = sheng4_sam(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, start, + end, scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + /* if we stopped before we expected, we found a match */ + if (rv == MO_MATCHES_PENDING) { + return MO_MATCHES_PENDING; + } + + rv = sheng_sam(state, cb, ctxt, sh, cached_accept_state, + cached_accept_id, single, offset, cur_buf, *scanned, end, + scanned); + } + if (rv == MO_HALT_MATCHING) { + return MO_DEAD; + } + /* if we stopped before we expected, we found a match */ + if (rv == MO_MATCHES_PENDING) { + return MO_MATCHES_PENDING; + } + return MO_ALIVE; +} + +static never_inline +char runSheng(const struct sheng *sh, struct mq *q, s64a b_end, + enum MatchMode mode) { + u8 state = *(u8 *)q->state; + u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; + u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; + u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; + + u8 cached_accept_state = 0; + ReportID cached_accept_id = 0; + + DEBUG_PRINTF("starting Sheng execution in state %u\n", + state & SHENG_STATE_MASK); + + if (q->report_current) { + DEBUG_PRINTF("reporting current pending matches\n"); + assert(sh); + + q->report_current = 0; + + int rv; + if (single) { + rv = fireSingleReport(q->cb, q->context, sh->report, + q_cur_offset(q)); + } else { + rv = fireReports(sh, q->cb, q->context, state, q_cur_offset(q), + &cached_accept_state, &cached_accept_id, 0); + } + if (rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); + return MO_DEAD; + } + + DEBUG_PRINTF("proceeding with matching\n"); + } + + assert(q_cur_type(q) == MQE_START); + s64a start = q_cur_loc(q); + + DEBUG_PRINTF("offset: %lli, location: %lli, mode: %s\n", q->offset, start, + mode == CALLBACK_OUTPUT ? "CALLBACK OUTPUT" : + mode == NO_MATCHES ? "NO MATCHES" : + mode == STOP_AT_MATCH ? "STOP AT MATCH" : "???"); + + DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), + q_cur_type(q) == MQE_START ? "START" : + q_cur_type(q) == MQE_TOP ? "TOP" : + q_cur_type(q) == MQE_END ? "END" : "???"); + + const u8* cur_buf; + if (start < 0) { + DEBUG_PRINTF("negative location, scanning history\n"); + DEBUG_PRINTF("min location: %zd\n", -q->hlength); + cur_buf = q->history + q->hlength; + } else { + DEBUG_PRINTF("positive location, scanning buffer\n"); + DEBUG_PRINTF("max location: %lli\n", b_end); + cur_buf = q->buffer; + } + + /* if we our queue event is past our end */ + if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { + DEBUG_PRINTF("current location past buffer end\n"); + DEBUG_PRINTF("setting q location to %llu\n", b_end); + DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); + q->items[q->cur].location = b_end; + return MO_ALIVE; + } + + q->cur++; + + s64a cur_start = start; + + while (1) { + DEBUG_PRINTF("processing event @ %lli: %s\n", q->offset + q_cur_loc(q), + q_cur_type(q) == MQE_START ? "START" : + q_cur_type(q) == MQE_TOP ? "TOP" : + q_cur_type(q) == MQE_END ? "END" : "???"); + s64a end = q_cur_loc(q); + if (mode != NO_MATCHES) { + end = MIN(end, b_end); + } + assert(end <= (s64a) q->length); + s64a cur_end = end; + + /* we may cross the border between history and current buffer */ + if (cur_start < 0) { + cur_end = MIN(0, cur_end); + } + + DEBUG_PRINTF("start: %lli end: %lli\n", start, end); + + /* don't scan zero length buffer */ + if (cur_start != cur_end) { + const u8 * scanned = cur_buf; + char rv; + + if (mode == NO_MATCHES) { + runShengNm(sh, q->cb, q->context, q->offset, + &cached_accept_state, &cached_accept_id, cur_buf, + cur_buf + cur_start, cur_buf + cur_end, can_die, + has_accel, single, &scanned, &state); + } else if (mode == CALLBACK_OUTPUT) { + rv = runShengCb(sh, q->cb, q->context, q->offset, + &cached_accept_state, &cached_accept_id, + cur_buf, cur_buf + cur_start, cur_buf + cur_end, + can_die, has_accel, single, &scanned, &state); + if (rv == MO_DEAD) { + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG_STATE_MASK); + return MO_DEAD; + } + } else if (mode == STOP_AT_MATCH) { + rv = runShengSam(sh, q->cb, q->context, q->offset, + &cached_accept_state, &cached_accept_id, + cur_buf, cur_buf + cur_start, + cur_buf + cur_end, can_die, has_accel, single, + &scanned, &state); + if (rv == MO_DEAD) { + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG_STATE_MASK); + return rv; + } else if (rv == MO_MATCHES_PENDING) { + assert(q->cur); + DEBUG_PRINTF("found a match, setting q location to %zd\n", + scanned - cur_buf + 1); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = + scanned - cur_buf + 1; /* due to exiting early */ + *(u8 *)q->state = state; + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG_STATE_MASK); + return rv; + } + } else { + assert(!"invalid scanning mode!"); + } + assert(scanned == cur_buf + cur_end); + + cur_start = cur_end; + } + + /* if we our queue event is past our end */ + if (mode != NO_MATCHES && q_cur_loc(q) > b_end) { + DEBUG_PRINTF("current location past buffer end\n"); + DEBUG_PRINTF("setting q location to %llu\n", b_end); + DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = b_end; + *(u8 *)q->state = state; + return MO_ALIVE; + } + + /* crossing over into actual buffer */ + if (cur_start == 0) { + DEBUG_PRINTF("positive location, scanning buffer\n"); + DEBUG_PRINTF("max offset: %lli\n", b_end); + cur_buf = q->buffer; + } + + /* continue scanning the same buffer */ + if (end != cur_end) { + continue; + } + + switch (q_cur_type(q)) { + case MQE_END: + *(u8 *)q->state = state; + q->cur++; + DEBUG_PRINTF("exiting in state %u\n", state & SHENG_STATE_MASK); + if (can_die) { + return (state & SHENG_STATE_DEAD) ? MO_DEAD : MO_ALIVE; + } + return MO_ALIVE; + case MQE_TOP: + if (q->offset + cur_start == 0) { + DEBUG_PRINTF("Anchored start, going to state %u\n", + sh->anchored); + state = sh->anchored; + } else { + u8 new_state = get_aux(sh, state)->top; + DEBUG_PRINTF("Top event %u->%u\n", state & SHENG_STATE_MASK, + new_state & SHENG_STATE_MASK); + state = new_state; + } + break; + default: + assert(!"invalid queue event"); + break; + } + q->cur++; + } +} + +char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context) { + DEBUG_PRINTF("smallwrite Sheng\n"); + assert(n->type == SHENG_NFA); + const struct sheng *sh = getImplNfa(n); + u8 state = sh->anchored; + u8 can_die = sh->flags & SHENG_FLAG_CAN_DIE; + u8 has_accel = sh->flags & SHENG_FLAG_HAS_ACCEL; + u8 single = sh->flags & SHENG_FLAG_SINGLE_REPORT; + u8 cached_accept_state = 0; + ReportID cached_accept_id = 0; + + /* scan and report all matches */ + int rv; + s64a end = length; + const u8 *scanned; + + rv = runShengCb(sh, cb, context, offset, &cached_accept_state, + &cached_accept_id, buffer, buffer, buffer + end, can_die, + has_accel, single, &scanned, &state); + if (rv == MO_DEAD) { + DEBUG_PRINTF("exiting in state %u\n", + state & SHENG_STATE_MASK); + return MO_DEAD; + } + + DEBUG_PRINTF("%u\n", state & SHENG_STATE_MASK); + + const struct sstate_aux *aux = get_aux(sh, state); + + if (aux->accept_eod) { + DEBUG_PRINTF("Reporting EOD matches\n"); + fireReports(sh, cb, context, state, end + offset, &cached_accept_state, + &cached_accept_id, 1); + } + + return state & SHENG_STATE_DEAD ? MO_DEAD : MO_ALIVE; +} + +char nfaExecSheng_Q(const struct NFA *n, struct mq *q, s64a end) { + const struct sheng *sh = get_sheng(n); + char rv = runSheng(sh, q, end, CALLBACK_OUTPUT); + return rv; +} + +char nfaExecSheng_Q2(const struct NFA *n, struct mq *q, s64a end) { + const struct sheng *sh = get_sheng(n); + char rv = runSheng(sh, q, end, STOP_AT_MATCH); + return rv; +} + +char nfaExecSheng_QR(const struct NFA *n, struct mq *q, ReportID report) { + assert(q_cur_type(q) == MQE_START); + + const struct sheng *sh = get_sheng(n); + char rv = runSheng(sh, q, 0 /* end */, NO_MATCHES); + + if (rv && nfaExecSheng_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } + return rv; +} + +char nfaExecSheng_inAccept(const struct NFA *n, ReportID report, struct mq *q) { + assert(n && q); + + const struct sheng *sh = get_sheng(n); + u8 s = *(const u8 *)q->state; + DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK)); + + const struct sstate_aux *aux = get_aux(sh, s); + + if (!aux->accept) { + return 0; + } + + return shengHasAccept(sh, aux, report); +} + +char nfaExecSheng_inAnyAccept(const struct NFA *n, struct mq *q) { + assert(n && q); + + const struct sheng *sh = get_sheng(n); + u8 s = *(const u8 *)q->state; + DEBUG_PRINTF("checking accepts for %u\n", (u8)(s & SHENG_STATE_MASK)); + + const struct sstate_aux *aux = get_aux(sh, s); + return !!aux->accept; +} + +char nfaExecSheng_testEOD(const struct NFA *nfa, const char *state, + UNUSED const char *streamState, u64a offset, + NfaCallback cb, void *ctxt) { + assert(nfa); + + const struct sheng *sh = get_sheng(nfa); + u8 s = *(const u8 *)state; + DEBUG_PRINTF("checking EOD accepts for %u\n", (u8)(s & SHENG_STATE_MASK)); + + const struct sstate_aux *aux = get_aux(sh, s); + + if (!aux->accept_eod) { + return MO_CONTINUE_MATCHING; + } + + return fireReports(sh, cb, ctxt, s, offset, NULL, NULL, 1); +} + +char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q) { + const struct sheng *sh = (const struct sheng *)getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; + u8 s = *(u8 *)q->state; + const struct sstate_aux *aux = get_aux(sh, s); + u64a offset = q_cur_offset(q); + u8 cached_state_id = 0; + ReportID cached_report_id = 0; + assert(q_cur_type(q) == MQE_START); + + if (aux->accept) { + if (sh->flags & SHENG_FLAG_SINGLE_REPORT) { + fireSingleReport(cb, ctxt, sh->report, offset); + } else { + fireReports(sh, cb, ctxt, s, offset, &cached_state_id, &cached_report_id, 0); - } - } - - return 0; -} - -char nfaExecSheng_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, UNUSED u8 key) { - const struct sheng *sh = get_sheng(nfa); - u8 *s = (u8 *)state; - *s = offset ? sh->floating: sh->anchored; - return !(*s & SHENG_STATE_DEAD); -} - -char nfaExecSheng_queueInitState(const struct NFA *nfa, struct mq *q) { - assert(nfa->scratchStateSize == 1); - - /* starting in floating state */ - const struct sheng *sh = get_sheng(nfa); - *(u8 *)q->state = sh->floating; - DEBUG_PRINTF("starting in floating state\n"); - return 0; -} - -char nfaExecSheng_queueCompressState(UNUSED const struct NFA *nfa, - const struct mq *q, UNUSED s64a loc) { - void *dest = q->streamState; - const void *src = q->state; - assert(nfa->scratchStateSize == 1); - assert(nfa->streamStateSize == 1); - *(u8 *)dest = *(const u8 *)src; - return 0; -} - -char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest, - const void *src, UNUSED u64a offset, - UNUSED u8 key) { - assert(nfa->scratchStateSize == 1); - assert(nfa->streamStateSize == 1); - *(u8 *)dest = *(const u8 *)src; - return 0; -} + } + } + + return 0; +} + +char nfaExecSheng_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct sheng *sh = get_sheng(nfa); + u8 *s = (u8 *)state; + *s = offset ? sh->floating: sh->anchored; + return !(*s & SHENG_STATE_DEAD); +} + +char nfaExecSheng_queueInitState(const struct NFA *nfa, struct mq *q) { + assert(nfa->scratchStateSize == 1); + + /* starting in floating state */ + const struct sheng *sh = get_sheng(nfa); + *(u8 *)q->state = sh->floating; + DEBUG_PRINTF("starting in floating state\n"); + return 0; +} + +char nfaExecSheng_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecSheng_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} #if defined(HAVE_AVX512VBMI) // Sheng32 diff --git a/contrib/libs/hyperscan/src/nfa/sheng.h b/contrib/libs/hyperscan/src/nfa/sheng.h index 7b90e3034f..b9785008e8 100644 --- a/contrib/libs/hyperscan/src/nfa/sheng.h +++ b/contrib/libs/hyperscan/src/nfa/sheng.h @@ -1,63 +1,63 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SHENG_H_ -#define SHENG_H_ - -#include "callback.h" -#include "ue2common.h" - -struct mq; -struct NFA; - -#define nfaExecSheng_B_Reverse NFA_API_NO_IMPL -#define nfaExecSheng_zombie_status NFA_API_ZOMBIE_NO_IMPL - -char nfaExecSheng_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecSheng_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecSheng_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecSheng_inAccept(const struct NFA *n, ReportID report, struct mq *q); -char nfaExecSheng_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecSheng_queueInitState(const struct NFA *nfa, struct mq *q); -char nfaExecSheng_queueCompressState(const struct NFA *nfa, const struct mq *q, - s64a loc); -char nfaExecSheng_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); -char nfaExecSheng_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, u8 key); -char nfaExecSheng_testEOD(const struct NFA *nfa, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context); -char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q); - -char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer, - size_t length, NfaCallback cb, void *context); - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SHENG_H_ +#define SHENG_H_ + +#include "callback.h" +#include "ue2common.h" + +struct mq; +struct NFA; + +#define nfaExecSheng_B_Reverse NFA_API_NO_IMPL +#define nfaExecSheng_zombie_status NFA_API_ZOMBIE_NO_IMPL + +char nfaExecSheng_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecSheng_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecSheng_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecSheng_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecSheng_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecSheng_queueInitState(const struct NFA *nfa, struct mq *q); +char nfaExecSheng_queueCompressState(const struct NFA *nfa, const struct mq *q, + s64a loc); +char nfaExecSheng_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); +char nfaExecSheng_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, u8 key); +char nfaExecSheng_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecSheng_reportCurrent(const struct NFA *n, struct mq *q); + +char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context); + #if defined(HAVE_AVX512VBMI) #define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL #define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL @@ -140,4 +140,4 @@ char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer, #define nfaExecSheng64_B NFA_API_NO_IMPL #endif // end of HAVE_AVX512VBMI -#endif /* SHENG_H_ */ +#endif /* SHENG_H_ */ diff --git a/contrib/libs/hyperscan/src/nfa/sheng_defs.h b/contrib/libs/hyperscan/src/nfa/sheng_defs.h index 390af75221..c2ec1d09fa 100644 --- a/contrib/libs/hyperscan/src/nfa/sheng_defs.h +++ b/contrib/libs/hyperscan/src/nfa/sheng_defs.h @@ -1,57 +1,57 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SHENG_DEFS_H -#define SHENG_DEFS_H - -/* - * Utility functions used by various versions of Sheng engine - */ -static really_inline -u8 isDeadState(const u8 a) { - return a & SHENG_STATE_DEAD; -} - -static really_inline -u8 isAcceptState(const u8 a) { - return a & SHENG_STATE_ACCEPT; -} - -static really_inline -u8 isAccelState(const u8 a) { - return a & SHENG_STATE_ACCEL; -} - -static really_inline -u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) { - return (a | b | c | d) & (SHENG_STATE_FLAG_MASK); -} - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SHENG_DEFS_H +#define SHENG_DEFS_H + +/* + * Utility functions used by various versions of Sheng engine + */ +static really_inline +u8 isDeadState(const u8 a) { + return a & SHENG_STATE_DEAD; +} + +static really_inline +u8 isAcceptState(const u8 a) { + return a & SHENG_STATE_ACCEPT; +} + +static really_inline +u8 isAccelState(const u8 a) { + return a & SHENG_STATE_ACCEL; +} + +static really_inline +u8 hasInterestingStates(const u8 a, const u8 b, const u8 c, const u8 d) { + return (a | b | c | d) & (SHENG_STATE_FLAG_MASK); +} + #if defined(HAVE_AVX512VBMI) static really_inline u8 isDeadState32(const u8 a) { @@ -89,25 +89,25 @@ u8 hasInterestingStates64(const u8 a, const u8 b, const u8 c, const u8 d) { } #endif -/* these functions should be optimized out, used by NO_MATCHES mode */ -static really_inline -u8 dummyFunc4(UNUSED const u8 a, UNUSED const u8 b, UNUSED const u8 c, - UNUSED const u8 d) { - return 0; -} - -static really_inline -u8 dummyFunc(UNUSED const u8 a) { - return 0; -} - -/* - * Sheng function definitions for single byte loops - */ -/* callback output, can die */ -#define SHENG_IMPL sheng_cod -#define DEAD_FUNC isDeadState -#define ACCEPT_FUNC isAcceptState +/* these functions should be optimized out, used by NO_MATCHES mode */ +static really_inline +u8 dummyFunc4(UNUSED const u8 a, UNUSED const u8 b, UNUSED const u8 c, + UNUSED const u8 d) { + return 0; +} + +static really_inline +u8 dummyFunc(UNUSED const u8 a) { + return 0; +} + +/* + * Sheng function definitions for single byte loops + */ +/* callback output, can die */ +#define SHENG_IMPL sheng_cod +#define DEAD_FUNC isDeadState +#define ACCEPT_FUNC isAcceptState #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_cod #define DEAD_FUNC32 isDeadState32 @@ -116,11 +116,11 @@ u8 dummyFunc(UNUSED const u8 a) { #define DEAD_FUNC64 isDeadState64 #define ACCEPT_FUNC64 isAcceptState64 #endif -#define STOP_AT_MATCH 0 -#include "sheng_impl.h" -#undef SHENG_IMPL -#undef DEAD_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 0 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef DEAD_FUNC32 @@ -129,12 +129,12 @@ u8 dummyFunc(UNUSED const u8 a) { #undef DEAD_FUNC64 #undef ACCEPT_FUNC64 #endif -#undef STOP_AT_MATCH - -/* callback output, can't die */ -#define SHENG_IMPL sheng_co -#define DEAD_FUNC dummyFunc -#define ACCEPT_FUNC isAcceptState +#undef STOP_AT_MATCH + +/* callback output, can't die */ +#define SHENG_IMPL sheng_co +#define DEAD_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_co #define DEAD_FUNC32 dummyFunc @@ -143,11 +143,11 @@ u8 dummyFunc(UNUSED const u8 a) { #define DEAD_FUNC64 dummyFunc #define ACCEPT_FUNC64 isAcceptState64 #endif -#define STOP_AT_MATCH 0 -#include "sheng_impl.h" -#undef SHENG_IMPL -#undef DEAD_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 0 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef DEAD_FUNC32 @@ -156,12 +156,12 @@ u8 dummyFunc(UNUSED const u8 a) { #undef DEAD_FUNC64 #undef ACCEPT_FUNC64 #endif -#undef STOP_AT_MATCH - -/* stop at match, can die */ -#define SHENG_IMPL sheng_samd -#define DEAD_FUNC isDeadState -#define ACCEPT_FUNC isAcceptState +#undef STOP_AT_MATCH + +/* stop at match, can die */ +#define SHENG_IMPL sheng_samd +#define DEAD_FUNC isDeadState +#define ACCEPT_FUNC isAcceptState #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_samd #define DEAD_FUNC32 isDeadState32 @@ -170,11 +170,11 @@ u8 dummyFunc(UNUSED const u8 a) { #define DEAD_FUNC64 isDeadState64 #define ACCEPT_FUNC64 isAcceptState64 #endif -#define STOP_AT_MATCH 1 -#include "sheng_impl.h" -#undef SHENG_IMPL -#undef DEAD_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 1 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef DEAD_FUNC32 @@ -183,12 +183,12 @@ u8 dummyFunc(UNUSED const u8 a) { #undef DEAD_FUNC64 #undef ACCEPT_FUNC64 #endif -#undef STOP_AT_MATCH - -/* stop at match, can't die */ -#define SHENG_IMPL sheng_sam -#define DEAD_FUNC dummyFunc -#define ACCEPT_FUNC isAcceptState +#undef STOP_AT_MATCH + +/* stop at match, can't die */ +#define SHENG_IMPL sheng_sam +#define DEAD_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_sam #define DEAD_FUNC32 dummyFunc @@ -197,11 +197,11 @@ u8 dummyFunc(UNUSED const u8 a) { #define DEAD_FUNC64 dummyFunc #define ACCEPT_FUNC64 isAcceptState64 #endif -#define STOP_AT_MATCH 1 -#include "sheng_impl.h" -#undef SHENG_IMPL -#undef DEAD_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 1 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef DEAD_FUNC32 @@ -210,12 +210,12 @@ u8 dummyFunc(UNUSED const u8 a) { #undef DEAD_FUNC64 #undef ACCEPT_FUNC64 #endif -#undef STOP_AT_MATCH - -/* no match, can die */ -#define SHENG_IMPL sheng_nmd -#define DEAD_FUNC isDeadState -#define ACCEPT_FUNC dummyFunc +#undef STOP_AT_MATCH + +/* no match, can die */ +#define SHENG_IMPL sheng_nmd +#define DEAD_FUNC isDeadState +#define ACCEPT_FUNC dummyFunc #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_nmd #define DEAD_FUNC32 isDeadState32 @@ -224,11 +224,11 @@ u8 dummyFunc(UNUSED const u8 a) { #define DEAD_FUNC64 isDeadState64 #define ACCEPT_FUNC64 dummyFunc #endif -#define STOP_AT_MATCH 0 -#include "sheng_impl.h" -#undef SHENG_IMPL -#undef DEAD_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 0 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef DEAD_FUNC32 @@ -237,12 +237,12 @@ u8 dummyFunc(UNUSED const u8 a) { #undef DEAD_FUNC64 #undef ACCEPT_FUNC64 #endif -#undef STOP_AT_MATCH - -/* no match, can't die */ -#define SHENG_IMPL sheng_nm -#define DEAD_FUNC dummyFunc -#define ACCEPT_FUNC dummyFunc +#undef STOP_AT_MATCH + +/* no match, can't die */ +#define SHENG_IMPL sheng_nm +#define DEAD_FUNC dummyFunc +#define ACCEPT_FUNC dummyFunc #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_nm #define DEAD_FUNC32 dummyFunc @@ -251,11 +251,11 @@ u8 dummyFunc(UNUSED const u8 a) { #define DEAD_FUNC64 dummyFunc #define ACCEPT_FUNC64 dummyFunc #endif -#define STOP_AT_MATCH 0 -#include "sheng_impl.h" -#undef SHENG_IMPL -#undef DEAD_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 0 +#include "sheng_impl.h" +#undef SHENG_IMPL +#undef DEAD_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef DEAD_FUNC32 @@ -264,19 +264,19 @@ u8 dummyFunc(UNUSED const u8 a) { #undef DEAD_FUNC64 #undef ACCEPT_FUNC64 #endif -#undef STOP_AT_MATCH - -/* - * Sheng function definitions for 4-byte loops - */ -/* callback output, can die, accelerated */ -#define SHENG_IMPL sheng4_coda -#define INTERESTING_FUNC hasInterestingStates -#define INNER_DEAD_FUNC isDeadState -#define OUTER_DEAD_FUNC dummyFunc -#define INNER_ACCEL_FUNC isAccelState -#define OUTER_ACCEL_FUNC dummyFunc -#define ACCEPT_FUNC isAcceptState +#undef STOP_AT_MATCH + +/* + * Sheng function definitions for 4-byte loops + */ +/* callback output, can die, accelerated */ +#define SHENG_IMPL sheng4_coda +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC isDeadState +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC isAccelState +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_coda #define INTERESTING_FUNC32 hasInterestingStates32 @@ -287,15 +287,15 @@ u8 dummyFunc(UNUSED const u8 a) { #define ACCEPT_FUNC32 isAcceptState32 #define NO_SHENG64_IMPL #endif -#define STOP_AT_MATCH 0 -#include "sheng_impl4.h" -#undef SHENG_IMPL -#undef INTERESTING_FUNC -#undef INNER_DEAD_FUNC -#undef OUTER_DEAD_FUNC -#undef INNER_ACCEL_FUNC -#undef OUTER_ACCEL_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 @@ -306,16 +306,16 @@ u8 dummyFunc(UNUSED const u8 a) { #undef ACCEPT_FUNC32 #undef NO_SHENG64_IMPL #endif -#undef STOP_AT_MATCH - -/* callback output, can die, not accelerated */ -#define SHENG_IMPL sheng4_cod -#define INTERESTING_FUNC hasInterestingStates -#define INNER_DEAD_FUNC isDeadState -#define OUTER_DEAD_FUNC dummyFunc -#define INNER_ACCEL_FUNC dummyFunc -#define OUTER_ACCEL_FUNC dummyFunc -#define ACCEPT_FUNC isAcceptState +#undef STOP_AT_MATCH + +/* callback output, can die, not accelerated */ +#define SHENG_IMPL sheng4_cod +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC isDeadState +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_cod #define INTERESTING_FUNC32 hasInterestingStates32 @@ -330,15 +330,15 @@ u8 dummyFunc(UNUSED const u8 a) { #define OUTER_DEAD_FUNC64 dummyFunc #define ACCEPT_FUNC64 isAcceptState64 #endif -#define STOP_AT_MATCH 0 -#include "sheng_impl4.h" -#undef SHENG_IMPL -#undef INTERESTING_FUNC -#undef INNER_DEAD_FUNC -#undef OUTER_DEAD_FUNC -#undef INNER_ACCEL_FUNC -#undef OUTER_ACCEL_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 @@ -353,16 +353,16 @@ u8 dummyFunc(UNUSED const u8 a) { #undef OUTER_DEAD_FUNC64 #undef ACCEPT_FUNC64 #endif -#undef STOP_AT_MATCH - -/* callback output, can't die, accelerated */ -#define SHENG_IMPL sheng4_coa -#define INTERESTING_FUNC hasInterestingStates -#define INNER_DEAD_FUNC dummyFunc -#define OUTER_DEAD_FUNC dummyFunc -#define INNER_ACCEL_FUNC isAccelState -#define OUTER_ACCEL_FUNC dummyFunc -#define ACCEPT_FUNC isAcceptState +#undef STOP_AT_MATCH + +/* callback output, can't die, accelerated */ +#define SHENG_IMPL sheng4_coa +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC isAccelState +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_coa #define INTERESTING_FUNC32 hasInterestingStates32 @@ -373,15 +373,15 @@ u8 dummyFunc(UNUSED const u8 a) { #define ACCEPT_FUNC32 isAcceptState32 #define NO_SHENG64_IMPL #endif -#define STOP_AT_MATCH 0 -#include "sheng_impl4.h" -#undef SHENG_IMPL -#undef INTERESTING_FUNC -#undef INNER_DEAD_FUNC -#undef OUTER_DEAD_FUNC -#undef INNER_ACCEL_FUNC -#undef OUTER_ACCEL_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 @@ -392,16 +392,16 @@ u8 dummyFunc(UNUSED const u8 a) { #undef ACCEPT_FUNC32 #undef NO_SHENG64_IMPL #endif -#undef STOP_AT_MATCH - -/* callback output, can't die, not accelerated */ -#define SHENG_IMPL sheng4_co -#define INTERESTING_FUNC hasInterestingStates -#define INNER_DEAD_FUNC dummyFunc -#define OUTER_DEAD_FUNC dummyFunc -#define INNER_ACCEL_FUNC dummyFunc -#define OUTER_ACCEL_FUNC dummyFunc -#define ACCEPT_FUNC isAcceptState +#undef STOP_AT_MATCH + +/* callback output, can't die, not accelerated */ +#define SHENG_IMPL sheng4_co +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_co #define INTERESTING_FUNC32 hasInterestingStates32 @@ -416,15 +416,15 @@ u8 dummyFunc(UNUSED const u8 a) { #define OUTER_DEAD_FUNC64 dummyFunc #define ACCEPT_FUNC64 isAcceptState64 #endif -#define STOP_AT_MATCH 0 -#include "sheng_impl4.h" -#undef SHENG_IMPL -#undef INTERESTING_FUNC -#undef INNER_DEAD_FUNC -#undef OUTER_DEAD_FUNC -#undef INNER_ACCEL_FUNC -#undef OUTER_ACCEL_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 @@ -439,16 +439,16 @@ u8 dummyFunc(UNUSED const u8 a) { #undef OUTER_DEAD_FUNC64 #undef ACCEPT_FUNC64 #endif -#undef STOP_AT_MATCH - -/* stop at match, can die, accelerated */ -#define SHENG_IMPL sheng4_samda -#define INTERESTING_FUNC hasInterestingStates -#define INNER_DEAD_FUNC isDeadState -#define OUTER_DEAD_FUNC dummyFunc -#define INNER_ACCEL_FUNC isAccelState -#define OUTER_ACCEL_FUNC dummyFunc -#define ACCEPT_FUNC isAcceptState +#undef STOP_AT_MATCH + +/* stop at match, can die, accelerated */ +#define SHENG_IMPL sheng4_samda +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC isDeadState +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC isAccelState +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_samda #define INTERESTING_FUNC32 hasInterestingStates32 @@ -459,15 +459,15 @@ u8 dummyFunc(UNUSED const u8 a) { #define ACCEPT_FUNC32 isAcceptState32 #define NO_SHENG64_IMPL #endif -#define STOP_AT_MATCH 1 -#include "sheng_impl4.h" -#undef SHENG_IMPL -#undef INTERESTING_FUNC -#undef INNER_DEAD_FUNC -#undef OUTER_DEAD_FUNC -#undef INNER_ACCEL_FUNC -#undef OUTER_ACCEL_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 1 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 @@ -478,16 +478,16 @@ u8 dummyFunc(UNUSED const u8 a) { #undef ACCEPT_FUNC32 #undef NO_SHENG64_IMPL #endif -#undef STOP_AT_MATCH - -/* stop at match, can die, not accelerated */ -#define SHENG_IMPL sheng4_samd -#define INTERESTING_FUNC hasInterestingStates -#define INNER_DEAD_FUNC isDeadState -#define OUTER_DEAD_FUNC dummyFunc -#define INNER_ACCEL_FUNC dummyFunc -#define OUTER_ACCEL_FUNC dummyFunc -#define ACCEPT_FUNC isAcceptState +#undef STOP_AT_MATCH + +/* stop at match, can die, not accelerated */ +#define SHENG_IMPL sheng4_samd +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC isDeadState +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_samd #define INTERESTING_FUNC32 hasInterestingStates32 @@ -502,15 +502,15 @@ u8 dummyFunc(UNUSED const u8 a) { #define OUTER_DEAD_FUNC64 dummyFunc #define ACCEPT_FUNC64 isAcceptState64 #endif -#define STOP_AT_MATCH 1 -#include "sheng_impl4.h" -#undef SHENG_IMPL -#undef INTERESTING_FUNC -#undef INNER_DEAD_FUNC -#undef OUTER_DEAD_FUNC -#undef INNER_ACCEL_FUNC -#undef OUTER_ACCEL_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 1 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 @@ -525,16 +525,16 @@ u8 dummyFunc(UNUSED const u8 a) { #undef OUTER_DEAD_FUNC64 #undef ACCEPT_FUNC64 #endif -#undef STOP_AT_MATCH - -/* stop at match, can't die, accelerated */ -#define SHENG_IMPL sheng4_sama -#define INTERESTING_FUNC hasInterestingStates -#define INNER_DEAD_FUNC dummyFunc -#define OUTER_DEAD_FUNC dummyFunc -#define INNER_ACCEL_FUNC isAccelState -#define OUTER_ACCEL_FUNC dummyFunc -#define ACCEPT_FUNC isAcceptState +#undef STOP_AT_MATCH + +/* stop at match, can't die, accelerated */ +#define SHENG_IMPL sheng4_sama +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC isAccelState +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_sama #define INTERESTING_FUNC32 hasInterestingStates32 @@ -545,15 +545,15 @@ u8 dummyFunc(UNUSED const u8 a) { #define ACCEPT_FUNC32 isAcceptState32 #define NO_SHENG64_IMPL #endif -#define STOP_AT_MATCH 1 -#include "sheng_impl4.h" -#undef SHENG_IMPL -#undef INTERESTING_FUNC -#undef INNER_DEAD_FUNC -#undef OUTER_DEAD_FUNC -#undef INNER_ACCEL_FUNC -#undef OUTER_ACCEL_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 1 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 @@ -564,16 +564,16 @@ u8 dummyFunc(UNUSED const u8 a) { #undef ACCEPT_FUNC32 #undef NO_SHENG64_IMPL #endif -#undef STOP_AT_MATCH - -/* stop at match, can't die, not accelerated */ -#define SHENG_IMPL sheng4_sam -#define INTERESTING_FUNC hasInterestingStates -#define INNER_DEAD_FUNC dummyFunc -#define OUTER_DEAD_FUNC dummyFunc -#define INNER_ACCEL_FUNC dummyFunc -#define OUTER_ACCEL_FUNC dummyFunc -#define ACCEPT_FUNC isAcceptState +#undef STOP_AT_MATCH + +/* stop at match, can't die, not accelerated */ +#define SHENG_IMPL sheng4_sam +#define INTERESTING_FUNC hasInterestingStates +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC isAcceptState #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_sam #define INTERESTING_FUNC32 hasInterestingStates32 @@ -588,15 +588,15 @@ u8 dummyFunc(UNUSED const u8 a) { #define OUTER_DEAD_FUNC64 dummyFunc #define ACCEPT_FUNC64 isAcceptState64 #endif -#define STOP_AT_MATCH 1 -#include "sheng_impl4.h" -#undef SHENG_IMPL -#undef INTERESTING_FUNC -#undef INNER_DEAD_FUNC -#undef OUTER_DEAD_FUNC -#undef INNER_ACCEL_FUNC -#undef OUTER_ACCEL_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 1 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 @@ -611,18 +611,18 @@ u8 dummyFunc(UNUSED const u8 a) { #undef OUTER_DEAD_FUNC64 #undef ACCEPT_FUNC64 #endif -#undef STOP_AT_MATCH - -/* no-match have interesting func as dummy, and die/accel checks are outer */ - -/* no match, can die, accelerated */ -#define SHENG_IMPL sheng4_nmda -#define INTERESTING_FUNC dummyFunc4 -#define INNER_DEAD_FUNC dummyFunc -#define OUTER_DEAD_FUNC isDeadState -#define INNER_ACCEL_FUNC dummyFunc -#define OUTER_ACCEL_FUNC isAccelState -#define ACCEPT_FUNC dummyFunc +#undef STOP_AT_MATCH + +/* no-match have interesting func as dummy, and die/accel checks are outer */ + +/* no match, can die, accelerated */ +#define SHENG_IMPL sheng4_nmda +#define INTERESTING_FUNC dummyFunc4 +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC isDeadState +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC isAccelState +#define ACCEPT_FUNC dummyFunc #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_nmda #define INTERESTING_FUNC32 dummyFunc4 @@ -633,15 +633,15 @@ u8 dummyFunc(UNUSED const u8 a) { #define ACCEPT_FUNC32 dummyFunc #define NO_SHENG64_IMPL #endif -#define STOP_AT_MATCH 0 -#include "sheng_impl4.h" -#undef SHENG_IMPL -#undef INTERESTING_FUNC -#undef INNER_DEAD_FUNC -#undef OUTER_DEAD_FUNC -#undef INNER_ACCEL_FUNC -#undef OUTER_ACCEL_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 @@ -652,16 +652,16 @@ u8 dummyFunc(UNUSED const u8 a) { #undef ACCEPT_FUNC32 #undef NO_SHENG64_IMPL #endif -#undef STOP_AT_MATCH - -/* no match, can die, not accelerated */ -#define SHENG_IMPL sheng4_nmd -#define INTERESTING_FUNC dummyFunc4 -#define INNER_DEAD_FUNC dummyFunc -#define OUTER_DEAD_FUNC isDeadState -#define INNER_ACCEL_FUNC dummyFunc -#define OUTER_ACCEL_FUNC dummyFunc -#define ACCEPT_FUNC dummyFunc +#undef STOP_AT_MATCH + +/* no match, can die, not accelerated */ +#define SHENG_IMPL sheng4_nmd +#define INTERESTING_FUNC dummyFunc4 +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC isDeadState +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC dummyFunc #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_nmd #define INTERESTING_FUNC32 dummyFunc4 @@ -676,15 +676,15 @@ u8 dummyFunc(UNUSED const u8 a) { #define OUTER_DEAD_FUNC64 isDeadState64 #define ACCEPT_FUNC64 dummyFunc #endif -#define STOP_AT_MATCH 0 -#include "sheng_impl4.h" -#undef SHENG_IMPL -#undef INTERESTING_FUNC -#undef INNER_DEAD_FUNC -#undef OUTER_DEAD_FUNC -#undef INNER_ACCEL_FUNC -#undef OUTER_ACCEL_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 @@ -699,19 +699,19 @@ u8 dummyFunc(UNUSED const u8 a) { #undef OUTER_DEAD_FUNC64 #undef ACCEPT_FUNC64 #endif -#undef STOP_AT_MATCH - -/* there is no performance benefit in accelerating a no-match case that can't - * die */ - -/* no match, can't die */ -#define SHENG_IMPL sheng4_nm -#define INTERESTING_FUNC dummyFunc4 -#define INNER_DEAD_FUNC dummyFunc -#define OUTER_DEAD_FUNC dummyFunc -#define INNER_ACCEL_FUNC dummyFunc -#define OUTER_ACCEL_FUNC dummyFunc -#define ACCEPT_FUNC dummyFunc +#undef STOP_AT_MATCH + +/* there is no performance benefit in accelerating a no-match case that can't + * die */ + +/* no match, can't die */ +#define SHENG_IMPL sheng4_nm +#define INTERESTING_FUNC dummyFunc4 +#define INNER_DEAD_FUNC dummyFunc +#define OUTER_DEAD_FUNC dummyFunc +#define INNER_ACCEL_FUNC dummyFunc +#define OUTER_ACCEL_FUNC dummyFunc +#define ACCEPT_FUNC dummyFunc #if defined(HAVE_AVX512VBMI) #define SHENG32_IMPL sheng32_4_nm #define INTERESTING_FUNC32 dummyFunc4 @@ -726,15 +726,15 @@ u8 dummyFunc(UNUSED const u8 a) { #define OUTER_DEAD_FUNC64 dummyFunc #define ACCEPT_FUNC64 dummyFunc #endif -#define STOP_AT_MATCH 0 -#include "sheng_impl4.h" -#undef SHENG_IMPL -#undef INTERESTING_FUNC -#undef INNER_DEAD_FUNC -#undef OUTER_DEAD_FUNC -#undef INNER_ACCEL_FUNC -#undef OUTER_ACCEL_FUNC -#undef ACCEPT_FUNC +#define STOP_AT_MATCH 0 +#include "sheng_impl4.h" +#undef SHENG_IMPL +#undef INTERESTING_FUNC +#undef INNER_DEAD_FUNC +#undef OUTER_DEAD_FUNC +#undef INNER_ACCEL_FUNC +#undef OUTER_ACCEL_FUNC +#undef ACCEPT_FUNC #if defined(HAVE_AVX512VBMI) #undef SHENG32_IMPL #undef INTERESTING_FUNC32 @@ -749,6 +749,6 @@ u8 dummyFunc(UNUSED const u8 a) { #undef OUTER_DEAD_FUNC64 #undef ACCEPT_FUNC64 #endif -#undef STOP_AT_MATCH - -#endif // SHENG_DEFS_H +#undef STOP_AT_MATCH + +#endif // SHENG_DEFS_H diff --git a/contrib/libs/hyperscan/src/nfa/sheng_impl.h b/contrib/libs/hyperscan/src/nfa/sheng_impl.h index fb8ee16834..8c42754083 100644 --- a/contrib/libs/hyperscan/src/nfa/sheng_impl.h +++ b/contrib/libs/hyperscan/src/nfa/sheng_impl.h @@ -1,100 +1,100 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * In order to use this macro, the following things need to be defined: - * - * - SHENG_IMPL (name of the Sheng implementation function) - * - DEAD_FUNC (name of the function checking for dead states) - * - ACCEPT_FUNC (name of the function checking for accept state) - * - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match) - */ - -/* byte-by-byte version. we don't do byte-by-byte death checking as it's - * pretty pointless to do it over a buffer that's at most 3 bytes long */ -static really_inline -char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, - u8 *const cached_accept_state, ReportID *const cached_accept_id, - u8 single, u64a base_offset, const u8 *buf, const u8 *start, - const u8 *end, const u8 **scan_end) { - DEBUG_PRINTF("Starting DFA execution in state %u\n", - *state & SHENG_STATE_MASK); - const u8 *cur_buf = start; - if (DEAD_FUNC(*state)) { - DEBUG_PRINTF("Dead on arrival\n"); - *scan_end = end; - return MO_CONTINUE_MATCHING; - } - DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); - - m128 cur_state = set16x8(*state); - const m128 *masks = s->shuffle_masks; - - while (likely(cur_buf != end)) { - const u8 c = *cur_buf; - const m128 shuffle_mask = masks[c]; - cur_state = pshufb_m128(shuffle_mask, cur_state); - const u8 tmp = movd(cur_state); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); - DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", tmp, (tmp & 0xF0) >> 4, - tmp & 0xF); - - if (unlikely(ACCEPT_FUNC(tmp))) { - DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG_STATE_MASK); - u64a match_offset = base_offset + (cur_buf - buf) + 1; - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (u64a)(cur_buf - start)); - *state = tmp; - *scan_end = cur_buf; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports(s, cb, ctxt, tmp, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - cur_buf++; - } - *state = movd(cur_state); - *scan_end = cur_buf; - return MO_CONTINUE_MATCHING; -} + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * In order to use this macro, the following things need to be defined: + * + * - SHENG_IMPL (name of the Sheng implementation function) + * - DEAD_FUNC (name of the function checking for dead states) + * - ACCEPT_FUNC (name of the function checking for accept state) + * - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match) + */ + +/* byte-by-byte version. we don't do byte-by-byte death checking as it's + * pretty pointless to do it over a buffer that's at most 3 bytes long */ +static really_inline +char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, + u8 *const cached_accept_state, ReportID *const cached_accept_id, + u8 single, u64a base_offset, const u8 *buf, const u8 *start, + const u8 *end, const u8 **scan_end) { + DEBUG_PRINTF("Starting DFA execution in state %u\n", + *state & SHENG_STATE_MASK); + const u8 *cur_buf = start; + if (DEAD_FUNC(*state)) { + DEBUG_PRINTF("Dead on arrival\n"); + *scan_end = end; + return MO_CONTINUE_MATCHING; + } + DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start)); + + m128 cur_state = set16x8(*state); + const m128 *masks = s->shuffle_masks; + + while (likely(cur_buf != end)) { + const u8 c = *cur_buf; + const m128 shuffle_mask = masks[c]; + cur_state = pshufb_m128(shuffle_mask, cur_state); + const u8 tmp = movd(cur_state); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?'); + DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", tmp, (tmp & 0xF0) >> 4, + tmp & 0xF); + + if (unlikely(ACCEPT_FUNC(tmp))) { + DEBUG_PRINTF("Accept state %u reached\n", tmp & SHENG_STATE_MASK); + u64a match_offset = base_offset + (cur_buf - buf) + 1; + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (u64a)(cur_buf - start)); + *state = tmp; + *scan_end = cur_buf; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports(s, cb, ctxt, tmp, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + cur_buf++; + } + *state = movd(cur_state); + *scan_end = cur_buf; + return MO_CONTINUE_MATCHING; +} #if defined(HAVE_AVX512VBMI) static really_inline diff --git a/contrib/libs/hyperscan/src/nfa/sheng_impl4.h b/contrib/libs/hyperscan/src/nfa/sheng_impl4.h index 440e7396e2..fffb88a437 100644 --- a/contrib/libs/hyperscan/src/nfa/sheng_impl4.h +++ b/contrib/libs/hyperscan/src/nfa/sheng_impl4.h @@ -1,287 +1,287 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * In order to use this macro, the following things need to be defined: - * - * - SHENG_IMPL (name of the Sheng implementation function) - * - INTERESTING_FUNC (name of the function checking for accept, accel or dead - * states) - * - INNER_DEAD_FUNC (name of the inner function checking for dead states) - * - OUTER_DEAD_FUNC (name of the outer function checking for dead states) - * - INNER_ACCEL_FUNC (name of the inner function checking for accel states) - * - OUTER_ACCEL_FUNC (name of the outer function checking for accel states) - * - ACCEPT_FUNC (name of the function checking for accept state) - * - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match) - */ - -/* unrolled 4-byte-at-a-time version. - * - * we put innerDeadFunc inside interestingFunc() block so that we don't pay for - * dead states checking. however, if interestingFunc is dummy, innerDeadFunc - * gets lost with it, so we need an additional check outside the - * interestingFunc() branch - it's normally dummy so we don't pay for it, but - * when interestingFunc is dummy, outerDeadFunc should be set if we want to - * check for dead states. - * - * also, deadFunc only checks the last known state, but since we can't ever get - * out of the dead state and we don't really care where we died, it's not a - * problem. - */ -static really_inline -char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, - u8 *const cached_accept_state, ReportID *const cached_accept_id, - u8 single, u64a base_offset, const u8 *buf, const u8 *start, - const u8 *end, const u8 **scan_end) { - DEBUG_PRINTF("Starting DFAx4 execution in state %u\n", - *state & SHENG_STATE_MASK); - const u8 *cur_buf = start; - const u8 *min_accel_dist = start; - base_offset++; - DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start)); - - if (INNER_ACCEL_FUNC(*state) || OUTER_ACCEL_FUNC(*state)) { - DEBUG_PRINTF("Accel state reached @ 0\n"); - const union AccelAux *aaux = get_accel(s, *state & SHENG_STATE_MASK); - const u8 *new_offset = run_accel(aaux, cur_buf, end); - if (new_offset < cur_buf + BAD_ACCEL_DIST) { - min_accel_dist = new_offset + BIG_ACCEL_PENALTY; - } else { - min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; - } - DEBUG_PRINTF("Next accel chance: %llu\n", - (u64a)(min_accel_dist - start)); - DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf); - cur_buf = new_offset; - DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start)); - } - if (INNER_DEAD_FUNC(*state) || OUTER_DEAD_FUNC(*state)) { - DEBUG_PRINTF("Dead on arrival\n"); - *scan_end = end; - return MO_CONTINUE_MATCHING; - } - - m128 cur_state = set16x8(*state); - const m128 *masks = s->shuffle_masks; - - while (likely(end - cur_buf >= 4)) { - const u8 *b1 = cur_buf; - const u8 *b2 = cur_buf + 1; - const u8 *b3 = cur_buf + 2; - const u8 *b4 = cur_buf + 3; - const u8 c1 = *b1; - const u8 c2 = *b2; - const u8 c3 = *b3; - const u8 c4 = *b4; - - const m128 shuffle_mask1 = masks[c1]; - cur_state = pshufb_m128(shuffle_mask1, cur_state); - const u8 a1 = movd(cur_state); - - const m128 shuffle_mask2 = masks[c2]; - cur_state = pshufb_m128(shuffle_mask2, cur_state); - const u8 a2 = movd(cur_state); - - const m128 shuffle_mask3 = masks[c3]; - cur_state = pshufb_m128(shuffle_mask3, cur_state); - const u8 a3 = movd(cur_state); - - const m128 shuffle_mask4 = masks[c4]; - cur_state = pshufb_m128(shuffle_mask4, cur_state); - const u8 a4 = movd(cur_state); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); - DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a1, (a1 & 0xF0) >> 4, a1 & 0xF); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?'); - DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a2, (a2 & 0xF0) >> 4, a2 & 0xF); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?'); - DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a3, (a3 & 0xF0) >> 4, a3 & 0xF); - - DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?'); - DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a4, (a4 & 0xF0) >> 4, a4 & 0xF); - - if (unlikely(INTERESTING_FUNC(a1, a2, a3, a4))) { - if (ACCEPT_FUNC(a1)) { - u64a match_offset = base_offset + b1 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a1 & SHENG_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b1 - start)); - *scan_end = b1; - *state = a1; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports(s, cb, ctxt, a1, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (ACCEPT_FUNC(a2)) { - u64a match_offset = base_offset + b2 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a2 & SHENG_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b2 - start)); - *scan_end = b2; - *state = a2; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports(s, cb, ctxt, a2, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (ACCEPT_FUNC(a3)) { - u64a match_offset = base_offset + b3 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a3 & SHENG_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b3 - start)); - *scan_end = b3; - *state = a3; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports(s, cb, ctxt, a3, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (ACCEPT_FUNC(a4)) { - u64a match_offset = base_offset + b4 - buf; - DEBUG_PRINTF("Accept state %u reached\n", - a4 & SHENG_STATE_MASK); - DEBUG_PRINTF("Match @ %llu\n", match_offset); - if (STOP_AT_MATCH) { - DEBUG_PRINTF("Stopping at match @ %lli\n", - (s64a)(b4 - start)); - *scan_end = b4; - *state = a4; - return MO_MATCHES_PENDING; - } - if (single) { - if (fireSingleReport(cb, ctxt, s->report, match_offset) == - MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else { - if (fireReports(s, cb, ctxt, a4, match_offset, - cached_accept_state, cached_accept_id, - 0) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - if (INNER_DEAD_FUNC(a4)) { - DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf)); - *scan_end = end; - *state = a4; - return MO_CONTINUE_MATCHING; - } - if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC(a4)) { - DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf)); - const union AccelAux *aaux = - get_accel(s, a4 & SHENG_STATE_MASK); - const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); - if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { - min_accel_dist = new_offset + BIG_ACCEL_PENALTY; - } else { - min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; - } - DEBUG_PRINTF("Next accel chance: %llu\n", - (u64a)(min_accel_dist - start)); - DEBUG_PRINTF("Accel scanned %llu bytes\n", - (u64a)(new_offset - cur_buf - 4)); - cur_buf = new_offset; - DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf)); - continue; - } - } - if (OUTER_DEAD_FUNC(a4)) { - DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf)); - *scan_end = end; - *state = a4; - return MO_CONTINUE_MATCHING; - }; - if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC(a4)) { - DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf)); - const union AccelAux *aaux = get_accel(s, a4 & SHENG_STATE_MASK); - const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); - if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { - min_accel_dist = new_offset + BIG_ACCEL_PENALTY; - } else { - min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; - } - DEBUG_PRINTF("Next accel chance: %llu\n", - (u64a)(min_accel_dist - start)); - DEBUG_PRINTF("Accel scanned %llu bytes\n", - (u64a)(new_offset - cur_buf - 4)); - cur_buf = new_offset; - DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf)); - continue; - }; - cur_buf += 4; - } - *state = movd(cur_state); - *scan_end = cur_buf; - return MO_CONTINUE_MATCHING; -} + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * In order to use this macro, the following things need to be defined: + * + * - SHENG_IMPL (name of the Sheng implementation function) + * - INTERESTING_FUNC (name of the function checking for accept, accel or dead + * states) + * - INNER_DEAD_FUNC (name of the inner function checking for dead states) + * - OUTER_DEAD_FUNC (name of the outer function checking for dead states) + * - INNER_ACCEL_FUNC (name of the inner function checking for accel states) + * - OUTER_ACCEL_FUNC (name of the outer function checking for accel states) + * - ACCEPT_FUNC (name of the function checking for accept state) + * - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match) + */ + +/* unrolled 4-byte-at-a-time version. + * + * we put innerDeadFunc inside interestingFunc() block so that we don't pay for + * dead states checking. however, if interestingFunc is dummy, innerDeadFunc + * gets lost with it, so we need an additional check outside the + * interestingFunc() branch - it's normally dummy so we don't pay for it, but + * when interestingFunc is dummy, outerDeadFunc should be set if we want to + * check for dead states. + * + * also, deadFunc only checks the last known state, but since we can't ever get + * out of the dead state and we don't really care where we died, it's not a + * problem. + */ +static really_inline +char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, + u8 *const cached_accept_state, ReportID *const cached_accept_id, + u8 single, u64a base_offset, const u8 *buf, const u8 *start, + const u8 *end, const u8 **scan_end) { + DEBUG_PRINTF("Starting DFAx4 execution in state %u\n", + *state & SHENG_STATE_MASK); + const u8 *cur_buf = start; + const u8 *min_accel_dist = start; + base_offset++; + DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start)); + + if (INNER_ACCEL_FUNC(*state) || OUTER_ACCEL_FUNC(*state)) { + DEBUG_PRINTF("Accel state reached @ 0\n"); + const union AccelAux *aaux = get_accel(s, *state & SHENG_STATE_MASK); + const u8 *new_offset = run_accel(aaux, cur_buf, end); + if (new_offset < cur_buf + BAD_ACCEL_DIST) { + min_accel_dist = new_offset + BIG_ACCEL_PENALTY; + } else { + min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; + } + DEBUG_PRINTF("Next accel chance: %llu\n", + (u64a)(min_accel_dist - start)); + DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf); + cur_buf = new_offset; + DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start)); + } + if (INNER_DEAD_FUNC(*state) || OUTER_DEAD_FUNC(*state)) { + DEBUG_PRINTF("Dead on arrival\n"); + *scan_end = end; + return MO_CONTINUE_MATCHING; + } + + m128 cur_state = set16x8(*state); + const m128 *masks = s->shuffle_masks; + + while (likely(end - cur_buf >= 4)) { + const u8 *b1 = cur_buf; + const u8 *b2 = cur_buf + 1; + const u8 *b3 = cur_buf + 2; + const u8 *b4 = cur_buf + 3; + const u8 c1 = *b1; + const u8 c2 = *b2; + const u8 c3 = *b3; + const u8 c4 = *b4; + + const m128 shuffle_mask1 = masks[c1]; + cur_state = pshufb_m128(shuffle_mask1, cur_state); + const u8 a1 = movd(cur_state); + + const m128 shuffle_mask2 = masks[c2]; + cur_state = pshufb_m128(shuffle_mask2, cur_state); + const u8 a2 = movd(cur_state); + + const m128 shuffle_mask3 = masks[c3]; + cur_state = pshufb_m128(shuffle_mask3, cur_state); + const u8 a3 = movd(cur_state); + + const m128 shuffle_mask4 = masks[c4]; + cur_state = pshufb_m128(shuffle_mask4, cur_state); + const u8 a4 = movd(cur_state); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?'); + DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a1, (a1 & 0xF0) >> 4, a1 & 0xF); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?'); + DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a2, (a2 & 0xF0) >> 4, a2 & 0xF); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?'); + DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a3, (a3 & 0xF0) >> 4, a3 & 0xF); + + DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?'); + DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a4, (a4 & 0xF0) >> 4, a4 & 0xF); + + if (unlikely(INTERESTING_FUNC(a1, a2, a3, a4))) { + if (ACCEPT_FUNC(a1)) { + u64a match_offset = base_offset + b1 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a1 & SHENG_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b1 - start)); + *scan_end = b1; + *state = a1; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports(s, cb, ctxt, a1, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (ACCEPT_FUNC(a2)) { + u64a match_offset = base_offset + b2 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a2 & SHENG_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b2 - start)); + *scan_end = b2; + *state = a2; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports(s, cb, ctxt, a2, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (ACCEPT_FUNC(a3)) { + u64a match_offset = base_offset + b3 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a3 & SHENG_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b3 - start)); + *scan_end = b3; + *state = a3; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports(s, cb, ctxt, a3, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (ACCEPT_FUNC(a4)) { + u64a match_offset = base_offset + b4 - buf; + DEBUG_PRINTF("Accept state %u reached\n", + a4 & SHENG_STATE_MASK); + DEBUG_PRINTF("Match @ %llu\n", match_offset); + if (STOP_AT_MATCH) { + DEBUG_PRINTF("Stopping at match @ %lli\n", + (s64a)(b4 - start)); + *scan_end = b4; + *state = a4; + return MO_MATCHES_PENDING; + } + if (single) { + if (fireSingleReport(cb, ctxt, s->report, match_offset) == + MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else { + if (fireReports(s, cb, ctxt, a4, match_offset, + cached_accept_state, cached_accept_id, + 0) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + if (INNER_DEAD_FUNC(a4)) { + DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf)); + *scan_end = end; + *state = a4; + return MO_CONTINUE_MATCHING; + } + if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC(a4)) { + DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf)); + const union AccelAux *aaux = + get_accel(s, a4 & SHENG_STATE_MASK); + const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); + if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { + min_accel_dist = new_offset + BIG_ACCEL_PENALTY; + } else { + min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; + } + DEBUG_PRINTF("Next accel chance: %llu\n", + (u64a)(min_accel_dist - start)); + DEBUG_PRINTF("Accel scanned %llu bytes\n", + (u64a)(new_offset - cur_buf - 4)); + cur_buf = new_offset; + DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf)); + continue; + } + } + if (OUTER_DEAD_FUNC(a4)) { + DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf)); + *scan_end = end; + *state = a4; + return MO_CONTINUE_MATCHING; + }; + if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC(a4)) { + DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf)); + const union AccelAux *aaux = get_accel(s, a4 & SHENG_STATE_MASK); + const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); + if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { + min_accel_dist = new_offset + BIG_ACCEL_PENALTY; + } else { + min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; + } + DEBUG_PRINTF("Next accel chance: %llu\n", + (u64a)(min_accel_dist - start)); + DEBUG_PRINTF("Accel scanned %llu bytes\n", + (u64a)(new_offset - cur_buf - 4)); + cur_buf = new_offset; + DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf)); + continue; + }; + cur_buf += 4; + } + *state = movd(cur_state); + *scan_end = cur_buf; + return MO_CONTINUE_MATCHING; +} #if defined(HAVE_AVX512VBMI) static really_inline diff --git a/contrib/libs/hyperscan/src/nfa/sheng_internal.h b/contrib/libs/hyperscan/src/nfa/sheng_internal.h index 98536886c5..70fc327a77 100644 --- a/contrib/libs/hyperscan/src/nfa/sheng_internal.h +++ b/contrib/libs/hyperscan/src/nfa/sheng_internal.h @@ -1,43 +1,43 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SHENG_INTERNAL_H_ -#define SHENG_INTERNAL_H_ - -#include "ue2common.h" -#include "util/simd_types.h" - -#define SHENG_STATE_ACCEPT 0x10 -#define SHENG_STATE_DEAD 0x20 -#define SHENG_STATE_ACCEL 0x40 -#define SHENG_STATE_MASK 0xF -#define SHENG_STATE_FLAG_MASK 0x70 - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SHENG_INTERNAL_H_ +#define SHENG_INTERNAL_H_ + +#include "ue2common.h" +#include "util/simd_types.h" + +#define SHENG_STATE_ACCEPT 0x10 +#define SHENG_STATE_DEAD 0x20 +#define SHENG_STATE_ACCEL 0x40 +#define SHENG_STATE_MASK 0xF +#define SHENG_STATE_FLAG_MASK 0x70 + #define SHENG32_STATE_ACCEPT 0x20 #define SHENG32_STATE_DEAD 0x40 #define SHENG32_STATE_ACCEL 0x80 @@ -49,35 +49,35 @@ #define SHENG64_STATE_MASK 0x3F #define SHENG64_STATE_FLAG_MASK 0xC0 -#define SHENG_FLAG_SINGLE_REPORT 0x1 -#define SHENG_FLAG_CAN_DIE 0x2 -#define SHENG_FLAG_HAS_ACCEL 0x4 - -struct report_list { - u32 count; - ReportID report[]; -}; - -struct sstate_aux { - u32 accept; - u32 accept_eod; - u32 accel; - u32 top; -}; - -struct sheng { - m128 shuffle_masks[256]; - u32 length; - u32 aux_offset; - u32 report_offset; - u32 accel_offset; - u8 n_states; - u8 anchored; - u8 floating; - u8 flags; - ReportID report; -}; - +#define SHENG_FLAG_SINGLE_REPORT 0x1 +#define SHENG_FLAG_CAN_DIE 0x2 +#define SHENG_FLAG_HAS_ACCEL 0x4 + +struct report_list { + u32 count; + ReportID report[]; +}; + +struct sstate_aux { + u32 accept; + u32 accept_eod; + u32 accel; + u32 top; +}; + +struct sheng { + m128 shuffle_masks[256]; + u32 length; + u32 aux_offset; + u32 report_offset; + u32 accel_offset; + u8 n_states; + u8 anchored; + u8 floating; + u8 flags; + ReportID report; +}; + struct sheng32 { m512 succ_masks[256]; u32 length; @@ -104,4 +104,4 @@ struct sheng64 { ReportID report; }; -#endif /* SHENG_INTERNAL_H_ */ +#endif /* SHENG_INTERNAL_H_ */ diff --git a/contrib/libs/hyperscan/src/nfa/shengcompile.cpp b/contrib/libs/hyperscan/src/nfa/shengcompile.cpp index aa3faeb09d..8cc98eea8a 100644 --- a/contrib/libs/hyperscan/src/nfa/shengcompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/shengcompile.cpp @@ -1,306 +1,306 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "shengcompile.h" - -#include "accel.h" -#include "accelcompile.h" -#include "shufticompile.h" -#include "trufflecompile.h" -#include "util/alloc.h" -#include "util/bitutils.h" -#include "util/charreach.h" -#include "util/compare.h" -#include "util/container.h" -#include "util/order_check.h" -#include "util/report_manager.h" -#include "util/unaligned.h" - -#include "grey.h" -#include "nfa_internal.h" -#include "sheng_internal.h" -#include "ue2common.h" -#include "util/compile_context.h" -#include "util/make_unique.h" -#include "util/verify_types.h" -#include "util/simd_types.h" - -#include <map> -#include <vector> -#include <sstream> - -#include <boost/range/adaptor/map.hpp> - -using namespace std; -using boost::adaptors::map_keys; - -namespace ue2 { - -#define ACCEL_DFA_MAX_OFFSET_DEPTH 4 - -/** Maximum tolerated number of escape character from an accel state. - * This is larger than nfa, as we don't have a budget and the nfa cheats on stop - * characters for sets of states */ -#define ACCEL_DFA_MAX_STOP_CHAR 160 - -/** Maximum tolerated number of escape character from a sds accel state. Larger - * than normal states as accelerating sds is important. Matches NFA value */ -#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 - -struct dfa_info { - accel_dfa_build_strat &strat; - raw_dfa &raw; - vector<dstate> &states; - dstate &floating; - dstate &anchored; - bool can_die; - - explicit dfa_info(accel_dfa_build_strat &s) - : strat(s), raw(strat.get_raw()), states(raw.states), - floating(states[raw.start_floating]), - anchored(states[raw.start_anchored]), can_die(dfaCanDie(raw)) {} - - // returns adjusted size - size_t size() const { - return can_die ? states.size() : states.size() - 1; - } - // expects adjusted index - dstate &operator[](dstate_id_t idx) { - return states[raw_id(idx)]; - } - dstate &top(dstate_id_t idx) { - if (isDead(idx)) { - return floating; - } - return next(idx, TOP); - } - dstate &next(dstate_id_t idx, u16 chr) { - auto &src = (*this)[idx]; - auto next_id = src.next[raw.alpha_remap[chr]]; - return states[next_id]; - } - // get original idx from adjusted idx - dstate_id_t raw_id(dstate_id_t idx) { - assert(idx < size()); - // if DFA can't die, shift all indices left by 1 - return can_die ? idx : idx + 1; - } - bool isDead(dstate &state) { - return raw_id(state.impl_id) == DEAD_STATE; - } - bool isDead(dstate_id_t idx) { - return raw_id(idx) == DEAD_STATE; - } - -private: - static bool dfaCanDie(raw_dfa &rdfa) { - for (unsigned chr = 0; chr < 256; chr++) { - for (dstate_id_t state = 0; state < rdfa.states.size(); state++) { - auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]]; - if (succ == DEAD_STATE) { - return true; - } - } - } - return false; - } -}; - -namespace { - -struct raw_report_list { - flat_set<ReportID> reports; - - raw_report_list(const flat_set<ReportID> &reports_in, - const ReportManager &rm, bool do_remap) { - if (do_remap) { - for (auto &id : reports_in) { - reports.insert(rm.getProgramOffset(id)); - } - } else { - reports = reports_in; - } - } - - bool operator<(const raw_report_list &b) const { - return reports < b.reports; - } -}; - -struct raw_report_info_impl : public raw_report_info { - vector<raw_report_list> rl; - u32 getReportListSize() const override; - size_t size() const override; - void fillReportLists(NFA *n, size_t base_offset, - std::vector<u32> &ro /* out */) const override; -}; -} - -u32 raw_report_info_impl::getReportListSize() const { - u32 rv = 0; - - for (const auto &reps : rl) { - rv += sizeof(report_list); - rv += sizeof(ReportID) * reps.reports.size(); - } - - return rv; -} - -size_t raw_report_info_impl::size() const { - return rl.size(); -} - -void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, - vector<u32> &ro) const { - for (const auto &reps : rl) { - ro.push_back(base_offset); - - report_list *p = (report_list *)((char *)n + base_offset); - - u32 i = 0; - for (const ReportID report : reps.reports) { - p->report[i++] = report; - } - p->count = verify_u32(reps.reports.size()); - - base_offset += sizeof(report_list); - base_offset += sizeof(ReportID) * reps.reports.size(); - } -} - -unique_ptr<raw_report_info> sheng_build_strat::gatherReports( - vector<u32> &reports, - vector<u32> &reports_eod, - u8 *isSingleReport, - ReportID *arbReport) const { - DEBUG_PRINTF("gathering reports\n"); - - const bool remap_reports = has_managed_reports(rdfa.kind); - - auto ri = ue2::make_unique<raw_report_info_impl>(); - map<raw_report_list, u32> rev; - - for (const dstate &s : rdfa.states) { - if (s.reports.empty()) { - reports.push_back(MO_INVALID_IDX); - continue; - } - - raw_report_list rrl(s.reports, rm, remap_reports); - DEBUG_PRINTF("non empty r\n"); - if (rev.find(rrl) != rev.end()) { - reports.push_back(rev[rrl]); - } else { - DEBUG_PRINTF("adding to rl %zu\n", ri->size()); - rev[rrl] = ri->size(); - reports.push_back(ri->size()); - ri->rl.push_back(rrl); - } - } - - for (const dstate &s : rdfa.states) { - if (s.reports_eod.empty()) { - reports_eod.push_back(MO_INVALID_IDX); - continue; - } - - DEBUG_PRINTF("non empty r eod\n"); - raw_report_list rrl(s.reports_eod, rm, remap_reports); - if (rev.find(rrl) != rev.end()) { - reports_eod.push_back(rev[rrl]); - continue; - } - - DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size()); - rev[rrl] = ri->size(); - reports_eod.push_back(ri->size()); - ri->rl.push_back(rrl); - } - - assert(!ri->rl.empty()); /* all components should be able to generate - reports */ - if (!ri->rl.empty()) { - *arbReport = *ri->rl.begin()->reports.begin(); - } else { - *arbReport = 0; - } - - /* if we have only a single report id generated from all accepts (not eod) - * we can take some short cuts */ - set<ReportID> reps; - - for (u32 rl_index : reports) { - if (rl_index == MO_INVALID_IDX) { - continue; - } - assert(rl_index < ri->size()); - insert(&reps, ri->rl[rl_index].reports); - } - - if (reps.size() == 1) { - *isSingleReport = 1; - *arbReport = *reps.begin(); - DEBUG_PRINTF("single -- %u\n", *arbReport); - } else { - *isSingleReport = 0; - } - - return move(ri); -} - -u32 sheng_build_strat::max_allowed_offset_accel() const { - return ACCEL_DFA_MAX_OFFSET_DEPTH; -} - -u32 sheng_build_strat::max_stop_char() const { - return ACCEL_DFA_MAX_STOP_CHAR; -} - -u32 sheng_build_strat::max_floating_stop_char() const { - return ACCEL_DFA_MAX_FLOATING_STOP_CHAR; -} - -size_t sheng_build_strat::accelSize() const { - return sizeof(AccelAux); -} - -#ifdef DEBUG -static really_inline -void dumpShuffleMask(const u8 chr, const u8 *buf, unsigned sz) { - stringstream o; - - for (unsigned i = 0; i < sz; i++) { - o.width(2); - o << (buf[i] & SHENG_STATE_MASK) << " "; - } - DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str()); -} + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "shengcompile.h" + +#include "accel.h" +#include "accelcompile.h" +#include "shufticompile.h" +#include "trufflecompile.h" +#include "util/alloc.h" +#include "util/bitutils.h" +#include "util/charreach.h" +#include "util/compare.h" +#include "util/container.h" +#include "util/order_check.h" +#include "util/report_manager.h" +#include "util/unaligned.h" + +#include "grey.h" +#include "nfa_internal.h" +#include "sheng_internal.h" +#include "ue2common.h" +#include "util/compile_context.h" +#include "util/make_unique.h" +#include "util/verify_types.h" +#include "util/simd_types.h" + +#include <map> +#include <vector> +#include <sstream> + +#include <boost/range/adaptor/map.hpp> + +using namespace std; +using boost::adaptors::map_keys; + +namespace ue2 { + +#define ACCEL_DFA_MAX_OFFSET_DEPTH 4 + +/** Maximum tolerated number of escape character from an accel state. + * This is larger than nfa, as we don't have a budget and the nfa cheats on stop + * characters for sets of states */ +#define ACCEL_DFA_MAX_STOP_CHAR 160 + +/** Maximum tolerated number of escape character from a sds accel state. Larger + * than normal states as accelerating sds is important. Matches NFA value */ +#define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 + +struct dfa_info { + accel_dfa_build_strat &strat; + raw_dfa &raw; + vector<dstate> &states; + dstate &floating; + dstate &anchored; + bool can_die; + + explicit dfa_info(accel_dfa_build_strat &s) + : strat(s), raw(strat.get_raw()), states(raw.states), + floating(states[raw.start_floating]), + anchored(states[raw.start_anchored]), can_die(dfaCanDie(raw)) {} + + // returns adjusted size + size_t size() const { + return can_die ? states.size() : states.size() - 1; + } + // expects adjusted index + dstate &operator[](dstate_id_t idx) { + return states[raw_id(idx)]; + } + dstate &top(dstate_id_t idx) { + if (isDead(idx)) { + return floating; + } + return next(idx, TOP); + } + dstate &next(dstate_id_t idx, u16 chr) { + auto &src = (*this)[idx]; + auto next_id = src.next[raw.alpha_remap[chr]]; + return states[next_id]; + } + // get original idx from adjusted idx + dstate_id_t raw_id(dstate_id_t idx) { + assert(idx < size()); + // if DFA can't die, shift all indices left by 1 + return can_die ? idx : idx + 1; + } + bool isDead(dstate &state) { + return raw_id(state.impl_id) == DEAD_STATE; + } + bool isDead(dstate_id_t idx) { + return raw_id(idx) == DEAD_STATE; + } + +private: + static bool dfaCanDie(raw_dfa &rdfa) { + for (unsigned chr = 0; chr < 256; chr++) { + for (dstate_id_t state = 0; state < rdfa.states.size(); state++) { + auto succ = rdfa.states[state].next[rdfa.alpha_remap[chr]]; + if (succ == DEAD_STATE) { + return true; + } + } + } + return false; + } +}; + +namespace { + +struct raw_report_list { + flat_set<ReportID> reports; + + raw_report_list(const flat_set<ReportID> &reports_in, + const ReportManager &rm, bool do_remap) { + if (do_remap) { + for (auto &id : reports_in) { + reports.insert(rm.getProgramOffset(id)); + } + } else { + reports = reports_in; + } + } + + bool operator<(const raw_report_list &b) const { + return reports < b.reports; + } +}; + +struct raw_report_info_impl : public raw_report_info { + vector<raw_report_list> rl; + u32 getReportListSize() const override; + size_t size() const override; + void fillReportLists(NFA *n, size_t base_offset, + std::vector<u32> &ro /* out */) const override; +}; +} + +u32 raw_report_info_impl::getReportListSize() const { + u32 rv = 0; + + for (const auto &reps : rl) { + rv += sizeof(report_list); + rv += sizeof(ReportID) * reps.reports.size(); + } + + return rv; +} + +size_t raw_report_info_impl::size() const { + return rl.size(); +} + +void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, + vector<u32> &ro) const { + for (const auto &reps : rl) { + ro.push_back(base_offset); + + report_list *p = (report_list *)((char *)n + base_offset); + + u32 i = 0; + for (const ReportID report : reps.reports) { + p->report[i++] = report; + } + p->count = verify_u32(reps.reports.size()); + + base_offset += sizeof(report_list); + base_offset += sizeof(ReportID) * reps.reports.size(); + } +} + +unique_ptr<raw_report_info> sheng_build_strat::gatherReports( + vector<u32> &reports, + vector<u32> &reports_eod, + u8 *isSingleReport, + ReportID *arbReport) const { + DEBUG_PRINTF("gathering reports\n"); + + const bool remap_reports = has_managed_reports(rdfa.kind); + + auto ri = ue2::make_unique<raw_report_info_impl>(); + map<raw_report_list, u32> rev; + + for (const dstate &s : rdfa.states) { + if (s.reports.empty()) { + reports.push_back(MO_INVALID_IDX); + continue; + } + + raw_report_list rrl(s.reports, rm, remap_reports); + DEBUG_PRINTF("non empty r\n"); + if (rev.find(rrl) != rev.end()) { + reports.push_back(rev[rrl]); + } else { + DEBUG_PRINTF("adding to rl %zu\n", ri->size()); + rev[rrl] = ri->size(); + reports.push_back(ri->size()); + ri->rl.push_back(rrl); + } + } + + for (const dstate &s : rdfa.states) { + if (s.reports_eod.empty()) { + reports_eod.push_back(MO_INVALID_IDX); + continue; + } + + DEBUG_PRINTF("non empty r eod\n"); + raw_report_list rrl(s.reports_eod, rm, remap_reports); + if (rev.find(rrl) != rev.end()) { + reports_eod.push_back(rev[rrl]); + continue; + } + + DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size()); + rev[rrl] = ri->size(); + reports_eod.push_back(ri->size()); + ri->rl.push_back(rrl); + } + + assert(!ri->rl.empty()); /* all components should be able to generate + reports */ + if (!ri->rl.empty()) { + *arbReport = *ri->rl.begin()->reports.begin(); + } else { + *arbReport = 0; + } + + /* if we have only a single report id generated from all accepts (not eod) + * we can take some short cuts */ + set<ReportID> reps; + + for (u32 rl_index : reports) { + if (rl_index == MO_INVALID_IDX) { + continue; + } + assert(rl_index < ri->size()); + insert(&reps, ri->rl[rl_index].reports); + } + + if (reps.size() == 1) { + *isSingleReport = 1; + *arbReport = *reps.begin(); + DEBUG_PRINTF("single -- %u\n", *arbReport); + } else { + *isSingleReport = 0; + } + + return move(ri); +} + +u32 sheng_build_strat::max_allowed_offset_accel() const { + return ACCEL_DFA_MAX_OFFSET_DEPTH; +} + +u32 sheng_build_strat::max_stop_char() const { + return ACCEL_DFA_MAX_STOP_CHAR; +} + +u32 sheng_build_strat::max_floating_stop_char() const { + return ACCEL_DFA_MAX_FLOATING_STOP_CHAR; +} + +size_t sheng_build_strat::accelSize() const { + return sizeof(AccelAux); +} + +#ifdef DEBUG +static really_inline +void dumpShuffleMask(const u8 chr, const u8 *buf, unsigned sz) { + stringstream o; + + for (unsigned i = 0; i < sz; i++) { + o.width(2); + o << (buf[i] & SHENG_STATE_MASK) << " "; + } + DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str()); +} static really_inline void dumpShuffleMask32(const u8 chr, const u8 *buf, unsigned sz) { @@ -323,18 +323,18 @@ void dumpShuffleMask64(const u8 chr, const u8 *buf, unsigned sz) { } DEBUG_PRINTF("chr %3u: %s\n", chr, o.str().c_str()); } -#endif - -static -void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info, - set<dstate_id_t> *accel_states) { - for (dstate_id_t i : accel_escape_info | map_keys) { - accel_states->insert(i); - } -} - +#endif + +static +void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info, + set<dstate_id_t> *accel_states) { + for (dstate_id_t i : accel_escape_info | map_keys) { + accel_states->insert(i); + } +} + template <typename T> -static +static u8 getShengState(UNUSED dstate &state, UNUSED dfa_info &info, UNUSED map<dstate_id_t, AccelScheme> &accelInfo) { return 0; @@ -343,19 +343,19 @@ u8 getShengState(UNUSED dstate &state, UNUSED dfa_info &info, template <> u8 getShengState<sheng>(dstate &state, dfa_info &info, map<dstate_id_t, AccelScheme> &accelInfo) { - u8 s = state.impl_id; - if (!state.reports.empty()) { - s |= SHENG_STATE_ACCEPT; - } - if (info.isDead(state)) { - s |= SHENG_STATE_DEAD; - } - if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) { - s |= SHENG_STATE_ACCEL; - } - return s; -} - + u8 s = state.impl_id; + if (!state.reports.empty()) { + s |= SHENG_STATE_ACCEPT; + } + if (info.isDead(state)) { + s |= SHENG_STATE_DEAD; + } + if (accelInfo.find(info.raw_id(state.impl_id)) != accelInfo.end()) { + s |= SHENG_STATE_ACCEL; + } + return s; +} + template <> u8 getShengState<sheng32>(dstate &state, dfa_info &info, map<dstate_id_t, AccelScheme> &accelInfo) { @@ -386,30 +386,30 @@ u8 getShengState<sheng64>(dstate &state, dfa_info &info, } template <typename T> -static -void fillAccelAux(struct NFA *n, dfa_info &info, - map<dstate_id_t, AccelScheme> &accelInfo) { - DEBUG_PRINTF("Filling accel aux structures\n"); +static +void fillAccelAux(struct NFA *n, dfa_info &info, + map<dstate_id_t, AccelScheme> &accelInfo) { + DEBUG_PRINTF("Filling accel aux structures\n"); T *s = (T *)getMutableImplNfa(n); - u32 offset = s->accel_offset; - - for (dstate_id_t i = 0; i < info.size(); i++) { - dstate_id_t state_id = info.raw_id(i); - if (accelInfo.find(state_id) != accelInfo.end()) { - s->flags |= SHENG_FLAG_HAS_ACCEL; - AccelAux *aux = (AccelAux *)((char *)n + offset); - info.strat.buildAccel(state_id, accelInfo[state_id], aux); - sstate_aux *saux = - (sstate_aux *)((char *)n + s->aux_offset) + state_id; - saux->accel = offset; - DEBUG_PRINTF("Accel offset: %u\n", offset); - offset += ROUNDUP_N(sizeof(AccelAux), alignof(AccelAux)); - } - } -} - + u32 offset = s->accel_offset; + + for (dstate_id_t i = 0; i < info.size(); i++) { + dstate_id_t state_id = info.raw_id(i); + if (accelInfo.find(state_id) != accelInfo.end()) { + s->flags |= SHENG_FLAG_HAS_ACCEL; + AccelAux *aux = (AccelAux *)((char *)n + offset); + info.strat.buildAccel(state_id, accelInfo[state_id], aux); + sstate_aux *saux = + (sstate_aux *)((char *)n + s->aux_offset) + state_id; + saux->accel = offset; + DEBUG_PRINTF("Accel offset: %u\n", offset); + offset += ROUNDUP_N(sizeof(AccelAux), alignof(AccelAux)); + } + } +} + template <typename T> -static +static void populateBasicInfo(UNUSED struct NFA *n, UNUSED dfa_info &info, UNUSED map<dstate_id_t, AccelScheme> &accelInfo, UNUSED u32 aux_offset, UNUSED u32 report_offset, @@ -423,25 +423,25 @@ void populateBasicInfo<sheng>(struct NFA *n, dfa_info &info, u32 aux_offset, u32 report_offset, u32 accel_offset, u32 total_size, u32 dfa_size) { - n->length = total_size; - n->scratchStateSize = 1; - n->streamStateSize = 1; - n->nPositions = info.size(); - n->type = SHENG_NFA; - n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0; - - sheng *s = (sheng *)getMutableImplNfa(n); - s->aux_offset = aux_offset; - s->report_offset = report_offset; - s->accel_offset = accel_offset; - s->n_states = info.size(); - s->length = dfa_size; - s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0; - + n->length = total_size; + n->scratchStateSize = 1; + n->streamStateSize = 1; + n->nPositions = info.size(); + n->type = SHENG_NFA; + n->flags |= info.raw.hasEodReports() ? NFA_ACCEPTS_EOD : 0; + + sheng *s = (sheng *)getMutableImplNfa(n); + s->aux_offset = aux_offset; + s->report_offset = report_offset; + s->accel_offset = accel_offset; + s->n_states = info.size(); + s->length = dfa_size; + s->flags |= info.can_die ? SHENG_FLAG_CAN_DIE : 0; + s->anchored = getShengState<sheng>(info.anchored, info, accelInfo); s->floating = getShengState<sheng>(info.floating, info, accelInfo); -} - +} + template <> void populateBasicInfo<sheng32>(struct NFA *n, dfa_info &info, map<dstate_id_t, AccelScheme> &accelInfo, @@ -493,65 +493,65 @@ void populateBasicInfo<sheng64>(struct NFA *n, dfa_info &info, } template <typename T> -static -void fillTops(NFA *n, dfa_info &info, dstate_id_t id, - map<dstate_id_t, AccelScheme> &accelInfo) { +static +void fillTops(NFA *n, dfa_info &info, dstate_id_t id, + map<dstate_id_t, AccelScheme> &accelInfo) { T *s = (T *)getMutableImplNfa(n); - u32 aux_base = s->aux_offset; - - DEBUG_PRINTF("Filling tops for state %u\n", id); - - sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id; - - DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id, - (char *)aux - (char *)n); - - /* we could conceivably end up in an accept/dead state on a top event, - * so mark top as accept/dead state if it indeed is. - */ - auto &top_state = info.top(id); - - DEBUG_PRINTF("Top transition for state %u: %u\n", id, top_state.impl_id); - + u32 aux_base = s->aux_offset; + + DEBUG_PRINTF("Filling tops for state %u\n", id); + + sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id; + + DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id, + (char *)aux - (char *)n); + + /* we could conceivably end up in an accept/dead state on a top event, + * so mark top as accept/dead state if it indeed is. + */ + auto &top_state = info.top(id); + + DEBUG_PRINTF("Top transition for state %u: %u\n", id, top_state.impl_id); + aux->top = getShengState<T>(top_state, info, accelInfo); -} - +} + template <typename T> -static -void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports, - vector<u32> &reports_eod, vector<u32> &report_offsets) { +static +void fillAux(NFA *n, dfa_info &info, dstate_id_t id, vector<u32> &reports, + vector<u32> &reports_eod, vector<u32> &report_offsets) { T *s = (T *)getMutableImplNfa(n); - u32 aux_base = s->aux_offset; - auto raw_id = info.raw_id(id); - - auto &state = info[id]; - - sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id; - - DEBUG_PRINTF("Filling aux and report structures for state %u\n", id); - DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id, - (char *)aux - (char *)n); - - aux->accept = state.reports.empty() ? 0 : report_offsets[reports[raw_id]]; - aux->accept_eod = - state.reports_eod.empty() ? 0 : report_offsets[reports_eod[raw_id]]; - - DEBUG_PRINTF("Report list offset: %u\n", aux->accept); - DEBUG_PRINTF("EOD report list offset: %u\n", aux->accept_eod); -} - + u32 aux_base = s->aux_offset; + auto raw_id = info.raw_id(id); + + auto &state = info[id]; + + sstate_aux *aux = (sstate_aux *)((char *)n + aux_base) + id; + + DEBUG_PRINTF("Filling aux and report structures for state %u\n", id); + DEBUG_PRINTF("Aux structure for state %u, offset %zd\n", id, + (char *)aux - (char *)n); + + aux->accept = state.reports.empty() ? 0 : report_offsets[reports[raw_id]]; + aux->accept_eod = + state.reports_eod.empty() ? 0 : report_offsets[reports_eod[raw_id]]; + + DEBUG_PRINTF("Report list offset: %u\n", aux->accept); + DEBUG_PRINTF("EOD report list offset: %u\n", aux->accept_eod); +} + template <typename T> -static -void fillSingleReport(NFA *n, ReportID r_id) { +static +void fillSingleReport(NFA *n, ReportID r_id) { T *s = (T *)getMutableImplNfa(n); - - DEBUG_PRINTF("Single report ID: %u\n", r_id); - s->report = r_id; - s->flags |= SHENG_FLAG_SINGLE_REPORT; -} - + + DEBUG_PRINTF("Single report ID: %u\n", r_id); + s->report = r_id; + s->flags |= SHENG_FLAG_SINGLE_REPORT; +} + template <typename T> -static +static bool createShuffleMasks(UNUSED T *s, UNUSED dfa_info &info, UNUSED map<dstate_id_t, AccelScheme> &accelInfo) { return true; @@ -560,28 +560,28 @@ bool createShuffleMasks(UNUSED T *s, UNUSED dfa_info &info, template <> bool createShuffleMasks<sheng>(sheng *s, dfa_info &info, map<dstate_id_t, AccelScheme> &accelInfo) { - for (u16 chr = 0; chr < 256; chr++) { - u8 buf[16] = {0}; - - for (dstate_id_t idx = 0; idx < info.size(); idx++) { - auto &succ_state = info.next(idx, chr); - + for (u16 chr = 0; chr < 256; chr++) { + u8 buf[16] = {0}; + + for (dstate_id_t idx = 0; idx < info.size(); idx++) { + auto &succ_state = info.next(idx, chr); + buf[idx] = getShengState<sheng>(succ_state, info, accelInfo); - } -#ifdef DEBUG - dumpShuffleMask(chr, buf, sizeof(buf)); -#endif - memcpy(&s->shuffle_masks[chr], buf, sizeof(m128)); - } + } +#ifdef DEBUG + dumpShuffleMask(chr, buf, sizeof(buf)); +#endif + memcpy(&s->shuffle_masks[chr], buf, sizeof(m128)); + } return true; -} - +} + template <> bool createShuffleMasks<sheng32>(sheng32 *s, dfa_info &info, map<dstate_id_t, AccelScheme> &accelInfo) { for (u16 chr = 0; chr < 256; chr++) { u8 buf[64] = {0}; - + assert(info.size() <= 32); for (dstate_id_t idx = 0; idx < info.size(); idx++) { auto &succ_state = info.next(idx, chr); @@ -593,20 +593,20 @@ bool createShuffleMasks<sheng32>(sheng32 *s, dfa_info &info, dumpShuffleMask32(chr, buf, sizeof(buf)); #endif memcpy(&s->succ_masks[chr], buf, sizeof(m512)); - } + } return true; } - + template <> bool createShuffleMasks<sheng64>(sheng64 *s, dfa_info &info, map<dstate_id_t, AccelScheme> &accelInfo) { for (u16 chr = 0; chr < 256; chr++) { u8 buf[64] = {0}; - + assert(info.size() <= 64); for (dstate_id_t idx = 0; idx < info.size(); idx++) { auto &succ_state = info.next(idx, chr); - + if (accelInfo.find(info.raw_id(succ_state.impl_id)) != accelInfo.end()) { return false; @@ -617,10 +617,10 @@ bool createShuffleMasks<sheng64>(sheng64 *s, dfa_info &info, dumpShuffleMask64(chr, buf, sizeof(buf)); #endif memcpy(&s->succ_masks[chr], buf, sizeof(m512)); - } + } return true; } - + bool has_accel_sheng(const NFA *) { return true; /* consider the sheng region as accelerated */ } @@ -631,72 +631,72 @@ bytecode_ptr<NFA> shengCompile_int(raw_dfa &raw, const CompileContext &cc, set<dstate_id_t> *accel_states, sheng_build_strat &strat, dfa_info &info) { - if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming - * mode with our semantics */ - raw.stripExtraEodReports(); - } - auto accelInfo = strat.getAccelInfo(cc.grey); - - // set impl_id of each dfa state - for (dstate_id_t i = 0; i < info.size(); i++) { - info[i].impl_id = i; - } - - DEBUG_PRINTF("Anchored start state: %u, floating start state: %u\n", - info.anchored.impl_id, info.floating.impl_id); - + if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming + * mode with our semantics */ + raw.stripExtraEodReports(); + } + auto accelInfo = strat.getAccelInfo(cc.grey); + + // set impl_id of each dfa state + for (dstate_id_t i = 0; i < info.size(); i++) { + info[i].impl_id = i; + } + + DEBUG_PRINTF("Anchored start state: %u, floating start state: %u\n", + info.anchored.impl_id, info.floating.impl_id); + u32 nfa_size = ROUNDUP_16(sizeof(NFA) + sizeof(T)); - vector<u32> reports, eod_reports, report_offsets; - u8 isSingle = 0; - ReportID single_report = 0; - - auto ri = - strat.gatherReports(reports, eod_reports, &isSingle, &single_report); - - u32 total_aux = sizeof(sstate_aux) * info.size(); - u32 total_accel = strat.accelSize() * accelInfo.size(); - u32 total_reports = ri->getReportListSize(); - - u32 reports_offset = nfa_size + total_aux; - u32 accel_offset = - ROUNDUP_N(reports_offset + total_reports, alignof(AccelAux)); - u32 total_size = ROUNDUP_N(accel_offset + total_accel, 64); - - DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n", - nfa_size, total_aux, total_reports, total_accel, total_size); - - auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); - + vector<u32> reports, eod_reports, report_offsets; + u8 isSingle = 0; + ReportID single_report = 0; + + auto ri = + strat.gatherReports(reports, eod_reports, &isSingle, &single_report); + + u32 total_aux = sizeof(sstate_aux) * info.size(); + u32 total_accel = strat.accelSize() * accelInfo.size(); + u32 total_reports = ri->getReportListSize(); + + u32 reports_offset = nfa_size + total_aux; + u32 accel_offset = + ROUNDUP_N(reports_offset + total_reports, alignof(AccelAux)); + u32 total_size = ROUNDUP_N(accel_offset + total_accel, 64); + + DEBUG_PRINTF("NFA: %u, aux: %u, reports: %u, accel: %u, total: %u\n", + nfa_size, total_aux, total_reports, total_accel, total_size); + + auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); + populateBasicInfo<T>(nfa.get(), info, accelInfo, nfa_size, reports_offset, accel_offset, total_size, total_size - sizeof(NFA)); - - DEBUG_PRINTF("Setting up aux and report structures\n"); - - ri->fillReportLists(nfa.get(), reports_offset, report_offsets); - - for (dstate_id_t idx = 0; idx < info.size(); idx++) { + + DEBUG_PRINTF("Setting up aux and report structures\n"); + + ri->fillReportLists(nfa.get(), reports_offset, report_offsets); + + for (dstate_id_t idx = 0; idx < info.size(); idx++) { fillTops<T>(nfa.get(), info, idx, accelInfo); fillAux<T>(nfa.get(), info, idx, reports, eod_reports, report_offsets); - } - if (isSingle) { + } + if (isSingle) { fillSingleReport<T>(nfa.get(), single_report); - } - + } + fillAccelAux<T>(nfa.get(), info, accelInfo); - - if (accel_states) { - fillAccelOut(accelInfo, accel_states); - } - + + if (accel_states) { + fillAccelOut(accelInfo, accel_states); + } + if (!createShuffleMasks<T>((T *)getMutableImplNfa(nfa.get()), info, accelInfo)) { return nullptr; } - - return nfa; -} - + + return nfa; +} + bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, bool only_accel_init, set<dstate_id_t> *accel_states) { @@ -792,4 +792,4 @@ bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc, return nfa; } -} // namespace ue2 +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/shengcompile.h b/contrib/libs/hyperscan/src/nfa/shengcompile.h index 256f4a4e50..30ed0d68a6 100644 --- a/contrib/libs/hyperscan/src/nfa/shengcompile.h +++ b/contrib/libs/hyperscan/src/nfa/shengcompile.h @@ -1,76 +1,76 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef SHENGCOMPILE_H -#define SHENGCOMPILE_H - -#include "accel_dfa_build_strat.h" -#include "rdfa.h" -#include "util/bytecode_ptr.h" -#include "util/charreach.h" -#include "util/flat_containers.h" - -#include <memory> -#include <set> - -struct NFA; - -namespace ue2 { - -class ReportManager; -struct CompileContext; -struct raw_dfa; - -class sheng_build_strat : public accel_dfa_build_strat { -public: - sheng_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in, - bool only_accel_init_in) - : accel_dfa_build_strat(rm_in, only_accel_init_in), rdfa(rdfa_in) {} - raw_dfa &get_raw() const override { return rdfa; } - std::unique_ptr<raw_report_info> gatherReports( - std::vector<u32> &reports /* out */, - std::vector<u32> &reports_eod /* out */, - u8 *isSingleReport /* out */, - ReportID *arbReport /* out */) const override; - size_t accelSize(void) const override; - u32 max_allowed_offset_accel() const override; - u32 max_stop_char() const override; - u32 max_floating_stop_char() const override; + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef SHENGCOMPILE_H +#define SHENGCOMPILE_H + +#include "accel_dfa_build_strat.h" +#include "rdfa.h" +#include "util/bytecode_ptr.h" +#include "util/charreach.h" +#include "util/flat_containers.h" + +#include <memory> +#include <set> + +struct NFA; + +namespace ue2 { + +class ReportManager; +struct CompileContext; +struct raw_dfa; + +class sheng_build_strat : public accel_dfa_build_strat { +public: + sheng_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in, + bool only_accel_init_in) + : accel_dfa_build_strat(rm_in, only_accel_init_in), rdfa(rdfa_in) {} + raw_dfa &get_raw() const override { return rdfa; } + std::unique_ptr<raw_report_info> gatherReports( + std::vector<u32> &reports /* out */, + std::vector<u32> &reports_eod /* out */, + u8 *isSingleReport /* out */, + ReportID *arbReport /* out */) const override; + size_t accelSize(void) const override; + u32 max_allowed_offset_accel() const override; + u32 max_stop_char() const override; + u32 max_floating_stop_char() const override; DfaType getType() const override { return Sheng; } - -private: - raw_dfa &rdfa; -}; - -bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc, - const ReportManager &rm, bool only_accel_init, - std::set<dstate_id_t> *accel_states = nullptr); - + +private: + raw_dfa &rdfa; +}; + +bytecode_ptr<NFA> shengCompile(raw_dfa &raw, const CompileContext &cc, + const ReportManager &rm, bool only_accel_init, + std::set<dstate_id_t> *accel_states = nullptr); + bytecode_ptr<NFA> sheng32Compile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, bool only_accel_init, std::set<dstate_id_t> *accel_states = nullptr); @@ -79,15 +79,15 @@ bytecode_ptr<NFA> sheng64Compile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, bool only_accel_init, std::set<dstate_id_t> *accel_states = nullptr); -struct sheng_escape_info { - CharReach outs; - CharReach outs2_single; - flat_set<std::pair<u8, u8>> outs2; - bool outs2_broken = false; -}; - -bool has_accel_sheng(const NFA *nfa); - -} // namespace ue2 - -#endif /* SHENGCOMPILE_H */ +struct sheng_escape_info { + CharReach outs; + CharReach outs2_single; + flat_set<std::pair<u8, u8>> outs2; + bool outs2_broken = false; +}; + +bool has_accel_sheng(const NFA *nfa); + +} // namespace ue2 + +#endif /* SHENGCOMPILE_H */ diff --git a/contrib/libs/hyperscan/src/nfa/shufti.c b/contrib/libs/hyperscan/src/nfa/shufti.c index 09ffc0cf9a..59138a4798 100644 --- a/contrib/libs/hyperscan/src/nfa/shufti.c +++ b/contrib/libs/hyperscan/src/nfa/shufti.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,7 +34,7 @@ #include "shufti.h" #include "ue2common.h" -#include "util/arch.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/simd_utils.h" #include "util/unaligned.h" @@ -71,65 +71,65 @@ void dumpMsk##_t##AsChars(m##_t msk) { \ #endif -/** \brief Naive byte-by-byte implementation. */ -static really_inline -const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf, - const u8 *buf_end) { - assert(buf < buf_end); - - for (; buf < buf_end; ++buf) { - u8 c = *buf; - if (lo[c & 0xf] & hi[c >> 4]) { - break; - } - } - return buf; -} - -/** \brief Naive byte-by-byte implementation. */ -static really_inline -const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf, - const u8 *buf_end) { - assert(buf < buf_end); - - for (buf_end--; buf_end >= buf; buf_end--) { - u8 c = *buf_end; - if (lo[c & 0xf] & hi[c >> 4]) { - break; - } - } - return buf_end; -} - -#if !defined(HAVE_AVX2) +/** \brief Naive byte-by-byte implementation. */ +static really_inline +const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf, + const u8 *buf_end) { + assert(buf < buf_end); + + for (; buf < buf_end; ++buf) { + u8 c = *buf; + if (lo[c & 0xf] & hi[c >> 4]) { + break; + } + } + return buf; +} + +/** \brief Naive byte-by-byte implementation. */ +static really_inline +const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf, + const u8 *buf_end) { + assert(buf < buf_end); + + for (buf_end--; buf_end >= buf; buf_end--) { + u8 c = *buf_end; + if (lo[c & 0xf] & hi[c >> 4]) { + break; + } + } + return buf_end; +} + +#if !defined(HAVE_AVX2) /* Normal SSSE3 shufti */ -#ifdef DEBUG -DUMP_MSK(128) -#endif - +#ifdef DEBUG +DUMP_MSK(128) +#endif + #define GET_LO_4(chars) and128(chars, low4bits) -#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4) +#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4) static really_inline -u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits, - const m128 compare) { - m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars)); - m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars)); - m128 t = and128(c_lo, c_hi); - +u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits, + const m128 compare) { + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars)); + m128 t = and128(c_lo, c_hi); + #ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); + DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); #endif - return movemask128(eq128(t, compare)); -} + return movemask128(eq128(t, compare)); +} -static really_inline -const u8 *firstMatch(const u8 *buf, u32 z) { +static really_inline +const u8 *firstMatch(const u8 *buf, u32 z) { if (unlikely(z != 0xffff)) { u32 pos = ctz32(~z & 0xffff); assert(pos < 16); @@ -142,9 +142,9 @@ const u8 *firstMatch(const u8 *buf, u32 z) { static really_inline const u8 *fwdBlock(m128 mask_lo, m128 mask_hi, m128 chars, const u8 *buf, const m128 low4bits, const m128 zeroes) { - u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes); + u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes); - return firstMatch(buf, z); + return firstMatch(buf, z); } const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, @@ -219,8 +219,8 @@ const u8 *lastMatch(const u8 *buf, m128 t, m128 compare) { static really_inline const u8 *revBlock(m128 mask_lo, m128 mask_hi, m128 chars, const u8 *buf, const m128 low4bits, const m128 zeroes) { - m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars)); - m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars)); + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars)); m128 t = and128(c_lo, c_hi); #ifdef DEBUG @@ -289,8 +289,8 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, const m128 ones) { m128 chars_lo = GET_LO_4(chars); m128 chars_hi = GET_HI_4(chars); - m128 c_lo = pshufb_m128(mask1_lo, chars_lo); - m128 c_hi = pshufb_m128(mask1_hi, chars_hi); + m128 c_lo = pshufb_m128(mask1_lo, chars_lo); + m128 c_hi = pshufb_m128(mask1_hi, chars_hi); m128 t = or128(c_lo, c_hi); #ifdef DEBUG @@ -301,9 +301,9 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); #endif - m128 c2_lo = pshufb_m128(mask2_lo, chars_lo); - m128 c2_hi = pshufb_m128(mask2_hi, chars_hi); - m128 t2 = or128(t, rshiftbyte_m128(or128(c2_lo, c2_hi), 1)); + m128 c2_lo = pshufb_m128(mask2_lo, chars_lo); + m128 c2_hi = pshufb_m128(mask2_hi, chars_hi); + m128 t2 = or128(t, rshiftbyte_m128(or128(c2_lo, c2_hi), 1)); #ifdef DEBUG DEBUG_PRINTF(" c2_lo: "); dumpMsk128(c2_lo); printf("\n"); @@ -311,9 +311,9 @@ const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, DEBUG_PRINTF(" t2: "); dumpMsk128(t2); printf("\n"); #endif - u32 z = movemask128(eq128(t2, ones)); - DEBUG_PRINTF(" z: 0x%08x\n", z); - return firstMatch(buf, z); + u32 z = movemask128(eq128(t2, ones)); + DEBUG_PRINTF(" z: 0x%08x\n", z); + return firstMatch(buf, z); } const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, @@ -360,41 +360,41 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, return buf_end; } -#elif !defined(HAVE_AVX512) -// AVX2 - 256 wide shuftis +#elif !defined(HAVE_AVX512) +// AVX2 - 256 wide shuftis #ifdef DEBUG DUMP_MSK(256) #endif #define GET_LO_4(chars) and256(chars, low4bits) -#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4) +#define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4) static really_inline -u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits, - const m256 compare) { - m256 c_lo = pshufb_m256(mask_lo, GET_LO_4(chars)); - m256 c_hi = pshufb_m256(mask_hi, GET_HI_4(chars)); - m256 t = and256(c_lo, c_hi); - +u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits, + const m256 compare) { + m256 c_lo = pshufb_m256(mask_lo, GET_LO_4(chars)); + m256 c_hi = pshufb_m256(mask_hi, GET_HI_4(chars)); + m256 t = and256(c_lo, c_hi); + #ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); + DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); #endif - return movemask256(eq256(t, compare)); -} - -static really_inline -const u8 *firstMatch(const u8 *buf, u32 z) { - DEBUG_PRINTF("z 0x%08x\n", z); + return movemask256(eq256(t, compare)); +} + +static really_inline +const u8 *firstMatch(const u8 *buf, u32 z) { + DEBUG_PRINTF("z 0x%08x\n", z); if (unlikely(z != 0xffffffff)) { u32 pos = ctz32(~z); assert(pos < 32); - DEBUG_PRINTF("match @ pos %u\n", pos); + DEBUG_PRINTF("match @ pos %u\n", pos); return buf + pos; } else { return NULL; // no match @@ -402,44 +402,44 @@ const u8 *firstMatch(const u8 *buf, u32 z) { } static really_inline -const u8 *fwdBlockShort(m256 mask, m128 chars, const u8 *buf, - const m256 low4bits) { - // do the hi and lo shuffles in the one avx register - m256 c = combine2x128(rshift64_m128(chars, 4), chars); - c = and256(c, low4bits); - m256 c_shuf = pshufb_m256(mask, c); - m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); - // the upper 32-bits can't match - u32 z = 0xffff0000U | movemask128(eq128(t, zeroes128())); - - return firstMatch(buf, z); -} - -static really_inline -const u8 *shuftiFwdShort(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const m256 low4bits) { - // run shufti over two overlapping 16-byte unaligned reads - const m256 mask = combine2x128(mask_hi, mask_lo); - m128 chars = loadu128(buf); - const u8 *rv = fwdBlockShort(mask, chars, buf, low4bits); - if (rv) { - return rv; - } - - chars = loadu128(buf_end - 16); - rv = fwdBlockShort(mask, chars, buf_end - 16, low4bits); - if (rv) { - return rv; - } - return buf_end; -} - -static really_inline +const u8 *fwdBlockShort(m256 mask, m128 chars, const u8 *buf, + const m256 low4bits) { + // do the hi and lo shuffles in the one avx register + m256 c = combine2x128(rshift64_m128(chars, 4), chars); + c = and256(c, low4bits); + m256 c_shuf = pshufb_m256(mask, c); + m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); + // the upper 32-bits can't match + u32 z = 0xffff0000U | movemask128(eq128(t, zeroes128())); + + return firstMatch(buf, z); +} + +static really_inline +const u8 *shuftiFwdShort(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end, const m256 low4bits) { + // run shufti over two overlapping 16-byte unaligned reads + const m256 mask = combine2x128(mask_hi, mask_lo); + m128 chars = loadu128(buf); + const u8 *rv = fwdBlockShort(mask, chars, buf, low4bits); + if (rv) { + return rv; + } + + chars = loadu128(buf_end - 16); + rv = fwdBlockShort(mask, chars, buf_end - 16, low4bits); + if (rv) { + return rv; + } + return buf_end; +} + +static really_inline const u8 *fwdBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf, const m256 low4bits, const m256 zeroes) { - u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes); + u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes); - return firstMatch(buf, z); + return firstMatch(buf, z); } /* takes 128 bit masks, but operates on 256 bits of data */ @@ -447,20 +447,20 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *buf_end) { assert(buf && buf_end); assert(buf < buf_end); - DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf); + DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf); // Slow path for small cases. - if (buf_end - buf < 16) { + if (buf_end - buf < 16) { return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end); } - const m256 low4bits = set32x8(0xf); - - if (buf_end - buf <= 32) { - return shuftiFwdShort(mask_lo, mask_hi, buf, buf_end, low4bits); - } - + const m256 low4bits = set32x8(0xf); + + if (buf_end - buf <= 32) { + return shuftiFwdShort(mask_lo, mask_hi, buf, buf_end, low4bits); + } + const m256 zeroes = zeroes256(); const m256 wide_mask_lo = set2x128(mask_lo); const m256 wide_mask_hi = set2x128(mask_hi); @@ -503,7 +503,7 @@ const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, } static really_inline -const u8 *lastMatch(const u8 *buf, u32 z) { +const u8 *lastMatch(const u8 *buf, u32 z) { if (unlikely(z != 0xffffffff)) { u32 pos = clz32(~z); DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos); @@ -516,8 +516,8 @@ const u8 *lastMatch(const u8 *buf, u32 z) { static really_inline const u8 *revBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf, const m256 low4bits, const m256 zeroes) { - m256 c_lo = pshufb_m256(mask_lo, GET_LO_4(chars)); - m256 c_hi = pshufb_m256(mask_hi, GET_HI_4(chars)); + m256 c_lo = pshufb_m256(mask_lo, GET_LO_4(chars)); + m256 c_hi = pshufb_m256(mask_hi, GET_HI_4(chars)); m256 t = and256(c_lo, c_hi); #ifdef DEBUG @@ -528,45 +528,45 @@ const u8 *revBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf, DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); #endif - u32 z = movemask256(eq256(t, zeroes)); - return lastMatch(buf, z); -} - -static really_inline -const u8 *revBlockShort(m256 mask, m128 chars, const u8 *buf, - const m256 low4bits) { - // do the hi and lo shuffles in the one avx register - m256 c = combine2x128(rshift64_m128(chars, 4), chars); - c = and256(c, low4bits); - m256 c_shuf = pshufb_m256(mask, c); - m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); - // the upper 32-bits can't match - u32 z = 0xffff0000U | movemask128(eq128(t, zeroes128())); - - return lastMatch(buf, z); -} - -static really_inline -const u8 *shuftiRevShort(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end, const m256 low4bits) { - // run shufti over two overlapping 16-byte unaligned reads - const m256 mask = combine2x128(mask_hi, mask_lo); - - m128 chars = loadu128(buf_end - 16); - const u8 *rv = revBlockShort(mask, chars, buf_end - 16, low4bits); - if (rv) { - return rv; - } - - chars = loadu128(buf); - rv = revBlockShort(mask, chars, buf, low4bits); - if (rv) { - return rv; - } - return buf - 1; + u32 z = movemask256(eq256(t, zeroes)); + return lastMatch(buf, z); } - +static really_inline +const u8 *revBlockShort(m256 mask, m128 chars, const u8 *buf, + const m256 low4bits) { + // do the hi and lo shuffles in the one avx register + m256 c = combine2x128(rshift64_m128(chars, 4), chars); + c = and256(c, low4bits); + m256 c_shuf = pshufb_m256(mask, c); + m128 t = and128(movdq_hi(c_shuf), cast256to128(c_shuf)); + // the upper 32-bits can't match + u32 z = 0xffff0000U | movemask128(eq128(t, zeroes128())); + + return lastMatch(buf, z); +} + +static really_inline +const u8 *shuftiRevShort(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end, const m256 low4bits) { + // run shufti over two overlapping 16-byte unaligned reads + const m256 mask = combine2x128(mask_hi, mask_lo); + + m128 chars = loadu128(buf_end - 16); + const u8 *rv = revBlockShort(mask, chars, buf_end - 16, low4bits); + if (rv) { + return rv; + } + + chars = loadu128(buf); + rv = revBlockShort(mask, chars, buf, low4bits); + if (rv) { + return rv; + } + return buf - 1; +} + + /* takes 128 bit masks, but operates on 256 bits of data */ const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, const u8 *buf_end) { @@ -574,17 +574,17 @@ const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, assert(buf < buf_end); // Slow path for small cases. - if (buf_end - buf < 16) { + if (buf_end - buf < 16) { return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end); } - const m256 low4bits = set32x8(0xf); - - if (buf_end - buf <= 32) { - return shuftiRevShort(mask_lo, mask_hi, buf, buf_end, low4bits); - } - + const m256 low4bits = set32x8(0xf); + + if (buf_end - buf <= 32) { + return shuftiRevShort(mask_lo, mask_hi, buf, buf_end, low4bits); + } + const m256 zeroes = zeroes256(); const m256 wide_mask_lo = set2x128(mask_lo); const m256 wide_mask_hi = set2x128(mask_hi); @@ -630,8 +630,8 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi, DEBUG_PRINTF("buf %p\n", buf); m256 chars_lo = GET_LO_4(chars); m256 chars_hi = GET_HI_4(chars); - m256 c_lo = pshufb_m256(mask1_lo, chars_lo); - m256 c_hi = pshufb_m256(mask1_hi, chars_hi); + m256 c_lo = pshufb_m256(mask1_lo, chars_lo); + m256 c_hi = pshufb_m256(mask1_hi, chars_hi); m256 t = or256(c_lo, c_hi); #ifdef DEBUG @@ -642,71 +642,71 @@ const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi, DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); #endif - m256 c2_lo = pshufb_m256(mask2_lo, chars_lo); - m256 c2_hi = pshufb_m256(mask2_hi, chars_hi); - m256 t2 = or256(t, rshift128_m256(or256(c2_lo, c2_hi), 1)); + m256 c2_lo = pshufb_m256(mask2_lo, chars_lo); + m256 c2_hi = pshufb_m256(mask2_hi, chars_hi); + m256 t2 = or256(t, rshift128_m256(or256(c2_lo, c2_hi), 1)); #ifdef DEBUG DEBUG_PRINTF(" c2_lo: "); dumpMsk256(c2_lo); printf("\n"); DEBUG_PRINTF(" c2_hi: "); dumpMsk256(c2_hi); printf("\n"); DEBUG_PRINTF(" t2: "); dumpMsk256(t2); printf("\n"); #endif - u32 z = movemask256(eq256(t2, ones)); - - return firstMatch(buf, z); -} - -static really_inline -const u8 *fwdBlockShort2(m256 mask1, m256 mask2, m128 chars, const u8 *buf, - const m256 low4bits) { - // do the hi and lo shuffles in the one avx register - m256 c = combine2x128(rshift64_m128(chars, 4), chars); - c = and256(c, low4bits); - m256 c_shuf1 = pshufb_m256(mask1, c); - m256 c_shuf2 = rshift128_m256(pshufb_m256(mask2, c), 1); - m256 t0 = or256(c_shuf1, c_shuf2); - m128 t = or128(movdq_hi(t0), cast256to128(t0)); - // the upper 32-bits can't match - u32 z = 0xffff0000U | movemask128(eq128(t, ones128())); - - return firstMatch(buf, z); -} - -static really_inline -const u8 *shuftiDoubleShort(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, - m128 mask2_hi, const u8 *buf, const u8 *buf_end) { - DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf); - const m256 low4bits = set32x8(0xf); - // run shufti over two overlapping 16-byte unaligned reads - const m256 mask1 = combine2x128(mask1_hi, mask1_lo); - const m256 mask2 = combine2x128(mask2_hi, mask2_lo); - m128 chars = loadu128(buf); - const u8 *rv = fwdBlockShort2(mask1, mask2, chars, buf, low4bits); - if (rv) { - return rv; - } + u32 z = movemask256(eq256(t2, ones)); - chars = loadu128(buf_end - 16); - rv = fwdBlockShort2(mask1, mask2, chars, buf_end - 16, low4bits); - if (rv) { - return rv; - } - return buf_end; + return firstMatch(buf, z); } +static really_inline +const u8 *fwdBlockShort2(m256 mask1, m256 mask2, m128 chars, const u8 *buf, + const m256 low4bits) { + // do the hi and lo shuffles in the one avx register + m256 c = combine2x128(rshift64_m128(chars, 4), chars); + c = and256(c, low4bits); + m256 c_shuf1 = pshufb_m256(mask1, c); + m256 c_shuf2 = rshift128_m256(pshufb_m256(mask2, c), 1); + m256 t0 = or256(c_shuf1, c_shuf2); + m128 t = or128(movdq_hi(t0), cast256to128(t0)); + // the upper 32-bits can't match + u32 z = 0xffff0000U | movemask128(eq128(t, ones128())); + + return firstMatch(buf, z); +} + +static really_inline +const u8 *shuftiDoubleShort(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, + m128 mask2_hi, const u8 *buf, const u8 *buf_end) { + DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf); + const m256 low4bits = set32x8(0xf); + // run shufti over two overlapping 16-byte unaligned reads + const m256 mask1 = combine2x128(mask1_hi, mask1_lo); + const m256 mask2 = combine2x128(mask2_hi, mask2_lo); + m128 chars = loadu128(buf); + const u8 *rv = fwdBlockShort2(mask1, mask2, chars, buf, low4bits); + if (rv) { + return rv; + } + + chars = loadu128(buf_end - 16); + rv = fwdBlockShort2(mask1, mask2, chars, buf_end - 16, low4bits); + if (rv) { + return rv; + } + return buf_end; +} + /* takes 128 bit masks, but operates on 256 bits of data */ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, const u8 *buf, const u8 *buf_end) { - /* we should always have at least 16 bytes */ - assert(buf_end - buf >= 16); - DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf); - + /* we should always have at least 16 bytes */ + assert(buf_end - buf >= 16); + DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf); + if (buf_end - buf < 32) { - return shuftiDoubleShort(mask1_lo, mask1_hi, mask2_lo, mask2_hi, buf, - buf_end); + return shuftiDoubleShort(mask1_lo, mask1_hi, mask2_lo, mask2_hi, buf, + buf_end); } - + const m256 ones = ones256(); const m256 low4bits = set32x8(0xf); const m256 wide_mask1_lo = set2x128(mask1_lo); @@ -751,347 +751,347 @@ const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, return buf_end; } -#else // defined(HAVE_AVX512) - -#ifdef DEBUG -DUMP_MSK(512) -#endif - -static really_inline -u64a block(m512 mask_lo, m512 mask_hi, m512 chars, const m512 low4bits, - const m512 compare) { - m512 c_lo = pshufb_m512(mask_lo, and512(chars, low4bits)); - m512 c_hi = pshufb_m512(mask_hi, - rshift64_m512(andnot512(low4bits, chars), 4)); - m512 t = and512(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk512AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk512(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk512(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk512(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk512(t); printf("\n"); -#endif - - return eq512mask(t, compare); -} -static really_inline -const u8 *firstMatch64(const u8 *buf, u64a z) { - DEBUG_PRINTF("z 0x%016llx\n", z); - if (unlikely(z != ~0ULL)) { - u32 pos = ctz64(~z); - DEBUG_PRINTF("match @ pos %u\n", pos); - assert(pos < 64); - return buf + pos; - } else { - return NULL; // no match - } -} - -static really_inline -const u8 *fwdBlock512(m512 mask_lo, m512 mask_hi, m512 chars, const u8 *buf, - const m512 low4bits, const m512 zeroes) { - u64a z = block(mask_lo, mask_hi, chars, low4bits, zeroes); - - return firstMatch64(buf, z); -} - -static really_inline -const u8 *shortShufti512(m512 mask_lo, m512 mask_hi, const u8 *buf, - const u8 *buf_end, const m512 low4bits, - const m512 zeroes) { - DEBUG_PRINTF("short shufti %p len %zu\n", buf, buf_end - buf); - uintptr_t len = buf_end - buf; - assert(len <= 64); - - // load mask - u64a k = (~0ULL) >> (64 - len); - DEBUG_PRINTF("load mask 0x%016llx\n", k); - - m512 chars = loadu_maskz_m512(k, buf); - - u64a z = block(mask_lo, mask_hi, chars, low4bits, zeroes); - - // reuse the load mask to indicate valid bytes - return firstMatch64(buf, z | ~k); -} - -/* takes 128 bit masks, but operates on 512 bits of data */ -const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end) { - assert(buf && buf_end); - assert(buf < buf_end); - DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf); - DEBUG_PRINTF("b %s\n", buf); - - const m512 low4bits = set64x8(0xf); - const m512 zeroes = zeroes512(); - const m512 wide_mask_lo = set4x128(mask_lo); - const m512 wide_mask_hi = set4x128(mask_hi); - const u8 *rv; - - // small cases. - if (buf_end - buf <= 64) { - rv = shortShufti512(wide_mask_lo, wide_mask_hi, buf, buf_end, low4bits, - zeroes); - return rv ? rv : buf_end; - } - - assert(buf_end - buf >= 64); - - // Preconditioning: most of the time our buffer won't be aligned. - if ((uintptr_t)buf % 64) { - rv = shortShufti512(wide_mask_lo, wide_mask_hi, buf, - ROUNDUP_PTR(buf, 64), low4bits, zeroes); - if (rv) { - return rv; - } - buf = ROUNDUP_PTR(buf, 64); - } - - const u8 *last_block = ROUNDDOWN_PTR(buf_end, 64); - while (buf < last_block) { - m512 lchars = load512(buf); - rv = fwdBlock512(wide_mask_lo, wide_mask_hi, lchars, buf, low4bits, - zeroes); - if (rv) { - return rv; - } - buf += 64; - } - - if (buf == buf_end) { - goto done; - } - - // Use an unaligned load to mop up the last 64 bytes and get an accurate - // picture to buf_end. - assert(buf <= buf_end && buf >= buf_end - 64); - m512 chars = loadu512(buf_end - 64); - rv = fwdBlock512(wide_mask_lo, wide_mask_hi, chars, buf_end - 64, low4bits, - zeroes); - if (rv) { - return rv; - } -done: - return buf_end; -} - -static really_inline -const u8 *lastMatch64(const u8 *buf, u64a z) { - DEBUG_PRINTF("z 0x%016llx\n", z); - if (unlikely(z != ~0ULL)) { - u32 pos = clz64(~z); - DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos); - return buf + (63 - pos); - } else { - return NULL; // no match - } -} - -static really_inline -const u8 *rshortShufti512(m512 mask_lo, m512 mask_hi, const u8 *buf, - const u8 *buf_end, const m512 low4bits, - const m512 zeroes) { - DEBUG_PRINTF("short %p len %zu\n", buf, buf_end - buf); - uintptr_t len = buf_end - buf; - assert(len <= 64); - - // load mask - u64a k = (~0ULL) >> (64 - len); - DEBUG_PRINTF("load mask 0x%016llx\n", k); - - m512 chars = loadu_maskz_m512(k, buf); - - u64a z = block(mask_lo, mask_hi, chars, low4bits, zeroes); - - // reuse the load mask to indicate valid bytes - return lastMatch64(buf, z | ~k); -} - -static really_inline -const u8 *revBlock512(m512 mask_lo, m512 mask_hi, m512 chars, const u8 *buf, - const m512 low4bits, const m512 zeroes) { - m512 c_lo = pshufb_m512(mask_lo, and512(chars, low4bits)); - m512 c_hi = pshufb_m512(mask_hi, - rshift64_m512(andnot512(low4bits, chars), 4)); - m512 t = and512(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk512AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk512(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk512(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk512(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk512(t); printf("\n"); -#endif - - u64a z = eq512mask(t, zeroes); - return lastMatch64(buf, z); -} - -/* takes 128 bit masks, but operates on 512 bits of data */ -const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end) { - DEBUG_PRINTF("buf %p buf_end %p\n", buf, buf_end); - assert(buf && buf_end); - assert(buf < buf_end); - - const m512 low4bits = set64x8(0xf); - const m512 zeroes = zeroes512(); - const m512 wide_mask_lo = set4x128(mask_lo); - const m512 wide_mask_hi = set4x128(mask_hi); - const u8 *rv; - - if (buf_end - buf < 64) { - rv = rshortShufti512(wide_mask_lo, wide_mask_hi, buf, buf_end, low4bits, - zeroes); - return rv ? rv : buf - 1; - } - - if (ROUNDDOWN_PTR(buf_end, 64) != buf_end) { - // peel off unaligned portion - assert(buf_end - buf >= 64); - DEBUG_PRINTF("start\n"); - rv = rshortShufti512(wide_mask_lo, wide_mask_hi, - ROUNDDOWN_PTR(buf_end, 64), buf_end, low4bits, - zeroes); - if (rv) { - return rv; - } - buf_end = ROUNDDOWN_PTR(buf_end, 64); - } - - const u8 *last_block = ROUNDUP_PTR(buf, 64); - while (buf_end > last_block) { - buf_end -= 64; - m512 lchars = load512(buf_end); - rv = revBlock512(wide_mask_lo, wide_mask_hi, lchars, buf_end, low4bits, - zeroes); - if (rv) { - return rv; - } - } - if (buf_end == buf) { - goto done; - } - // Use an unaligned load to mop up the last 64 bytes and get an accurate - // picture to buf. - m512 chars = loadu512(buf); - rv = revBlock512(wide_mask_lo, wide_mask_hi, chars, buf, low4bits, zeroes); - if (rv) { - return rv; - } -done: - return buf - 1; -} - -static really_inline -const u8 *fwdBlock2(m512 mask1_lo, m512 mask1_hi, m512 mask2_lo, m512 mask2_hi, - m512 chars, const u8 *buf, const m512 low4bits, - const m512 ones, __mmask64 k) { - DEBUG_PRINTF("buf %p %.64s\n", buf, buf); - m512 chars_lo = and512(chars, low4bits); - m512 chars_hi = rshift64_m512(andnot512(low4bits, chars), 4); - m512 c_lo = maskz_pshufb_m512(k, mask1_lo, chars_lo); - m512 c_hi = maskz_pshufb_m512(k, mask1_hi, chars_hi); - m512 t = or512(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk512AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk512(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk512(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk512(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk512(t); printf("\n"); -#endif - - m512 c2_lo = maskz_pshufb_m512(k, mask2_lo, chars_lo); - m512 c2_hi = maskz_pshufb_m512(k, mask2_hi, chars_hi); - m512 t2 = or512(t, rshift128_m512(or512(c2_lo, c2_hi), 1)); - -#ifdef DEBUG - DEBUG_PRINTF(" c2_lo: "); dumpMsk512(c2_lo); printf("\n"); - DEBUG_PRINTF(" c2_hi: "); dumpMsk512(c2_hi); printf("\n"); - DEBUG_PRINTF(" t2: "); dumpMsk512(t2); printf("\n"); -#endif - u64a z = eq512mask(t2, ones); - - return firstMatch64(buf, z | ~k); -} - -static really_inline -const u8 *shortDoubleShufti512(m512 mask1_lo, m512 mask1_hi, m512 mask2_lo, - m512 mask2_hi, const u8 *buf, const u8 *buf_end, - const m512 low4bits, const m512 ones) { - DEBUG_PRINTF("short %p len %zu\n", buf, buf_end - buf); - uintptr_t len = buf_end - buf; - assert(len <= 64); - - u64a k = (~0ULL) >> (64 - len); - DEBUG_PRINTF("load mask 0x%016llx\n", k); - - m512 chars = loadu_mask_m512(ones, k, buf); - - const u8 *rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi, chars, buf, - low4bits, ones, k); - - return rv; -} - -/* takes 128 bit masks, but operates on 512 bits of data */ -const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, - m128 mask2_lo, m128 mask2_hi, - const u8 *buf, const u8 *buf_end) { - /* we should always have at least 16 bytes */ - assert(buf_end - buf >= 16); - DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf); - - const m512 ones = ones512(); - const m512 low4bits = set64x8(0xf); - const m512 wide_mask1_lo = set4x128(mask1_lo); - const m512 wide_mask1_hi = set4x128(mask1_hi); - const m512 wide_mask2_lo = set4x128(mask2_lo); - const m512 wide_mask2_hi = set4x128(mask2_hi); - const u8 *rv; - - if (buf_end - buf <= 64) { - rv = shortDoubleShufti512(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, - wide_mask2_hi, buf, buf_end, low4bits, ones); - DEBUG_PRINTF("rv %p\n", rv); - return rv ? rv : buf_end; - } - - // Preconditioning: most of the time our buffer won't be aligned. - if ((uintptr_t)buf % 64) { - rv = shortDoubleShufti512(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, - wide_mask2_hi, buf, ROUNDUP_PTR(buf, 64), - low4bits, ones); - if (rv) { - return rv; - } - - buf = ROUNDUP_PTR(buf, 64); - } - - const u8 *last_block = buf_end - 64; - while (buf < last_block) { - m512 lchars = load512(buf); - rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, - wide_mask2_hi, lchars, buf, low4bits, ones, ~0); - if (rv) { - return rv; - } - buf += 64; - } - - // Use an unaligned load to mop up the last 64 bytes and get an accurate - // picture to buf_end. - m512 chars = loadu512(buf_end - 64); - rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, - chars, buf_end - 64, low4bits, ones, ~0); - if (rv) { - return rv; - } - - return buf_end; -} -#endif +#else // defined(HAVE_AVX512) + +#ifdef DEBUG +DUMP_MSK(512) +#endif + +static really_inline +u64a block(m512 mask_lo, m512 mask_hi, m512 chars, const m512 low4bits, + const m512 compare) { + m512 c_lo = pshufb_m512(mask_lo, and512(chars, low4bits)); + m512 c_hi = pshufb_m512(mask_hi, + rshift64_m512(andnot512(low4bits, chars), 4)); + m512 t = and512(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk512AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk512(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk512(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk512(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk512(t); printf("\n"); +#endif + + return eq512mask(t, compare); +} +static really_inline +const u8 *firstMatch64(const u8 *buf, u64a z) { + DEBUG_PRINTF("z 0x%016llx\n", z); + if (unlikely(z != ~0ULL)) { + u32 pos = ctz64(~z); + DEBUG_PRINTF("match @ pos %u\n", pos); + assert(pos < 64); + return buf + pos; + } else { + return NULL; // no match + } +} + +static really_inline +const u8 *fwdBlock512(m512 mask_lo, m512 mask_hi, m512 chars, const u8 *buf, + const m512 low4bits, const m512 zeroes) { + u64a z = block(mask_lo, mask_hi, chars, low4bits, zeroes); + + return firstMatch64(buf, z); +} + +static really_inline +const u8 *shortShufti512(m512 mask_lo, m512 mask_hi, const u8 *buf, + const u8 *buf_end, const m512 low4bits, + const m512 zeroes) { + DEBUG_PRINTF("short shufti %p len %zu\n", buf, buf_end - buf); + uintptr_t len = buf_end - buf; + assert(len <= 64); + + // load mask + u64a k = (~0ULL) >> (64 - len); + DEBUG_PRINTF("load mask 0x%016llx\n", k); + + m512 chars = loadu_maskz_m512(k, buf); + + u64a z = block(mask_lo, mask_hi, chars, low4bits, zeroes); + + // reuse the load mask to indicate valid bytes + return firstMatch64(buf, z | ~k); +} + +/* takes 128 bit masks, but operates on 512 bits of data */ +const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end) { + assert(buf && buf_end); + assert(buf < buf_end); + DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf); + DEBUG_PRINTF("b %s\n", buf); + + const m512 low4bits = set64x8(0xf); + const m512 zeroes = zeroes512(); + const m512 wide_mask_lo = set4x128(mask_lo); + const m512 wide_mask_hi = set4x128(mask_hi); + const u8 *rv; + + // small cases. + if (buf_end - buf <= 64) { + rv = shortShufti512(wide_mask_lo, wide_mask_hi, buf, buf_end, low4bits, + zeroes); + return rv ? rv : buf_end; + } + + assert(buf_end - buf >= 64); + + // Preconditioning: most of the time our buffer won't be aligned. + if ((uintptr_t)buf % 64) { + rv = shortShufti512(wide_mask_lo, wide_mask_hi, buf, + ROUNDUP_PTR(buf, 64), low4bits, zeroes); + if (rv) { + return rv; + } + buf = ROUNDUP_PTR(buf, 64); + } + + const u8 *last_block = ROUNDDOWN_PTR(buf_end, 64); + while (buf < last_block) { + m512 lchars = load512(buf); + rv = fwdBlock512(wide_mask_lo, wide_mask_hi, lchars, buf, low4bits, + zeroes); + if (rv) { + return rv; + } + buf += 64; + } + + if (buf == buf_end) { + goto done; + } + + // Use an unaligned load to mop up the last 64 bytes and get an accurate + // picture to buf_end. + assert(buf <= buf_end && buf >= buf_end - 64); + m512 chars = loadu512(buf_end - 64); + rv = fwdBlock512(wide_mask_lo, wide_mask_hi, chars, buf_end - 64, low4bits, + zeroes); + if (rv) { + return rv; + } +done: + return buf_end; +} + +static really_inline +const u8 *lastMatch64(const u8 *buf, u64a z) { + DEBUG_PRINTF("z 0x%016llx\n", z); + if (unlikely(z != ~0ULL)) { + u32 pos = clz64(~z); + DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos); + return buf + (63 - pos); + } else { + return NULL; // no match + } +} + +static really_inline +const u8 *rshortShufti512(m512 mask_lo, m512 mask_hi, const u8 *buf, + const u8 *buf_end, const m512 low4bits, + const m512 zeroes) { + DEBUG_PRINTF("short %p len %zu\n", buf, buf_end - buf); + uintptr_t len = buf_end - buf; + assert(len <= 64); + + // load mask + u64a k = (~0ULL) >> (64 - len); + DEBUG_PRINTF("load mask 0x%016llx\n", k); + + m512 chars = loadu_maskz_m512(k, buf); + + u64a z = block(mask_lo, mask_hi, chars, low4bits, zeroes); + + // reuse the load mask to indicate valid bytes + return lastMatch64(buf, z | ~k); +} + +static really_inline +const u8 *revBlock512(m512 mask_lo, m512 mask_hi, m512 chars, const u8 *buf, + const m512 low4bits, const m512 zeroes) { + m512 c_lo = pshufb_m512(mask_lo, and512(chars, low4bits)); + m512 c_hi = pshufb_m512(mask_hi, + rshift64_m512(andnot512(low4bits, chars), 4)); + m512 t = and512(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk512AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk512(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk512(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk512(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk512(t); printf("\n"); +#endif + + u64a z = eq512mask(t, zeroes); + return lastMatch64(buf, z); +} + +/* takes 128 bit masks, but operates on 512 bits of data */ +const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("buf %p buf_end %p\n", buf, buf_end); + assert(buf && buf_end); + assert(buf < buf_end); + + const m512 low4bits = set64x8(0xf); + const m512 zeroes = zeroes512(); + const m512 wide_mask_lo = set4x128(mask_lo); + const m512 wide_mask_hi = set4x128(mask_hi); + const u8 *rv; + + if (buf_end - buf < 64) { + rv = rshortShufti512(wide_mask_lo, wide_mask_hi, buf, buf_end, low4bits, + zeroes); + return rv ? rv : buf - 1; + } + + if (ROUNDDOWN_PTR(buf_end, 64) != buf_end) { + // peel off unaligned portion + assert(buf_end - buf >= 64); + DEBUG_PRINTF("start\n"); + rv = rshortShufti512(wide_mask_lo, wide_mask_hi, + ROUNDDOWN_PTR(buf_end, 64), buf_end, low4bits, + zeroes); + if (rv) { + return rv; + } + buf_end = ROUNDDOWN_PTR(buf_end, 64); + } + + const u8 *last_block = ROUNDUP_PTR(buf, 64); + while (buf_end > last_block) { + buf_end -= 64; + m512 lchars = load512(buf_end); + rv = revBlock512(wide_mask_lo, wide_mask_hi, lchars, buf_end, low4bits, + zeroes); + if (rv) { + return rv; + } + } + if (buf_end == buf) { + goto done; + } + // Use an unaligned load to mop up the last 64 bytes and get an accurate + // picture to buf. + m512 chars = loadu512(buf); + rv = revBlock512(wide_mask_lo, wide_mask_hi, chars, buf, low4bits, zeroes); + if (rv) { + return rv; + } +done: + return buf - 1; +} + +static really_inline +const u8 *fwdBlock2(m512 mask1_lo, m512 mask1_hi, m512 mask2_lo, m512 mask2_hi, + m512 chars, const u8 *buf, const m512 low4bits, + const m512 ones, __mmask64 k) { + DEBUG_PRINTF("buf %p %.64s\n", buf, buf); + m512 chars_lo = and512(chars, low4bits); + m512 chars_hi = rshift64_m512(andnot512(low4bits, chars), 4); + m512 c_lo = maskz_pshufb_m512(k, mask1_lo, chars_lo); + m512 c_hi = maskz_pshufb_m512(k, mask1_hi, chars_hi); + m512 t = or512(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk512AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk512(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk512(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk512(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk512(t); printf("\n"); +#endif + + m512 c2_lo = maskz_pshufb_m512(k, mask2_lo, chars_lo); + m512 c2_hi = maskz_pshufb_m512(k, mask2_hi, chars_hi); + m512 t2 = or512(t, rshift128_m512(or512(c2_lo, c2_hi), 1)); + +#ifdef DEBUG + DEBUG_PRINTF(" c2_lo: "); dumpMsk512(c2_lo); printf("\n"); + DEBUG_PRINTF(" c2_hi: "); dumpMsk512(c2_hi); printf("\n"); + DEBUG_PRINTF(" t2: "); dumpMsk512(t2); printf("\n"); +#endif + u64a z = eq512mask(t2, ones); + + return firstMatch64(buf, z | ~k); +} + +static really_inline +const u8 *shortDoubleShufti512(m512 mask1_lo, m512 mask1_hi, m512 mask2_lo, + m512 mask2_hi, const u8 *buf, const u8 *buf_end, + const m512 low4bits, const m512 ones) { + DEBUG_PRINTF("short %p len %zu\n", buf, buf_end - buf); + uintptr_t len = buf_end - buf; + assert(len <= 64); + + u64a k = (~0ULL) >> (64 - len); + DEBUG_PRINTF("load mask 0x%016llx\n", k); + + m512 chars = loadu_mask_m512(ones, k, buf); + + const u8 *rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi, chars, buf, + low4bits, ones, k); + + return rv; +} + +/* takes 128 bit masks, but operates on 512 bits of data */ +const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, + m128 mask2_lo, m128 mask2_hi, + const u8 *buf, const u8 *buf_end) { + /* we should always have at least 16 bytes */ + assert(buf_end - buf >= 16); + DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf); + + const m512 ones = ones512(); + const m512 low4bits = set64x8(0xf); + const m512 wide_mask1_lo = set4x128(mask1_lo); + const m512 wide_mask1_hi = set4x128(mask1_hi); + const m512 wide_mask2_lo = set4x128(mask2_lo); + const m512 wide_mask2_hi = set4x128(mask2_hi); + const u8 *rv; + + if (buf_end - buf <= 64) { + rv = shortDoubleShufti512(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, + wide_mask2_hi, buf, buf_end, low4bits, ones); + DEBUG_PRINTF("rv %p\n", rv); + return rv ? rv : buf_end; + } + + // Preconditioning: most of the time our buffer won't be aligned. + if ((uintptr_t)buf % 64) { + rv = shortDoubleShufti512(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, + wide_mask2_hi, buf, ROUNDUP_PTR(buf, 64), + low4bits, ones); + if (rv) { + return rv; + } + + buf = ROUNDUP_PTR(buf, 64); + } + + const u8 *last_block = buf_end - 64; + while (buf < last_block) { + m512 lchars = load512(buf); + rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, + wide_mask2_hi, lchars, buf, low4bits, ones, ~0); + if (rv) { + return rv; + } + buf += 64; + } + + // Use an unaligned load to mop up the last 64 bytes and get an accurate + // picture to buf_end. + m512 chars = loadu512(buf_end - 64); + rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, + chars, buf_end - 64, low4bits, ones, ~0); + if (rv) { + return rv; + } + + return buf_end; +} +#endif diff --git a/contrib/libs/hyperscan/src/nfa/shufticompile.cpp b/contrib/libs/hyperscan/src/nfa/shufticompile.cpp index f712ef94a4..48d2aa4ea9 100644 --- a/contrib/libs/hyperscan/src/nfa/shufticompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/shufticompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,8 +32,8 @@ #include "shufticompile.h" #include "ue2common.h" #include "util/charreach.h" -#include "util/container.h" -#include "util/flat_containers.h" +#include "util/container.h" +#include "util/flat_containers.h" #include <array> #include <cassert> @@ -51,7 +51,7 @@ namespace ue2 { * * Note: always able to construct masks for 8 or fewer characters. */ -int shuftiBuildMasks(const CharReach &c, u8 *lo, u8 *hi) { +int shuftiBuildMasks(const CharReach &c, u8 *lo, u8 *hi) { /* Things could be packed much more optimally, but this should be able to * handle any set of characters entirely in the lower half. */ @@ -108,33 +108,33 @@ int shuftiBuildMasks(const CharReach &c, u8 *lo, u8 *hi) { return bit_index; } -static -array<u16, 4> or_array(array<u16, 4> a, const array<u16, 4> &b) { - a[0] |= b[0]; - a[1] |= b[1]; - a[2] |= b[2]; - a[3] |= b[3]; - - return a; -} - - -#define MAX_BUCKETS 8 -static -void set_buckets_from_mask(u16 nibble_mask, u32 bucket, - array<u8, 16> &byte_mask) { - assert(bucket < MAX_BUCKETS); - - u32 mask = nibble_mask; - while (mask) { - u32 n = findAndClearLSB_32(&mask); - byte_mask[n] &= ~(1 << bucket); - } -} - -bool shuftiBuildDoubleMasks(const CharReach &onechar, +static +array<u16, 4> or_array(array<u16, 4> a, const array<u16, 4> &b) { + a[0] |= b[0]; + a[1] |= b[1]; + a[2] |= b[2]; + a[3] |= b[3]; + + return a; +} + + +#define MAX_BUCKETS 8 +static +void set_buckets_from_mask(u16 nibble_mask, u32 bucket, + array<u8, 16> &byte_mask) { + assert(bucket < MAX_BUCKETS); + + u32 mask = nibble_mask; + while (mask) { + u32 n = findAndClearLSB_32(&mask); + byte_mask[n] &= ~(1 << bucket); + } +} + +bool shuftiBuildDoubleMasks(const CharReach &onechar, const flat_set<pair<u8, u8>> &twochar, - u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2) { + u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2) { DEBUG_PRINTF("unibytes %zu dibytes %zu\n", onechar.size(), twochar.size()); array<u8, 16> lo1_a; @@ -148,69 +148,69 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar, hi2_a.fill(0xff); // two-byte literals - vector<array<u16, 4>> nibble_masks; - for (const auto &p : twochar) { - DEBUG_PRINTF("%02hhx %02hhx\n", p.first, p.second); - u16 a_lo = 1U << (p.first & 0xf); - u16 a_hi = 1U << (p.first >> 4); - u16 b_lo = 1U << (p.second & 0xf); - u16 b_hi = 1U << (p.second >> 4); - nibble_masks.push_back({{a_lo, a_hi, b_lo, b_hi}}); + vector<array<u16, 4>> nibble_masks; + for (const auto &p : twochar) { + DEBUG_PRINTF("%02hhx %02hhx\n", p.first, p.second); + u16 a_lo = 1U << (p.first & 0xf); + u16 a_hi = 1U << (p.first >> 4); + u16 b_lo = 1U << (p.second & 0xf); + u16 b_hi = 1U << (p.second >> 4); + nibble_masks.push_back({{a_lo, a_hi, b_lo, b_hi}}); } // one-byte literals (second byte is a wildcard) for (size_t it = onechar.find_first(); it != CharReach::npos; - it = onechar.find_next(it)) { - DEBUG_PRINTF("%02hhx\n", (u8)it); - u16 a_lo = 1U << (it & 0xf); - u16 a_hi = 1U << (it >> 4); - u16 wildcard = 0xffff; - nibble_masks.push_back({{a_lo, a_hi, wildcard, wildcard}}); - } - - // try to merge strings into shared buckets - for (u32 i = 0; i < 4; i++) { - map<array<u16, 4>, array<u16, 4>> new_masks; - for (const auto &a : nibble_masks) { - auto key = a; - key[i] = 0; - if (!contains(new_masks, key)) { - new_masks[key] = a; - } else { - new_masks[key] = or_array(new_masks[key], a); - } + it = onechar.find_next(it)) { + DEBUG_PRINTF("%02hhx\n", (u8)it); + u16 a_lo = 1U << (it & 0xf); + u16 a_hi = 1U << (it >> 4); + u16 wildcard = 0xffff; + nibble_masks.push_back({{a_lo, a_hi, wildcard, wildcard}}); + } + + // try to merge strings into shared buckets + for (u32 i = 0; i < 4; i++) { + map<array<u16, 4>, array<u16, 4>> new_masks; + for (const auto &a : nibble_masks) { + auto key = a; + key[i] = 0; + if (!contains(new_masks, key)) { + new_masks[key] = a; + } else { + new_masks[key] = or_array(new_masks[key], a); + } } - nibble_masks.clear(); - for (const auto &e : new_masks) { - nibble_masks.push_back(e.second); - } - } - - if (nibble_masks.size() > MAX_BUCKETS) { - DEBUG_PRINTF("too many buckets needed (%zu)\n", nibble_masks.size()); - return false; - } - - u32 i = 0; - for (const auto &a : nibble_masks) { - set_buckets_from_mask(a[0], i, lo1_a); - set_buckets_from_mask(a[1], i, hi1_a); - set_buckets_from_mask(a[2], i, lo2_a); - set_buckets_from_mask(a[3], i, hi2_a); - i++; + nibble_masks.clear(); + for (const auto &e : new_masks) { + nibble_masks.push_back(e.second); + } } + if (nibble_masks.size() > MAX_BUCKETS) { + DEBUG_PRINTF("too many buckets needed (%zu)\n", nibble_masks.size()); + return false; + } + + u32 i = 0; + for (const auto &a : nibble_masks) { + set_buckets_from_mask(a[0], i, lo1_a); + set_buckets_from_mask(a[1], i, hi1_a); + set_buckets_from_mask(a[2], i, lo2_a); + set_buckets_from_mask(a[3], i, hi2_a); + i++; + } + memcpy(lo1, lo1_a.data(), sizeof(m128)); memcpy(lo2, lo2_a.data(), sizeof(m128)); memcpy(hi1, hi1_a.data(), sizeof(m128)); memcpy(hi2, hi2_a.data(), sizeof(m128)); - return true; + return true; } #ifdef DUMP_SUPPORT -CharReach shufti2cr(const u8 *lo, const u8 *hi) { +CharReach shufti2cr(const u8 *lo, const u8 *hi) { CharReach cr; for (u32 i = 0; i < 256; i++) { if (lo[(u8)i & 0xf] & hi[(u8)i >> 4]) { diff --git a/contrib/libs/hyperscan/src/nfa/shufticompile.h b/contrib/libs/hyperscan/src/nfa/shufticompile.h index 59b9c38dff..771d298939 100644 --- a/contrib/libs/hyperscan/src/nfa/shufticompile.h +++ b/contrib/libs/hyperscan/src/nfa/shufticompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -35,7 +35,7 @@ #include "ue2common.h" #include "util/charreach.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include <utility> @@ -48,15 +48,15 @@ namespace ue2 { * * Note: always able to construct masks for 8 or fewer characters. */ -int shuftiBuildMasks(const CharReach &chars, u8 *lo, u8 *hi); +int shuftiBuildMasks(const CharReach &chars, u8 *lo, u8 *hi); -/** \brief Double-byte variant - * - * Returns false if we are unable to build the masks (too many buckets required) - */ -bool shuftiBuildDoubleMasks(const CharReach &onechar, +/** \brief Double-byte variant + * + * Returns false if we are unable to build the masks (too many buckets required) + */ +bool shuftiBuildDoubleMasks(const CharReach &onechar, const flat_set<std::pair<u8, u8>> &twochar, - u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2); + u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2); #ifdef DUMP_SUPPORT @@ -64,7 +64,7 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar, * \brief Dump code: returns a CharReach with the reach that would match this * shufti. */ -CharReach shufti2cr(const u8 *lo, const u8 *hi); +CharReach shufti2cr(const u8 *lo, const u8 *hi); #endif // DUMP_SUPPORT diff --git a/contrib/libs/hyperscan/src/nfa/tamarama.c b/contrib/libs/hyperscan/src/nfa/tamarama.c index 43480f0650..8a2f633e09 100644 --- a/contrib/libs/hyperscan/src/nfa/tamarama.c +++ b/contrib/libs/hyperscan/src/nfa/tamarama.c @@ -1,441 +1,441 @@ -/* - * Copyright (c) 2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - \brief Tamarama: container engine for exclusive engines, runtime code. -*/ -#include "config.h" - -#include "tamarama.h" - -#include "tamarama_internal.h" -#include "nfa_api.h" -#include "nfa_api_queue.h" -#include "nfa_api_util.h" -#include "nfa_internal.h" -#include "scratch.h" -#include "util/partial_store.h" - -static really_inline -u32 getSubOffset(const struct Tamarama *t, u32 num) { - DEBUG_PRINTF("subengine:%u\n", num); - assert(num < t->numSubEngines); - const u32 *sub = - (const u32 *)((const char *)t + sizeof(struct Tamarama) + - t->numSubEngines * sizeof(u32)); - assert(ISALIGNED(sub)); - return sub[num]; -} - -static -const struct NFA *getSubEngine(const struct Tamarama *t, - const u32 activeIdx) { - const u32 offset = getSubOffset(t, activeIdx); - DEBUG_PRINTF("activeIdx:%u offsets:%u\n", activeIdx, offset); - const char *base = (const char *)t; - return (const struct NFA *)(base + offset); -} - -static -void storeActiveIdx(const struct Tamarama *t, char *state, - const u32 idx) { - assert(idx <= t->numSubEngines); - partial_store_u32(state, idx, t->activeIdxSize); -} - -static -u32 loadActiveIdx(const char *state, - const u32 activeIdxSize) { - return partial_load_u32(state, activeIdxSize); -} - -static really_inline -void copyQueueProperties(const struct mq *q1, struct mq *q2, - const u32 activeIdxSize) { - q2->state = q1->state; - q2->streamState = q1->streamState + activeIdxSize; - q2->offset = q1->offset; - q2->buffer = q1->buffer; - q2->length = q1->length; - q2->history = q1->history; - q2->hlength = q1->hlength; - q2->cb = q1->cb; - q2->context = q1->context; - q2->scratch = q1->scratch; - q2->report_current = q1->report_current; -} - -static -void copyQueueItems(const struct Tamarama *t, const struct NFA *sub, - struct mq *q1, struct mq *q2, const u32 activeIdx) { - const u32 *baseTop = (const u32 *)((const char *)t + - sizeof(struct Tamarama)); - - u32 lower = baseTop[activeIdx]; - u32 upper = activeIdx == t->numSubEngines - 1 ? - ~0U : baseTop[activeIdx + 1]; - u32 event_base = isMultiTopType(sub->type) ? MQE_TOP_FIRST : MQE_TOP; - while (q1->cur < q1->end) { - u32 type = q1->items[q1->cur].type; - s64a loc = q1->items[q1->cur].location; - DEBUG_PRINTF("type:%u lower:%u upper:%u\n", type, lower, upper); - if (type >= lower && type < upper) { - u32 event = event_base; - if (event == MQE_TOP_FIRST) { - event += type - lower; - } - pushQueue(q2, event, loc); - } else { - pushQueueNoMerge(q2, MQE_END, loc); - break; - } - q1->cur++; - } -} - -static -void copyQueue(const struct Tamarama *t, const struct NFA *sub, - struct mq *q1, struct mq *q2, const u32 activeIdx) { - copyQueueProperties(q1, q2, t->activeIdxSize); - - // copy MQE_START item - u32 cur = q1->cur++; - q2->cur = cur; - q2->items[cur] = q1->items[cur]; - q2->end = cur + 1; - - copyQueueItems(t, sub, q1, q2, activeIdx); - // restore cur index of the main queue - q1->cur = cur; -} - -static -u32 findEngineForTop(const u32 *baseTop, const u32 cur, - const u32 numSubEngines) { - u32 i; - for (i = 0; i < numSubEngines; ++i) { - DEBUG_PRINTF("cur:%u base:%u\n", cur, baseTop[i]); - if (cur >= baseTop[i] && - (i == numSubEngines - 1 || cur < baseTop[i + 1])) { - break; - } - } - return i; -} - -static -void initSubQueue(const struct Tamarama *t, struct mq *q1, - struct mq *q2, const u32 lastActiveIdx, - const u32 activeIdx) { - // Push events to the new queue - const struct NFA *sub = getSubEngine(t, activeIdx); - assert(!isContainerType(sub->type)); - q2->nfa = sub; - - // Reinitialize state if the last active subengine is different - // from current one - if (lastActiveIdx == t->numSubEngines || - lastActiveIdx != activeIdx) { - nfaQueueInitState(q2->nfa, q2); - } - - copyQueueItems(t, sub, q1, q2, activeIdx); - if (q1->items[q1->cur].type == MQE_END) { - q1->cur++; - } - DEBUG_PRINTF("update lastIdx:%u\n", activeIdx); - storeActiveIdx(t, q1->streamState, activeIdx); -} - -static -void updateQueues(const struct Tamarama *t, struct mq *q1, struct mq *q2) { - q2->cur = q2->end = 0; - copyQueueProperties(q1, q2, t->activeIdxSize); - - const u32 numSubEngines = t->numSubEngines; - u32 lastActiveIdx = loadActiveIdx(q1->streamState, - t->activeIdxSize); -#ifdef DEBUG - DEBUG_PRINTF("external queue\n"); - debugQueue(q1); -#endif - - // Push MQE_START event to the subqueue - s64a loc = q1->items[q1->cur].location; - pushQueueAt(q2, 0, MQE_START, loc); - char hasStart = 0; - if (q1->items[q1->cur].type == MQE_START) { - hasStart = 1; - q1->cur++; - } - - u32 activeIdx = lastActiveIdx; - // If we have top events in the main queue, update current active id - if (q1->cur < q1->end - 1) { - const u32 *baseTop = (const u32 *)((const char *)t + - sizeof(struct Tamarama)); - u32 curTop = q1->items[q1->cur].type; - activeIdx = findEngineForTop(baseTop, curTop, numSubEngines); - } - - assert(activeIdx < numSubEngines); - DEBUG_PRINTF("last id:%u, current id:%u, num of subengines:%u\n", - lastActiveIdx, activeIdx, numSubEngines); - // Handle unfinished last alive subengine - if (lastActiveIdx != activeIdx && - lastActiveIdx != numSubEngines && hasStart) { - loc = q1->items[q1->cur].location; - pushQueueNoMerge(q2, MQE_END, loc); - q2->nfa = getSubEngine(t, lastActiveIdx); - return; - } - - initSubQueue(t, q1, q2, lastActiveIdx, activeIdx); - DEBUG_PRINTF("finish queues\n"); -} - -// After processing subqueue items for subengines, we need to copy back -// remaining items in subqueue if there are any to Tamarama main queue -static -void copyBack(const struct Tamarama *t, struct mq *q, struct mq *q1) { - DEBUG_PRINTF("copy back %u, %u\n", q1->cur, q1->end); - q->report_current = q1->report_current; - if (q->cur >= q->end && q1->cur >= q1->end) { - return; - } - - const u32 *baseTop = (const u32 *)((const char *)t + - sizeof(struct Tamarama)); - const u32 lastIdx = loadActiveIdx(q->streamState, - t->activeIdxSize); - u32 base = 0, event_base = 0; - if (lastIdx != t->numSubEngines) { - base = baseTop[lastIdx]; - const struct NFA *sub = getSubEngine(t, lastIdx); - event_base = isMultiTopType(sub->type) ? MQE_TOP_FIRST : MQE_TOP; - } - - u32 numItems = q1->end > q1->cur + 1 ? q1->end - q1->cur - 1 : 1; - // Also need to copy MQE_END if the main queue is empty - if (q->cur == q->end) { - assert(q->cur > 1 && q1->items[q1->end - 1].type == MQE_END); - q->items[--q->cur] = q1->items[q1->end - 1]; - } - u32 cur = q->cur - numItems; - q->items[cur] = q1->items[q1->cur++]; - q->items[cur].type = MQE_START; - q->cur = cur++; - for (u32 i = 0; i < numItems - 1; ++i) { - assert(q1->cur < q1->end); - u32 type = q1->items[q1->cur].type; - if (type > MQE_END) { - q1->items[q1->cur].type = type - event_base + base; - } - q->items[cur++] = q1->items[q1->cur++]; - } - -#ifdef DEBUG - DEBUG_PRINTF("external queue\n"); - debugQueue(q); -#endif -} - -char nfaExecTamarama_testEOD(const struct NFA *n, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context) { - const struct Tamarama *t = getImplNfa(n); - u32 activeIdx = loadActiveIdx(streamState, t->activeIdxSize); - if (activeIdx == t->numSubEngines) { - return MO_CONTINUE_MATCHING; - } - - const struct NFA *sub = getSubEngine(t, activeIdx); - if (nfaAcceptsEod(sub)) { - assert(!isContainerType(sub->type)); - const char *subStreamState = streamState + t->activeIdxSize; - return nfaCheckFinalState(sub, state, subStreamState, offset, callback, - context); - } - - return MO_CONTINUE_MATCHING; -} - -char nfaExecTamarama_QR(const struct NFA *n, struct mq *q, ReportID report) { - DEBUG_PRINTF("exec rose\n"); - struct mq q1; - q1.cur = q1.end = 0; - char rv = 0; - const struct Tamarama *t = getImplNfa(n); - while (q->cur < q->end) { - updateQueues(t, q, &q1); - } - - if (q1.cur < q1.end) { - rv = nfaQueueExecRose(q1.nfa, &q1, report); - } - - DEBUG_PRINTF("exec rose rv:%u\n", rv); - return rv; -} - -char nfaExecTamarama_reportCurrent(const struct NFA *n, struct mq *q) { - const struct Tamarama *t = getImplNfa(n); - u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); - if (activeIdx == t->numSubEngines) { - return 1; - } - - const struct NFA *sub = getSubEngine(t, activeIdx); - struct mq q1; - copyQueue(t, sub, q, &q1, activeIdx); - return nfaReportCurrentMatches(sub, &q1); -} - -char nfaExecTamarama_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { - const struct Tamarama *t = getImplNfa(n); - u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); - if (activeIdx == t->numSubEngines) { - return 0; - } - const struct NFA *sub = getSubEngine(t, activeIdx); - - struct mq q1; - copyQueue(t, sub, q, &q1, activeIdx); - return nfaInAcceptState(sub, report, &q1); -} - -char nfaExecTamarama_inAnyAccept(const struct NFA *n, struct mq *q) { - const struct Tamarama *t = getImplNfa(n); - u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); - if (activeIdx == t->numSubEngines) { - return 0; - } - const struct NFA *sub = getSubEngine(t, activeIdx); - - struct mq q1; - copyQueue(t, sub, q, &q1, activeIdx); - return nfaInAnyAcceptState(sub, &q1); -} - -char nfaExecTamarama_queueInitState(const struct NFA *n, struct mq *q) { - DEBUG_PRINTF("init state\n"); - const struct Tamarama *t = getImplNfa(n); - char *ptr = q->streamState; - // Use activeIdxSize as a sentinel value and initialize the state to - // an invalid engine as nothing has been triggered yet - storeActiveIdx(t, ptr, t->numSubEngines); - return 0; -} - -char nfaExecTamarama_queueCompressState(const struct NFA *n, const struct mq *q, - s64a loc) { - const struct Tamarama *t = getImplNfa(n); - u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); - if (activeIdx == t->numSubEngines) { - return 0; - } - - const struct NFA *sub = getSubEngine(t, activeIdx); - - struct mq q1; - copyQueueProperties(q, &q1, t->activeIdxSize); - return nfaQueueCompressState(sub, &q1, loc); -} - -char nfaExecTamarama_expandState(const struct NFA *n, void *dest, - const void *src, u64a offset, u8 key) { - const struct Tamarama *t = getImplNfa(n); - u32 activeIdx = loadActiveIdx(src, t->activeIdxSize); - if (activeIdx == t->numSubEngines) { - return 0; - } - - const struct NFA *sub = getSubEngine(t, activeIdx); - - const char *subStreamState = (const char *)src + t->activeIdxSize; - return nfaExpandState(sub, dest, subStreamState, offset, key); -} - -enum nfa_zombie_status nfaExecTamarama_zombie_status(const struct NFA *n, - struct mq *q, s64a loc) { - const struct Tamarama *t = getImplNfa(n); - u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); - if (activeIdx == t->numSubEngines) { - return NFA_ZOMBIE_NO; - } - const struct NFA *sub = getSubEngine(t, activeIdx); - - struct mq q1; - copyQueue(t, sub, q, &q1, activeIdx); - return nfaGetZombieStatus(sub, &q1, loc); -} - -char nfaExecTamarama_Q(const struct NFA *n, struct mq *q, s64a end) { - DEBUG_PRINTF("exec\n"); - struct mq q1; - char rv = MO_ALIVE; - char copy = 0; - const struct Tamarama *t = getImplNfa(n); - while (q->cur < q->end && q_cur_loc(q) <= end) { - updateQueues(t, q, &q1); - rv = nfaQueueExec_raw(q1.nfa, &q1, end); - q->report_current = q1.report_current; - copy = 1; - if (can_stop_matching(q->scratch)) { - break; - } - } - if (copy) { - copyBack(t, q, &q1); - } - return rv; -} - -char nfaExecTamarama_Q2(const struct NFA *n, struct mq *q, s64a end) { - DEBUG_PRINTF("exec to match\n"); - struct mq q1; - char rv = 0; - char copy = 0; - const struct Tamarama *t = getImplNfa(n); - while (q->cur < q->end && q_cur_loc(q) <= end && - rv != MO_MATCHES_PENDING) { - updateQueues(t, q, &q1); - rv = nfaQueueExec2_raw(q1.nfa, &q1, end); - q->report_current = q1.report_current; - copy = 1; - if (can_stop_matching(q->scratch)) { - break; - } - } - if (copy) { - copyBack(t, q, &q1); - } - return rv; -} - +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + \brief Tamarama: container engine for exclusive engines, runtime code. +*/ +#include "config.h" + +#include "tamarama.h" + +#include "tamarama_internal.h" +#include "nfa_api.h" +#include "nfa_api_queue.h" +#include "nfa_api_util.h" +#include "nfa_internal.h" +#include "scratch.h" +#include "util/partial_store.h" + +static really_inline +u32 getSubOffset(const struct Tamarama *t, u32 num) { + DEBUG_PRINTF("subengine:%u\n", num); + assert(num < t->numSubEngines); + const u32 *sub = + (const u32 *)((const char *)t + sizeof(struct Tamarama) + + t->numSubEngines * sizeof(u32)); + assert(ISALIGNED(sub)); + return sub[num]; +} + +static +const struct NFA *getSubEngine(const struct Tamarama *t, + const u32 activeIdx) { + const u32 offset = getSubOffset(t, activeIdx); + DEBUG_PRINTF("activeIdx:%u offsets:%u\n", activeIdx, offset); + const char *base = (const char *)t; + return (const struct NFA *)(base + offset); +} + +static +void storeActiveIdx(const struct Tamarama *t, char *state, + const u32 idx) { + assert(idx <= t->numSubEngines); + partial_store_u32(state, idx, t->activeIdxSize); +} + +static +u32 loadActiveIdx(const char *state, + const u32 activeIdxSize) { + return partial_load_u32(state, activeIdxSize); +} + +static really_inline +void copyQueueProperties(const struct mq *q1, struct mq *q2, + const u32 activeIdxSize) { + q2->state = q1->state; + q2->streamState = q1->streamState + activeIdxSize; + q2->offset = q1->offset; + q2->buffer = q1->buffer; + q2->length = q1->length; + q2->history = q1->history; + q2->hlength = q1->hlength; + q2->cb = q1->cb; + q2->context = q1->context; + q2->scratch = q1->scratch; + q2->report_current = q1->report_current; +} + +static +void copyQueueItems(const struct Tamarama *t, const struct NFA *sub, + struct mq *q1, struct mq *q2, const u32 activeIdx) { + const u32 *baseTop = (const u32 *)((const char *)t + + sizeof(struct Tamarama)); + + u32 lower = baseTop[activeIdx]; + u32 upper = activeIdx == t->numSubEngines - 1 ? + ~0U : baseTop[activeIdx + 1]; + u32 event_base = isMultiTopType(sub->type) ? MQE_TOP_FIRST : MQE_TOP; + while (q1->cur < q1->end) { + u32 type = q1->items[q1->cur].type; + s64a loc = q1->items[q1->cur].location; + DEBUG_PRINTF("type:%u lower:%u upper:%u\n", type, lower, upper); + if (type >= lower && type < upper) { + u32 event = event_base; + if (event == MQE_TOP_FIRST) { + event += type - lower; + } + pushQueue(q2, event, loc); + } else { + pushQueueNoMerge(q2, MQE_END, loc); + break; + } + q1->cur++; + } +} + +static +void copyQueue(const struct Tamarama *t, const struct NFA *sub, + struct mq *q1, struct mq *q2, const u32 activeIdx) { + copyQueueProperties(q1, q2, t->activeIdxSize); + + // copy MQE_START item + u32 cur = q1->cur++; + q2->cur = cur; + q2->items[cur] = q1->items[cur]; + q2->end = cur + 1; + + copyQueueItems(t, sub, q1, q2, activeIdx); + // restore cur index of the main queue + q1->cur = cur; +} + +static +u32 findEngineForTop(const u32 *baseTop, const u32 cur, + const u32 numSubEngines) { + u32 i; + for (i = 0; i < numSubEngines; ++i) { + DEBUG_PRINTF("cur:%u base:%u\n", cur, baseTop[i]); + if (cur >= baseTop[i] && + (i == numSubEngines - 1 || cur < baseTop[i + 1])) { + break; + } + } + return i; +} + +static +void initSubQueue(const struct Tamarama *t, struct mq *q1, + struct mq *q2, const u32 lastActiveIdx, + const u32 activeIdx) { + // Push events to the new queue + const struct NFA *sub = getSubEngine(t, activeIdx); + assert(!isContainerType(sub->type)); + q2->nfa = sub; + + // Reinitialize state if the last active subengine is different + // from current one + if (lastActiveIdx == t->numSubEngines || + lastActiveIdx != activeIdx) { + nfaQueueInitState(q2->nfa, q2); + } + + copyQueueItems(t, sub, q1, q2, activeIdx); + if (q1->items[q1->cur].type == MQE_END) { + q1->cur++; + } + DEBUG_PRINTF("update lastIdx:%u\n", activeIdx); + storeActiveIdx(t, q1->streamState, activeIdx); +} + +static +void updateQueues(const struct Tamarama *t, struct mq *q1, struct mq *q2) { + q2->cur = q2->end = 0; + copyQueueProperties(q1, q2, t->activeIdxSize); + + const u32 numSubEngines = t->numSubEngines; + u32 lastActiveIdx = loadActiveIdx(q1->streamState, + t->activeIdxSize); +#ifdef DEBUG + DEBUG_PRINTF("external queue\n"); + debugQueue(q1); +#endif + + // Push MQE_START event to the subqueue + s64a loc = q1->items[q1->cur].location; + pushQueueAt(q2, 0, MQE_START, loc); + char hasStart = 0; + if (q1->items[q1->cur].type == MQE_START) { + hasStart = 1; + q1->cur++; + } + + u32 activeIdx = lastActiveIdx; + // If we have top events in the main queue, update current active id + if (q1->cur < q1->end - 1) { + const u32 *baseTop = (const u32 *)((const char *)t + + sizeof(struct Tamarama)); + u32 curTop = q1->items[q1->cur].type; + activeIdx = findEngineForTop(baseTop, curTop, numSubEngines); + } + + assert(activeIdx < numSubEngines); + DEBUG_PRINTF("last id:%u, current id:%u, num of subengines:%u\n", + lastActiveIdx, activeIdx, numSubEngines); + // Handle unfinished last alive subengine + if (lastActiveIdx != activeIdx && + lastActiveIdx != numSubEngines && hasStart) { + loc = q1->items[q1->cur].location; + pushQueueNoMerge(q2, MQE_END, loc); + q2->nfa = getSubEngine(t, lastActiveIdx); + return; + } + + initSubQueue(t, q1, q2, lastActiveIdx, activeIdx); + DEBUG_PRINTF("finish queues\n"); +} + +// After processing subqueue items for subengines, we need to copy back +// remaining items in subqueue if there are any to Tamarama main queue +static +void copyBack(const struct Tamarama *t, struct mq *q, struct mq *q1) { + DEBUG_PRINTF("copy back %u, %u\n", q1->cur, q1->end); + q->report_current = q1->report_current; + if (q->cur >= q->end && q1->cur >= q1->end) { + return; + } + + const u32 *baseTop = (const u32 *)((const char *)t + + sizeof(struct Tamarama)); + const u32 lastIdx = loadActiveIdx(q->streamState, + t->activeIdxSize); + u32 base = 0, event_base = 0; + if (lastIdx != t->numSubEngines) { + base = baseTop[lastIdx]; + const struct NFA *sub = getSubEngine(t, lastIdx); + event_base = isMultiTopType(sub->type) ? MQE_TOP_FIRST : MQE_TOP; + } + + u32 numItems = q1->end > q1->cur + 1 ? q1->end - q1->cur - 1 : 1; + // Also need to copy MQE_END if the main queue is empty + if (q->cur == q->end) { + assert(q->cur > 1 && q1->items[q1->end - 1].type == MQE_END); + q->items[--q->cur] = q1->items[q1->end - 1]; + } + u32 cur = q->cur - numItems; + q->items[cur] = q1->items[q1->cur++]; + q->items[cur].type = MQE_START; + q->cur = cur++; + for (u32 i = 0; i < numItems - 1; ++i) { + assert(q1->cur < q1->end); + u32 type = q1->items[q1->cur].type; + if (type > MQE_END) { + q1->items[q1->cur].type = type - event_base + base; + } + q->items[cur++] = q1->items[q1->cur++]; + } + +#ifdef DEBUG + DEBUG_PRINTF("external queue\n"); + debugQueue(q); +#endif +} + +char nfaExecTamarama_testEOD(const struct NFA *n, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context) { + const struct Tamarama *t = getImplNfa(n); + u32 activeIdx = loadActiveIdx(streamState, t->activeIdxSize); + if (activeIdx == t->numSubEngines) { + return MO_CONTINUE_MATCHING; + } + + const struct NFA *sub = getSubEngine(t, activeIdx); + if (nfaAcceptsEod(sub)) { + assert(!isContainerType(sub->type)); + const char *subStreamState = streamState + t->activeIdxSize; + return nfaCheckFinalState(sub, state, subStreamState, offset, callback, + context); + } + + return MO_CONTINUE_MATCHING; +} + +char nfaExecTamarama_QR(const struct NFA *n, struct mq *q, ReportID report) { + DEBUG_PRINTF("exec rose\n"); + struct mq q1; + q1.cur = q1.end = 0; + char rv = 0; + const struct Tamarama *t = getImplNfa(n); + while (q->cur < q->end) { + updateQueues(t, q, &q1); + } + + if (q1.cur < q1.end) { + rv = nfaQueueExecRose(q1.nfa, &q1, report); + } + + DEBUG_PRINTF("exec rose rv:%u\n", rv); + return rv; +} + +char nfaExecTamarama_reportCurrent(const struct NFA *n, struct mq *q) { + const struct Tamarama *t = getImplNfa(n); + u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); + if (activeIdx == t->numSubEngines) { + return 1; + } + + const struct NFA *sub = getSubEngine(t, activeIdx); + struct mq q1; + copyQueue(t, sub, q, &q1, activeIdx); + return nfaReportCurrentMatches(sub, &q1); +} + +char nfaExecTamarama_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + const struct Tamarama *t = getImplNfa(n); + u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); + if (activeIdx == t->numSubEngines) { + return 0; + } + const struct NFA *sub = getSubEngine(t, activeIdx); + + struct mq q1; + copyQueue(t, sub, q, &q1, activeIdx); + return nfaInAcceptState(sub, report, &q1); +} + +char nfaExecTamarama_inAnyAccept(const struct NFA *n, struct mq *q) { + const struct Tamarama *t = getImplNfa(n); + u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); + if (activeIdx == t->numSubEngines) { + return 0; + } + const struct NFA *sub = getSubEngine(t, activeIdx); + + struct mq q1; + copyQueue(t, sub, q, &q1, activeIdx); + return nfaInAnyAcceptState(sub, &q1); +} + +char nfaExecTamarama_queueInitState(const struct NFA *n, struct mq *q) { + DEBUG_PRINTF("init state\n"); + const struct Tamarama *t = getImplNfa(n); + char *ptr = q->streamState; + // Use activeIdxSize as a sentinel value and initialize the state to + // an invalid engine as nothing has been triggered yet + storeActiveIdx(t, ptr, t->numSubEngines); + return 0; +} + +char nfaExecTamarama_queueCompressState(const struct NFA *n, const struct mq *q, + s64a loc) { + const struct Tamarama *t = getImplNfa(n); + u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); + if (activeIdx == t->numSubEngines) { + return 0; + } + + const struct NFA *sub = getSubEngine(t, activeIdx); + + struct mq q1; + copyQueueProperties(q, &q1, t->activeIdxSize); + return nfaQueueCompressState(sub, &q1, loc); +} + +char nfaExecTamarama_expandState(const struct NFA *n, void *dest, + const void *src, u64a offset, u8 key) { + const struct Tamarama *t = getImplNfa(n); + u32 activeIdx = loadActiveIdx(src, t->activeIdxSize); + if (activeIdx == t->numSubEngines) { + return 0; + } + + const struct NFA *sub = getSubEngine(t, activeIdx); + + const char *subStreamState = (const char *)src + t->activeIdxSize; + return nfaExpandState(sub, dest, subStreamState, offset, key); +} + +enum nfa_zombie_status nfaExecTamarama_zombie_status(const struct NFA *n, + struct mq *q, s64a loc) { + const struct Tamarama *t = getImplNfa(n); + u32 activeIdx = loadActiveIdx(q->streamState, t->activeIdxSize); + if (activeIdx == t->numSubEngines) { + return NFA_ZOMBIE_NO; + } + const struct NFA *sub = getSubEngine(t, activeIdx); + + struct mq q1; + copyQueue(t, sub, q, &q1, activeIdx); + return nfaGetZombieStatus(sub, &q1, loc); +} + +char nfaExecTamarama_Q(const struct NFA *n, struct mq *q, s64a end) { + DEBUG_PRINTF("exec\n"); + struct mq q1; + char rv = MO_ALIVE; + char copy = 0; + const struct Tamarama *t = getImplNfa(n); + while (q->cur < q->end && q_cur_loc(q) <= end) { + updateQueues(t, q, &q1); + rv = nfaQueueExec_raw(q1.nfa, &q1, end); + q->report_current = q1.report_current; + copy = 1; + if (can_stop_matching(q->scratch)) { + break; + } + } + if (copy) { + copyBack(t, q, &q1); + } + return rv; +} + +char nfaExecTamarama_Q2(const struct NFA *n, struct mq *q, s64a end) { + DEBUG_PRINTF("exec to match\n"); + struct mq q1; + char rv = 0; + char copy = 0; + const struct Tamarama *t = getImplNfa(n); + while (q->cur < q->end && q_cur_loc(q) <= end && + rv != MO_MATCHES_PENDING) { + updateQueues(t, q, &q1); + rv = nfaQueueExec2_raw(q1.nfa, &q1, end); + q->report_current = q1.report_current; + copy = 1; + if (can_stop_matching(q->scratch)) { + break; + } + } + if (copy) { + copyBack(t, q, &q1); + } + return rv; +} + diff --git a/contrib/libs/hyperscan/src/nfa/tamarama.h b/contrib/libs/hyperscan/src/nfa/tamarama.h index 3b52d8de73..b4fba549fd 100644 --- a/contrib/libs/hyperscan/src/nfa/tamarama.h +++ b/contrib/libs/hyperscan/src/nfa/tamarama.h @@ -1,70 +1,70 @@ -/* - * Copyright (c) 2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TAMARAMA_H -#define TAMARAMA_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -#include "callback.h" -#include "ue2common.h" - -struct mq; -struct NFA; -struct hs_scratch; - -char nfaExecTamarama_testEOD(const struct NFA *n, const char *state, - const char *streamState, u64a offset, - NfaCallback callback, void *context); -char nfaExecTamarama_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecTamarama_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecTamarama_inAccept(const struct NFA *n, ReportID report, - struct mq *q); -char nfaExecTamarama_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecTamarama_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecTamarama_queueCompressState(const struct NFA *n, const struct mq *q, - s64a loc); -char nfaExecTamarama_expandState(const struct NFA *n, void *dest, - const void *src, u64a offset, u8 key); -enum nfa_zombie_status nfaExecTamarama_zombie_status(const struct NFA *n, - struct mq *q, s64a loc); -char nfaExecTamarama_Q(const struct NFA *nfa, struct mq *q, s64a end); -char nfaExecTamarama_Q2(const struct NFA *nfa, struct mq *q, s64a end); - -// only used by outfix and miracles, no implementation for tamarama -#define nfaExecTamarama_initCompressedState NFA_API_NO_IMPL -#define nfaExecTamarama_B_Reverse NFA_API_NO_IMPL - -#ifdef __cplusplus -} -#endif - -#endif +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TAMARAMA_H +#define TAMARAMA_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include "callback.h" +#include "ue2common.h" + +struct mq; +struct NFA; +struct hs_scratch; + +char nfaExecTamarama_testEOD(const struct NFA *n, const char *state, + const char *streamState, u64a offset, + NfaCallback callback, void *context); +char nfaExecTamarama_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecTamarama_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecTamarama_inAccept(const struct NFA *n, ReportID report, + struct mq *q); +char nfaExecTamarama_inAnyAccept(const struct NFA *n, struct mq *q); +char nfaExecTamarama_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecTamarama_queueCompressState(const struct NFA *n, const struct mq *q, + s64a loc); +char nfaExecTamarama_expandState(const struct NFA *n, void *dest, + const void *src, u64a offset, u8 key); +enum nfa_zombie_status nfaExecTamarama_zombie_status(const struct NFA *n, + struct mq *q, s64a loc); +char nfaExecTamarama_Q(const struct NFA *nfa, struct mq *q, s64a end); +char nfaExecTamarama_Q2(const struct NFA *nfa, struct mq *q, s64a end); + +// only used by outfix and miracles, no implementation for tamarama +#define nfaExecTamarama_initCompressedState NFA_API_NO_IMPL +#define nfaExecTamarama_B_Reverse NFA_API_NO_IMPL + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/tamarama_internal.h b/contrib/libs/hyperscan/src/nfa/tamarama_internal.h index 5cdc70d400..1e4bd48dbb 100644 --- a/contrib/libs/hyperscan/src/nfa/tamarama_internal.h +++ b/contrib/libs/hyperscan/src/nfa/tamarama_internal.h @@ -1,105 +1,105 @@ -/* - * Copyright (c) 2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - *\brief Tamarama: container engine for exclusive engines, - * data structures. - */ - -/* Tamarama bytecode layout: - * * |-----| - * * | | struct NFA - * * |-----| - * * | | struct Tamarama - * * | | - * * |-----| - * * | | top remapping table: - * * | | stores top base for each subengine. - * * | | old_top = remapped_top - top_base; - * * | | The size of table is equal to the number of subengines. - * * ... - * * | | - * * |-----| - * * | | offsets from the start of struct Tamarama to subengines --\ - * * ... | - * * | | -----------\ | - * * |-----| | | - * * ||--| | subengine 1 (struct NFA + rest of subengine) <--/ | - * * || | | | - * * ||--| | | - * * || | | | - * * || | | | - * * ||--| | | - * * | | | - * * ||--| | subengine 2 (struct NFA + rest of subengine) <-------/ - * * || | | - * * ||--| | - * * || | | - * * || | | - * * ||--| | - * * | | - * * ... - * * | | - * * |-----| total size of tamarama - * * - * * Tamarama stream state: - * * - * * |---| - * * | | active subengine id - * * |---| - * * | | common pool of stream state for each engine - * * | | - * * | | - * * ... - * * | | - * * | | - * * |---| - * * - * * Tamarama scratch space: - * * - * * |---| - * * | | common pool of scratch for each engine - * * | | - * * | | - * * ... - * * | | - * * | | - * * |---| - * */ - -#ifndef NFA_TAMARAMA_INTERNAL_H -#define NFA_TAMARAMA_INTERNAL_H - -#include "ue2common.h" - -struct ALIGN_AVX_DIRECTIVE Tamarama { - u32 numSubEngines; - u8 activeIdxSize; -}; - -#endif // NFA_TAMARAMA_INTERNAL_H +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + *\brief Tamarama: container engine for exclusive engines, + * data structures. + */ + +/* Tamarama bytecode layout: + * * |-----| + * * | | struct NFA + * * |-----| + * * | | struct Tamarama + * * | | + * * |-----| + * * | | top remapping table: + * * | | stores top base for each subengine. + * * | | old_top = remapped_top - top_base; + * * | | The size of table is equal to the number of subengines. + * * ... + * * | | + * * |-----| + * * | | offsets from the start of struct Tamarama to subengines --\ + * * ... | + * * | | -----------\ | + * * |-----| | | + * * ||--| | subengine 1 (struct NFA + rest of subengine) <--/ | + * * || | | | + * * ||--| | | + * * || | | | + * * || | | | + * * ||--| | | + * * | | | + * * ||--| | subengine 2 (struct NFA + rest of subengine) <-------/ + * * || | | + * * ||--| | + * * || | | + * * || | | + * * ||--| | + * * | | + * * ... + * * | | + * * |-----| total size of tamarama + * * + * * Tamarama stream state: + * * + * * |---| + * * | | active subengine id + * * |---| + * * | | common pool of stream state for each engine + * * | | + * * | | + * * ... + * * | | + * * | | + * * |---| + * * + * * Tamarama scratch space: + * * + * * |---| + * * | | common pool of scratch for each engine + * * | | + * * | | + * * ... + * * | | + * * | | + * * |---| + * */ + +#ifndef NFA_TAMARAMA_INTERNAL_H +#define NFA_TAMARAMA_INTERNAL_H + +#include "ue2common.h" + +struct ALIGN_AVX_DIRECTIVE Tamarama { + u32 numSubEngines; + u8 activeIdxSize; +}; + +#endif // NFA_TAMARAMA_INTERNAL_H diff --git a/contrib/libs/hyperscan/src/nfa/tamaramacompile.cpp b/contrib/libs/hyperscan/src/nfa/tamaramacompile.cpp index 1a6e8beff9..5892f52782 100644 --- a/contrib/libs/hyperscan/src/nfa/tamaramacompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/tamaramacompile.cpp @@ -1,176 +1,176 @@ -/* - * Copyright (c) 2016-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * \file - * \brief Tamarama: container engine for exclusive engines, compiler code. - */ - -#include "config.h" - -#include "tamaramacompile.h" - -#include "tamarama_internal.h" -#include "nfa_internal.h" -#include "nfa_api_queue.h" -#include "repeatcompile.h" -#include "util/container.h" -#include "util/verify_types.h" - -using namespace std; - -namespace ue2 { - -static -void remapTops(const TamaInfo &tamaInfo, - vector<u32> &top_base, - map<pair<const NFA *, u32>, u32> &out_top_remap) { - u32 i = 0; - u32 cur = 0; - for (const auto &sub : tamaInfo.subengines) { - u32 base = cur; - top_base.push_back(base + MQE_TOP_FIRST); - DEBUG_PRINTF("subengine:%u\n", i); - for (const auto &t : tamaInfo.tops[i++]) { - cur = base + t; - DEBUG_PRINTF("top remapping %u:%u\n", t ,cur); - out_top_remap.emplace(make_pair(sub, t), cur++); - } - } -} - -/** - * update stream state and scratch state sizes and copy in - * subengines in Tamarama. - */ -static -void copyInSubnfas(const char *base_offset, NFA &nfa, - const TamaInfo &tamaInfo, u32 *offsets, - char *sub_nfa_offset, const u32 activeIdxSize) { - u32 maxStreamStateSize = 0; - u32 maxScratchStateSize = 0; - sub_nfa_offset = ROUNDUP_PTR(sub_nfa_offset, 64); - bool infinite_max_width = false; - for (auto &sub : tamaInfo.subengines) { - u32 streamStateSize = verify_u32(sub->streamStateSize); - u32 scratchStateSize = verify_u32(sub->scratchStateSize); - maxStreamStateSize = max(maxStreamStateSize, streamStateSize); - maxScratchStateSize = max(maxScratchStateSize, scratchStateSize); - sub->queueIndex = nfa.queueIndex; - - memcpy(sub_nfa_offset, sub, sub->length); - *offsets = verify_u32(sub_nfa_offset - base_offset); - DEBUG_PRINTF("type:%u offsets:%u\n", sub->type, *offsets); - ++offsets; - sub_nfa_offset += ROUNDUP_CL(sub->length); - - // update nfa properties - nfa.flags |= sub->flags; - if (!sub->maxWidth) { - infinite_max_width = true; - } else if (!infinite_max_width) { - nfa.maxWidth = max(nfa.maxWidth, sub->maxWidth); - } - } - - if (infinite_max_width) { - nfa.maxWidth = 0; - } - nfa.maxBiAnchoredWidth = 0; - nfa.streamStateSize = activeIdxSize + maxStreamStateSize; - nfa.scratchStateSize = maxScratchStateSize; -} - -/** - * Take in a collection of exclusive sub engines and produces a tamarama, also - * returns via out_top_remap, a mapping indicating how tops in the subengines in - * relate to the tamarama's tops. - */ -bytecode_ptr<NFA> -buildTamarama(const TamaInfo &tamaInfo, const u32 queue, - map<pair<const NFA *, u32>, u32> &out_top_remap) { - vector<u32> top_base; - remapTops(tamaInfo, top_base, out_top_remap); - - size_t subSize = tamaInfo.subengines.size(); - DEBUG_PRINTF("subSize:%zu\n", subSize); - size_t total_size = - sizeof(NFA) + // initial NFA structure - sizeof(Tamarama) + // Tamarama structure - sizeof(u32) * subSize + // base top event value for subengines, - // used for top remapping at runtime - sizeof(u32) * subSize + 64; // offsets to subengines in bytecode and - // padding for subengines - - for (const auto &sub : tamaInfo.subengines) { - total_size += ROUNDUP_CL(sub->length); - } - - // use subSize as a sentinel value for no active subengines, - // so add one to subSize here - u32 activeIdxSize = calcPackedBytes(subSize + 1); - auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); - nfa->type = verify_u8(TAMARAMA_NFA); - nfa->length = verify_u32(total_size); - nfa->queueIndex = queue; - - char *ptr = (char *)nfa.get() + sizeof(NFA); - char *base_offset = ptr; - Tamarama *t = (Tamarama *)ptr; - t->numSubEngines = verify_u32(subSize); - t->activeIdxSize = verify_u8(activeIdxSize); - - ptr += sizeof(Tamarama); - copy_bytes(ptr, top_base); - ptr += byte_length(top_base); - - u32 *offsets = (u32 *)ptr; - char *sub_nfa_offset = ptr + sizeof(u32) * subSize; - copyInSubnfas(base_offset, *nfa, tamaInfo, offsets, sub_nfa_offset, - activeIdxSize); - assert((size_t)(sub_nfa_offset - (char *)nfa.get()) <= total_size); - return nfa; -} - -set<ReportID> all_reports(const TamaProto &proto) { - return proto.reports; -} - -void TamaInfo::add(NFA *sub, const set<u32> &top) { - assert(subengines.size() < max_occupancy); - subengines.push_back(sub); - tops.push_back(top); -} - -void TamaProto::add(const NFA *n, const u32 id, const u32 top, - const map<pair<const NFA *, u32>, u32> &out_top_remap) { - top_remap.emplace(make_pair(id, top), out_top_remap.at(make_pair(n, top))); -} - -} // namespace ue2 - +/* + * Copyright (c) 2016-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Tamarama: container engine for exclusive engines, compiler code. + */ + +#include "config.h" + +#include "tamaramacompile.h" + +#include "tamarama_internal.h" +#include "nfa_internal.h" +#include "nfa_api_queue.h" +#include "repeatcompile.h" +#include "util/container.h" +#include "util/verify_types.h" + +using namespace std; + +namespace ue2 { + +static +void remapTops(const TamaInfo &tamaInfo, + vector<u32> &top_base, + map<pair<const NFA *, u32>, u32> &out_top_remap) { + u32 i = 0; + u32 cur = 0; + for (const auto &sub : tamaInfo.subengines) { + u32 base = cur; + top_base.push_back(base + MQE_TOP_FIRST); + DEBUG_PRINTF("subengine:%u\n", i); + for (const auto &t : tamaInfo.tops[i++]) { + cur = base + t; + DEBUG_PRINTF("top remapping %u:%u\n", t ,cur); + out_top_remap.emplace(make_pair(sub, t), cur++); + } + } +} + +/** + * update stream state and scratch state sizes and copy in + * subengines in Tamarama. + */ +static +void copyInSubnfas(const char *base_offset, NFA &nfa, + const TamaInfo &tamaInfo, u32 *offsets, + char *sub_nfa_offset, const u32 activeIdxSize) { + u32 maxStreamStateSize = 0; + u32 maxScratchStateSize = 0; + sub_nfa_offset = ROUNDUP_PTR(sub_nfa_offset, 64); + bool infinite_max_width = false; + for (auto &sub : tamaInfo.subengines) { + u32 streamStateSize = verify_u32(sub->streamStateSize); + u32 scratchStateSize = verify_u32(sub->scratchStateSize); + maxStreamStateSize = max(maxStreamStateSize, streamStateSize); + maxScratchStateSize = max(maxScratchStateSize, scratchStateSize); + sub->queueIndex = nfa.queueIndex; + + memcpy(sub_nfa_offset, sub, sub->length); + *offsets = verify_u32(sub_nfa_offset - base_offset); + DEBUG_PRINTF("type:%u offsets:%u\n", sub->type, *offsets); + ++offsets; + sub_nfa_offset += ROUNDUP_CL(sub->length); + + // update nfa properties + nfa.flags |= sub->flags; + if (!sub->maxWidth) { + infinite_max_width = true; + } else if (!infinite_max_width) { + nfa.maxWidth = max(nfa.maxWidth, sub->maxWidth); + } + } + + if (infinite_max_width) { + nfa.maxWidth = 0; + } + nfa.maxBiAnchoredWidth = 0; + nfa.streamStateSize = activeIdxSize + maxStreamStateSize; + nfa.scratchStateSize = maxScratchStateSize; +} + +/** + * Take in a collection of exclusive sub engines and produces a tamarama, also + * returns via out_top_remap, a mapping indicating how tops in the subengines in + * relate to the tamarama's tops. + */ +bytecode_ptr<NFA> +buildTamarama(const TamaInfo &tamaInfo, const u32 queue, + map<pair<const NFA *, u32>, u32> &out_top_remap) { + vector<u32> top_base; + remapTops(tamaInfo, top_base, out_top_remap); + + size_t subSize = tamaInfo.subengines.size(); + DEBUG_PRINTF("subSize:%zu\n", subSize); + size_t total_size = + sizeof(NFA) + // initial NFA structure + sizeof(Tamarama) + // Tamarama structure + sizeof(u32) * subSize + // base top event value for subengines, + // used for top remapping at runtime + sizeof(u32) * subSize + 64; // offsets to subengines in bytecode and + // padding for subengines + + for (const auto &sub : tamaInfo.subengines) { + total_size += ROUNDUP_CL(sub->length); + } + + // use subSize as a sentinel value for no active subengines, + // so add one to subSize here + u32 activeIdxSize = calcPackedBytes(subSize + 1); + auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); + nfa->type = verify_u8(TAMARAMA_NFA); + nfa->length = verify_u32(total_size); + nfa->queueIndex = queue; + + char *ptr = (char *)nfa.get() + sizeof(NFA); + char *base_offset = ptr; + Tamarama *t = (Tamarama *)ptr; + t->numSubEngines = verify_u32(subSize); + t->activeIdxSize = verify_u8(activeIdxSize); + + ptr += sizeof(Tamarama); + copy_bytes(ptr, top_base); + ptr += byte_length(top_base); + + u32 *offsets = (u32 *)ptr; + char *sub_nfa_offset = ptr + sizeof(u32) * subSize; + copyInSubnfas(base_offset, *nfa, tamaInfo, offsets, sub_nfa_offset, + activeIdxSize); + assert((size_t)(sub_nfa_offset - (char *)nfa.get()) <= total_size); + return nfa; +} + +set<ReportID> all_reports(const TamaProto &proto) { + return proto.reports; +} + +void TamaInfo::add(NFA *sub, const set<u32> &top) { + assert(subengines.size() < max_occupancy); + subengines.push_back(sub); + tops.push_back(top); +} + +void TamaProto::add(const NFA *n, const u32 id, const u32 top, + const map<pair<const NFA *, u32>, u32> &out_top_remap) { + top_remap.emplace(make_pair(id, top), out_top_remap.at(make_pair(n, top))); +} + +} // namespace ue2 + diff --git a/contrib/libs/hyperscan/src/nfa/tamaramacompile.h b/contrib/libs/hyperscan/src/nfa/tamaramacompile.h index 7fcea3ec85..ce97d0adcd 100644 --- a/contrib/libs/hyperscan/src/nfa/tamaramacompile.h +++ b/contrib/libs/hyperscan/src/nfa/tamaramacompile.h @@ -1,96 +1,96 @@ -/* - * Copyright (c) 2016-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * \file - * \brief Tamarama: container engine for exclusive engines, compiler code. - */ - -#ifndef NFA_TAMARAMACOMPILE_H -#define NFA_TAMARAMACOMPILE_H - -#include "ue2common.h" -#include "util/bytecode_ptr.h" - -#include <map> -#include <set> -#include <vector> - -struct NFA; - -namespace ue2 { - -/** - * \brief A TamaProto that contains top remapping and reports info. - */ -struct TamaProto { - void add(const NFA *n, const u32 id, const u32 top, - const std::map<std::pair<const NFA *, u32>, u32> &out_top_remap); - /** Top remapping between <vertex id, top value> and - ** remapped top value. */ - std::map<std::pair<u32, u32>, u32> top_remap; - - /** All the reports in subengines */ - std::set<ReportID> reports; -}; - -/** - * \brief Construction info for a Tamarama engine: - * contains at least two subengines. - * - * A TamaInfo is converted into a single NFA, with each top triggering a - * subengine. A TamaInfo can contain at most TamaInfo::max_occupancy - * subengines. - */ -struct TamaInfo { - static constexpr size_t max_occupancy = 65536; // arbitrary limit - - /** \brief Add a new subengine. */ - void add(NFA *sub, const std::set<u32> &top); - - /** \brief All the subengines */ - std::vector<NFA *> subengines; - - /** \brief Tops of subengines */ - std::vector<std::set<u32>> tops; -}; - -std::set<ReportID> all_reports(const TamaProto &proto); - -/** - * Take in a collection of exclusive subengines and produces a tamarama, also - * returns via out_top_remap, a mapping indicating how tops in the subengines in - * relate to the tamarama's tops. - */ -bytecode_ptr<NFA> -buildTamarama(const TamaInfo &tamaInfo, const u32 queue, - std::map<std::pair<const NFA *, u32>, u32> &out_top_remap); - -} // namespace ue2 - -#endif // NFA_TAMARAMACOMPILE_H +/* + * Copyright (c) 2016-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Tamarama: container engine for exclusive engines, compiler code. + */ + +#ifndef NFA_TAMARAMACOMPILE_H +#define NFA_TAMARAMACOMPILE_H + +#include "ue2common.h" +#include "util/bytecode_ptr.h" + +#include <map> +#include <set> +#include <vector> + +struct NFA; + +namespace ue2 { + +/** + * \brief A TamaProto that contains top remapping and reports info. + */ +struct TamaProto { + void add(const NFA *n, const u32 id, const u32 top, + const std::map<std::pair<const NFA *, u32>, u32> &out_top_remap); + /** Top remapping between <vertex id, top value> and + ** remapped top value. */ + std::map<std::pair<u32, u32>, u32> top_remap; + + /** All the reports in subengines */ + std::set<ReportID> reports; +}; + +/** + * \brief Construction info for a Tamarama engine: + * contains at least two subengines. + * + * A TamaInfo is converted into a single NFA, with each top triggering a + * subengine. A TamaInfo can contain at most TamaInfo::max_occupancy + * subengines. + */ +struct TamaInfo { + static constexpr size_t max_occupancy = 65536; // arbitrary limit + + /** \brief Add a new subengine. */ + void add(NFA *sub, const std::set<u32> &top); + + /** \brief All the subengines */ + std::vector<NFA *> subengines; + + /** \brief Tops of subengines */ + std::vector<std::set<u32>> tops; +}; + +std::set<ReportID> all_reports(const TamaProto &proto); + +/** + * Take in a collection of exclusive subengines and produces a tamarama, also + * returns via out_top_remap, a mapping indicating how tops in the subengines in + * relate to the tamarama's tops. + */ +bytecode_ptr<NFA> +buildTamarama(const TamaInfo &tamaInfo, const u32 queue, + std::map<std::pair<const NFA *, u32>, u32> &out_top_remap); + +} // namespace ue2 + +#endif // NFA_TAMARAMACOMPILE_H diff --git a/contrib/libs/hyperscan/src/nfa/truffle.c b/contrib/libs/hyperscan/src/nfa/truffle.c index be6b312cf2..86a5bff446 100644 --- a/contrib/libs/hyperscan/src/nfa/truffle.c +++ b/contrib/libs/hyperscan/src/nfa/truffle.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,29 +33,29 @@ #include "ue2common.h" #include "truffle.h" -#include "util/arch.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/simd_utils.h" -#if !defined(HAVE_AVX2) +#if !defined(HAVE_AVX2) static really_inline -const u8 *lastMatch(const u8 *buf, u32 z) { +const u8 *lastMatch(const u8 *buf, u32 z) { if (unlikely(z != 0xffff)) { - u32 pos = clz32(~z & 0xffff); - assert(pos >= 16 && pos < 32); - return buf + (31 - pos); + u32 pos = clz32(~z & 0xffff); + assert(pos >= 16 && pos < 32); + return buf + (31 - pos); } return NULL; // no match } static really_inline -const u8 *firstMatch(const u8 *buf, u32 z) { +const u8 *firstMatch(const u8 *buf, u32 z) { if (unlikely(z != 0xffff)) { - u32 pos = ctz32(~z & 0xffff); - assert(pos < 16); - return buf + pos; + u32 pos = ctz32(~z & 0xffff); + assert(pos < 16); + return buf + pos; } return NULL; // no match @@ -68,11 +68,11 @@ u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) { m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201); // and now do the real work - m128 shuf1 = pshufb_m128(shuf_mask_lo_highclear, v); + m128 shuf1 = pshufb_m128(shuf_mask_lo_highclear, v); m128 t1 = xor128(v, highconst); - m128 shuf2 = pshufb_m128(shuf_mask_lo_highset, t1); - m128 t2 = andnot128(highconst, rshift64_m128(v, 4)); - m128 shuf3 = pshufb_m128(shuf_mask_hi, t2); + m128 shuf2 = pshufb_m128(shuf_mask_lo_highset, t1); + m128 t2 = andnot128(highconst, rshift64_m128(v, 4)); + m128 shuf3 = pshufb_m128(shuf_mask_hi, t2); m128 tmp = and128(or128(shuf1, shuf2), shuf3); m128 tmp2 = eq128(tmp, zeroes128()); u32 z = movemask128(tmp2); @@ -91,8 +91,8 @@ const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); // can't be these bytes in z - u32 mask = (0xffff >> (16 - len)) ^ 0xffff; - const u8 *rv = firstMatch(buf, z | mask); + u32 mask = (0xffff >> (16 - len)) ^ 0xffff; + const u8 *rv = firstMatch(buf, z | mask); if (rv) { return rv; @@ -101,23 +101,23 @@ const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, } } -static really_inline -const u8 *fwdBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - m128 v, const u8 *buf) { - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); - return firstMatch(buf, z); -} - -static really_inline -const u8 *revBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - m128 v, const u8 *buf) { - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); - return lastMatch(buf, z); -} - +static really_inline +const u8 *fwdBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + m128 v, const u8 *buf) { + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); + return firstMatch(buf, z); +} + +static really_inline +const u8 *revBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + m128 v, const u8 *buf) { + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); + return lastMatch(buf, z); +} + const u8 *truffleExec(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { + m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { DEBUG_PRINTF("len %zu\n", buf_end - buf); assert(buf && buf_end); @@ -166,15 +166,15 @@ const u8 *truffleExec(m128 shuf_mask_lo_highclear, static const u8 *truffleRevMini(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, const u8 *buf, - const u8 *buf_end) { + m128 shuf_mask_lo_highset, const u8 *buf, + const u8 *buf_end) { uintptr_t len = buf_end - buf; assert(len < 16); m128 chars = zeroes128(); memcpy(&chars, buf, len); - u32 mask = (0xffff >> (16 - len)) ^ 0xffff; + u32 mask = (0xffff >> (16 - len)) ^ 0xffff; u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); const u8 *rv = lastMatch(buf, z | mask); @@ -231,378 +231,378 @@ const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, return buf - 1; } -#elif !defined(HAVE_AVX512) - -// AVX2 - -static really_inline -const u8 *lastMatch(const u8 *buf, u32 z) { - if (unlikely(z != 0xffffffff)) { - u32 pos = clz32(~z); - assert(pos < 32); - return buf + (31 - pos); - } - - return NULL; // no match -} - -static really_inline -const u8 *firstMatch(const u8 *buf, u32 z) { - if (unlikely(z != 0xffffffff)) { - u32 pos = ctz32(~z); - assert(pos < 32); - return buf + pos; - } - - return NULL; // no match -} - -static really_inline -u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) { - - m256 highconst = _mm256_set1_epi8(0x80); - m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201); - - // and now do the real work - m256 shuf1 = pshufb_m256(shuf_mask_lo_highclear, v); - m256 t1 = xor256(v, highconst); - m256 shuf2 = pshufb_m256(shuf_mask_lo_highset, t1); - m256 t2 = andnot256(highconst, rshift64_m256(v, 4)); - m256 shuf3 = pshufb_m256(shuf_mask_hi, t2); - m256 tmp = and256(or256(shuf1, shuf2), shuf3); - m256 tmp2 = eq256(tmp, zeroes256()); - u32 z = movemask256(tmp2); - - return z; -} - -static -const u8 *truffleMini(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - uintptr_t len = buf_end - buf; - assert(len < 32); - - m256 chars = zeroes256(); - memcpy(&chars, buf, len); - - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); - // can't be these bytes in z - u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff; - const u8 *rv = firstMatch(buf, z | mask); - - if (rv) { - return rv; - } else { - return buf_end; - } -} - -static really_inline -const u8 *fwdBlock(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, - m256 v, const u8 *buf) { - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); - return firstMatch(buf, z); -} - -static really_inline -const u8 *revBlock(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, - m256 v, const u8 *buf) { - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); - return lastMatch(buf, z); -} - -const u8 *truffleExec(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - DEBUG_PRINTF("len %zu\n", buf_end - buf); - const m256 wide_clear = set2x128(shuf_mask_lo_highclear); - const m256 wide_set = set2x128(shuf_mask_lo_highset); - - assert(buf && buf_end); - assert(buf < buf_end); - const u8 *rv; - - if (buf_end - buf < 32) { - return truffleMini(wide_clear, wide_set, buf, buf_end); - } - - size_t min = (size_t)buf % 32; - assert(buf_end - buf >= 32); - - // Preconditioning: most of the time our buffer won't be aligned. - m256 chars = loadu256(buf); - rv = fwdBlock(wide_clear, wide_set, chars, buf); - if (rv) { - return rv; - } - buf += (32 - min); - - const u8 *last_block = buf_end - 32; - while (buf < last_block) { - m256 lchars = load256(buf); - rv = fwdBlock(wide_clear, wide_set, lchars, buf); - if (rv) { - return rv; - } - buf += 32; - } - - // Use an unaligned load to mop up the last 32 bytes and get an accurate - // picture to buf_end. - assert(buf <= buf_end && buf >= buf_end - 32); - chars = loadu256(buf_end - 32); - rv = fwdBlock(wide_clear, wide_set, chars, buf_end - 32); - if (rv) { - return rv; - } - return buf_end; -} - -static -const u8 *truffleRevMini(m256 shuf_mask_lo_highclear, - m256 shuf_mask_lo_highset, const u8 *buf, - const u8 *buf_end) { - uintptr_t len = buf_end - buf; - assert(len < 32); - - m256 chars = zeroes256(); - memcpy(&chars, buf, len); - - u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff; - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); - const u8 *rv = lastMatch(buf, z | mask); - - if (rv) { - return rv; - } - return buf - 1; -} - - -const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - const m256 wide_clear = set2x128(shuf_mask_lo_highclear); - const m256 wide_set = set2x128(shuf_mask_lo_highset); - assert(buf && buf_end); - assert(buf < buf_end); - const u8 *rv; - - DEBUG_PRINTF("len %zu\n", buf_end - buf); - - if (buf_end - buf < 32) { - return truffleRevMini(wide_clear, wide_set, buf, buf_end); - } - - assert(buf_end - buf >= 32); - - // Preconditioning: most of the time our buffer won't be aligned. - m256 chars = loadu256(buf_end - 32); - rv = revBlock(wide_clear, wide_set, chars, - buf_end - 32); - if (rv) { - return rv; - } - buf_end = (const u8 *)((size_t)buf_end & ~((size_t)0x1f)); - - const u8 *last_block = buf + 32; - while (buf_end > last_block) { - buf_end -= 32; - m256 lchars = load256(buf_end); - rv = revBlock(wide_clear, wide_set, lchars, buf_end); - if (rv) { - return rv; - } - } - - // Use an unaligned load to mop up the last 32 bytes and get an accurate - // picture to buf_end. - chars = loadu256(buf); - rv = revBlock(wide_clear, wide_set, chars, buf); - if (rv) { - return rv; - } - return buf - 1; -} - -#else // AVX512 - -static really_inline -const u8 *lastMatch(const u8 *buf, u64a z) { - if (unlikely(z != ~0ULL)) { - u64a pos = clz64(~z); - assert(pos < 64); - return buf + (63 - pos); - } - - return NULL; // no match -} - -static really_inline -const u8 *firstMatch(const u8 *buf, u64a z) { - if (unlikely(z != ~0ULL)) { - u64a pos = ctz64(~z); - assert(pos < 64); - DEBUG_PRINTF("pos %llu\n", pos); - return buf + pos; - } - - return NULL; // no match -} - -static really_inline -u64a block(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, m512 v) { - m512 highconst = set64x8(0x80); - m512 shuf_mask_hi = set8x64(0x8040201008040201); - - // and now do the real work - m512 shuf1 = pshufb_m512(shuf_mask_lo_highclear, v); - m512 t1 = xor512(v, highconst); - m512 shuf2 = pshufb_m512(shuf_mask_lo_highset, t1); - m512 t2 = andnot512(highconst, rshift64_m512(v, 4)); - m512 shuf3 = pshufb_m512(shuf_mask_hi, t2); - m512 tmp = and512(or512(shuf1, shuf2), shuf3); - u64a z = eq512mask(tmp, zeroes512()); - - return z; -} - -static really_inline -const u8 *truffleMini(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - uintptr_t len = buf_end - buf; - assert(len <= 64); - - __mmask64 mask = (~0ULL) >> (64 - len); - - m512 chars = loadu_maskz_m512(mask, buf); - - u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); - - const u8 *rv = firstMatch(buf, z | ~mask); - - return rv; -} - -static really_inline -const u8 *fwdBlock(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, - m512 v, const u8 *buf) { - u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); - return firstMatch(buf, z); -} - -static really_inline -const u8 *revBlock(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, - m512 v, const u8 *buf) { - u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); - return lastMatch(buf, z); -} - -const u8 *truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - DEBUG_PRINTF("len %zu\n", buf_end - buf); - const m512 wide_clear = set4x128(shuf_mask_lo_highclear); - const m512 wide_set = set4x128(shuf_mask_lo_highset); - - assert(buf && buf_end); - assert(buf < buf_end); - const u8 *rv; - - if (buf_end - buf <= 64) { - rv = truffleMini(wide_clear, wide_set, buf, buf_end); - return rv ? rv : buf_end; - } - - assert(buf_end - buf >= 64); - if ((uintptr_t)buf % 64) { - // Preconditioning: most of the time our buffer won't be aligned. - rv = truffleMini(wide_clear, wide_set, buf, ROUNDUP_PTR(buf, 64)); - if (rv) { - return rv; - } - buf = ROUNDUP_PTR(buf, 64); - } - const u8 *last_block = buf_end - 64; - while (buf < last_block) { - m512 lchars = load512(buf); - rv = fwdBlock(wide_clear, wide_set, lchars, buf); - if (rv) { - return rv; - } - buf += 64; - } - - // Use an unaligned load to mop up the last 64 bytes and get an accurate - // picture to buf_end. - assert(buf <= buf_end && buf >= buf_end - 64); - m512 chars = loadu512(buf_end - 64); - rv = fwdBlock(wide_clear, wide_set, chars, buf_end - 64); - if (rv) { - return rv; - } - return buf_end; -} - -static really_inline -const u8 *truffleRevMini(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - uintptr_t len = buf_end - buf; - assert(len < 64); - - __mmask64 mask = (~0ULL) >> (64 - len); - m512 chars = loadu_maskz_m512(mask, buf); - u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); - DEBUG_PRINTF("mask 0x%016llx z 0x%016llx\n", mask, z); - const u8 *rv = lastMatch(buf, z | ~mask); - - if (rv) { - return rv; - } - return buf - 1; -} - -const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - const m512 wide_clear = set4x128(shuf_mask_lo_highclear); - const m512 wide_set = set4x128(shuf_mask_lo_highset); - assert(buf && buf_end); - assert(buf < buf_end); - const u8 *rv; - - DEBUG_PRINTF("len %zu\n", buf_end - buf); - - if (buf_end - buf < 64) { - return truffleRevMini(wide_clear, wide_set, buf, buf_end); - } - - assert(buf_end - buf >= 64); - - // Preconditioning: most of the time our buffer won't be aligned. - m512 chars = loadu512(buf_end - 64); - rv = revBlock(wide_clear, wide_set, chars, buf_end - 64); - if (rv) { - return rv; - } - buf_end = (const u8 *)ROUNDDOWN_N((uintptr_t)buf_end, 64); - - const u8 *last_block = buf + 64; - while (buf_end > last_block) { - buf_end -= 64; - m512 lchars = load512(buf_end); - rv = revBlock(wide_clear, wide_set, lchars, buf_end); - if (rv) { - return rv; - } - } - - // Use an unaligned load to mop up the last 64 bytes and get an accurate - // picture to buf_end. - chars = loadu512(buf); - rv = revBlock(wide_clear, wide_set, chars, buf); - if (rv) { - return rv; - } - return buf - 1; -} - -#endif +#elif !defined(HAVE_AVX512) + +// AVX2 + +static really_inline +const u8 *lastMatch(const u8 *buf, u32 z) { + if (unlikely(z != 0xffffffff)) { + u32 pos = clz32(~z); + assert(pos < 32); + return buf + (31 - pos); + } + + return NULL; // no match +} + +static really_inline +const u8 *firstMatch(const u8 *buf, u32 z) { + if (unlikely(z != 0xffffffff)) { + u32 pos = ctz32(~z); + assert(pos < 32); + return buf + pos; + } + + return NULL; // no match +} + +static really_inline +u32 block(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, m256 v) { + + m256 highconst = _mm256_set1_epi8(0x80); + m256 shuf_mask_hi = _mm256_set1_epi64x(0x8040201008040201); + + // and now do the real work + m256 shuf1 = pshufb_m256(shuf_mask_lo_highclear, v); + m256 t1 = xor256(v, highconst); + m256 shuf2 = pshufb_m256(shuf_mask_lo_highset, t1); + m256 t2 = andnot256(highconst, rshift64_m256(v, 4)); + m256 shuf3 = pshufb_m256(shuf_mask_hi, t2); + m256 tmp = and256(or256(shuf1, shuf2), shuf3); + m256 tmp2 = eq256(tmp, zeroes256()); + u32 z = movemask256(tmp2); + + return z; +} + +static +const u8 *truffleMini(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len < 32); + + m256 chars = zeroes256(); + memcpy(&chars, buf, len); + + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + // can't be these bytes in z + u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff; + const u8 *rv = firstMatch(buf, z | mask); + + if (rv) { + return rv; + } else { + return buf_end; + } +} + +static really_inline +const u8 *fwdBlock(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, + m256 v, const u8 *buf) { + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); + return firstMatch(buf, z); +} + +static really_inline +const u8 *revBlock(m256 shuf_mask_lo_highclear, m256 shuf_mask_lo_highset, + m256 v, const u8 *buf) { + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); + return lastMatch(buf, z); +} + +const u8 *truffleExec(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + DEBUG_PRINTF("len %zu\n", buf_end - buf); + const m256 wide_clear = set2x128(shuf_mask_lo_highclear); + const m256 wide_set = set2x128(shuf_mask_lo_highset); + + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + if (buf_end - buf < 32) { + return truffleMini(wide_clear, wide_set, buf, buf_end); + } + + size_t min = (size_t)buf % 32; + assert(buf_end - buf >= 32); + + // Preconditioning: most of the time our buffer won't be aligned. + m256 chars = loadu256(buf); + rv = fwdBlock(wide_clear, wide_set, chars, buf); + if (rv) { + return rv; + } + buf += (32 - min); + + const u8 *last_block = buf_end - 32; + while (buf < last_block) { + m256 lchars = load256(buf); + rv = fwdBlock(wide_clear, wide_set, lchars, buf); + if (rv) { + return rv; + } + buf += 32; + } + + // Use an unaligned load to mop up the last 32 bytes and get an accurate + // picture to buf_end. + assert(buf <= buf_end && buf >= buf_end - 32); + chars = loadu256(buf_end - 32); + rv = fwdBlock(wide_clear, wide_set, chars, buf_end - 32); + if (rv) { + return rv; + } + return buf_end; +} + +static +const u8 *truffleRevMini(m256 shuf_mask_lo_highclear, + m256 shuf_mask_lo_highset, const u8 *buf, + const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len < 32); + + m256 chars = zeroes256(); + memcpy(&chars, buf, len); + + u32 mask = (0xffffffff >> (32 - len)) ^ 0xffffffff; + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + const u8 *rv = lastMatch(buf, z | mask); + + if (rv) { + return rv; + } + return buf - 1; +} + + +const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + const m256 wide_clear = set2x128(shuf_mask_lo_highclear); + const m256 wide_set = set2x128(shuf_mask_lo_highset); + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + DEBUG_PRINTF("len %zu\n", buf_end - buf); + + if (buf_end - buf < 32) { + return truffleRevMini(wide_clear, wide_set, buf, buf_end); + } + + assert(buf_end - buf >= 32); + + // Preconditioning: most of the time our buffer won't be aligned. + m256 chars = loadu256(buf_end - 32); + rv = revBlock(wide_clear, wide_set, chars, + buf_end - 32); + if (rv) { + return rv; + } + buf_end = (const u8 *)((size_t)buf_end & ~((size_t)0x1f)); + + const u8 *last_block = buf + 32; + while (buf_end > last_block) { + buf_end -= 32; + m256 lchars = load256(buf_end); + rv = revBlock(wide_clear, wide_set, lchars, buf_end); + if (rv) { + return rv; + } + } + + // Use an unaligned load to mop up the last 32 bytes and get an accurate + // picture to buf_end. + chars = loadu256(buf); + rv = revBlock(wide_clear, wide_set, chars, buf); + if (rv) { + return rv; + } + return buf - 1; +} + +#else // AVX512 + +static really_inline +const u8 *lastMatch(const u8 *buf, u64a z) { + if (unlikely(z != ~0ULL)) { + u64a pos = clz64(~z); + assert(pos < 64); + return buf + (63 - pos); + } + + return NULL; // no match +} + +static really_inline +const u8 *firstMatch(const u8 *buf, u64a z) { + if (unlikely(z != ~0ULL)) { + u64a pos = ctz64(~z); + assert(pos < 64); + DEBUG_PRINTF("pos %llu\n", pos); + return buf + pos; + } + + return NULL; // no match +} + +static really_inline +u64a block(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, m512 v) { + m512 highconst = set64x8(0x80); + m512 shuf_mask_hi = set8x64(0x8040201008040201); + + // and now do the real work + m512 shuf1 = pshufb_m512(shuf_mask_lo_highclear, v); + m512 t1 = xor512(v, highconst); + m512 shuf2 = pshufb_m512(shuf_mask_lo_highset, t1); + m512 t2 = andnot512(highconst, rshift64_m512(v, 4)); + m512 shuf3 = pshufb_m512(shuf_mask_hi, t2); + m512 tmp = and512(or512(shuf1, shuf2), shuf3); + u64a z = eq512mask(tmp, zeroes512()); + + return z; +} + +static really_inline +const u8 *truffleMini(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len <= 64); + + __mmask64 mask = (~0ULL) >> (64 - len); + + m512 chars = loadu_maskz_m512(mask, buf); + + u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + + const u8 *rv = firstMatch(buf, z | ~mask); + + return rv; +} + +static really_inline +const u8 *fwdBlock(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, + m512 v, const u8 *buf) { + u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); + return firstMatch(buf, z); +} + +static really_inline +const u8 *revBlock(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, + m512 v, const u8 *buf) { + u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, v); + return lastMatch(buf, z); +} + +const u8 *truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + DEBUG_PRINTF("len %zu\n", buf_end - buf); + const m512 wide_clear = set4x128(shuf_mask_lo_highclear); + const m512 wide_set = set4x128(shuf_mask_lo_highset); + + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + if (buf_end - buf <= 64) { + rv = truffleMini(wide_clear, wide_set, buf, buf_end); + return rv ? rv : buf_end; + } + + assert(buf_end - buf >= 64); + if ((uintptr_t)buf % 64) { + // Preconditioning: most of the time our buffer won't be aligned. + rv = truffleMini(wide_clear, wide_set, buf, ROUNDUP_PTR(buf, 64)); + if (rv) { + return rv; + } + buf = ROUNDUP_PTR(buf, 64); + } + const u8 *last_block = buf_end - 64; + while (buf < last_block) { + m512 lchars = load512(buf); + rv = fwdBlock(wide_clear, wide_set, lchars, buf); + if (rv) { + return rv; + } + buf += 64; + } + + // Use an unaligned load to mop up the last 64 bytes and get an accurate + // picture to buf_end. + assert(buf <= buf_end && buf >= buf_end - 64); + m512 chars = loadu512(buf_end - 64); + rv = fwdBlock(wide_clear, wide_set, chars, buf_end - 64); + if (rv) { + return rv; + } + return buf_end; +} + +static really_inline +const u8 *truffleRevMini(m512 shuf_mask_lo_highclear, m512 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len < 64); + + __mmask64 mask = (~0ULL) >> (64 - len); + m512 chars = loadu_maskz_m512(mask, buf); + u64a z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + DEBUG_PRINTF("mask 0x%016llx z 0x%016llx\n", mask, z); + const u8 *rv = lastMatch(buf, z | ~mask); + + if (rv) { + return rv; + } + return buf - 1; +} + +const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + const m512 wide_clear = set4x128(shuf_mask_lo_highclear); + const m512 wide_set = set4x128(shuf_mask_lo_highset); + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + DEBUG_PRINTF("len %zu\n", buf_end - buf); + + if (buf_end - buf < 64) { + return truffleRevMini(wide_clear, wide_set, buf, buf_end); + } + + assert(buf_end - buf >= 64); + + // Preconditioning: most of the time our buffer won't be aligned. + m512 chars = loadu512(buf_end - 64); + rv = revBlock(wide_clear, wide_set, chars, buf_end - 64); + if (rv) { + return rv; + } + buf_end = (const u8 *)ROUNDDOWN_N((uintptr_t)buf_end, 64); + + const u8 *last_block = buf + 64; + while (buf_end > last_block) { + buf_end -= 64; + m512 lchars = load512(buf_end); + rv = revBlock(wide_clear, wide_set, lchars, buf_end); + if (rv) { + return rv; + } + } + + // Use an unaligned load to mop up the last 64 bytes and get an accurate + // picture to buf_end. + chars = loadu512(buf); + rv = revBlock(wide_clear, wide_set, chars, buf); + if (rv) { + return rv; + } + return buf - 1; +} + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/truffle.h b/contrib/libs/hyperscan/src/nfa/truffle.h index f67227ad1e..24ece2a9f5 100644 --- a/contrib/libs/hyperscan/src/nfa/truffle.h +++ b/contrib/libs/hyperscan/src/nfa/truffle.h @@ -26,17 +26,17 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file - * \brief Truffle: fully general character class acceleration. - * - * Utilises the SSSE3 pshufb or AVX2 vpshufb shuffle instructions - */ - +/** \file + * \brief Truffle: fully general character class acceleration. + * + * Utilises the SSSE3 pshufb or AVX2 vpshufb shuffle instructions + */ + #ifndef TRUFFLE_H #define TRUFFLE_H - + #include "util/simd_types.h" - + #ifdef __cplusplus extern "C" { diff --git a/contrib/libs/hyperscan/src/nfa/trufflecompile.cpp b/contrib/libs/hyperscan/src/nfa/trufflecompile.cpp index f19de0ee04..29fc503ce5 100644 --- a/contrib/libs/hyperscan/src/nfa/trufflecompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/trufflecompile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,16 +32,16 @@ * truffle is always able to represent an entire character class, providing a * backstop to other acceleration engines. */ - + #include "trufflecompile.h" - + #include "ue2common.h" #include "util/charreach.h" -#include "util/dump_mask.h" +#include "util/dump_mask.h" #include "util/simd_types.h" -#include <cstring> - +#include <cstring> + using namespace std; namespace ue2 { @@ -56,15 +56,15 @@ namespace ue2 { * bits 456 is the bit that is set at that offset. */ -void truffleBuildMasks(const CharReach &cr, u8 *shuf_mask_lo_highclear, - u8 *shuf_mask_lo_highset) { - memset(shuf_mask_lo_highset, 0, sizeof(m128)); - memset(shuf_mask_lo_highclear, 0, sizeof(m128)); +void truffleBuildMasks(const CharReach &cr, u8 *shuf_mask_lo_highclear, + u8 *shuf_mask_lo_highset) { + memset(shuf_mask_lo_highset, 0, sizeof(m128)); + memset(shuf_mask_lo_highclear, 0, sizeof(m128)); for (size_t v = cr.find_first(); v != CharReach::npos; v = cr.find_next(v)) { DEBUG_PRINTF("adding 0x%02x to %s\n", (u8)v, (v & 0x80) ? "highset" : "highclear"); - u8 *change_mask = (v & 0x80) ? shuf_mask_lo_highset : shuf_mask_lo_highclear; + u8 *change_mask = (v & 0x80) ? shuf_mask_lo_highset : shuf_mask_lo_highclear; u8 low_nibble = v & 0xf; u8 bits_456 = (v & 0x70) >> 4; change_mask[low_nibble] |= 1 << bits_456; @@ -74,16 +74,16 @@ void truffleBuildMasks(const CharReach &cr, u8 *shuf_mask_lo_highclear, /* * Reconstruct the charclass that the truffle masks represent */ -CharReach truffle2cr(const u8 *highclear, const u8 *highset) { +CharReach truffle2cr(const u8 *highclear, const u8 *highset) { CharReach cr; for (u8 i = 0; i < 16; i++) { - u32 bits_456 = highclear[i]; + u32 bits_456 = highclear[i]; while (bits_456) { u32 pos = findAndClearLSB_32(&bits_456); assert(pos < 8); cr.set(pos << 4 | i); } - bits_456 = highset[i]; + bits_456 = highset[i]; while (bits_456) { u32 pos = findAndClearLSB_32(&bits_456); assert(pos < 8); diff --git a/contrib/libs/hyperscan/src/nfa/trufflecompile.h b/contrib/libs/hyperscan/src/nfa/trufflecompile.h index 14b314f391..fa983508ec 100644 --- a/contrib/libs/hyperscan/src/nfa/trufflecompile.h +++ b/contrib/libs/hyperscan/src/nfa/trufflecompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -34,8 +34,8 @@ namespace ue2 { -void truffleBuildMasks(const CharReach &cr, u8 *mask1, u8 *mask2); -CharReach truffle2cr(const u8 *lo_in, const u8 *hi_in); +void truffleBuildMasks(const CharReach &cr, u8 *mask1, u8 *mask2); +CharReach truffle2cr(const u8 *lo_in, const u8 *hi_in); } diff --git a/contrib/libs/hyperscan/src/nfa/vermicelli.h b/contrib/libs/hyperscan/src/nfa/vermicelli.h index ed797d83f9..82a241dcf3 100644 --- a/contrib/libs/hyperscan/src/nfa/vermicelli.h +++ b/contrib/libs/hyperscan/src/nfa/vermicelli.h @@ -87,7 +87,7 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf, } buf += VERM_BOUNDARY - min; - assert(buf < buf_end); + assert(buf < buf_end); } // Aligned loops from here on in @@ -153,7 +153,7 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf, } buf += VERM_BOUNDARY - min; - assert(buf < buf_end); + assert(buf < buf_end); } // Aligned loops from here on in @@ -214,49 +214,49 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, } buf += VERM_BOUNDARY - min; - assert(buf < buf_end); - } - - // Aligned loops from here on in - const u8 *ptr = nocase ? dvermSearchAlignedNocase(chars1, chars2, c1, c2, - buf, buf_end) - : dvermSearchAligned(chars1, chars2, c1, c2, buf, - buf_end); - if (ptr) { - return ptr; - } - - // Tidy up the mess at the end - ptr = nocase ? dvermPreconditionNocase(chars1, chars2, - buf_end - VERM_BOUNDARY) - : dvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY); - - if (ptr) { - return ptr; - } - - /* check for partial match at end */ - u8 mask = nocase ? CASE_CLEAR : 0xff; - if ((buf_end[-1] & mask) == (u8)c1) { - DEBUG_PRINTF("partial!!!\n"); - return buf_end - 1; - } - - return buf_end; -} - -static really_inline -const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, - const u8 *buf, const u8 *buf_end) { - DEBUG_PRINTF("double verm scan (\\x%02hhx&\\x%02hhx)(\\x%02hhx&\\x%02hhx) " - "over %zu bytes\n", c1, m1, c2, m2, (size_t)(buf_end - buf)); - assert(buf < buf_end); - - VERM_TYPE chars1 = VERM_SET_FN(c1); - VERM_TYPE chars2 = VERM_SET_FN(c2); - VERM_TYPE mask1 = VERM_SET_FN(m1); - VERM_TYPE mask2 = VERM_SET_FN(m2); - + assert(buf < buf_end); + } + + // Aligned loops from here on in + const u8 *ptr = nocase ? dvermSearchAlignedNocase(chars1, chars2, c1, c2, + buf, buf_end) + : dvermSearchAligned(chars1, chars2, c1, c2, buf, + buf_end); + if (ptr) { + return ptr; + } + + // Tidy up the mess at the end + ptr = nocase ? dvermPreconditionNocase(chars1, chars2, + buf_end - VERM_BOUNDARY) + : dvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY); + + if (ptr) { + return ptr; + } + + /* check for partial match at end */ + u8 mask = nocase ? CASE_CLEAR : 0xff; + if ((buf_end[-1] & mask) == (u8)c1) { + DEBUG_PRINTF("partial!!!\n"); + return buf_end - 1; + } + + return buf_end; +} + +static really_inline +const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, + const u8 *buf, const u8 *buf_end) { + DEBUG_PRINTF("double verm scan (\\x%02hhx&\\x%02hhx)(\\x%02hhx&\\x%02hhx) " + "over %zu bytes\n", c1, m1, c2, m2, (size_t)(buf_end - buf)); + assert(buf < buf_end); + + VERM_TYPE chars1 = VERM_SET_FN(c1); + VERM_TYPE chars2 = VERM_SET_FN(c2); + VERM_TYPE mask1 = VERM_SET_FN(m1); + VERM_TYPE mask2 = VERM_SET_FN(m2); + #ifdef HAVE_AVX512 if (buf_end - buf <= VERM_BOUNDARY) { const u8 *ptr = dvermMiniMasked(chars1, chars2, mask1, mask2, buf, @@ -277,42 +277,42 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, assert((buf_end - buf) >= VERM_BOUNDARY); uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; - if (min) { - // Input isn't aligned, so we need to run one iteration with an - // unaligned load, then skip buf forward to the next aligned address. - // There's some small overlap here, but we don't mind scanning it twice - // if we can do it quickly, do we? - const u8 *p = dvermPreconditionMasked(chars1, chars2, mask1, mask2, buf); - if (p) { - return p; + if (min) { + // Input isn't aligned, so we need to run one iteration with an + // unaligned load, then skip buf forward to the next aligned address. + // There's some small overlap here, but we don't mind scanning it twice + // if we can do it quickly, do we? + const u8 *p = dvermPreconditionMasked(chars1, chars2, mask1, mask2, buf); + if (p) { + return p; } - - buf += VERM_BOUNDARY - min; - assert(buf < buf_end); + + buf += VERM_BOUNDARY - min; + assert(buf < buf_end); } // Aligned loops from here on in - const u8 *ptr = dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1, - c2, m1, m2, buf, buf_end); - if (ptr) { - return ptr; - } - - // Tidy up the mess at the end - ptr = dvermPreconditionMasked(chars1, chars2, mask1, mask2, - buf_end - VERM_BOUNDARY); - - if (ptr) { - return ptr; + const u8 *ptr = dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1, + c2, m1, m2, buf, buf_end); + if (ptr) { + return ptr; } - - /* check for partial match at end */ - if ((buf_end[-1] & m1) == (u8)c1) { + + // Tidy up the mess at the end + ptr = dvermPreconditionMasked(chars1, chars2, mask1, mask2, + buf_end - VERM_BOUNDARY); + + if (ptr) { + return ptr; + } + + /* check for partial match at end */ + if ((buf_end[-1] & m1) == (u8)c1) { DEBUG_PRINTF("partial!!!\n"); - return buf_end - 1; - } - - return buf_end; + return buf_end - 1; + } + + return buf_end; } // Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if diff --git a/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h b/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h index 3307486cff..70dc1f4d00 100644 --- a/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h +++ b/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h @@ -140,7 +140,7 @@ const u8 *dvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2, for (; buf + 16 < buf_end; buf += 16) { m128 data = load128(buf); u32 z = movemask128(and128(eq128(chars1, data), - rshiftbyte_m128(eq128(chars2, data), 1))); + rshiftbyte_m128(eq128(chars2, data), 1))); if (buf[15] == c1 && buf[16] == c2) { z |= (1 << 15); } @@ -149,8 +149,8 @@ const u8 *dvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2, return buf + pos; } } - - return NULL; + + return NULL; } static really_inline @@ -163,7 +163,7 @@ const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2, m128 data = load128(buf); m128 v = and128(casemask, data); u32 z = movemask128(and128(eq128(chars1, v), - rshiftbyte_m128(eq128(chars2, v), 1))); + rshiftbyte_m128(eq128(chars2, v), 1))); if ((buf[15] & CASE_CLEAR) == c1 && (buf[16] & CASE_CLEAR) == c2) { z |= (1 << 15); } @@ -172,40 +172,40 @@ const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2, return buf + pos; } } - - return NULL; -} - -static really_inline -const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2, - m128 mask1, m128 mask2, u8 c1, u8 c2, u8 m1, - u8 m2, const u8 *buf, const u8 *buf_end) { - assert((size_t)buf % 16 == 0); - - for (; buf + 16 < buf_end; buf += 16) { - m128 data = load128(buf); - m128 v1 = eq128(chars1, and128(data, mask1)); - m128 v2 = eq128(chars2, and128(data, mask2)); - u32 z = movemask128(and128(v1, rshiftbyte_m128(v2, 1))); - - if ((buf[15] & m1) == c1 && (buf[16] & m2) == c2) { - z |= (1 << 15); - } - if (unlikely(z)) { - u32 pos = ctz32(z); - return buf + pos; - } - } - - return NULL; -} - + + return NULL; +} + +static really_inline +const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2, + m128 mask1, m128 mask2, u8 c1, u8 c2, u8 m1, + u8 m2, const u8 *buf, const u8 *buf_end) { + assert((size_t)buf % 16 == 0); + + for (; buf + 16 < buf_end; buf += 16) { + m128 data = load128(buf); + m128 v1 = eq128(chars1, and128(data, mask1)); + m128 v2 = eq128(chars2, and128(data, mask2)); + u32 z = movemask128(and128(v1, rshiftbyte_m128(v2, 1))); + + if ((buf[15] & m1) == c1 && (buf[16] & m2) == c2) { + z |= (1 << 15); + } + if (unlikely(z)) { + u32 pos = ctz32(z); + return buf + pos; + } + } + + return NULL; +} + // returns NULL if not found static really_inline const u8 *dvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) { m128 data = loadu128(buf); // unaligned u32 z = movemask128(and128(eq128(chars1, data), - rshiftbyte_m128(eq128(chars2, data), 1))); + rshiftbyte_m128(eq128(chars2, data), 1))); /* no fixup of the boundary required - the aligned run will pick it up */ if (unlikely(z)) { @@ -223,24 +223,7 @@ const u8 *dvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) { m128 data = loadu128(buf); // unaligned m128 v = and128(casemask, data); u32 z = movemask128(and128(eq128(chars1, v), - rshiftbyte_m128(eq128(chars2, v), 1))); - - /* no fixup of the boundary required - the aligned run will pick it up */ - if (unlikely(z)) { - u32 pos = ctz32(z); - return buf + pos; - } - return NULL; -} - -// returns NULL if not found -static really_inline -const u8 *dvermPreconditionMasked(m128 chars1, m128 chars2, - m128 mask1, m128 mask2, const u8 *buf) { - m128 data = loadu128(buf); // unaligned - m128 v1 = eq128(chars1, and128(data, mask1)); - m128 v2 = eq128(chars2, and128(data, mask2)); - u32 z = movemask128(and128(v1, rshiftbyte_m128(v2, 1))); + rshiftbyte_m128(eq128(chars2, v), 1))); /* no fixup of the boundary required - the aligned run will pick it up */ if (unlikely(z)) { @@ -250,7 +233,24 @@ const u8 *dvermPreconditionMasked(m128 chars1, m128 chars2, return NULL; } +// returns NULL if not found static really_inline +const u8 *dvermPreconditionMasked(m128 chars1, m128 chars2, + m128 mask1, m128 mask2, const u8 *buf) { + m128 data = loadu128(buf); // unaligned + m128 v1 = eq128(chars1, and128(data, mask1)); + m128 v2 = eq128(chars2, and128(data, mask2)); + u32 z = movemask128(and128(v1, rshiftbyte_m128(v2, 1))); + + /* no fixup of the boundary required - the aligned run will pick it up */ + if (unlikely(z)) { + u32 pos = ctz32(z); + return buf + pos; + } + return NULL; +} + +static really_inline const u8 *lastMatchOffset(const u8 *buf_end, u32 z) { assert(z); return buf_end - 16 + 31 - clz32(z); @@ -329,7 +329,7 @@ const u8 *rdvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2, for (; buf + 16 < buf_end; buf_end -= 16) { m128 data = load128(buf_end - 16); u32 z = movemask128(and128(eq128(chars2, data), - lshiftbyte_m128(eq128(chars1, data), 1))); + lshiftbyte_m128(eq128(chars1, data), 1))); if (buf_end[-17] == c1 && buf_end[-16] == c2) { z |= 1; } @@ -350,7 +350,7 @@ const u8 *rdvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2, m128 data = load128(buf_end - 16); m128 v = and128(casemask, data); u32 z = movemask128(and128(eq128(chars2, v), - lshiftbyte_m128(eq128(chars1, v), 1))); + lshiftbyte_m128(eq128(chars1, v), 1))); if ((buf_end[-17] & CASE_CLEAR) == c1 && (buf_end[-16] & CASE_CLEAR) == c2) { z |= 1; @@ -367,7 +367,7 @@ static really_inline const u8 *rdvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) { m128 data = loadu128(buf); u32 z = movemask128(and128(eq128(chars2, data), - lshiftbyte_m128(eq128(chars1, data), 1))); + lshiftbyte_m128(eq128(chars1, data), 1))); /* no fixup of the boundary required - the aligned run will pick it up */ if (unlikely(z)) { @@ -385,7 +385,7 @@ const u8 *rdvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) { m128 data = loadu128(buf); m128 v = and128(casemask, data); u32 z = movemask128(and128(eq128(chars2, v), - lshiftbyte_m128(eq128(chars1, v), 1))); + lshiftbyte_m128(eq128(chars1, v), 1))); /* no fixup of the boundary required - the aligned run will pick it up */ if (unlikely(z)) { return lastMatchOffset(buf + 16, z); |