diff options
author | bnagaev <bnagaev@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
commit | c74559fb88da8adac0d9186cfa55a6b13c47695f (patch) | |
tree | b83306b6e37edeea782e9eed673d89286c4fef35 /contrib/libs/hyperscan/src/nfa | |
parent | d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (diff) | |
download | ydb-c74559fb88da8adac0d9186cfa55a6b13c47695f.tar.gz |
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfa')
84 files changed, 22615 insertions, 22615 deletions
diff --git a/contrib/libs/hyperscan/src/nfa/accel.c b/contrib/libs/hyperscan/src/nfa/accel.c index 82e94d40a7..2bc60945f9 100644 --- a/contrib/libs/hyperscan/src/nfa/accel.c +++ b/contrib/libs/hyperscan/src/nfa/accel.c @@ -1,86 +1,86 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "accel.h" -#include "shufti.h" -#include "truffle.h" -#include "vermicelli.h" -#include "ue2common.h" - -const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { - assert(ISALIGNED_N(accel, alignof(union AccelAux))); - const u8 *rv; - - switch (accel->accel_type) { - case ACCEL_NONE: - DEBUG_PRINTF("accel none %p %p\n", c, c_end); - return c; - - case ACCEL_VERM: - DEBUG_PRINTF("accel verm %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = vermicelliExec(accel->verm.c, 0, c, c_end); - break; - - case ACCEL_VERM_NOCASE: - DEBUG_PRINTF("accel verm nc %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = vermicelliExec(accel->verm.c, 1, c, c_end); - break; - - case ACCEL_DVERM: - DEBUG_PRINTF("accel dverm %p %p\n", c, c_end); - if (c + 16 + 1 >= c_end) { - return c; - } - - /* need to stop one early to get an accurate end state */ - rv = vermicelliDoubleExec(accel->dverm.c1, accel->dverm.c2, 0, c, - c_end - 1); - break; - - case ACCEL_DVERM_NOCASE: - DEBUG_PRINTF("accel dverm nc %p %p\n", c, c_end); - if (c + 16 + 1 >= c_end) { - return c; - } - - /* need to stop one early to get an accurate end state */ - rv = vermicelliDoubleExec(accel->dverm.c1, accel->dverm.c2, 1, c, - c_end - 1); - break; - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "accel.h" +#include "shufti.h" +#include "truffle.h" +#include "vermicelli.h" +#include "ue2common.h" + +const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { + assert(ISALIGNED_N(accel, alignof(union AccelAux))); + const u8 *rv; + + switch (accel->accel_type) { + case ACCEL_NONE: + DEBUG_PRINTF("accel none %p %p\n", c, c_end); + return c; + + case ACCEL_VERM: + DEBUG_PRINTF("accel verm %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = vermicelliExec(accel->verm.c, 0, c, c_end); + break; + + case ACCEL_VERM_NOCASE: + DEBUG_PRINTF("accel verm nc %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = vermicelliExec(accel->verm.c, 1, c, c_end); + break; + + case ACCEL_DVERM: + DEBUG_PRINTF("accel dverm %p %p\n", c, c_end); + if (c + 16 + 1 >= c_end) { + return c; + } + + /* need to stop one early to get an accurate end state */ + rv = vermicelliDoubleExec(accel->dverm.c1, accel->dverm.c2, 0, c, + c_end - 1); + break; + + case ACCEL_DVERM_NOCASE: + DEBUG_PRINTF("accel dverm nc %p %p\n", c, c_end); + if (c + 16 + 1 >= c_end) { + return c; + } + + /* need to stop one early to get an accurate end state */ + rv = vermicelliDoubleExec(accel->dverm.c1, accel->dverm.c2, 1, c, + c_end - 1); + break; + case ACCEL_DVERM_MASKED: DEBUG_PRINTF("accel dverm masked %p %p\n", c, c_end); if (c + 16 + 1 >= c_end) { @@ -93,54 +93,54 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { c, c_end - 1); break; - case ACCEL_SHUFTI: - DEBUG_PRINTF("accel shufti %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = shuftiExec(accel->shufti.lo, accel->shufti.hi, c, c_end); - break; - - case ACCEL_TRUFFLE: - DEBUG_PRINTF("accel Truffle %p %p\n", c, c_end); - if (c + 15 >= c_end) { - return c; - } - - rv = truffleExec(accel->truffle.mask1, accel->truffle.mask2, c, c_end); - break; - - case ACCEL_DSHUFTI: - DEBUG_PRINTF("accel dshufti %p %p\n", c, c_end); - if (c + 15 + 1 >= c_end) { - return c; - } - - /* need to stop one early to get an accurate end state */ - rv = shuftiDoubleExec(accel->dshufti.lo1, - accel->dshufti.hi1, - accel->dshufti.lo2, - accel->dshufti.hi2, c, c_end - 1); - break; - - case ACCEL_RED_TAPE: - DEBUG_PRINTF("accel red tape %p %p\n", c, c_end); - rv = c_end; - break; - - - default: - assert(!"not here"); - return c; - } - - DEBUG_PRINTF("adjusting for offset %u\n", accel->generic.offset); - /* adjust offset to take into account the offset */ - rv = MAX(c + accel->generic.offset, rv); - rv -= accel->generic.offset; - + case ACCEL_SHUFTI: + DEBUG_PRINTF("accel shufti %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = shuftiExec(accel->shufti.lo, accel->shufti.hi, c, c_end); + break; + + case ACCEL_TRUFFLE: + DEBUG_PRINTF("accel Truffle %p %p\n", c, c_end); + if (c + 15 >= c_end) { + return c; + } + + rv = truffleExec(accel->truffle.mask1, accel->truffle.mask2, c, c_end); + break; + + case ACCEL_DSHUFTI: + DEBUG_PRINTF("accel dshufti %p %p\n", c, c_end); + if (c + 15 + 1 >= c_end) { + return c; + } + + /* need to stop one early to get an accurate end state */ + rv = shuftiDoubleExec(accel->dshufti.lo1, + accel->dshufti.hi1, + accel->dshufti.lo2, + accel->dshufti.hi2, c, c_end - 1); + break; + + case ACCEL_RED_TAPE: + DEBUG_PRINTF("accel red tape %p %p\n", c, c_end); + rv = c_end; + break; + + + default: + assert(!"not here"); + return c; + } + + DEBUG_PRINTF("adjusting for offset %u\n", accel->generic.offset); + /* adjust offset to take into account the offset */ + rv = MAX(c + accel->generic.offset, rv); + rv -= accel->generic.offset; + DEBUG_PRINTF("advanced %zd\n", rv - c); - return rv; -} + return rv; +} diff --git a/contrib/libs/hyperscan/src/nfa/accel.h b/contrib/libs/hyperscan/src/nfa/accel.h index 71b93f126c..3a03d05967 100644 --- a/contrib/libs/hyperscan/src/nfa/accel.h +++ b/contrib/libs/hyperscan/src/nfa/accel.h @@ -1,92 +1,92 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Acceleration: data structures and common definitions. - */ - -#ifndef ACCEL_H -#define ACCEL_H - -#include "ue2common.h" - -/* run time defs */ -#define BAD_ACCEL_DIST 4 -#define SMALL_ACCEL_PENALTY 8 -#define BIG_ACCEL_PENALTY 32 - -/// Minimum length of the scan buffer for us to attempt acceleration. -#define ACCEL_MIN_LEN 16 - -enum AccelType { - ACCEL_NONE, - ACCEL_VERM, - ACCEL_VERM_NOCASE, - ACCEL_DVERM, - ACCEL_DVERM_NOCASE, - ACCEL_RVERM, - ACCEL_RVERM_NOCASE, - ACCEL_RDVERM, - ACCEL_RDVERM_NOCASE, - ACCEL_REOD, - ACCEL_REOD_NOCASE, - ACCEL_RDEOD, - ACCEL_RDEOD_NOCASE, - ACCEL_SHUFTI, - ACCEL_DSHUFTI, - ACCEL_TRUFFLE, + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Acceleration: data structures and common definitions. + */ + +#ifndef ACCEL_H +#define ACCEL_H + +#include "ue2common.h" + +/* run time defs */ +#define BAD_ACCEL_DIST 4 +#define SMALL_ACCEL_PENALTY 8 +#define BIG_ACCEL_PENALTY 32 + +/// Minimum length of the scan buffer for us to attempt acceleration. +#define ACCEL_MIN_LEN 16 + +enum AccelType { + ACCEL_NONE, + ACCEL_VERM, + ACCEL_VERM_NOCASE, + ACCEL_DVERM, + ACCEL_DVERM_NOCASE, + ACCEL_RVERM, + ACCEL_RVERM_NOCASE, + ACCEL_RDVERM, + ACCEL_RDVERM_NOCASE, + ACCEL_REOD, + ACCEL_REOD_NOCASE, + ACCEL_RDEOD, + ACCEL_RDEOD_NOCASE, + ACCEL_SHUFTI, + ACCEL_DSHUFTI, + ACCEL_TRUFFLE, ACCEL_RED_TAPE, ACCEL_DVERM_MASKED, -}; - -/** \brief Structure for accel framework. */ -union AccelAux { - u8 accel_type; - struct { - u8 accel_type; - u8 offset; - } generic; - struct { - u8 accel_type; - u8 offset; - u8 c; // uppercase if nocase - } verm; - struct { - u8 accel_type; - u8 offset; - u8 c1; // uppercase if nocase - u8 c2; // uppercase if nocase +}; + +/** \brief Structure for accel framework. */ +union AccelAux { + u8 accel_type; + struct { + u8 accel_type; + u8 offset; + } generic; + struct { + u8 accel_type; + u8 offset; + u8 c; // uppercase if nocase + } verm; + struct { + u8 accel_type; + u8 offset; + u8 c1; // uppercase if nocase + u8 c2; // uppercase if nocase u8 m1; // masked variant u8 m2; // masked variant - } dverm; - struct { - u8 accel_type; - u8 offset; + } dverm; + struct { + u8 accel_type; + u8 offset; u8 c; // uppercase if nocase u8 len; } mverm; @@ -100,29 +100,29 @@ union AccelAux { struct { u8 accel_type; u8 offset; - m128 lo; - m128 hi; - } shufti; - struct { - u8 accel_type; - u8 offset; - m128 lo1; - m128 hi1; - m128 lo2; - m128 hi2; - } dshufti; - struct { - u8 accel_type; - u8 offset; - m128 mask1; - m128 mask2; - } truffle; -}; - -/** - * Runs the specified acceleration scheme between c and c_end, returns a point - * such that the acceleration scheme does not match before. - */ -const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end); - -#endif + m128 lo; + m128 hi; + } shufti; + struct { + u8 accel_type; + u8 offset; + m128 lo1; + m128 hi1; + m128 lo2; + m128 hi2; + } dshufti; + struct { + u8 accel_type; + u8 offset; + m128 mask1; + m128 mask2; + } truffle; +}; + +/** + * Runs the specified acceleration scheme between c and c_end, returns a point + * such that the acceleration scheme does not match before. + */ +const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end); + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/accelcompile.cpp b/contrib/libs/hyperscan/src/nfa/accelcompile.cpp index b5e21fc700..a224410dc9 100644 --- a/contrib/libs/hyperscan/src/nfa/accelcompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/accelcompile.cpp @@ -1,99 +1,99 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "accel.h" -#include "accelcompile.h" -#include "shufticompile.h" -#include "trufflecompile.h" -#include "nfagraph/ng_limex_accel.h" /* for constants */ -#include "util/bitutils.h" -#include "util/verify_types.h" - -#include <map> -#include <set> -#include <vector> - -using namespace std; - -namespace ue2 { - -static -void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { - assert(aux->accel_type == ACCEL_NONE); - if (info.single_stops.all()) { - return; - } - - size_t outs = info.single_stops.count(); - DEBUG_PRINTF("%zu outs\n", outs); - assert(outs && outs < 256); - u32 offset = info.single_offset; - - if (outs == 1) { - aux->accel_type = ACCEL_VERM; - aux->verm.offset = offset; - aux->verm.c = info.single_stops.find_first(); - DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); - return; - } - - if (outs == 2 && info.single_stops.isCaselessChar()) { - aux->accel_type = ACCEL_VERM_NOCASE; - aux->verm.offset = offset; - aux->verm.c = info.single_stops.find_first() & CASE_CLEAR; - DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", - aux->verm.c); - return; - } - - DEBUG_PRINTF("attempting shufti for %zu chars\n", outs); + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "accel.h" +#include "accelcompile.h" +#include "shufticompile.h" +#include "trufflecompile.h" +#include "nfagraph/ng_limex_accel.h" /* for constants */ +#include "util/bitutils.h" +#include "util/verify_types.h" + +#include <map> +#include <set> +#include <vector> + +using namespace std; + +namespace ue2 { + +static +void buildAccelSingle(const AccelInfo &info, AccelAux *aux) { + assert(aux->accel_type == ACCEL_NONE); + if (info.single_stops.all()) { + return; + } + + size_t outs = info.single_stops.count(); + DEBUG_PRINTF("%zu outs\n", outs); + assert(outs && outs < 256); + u32 offset = info.single_offset; + + if (outs == 1) { + aux->accel_type = ACCEL_VERM; + aux->verm.offset = offset; + aux->verm.c = info.single_stops.find_first(); + DEBUG_PRINTF("building vermicelli caseful for 0x%02hhx\n", aux->verm.c); + return; + } + + if (outs == 2 && info.single_stops.isCaselessChar()) { + aux->accel_type = ACCEL_VERM_NOCASE; + aux->verm.offset = offset; + aux->verm.c = info.single_stops.find_first() & CASE_CLEAR; + DEBUG_PRINTF("building vermicelli caseless for 0x%02hhx\n", + aux->verm.c); + return; + } + + DEBUG_PRINTF("attempting shufti for %zu chars\n", outs); if (-1 != shuftiBuildMasks(info.single_stops, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) { - aux->accel_type = ACCEL_SHUFTI; - aux->shufti.offset = offset; - DEBUG_PRINTF("shufti built OK\n"); - return; - } else { - DEBUG_PRINTF("shufti build failed, falling through\n"); - } - - if (outs <= ACCEL_MAX_STOP_CHAR) { - DEBUG_PRINTF("building Truffle for %zu chars\n", outs); - aux->accel_type = ACCEL_TRUFFLE; - aux->truffle.offset = offset; + aux->accel_type = ACCEL_SHUFTI; + aux->shufti.offset = offset; + DEBUG_PRINTF("shufti built OK\n"); + return; + } else { + DEBUG_PRINTF("shufti build failed, falling through\n"); + } + + if (outs <= ACCEL_MAX_STOP_CHAR) { + DEBUG_PRINTF("building Truffle for %zu chars\n", outs); + aux->accel_type = ACCEL_TRUFFLE; + aux->truffle.offset = offset; truffleBuildMasks(info.single_stops, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2); - return; - } - - DEBUG_PRINTF("unable to accelerate case with %zu outs\n", outs); -} - + return; + } + + DEBUG_PRINTF("unable to accelerate case with %zu outs\n", outs); +} + bool buildDvermMask(const flat_set<pair<u8, u8>> &escape_set, u8 *m1_out, u8 *m2_out) { u8 a1 = 0xff; @@ -135,61 +135,61 @@ bool buildDvermMask(const flat_set<pair<u8, u8>> &escape_set, u8 *m1_out, return true; } -static -bool isCaselessDouble(const flat_set<pair<u8, u8>> &stop) { - // test for vector containing <A,Z> <A,z> <a,Z> <a,z> - if (stop.size() != 4) { - return false; - } - const u8 a = stop.begin()->first & CASE_CLEAR; - const u8 b = stop.begin()->second & CASE_CLEAR; - - flat_set<pair<u8, u8>>::const_iterator it, ite; - for (it = stop.begin(), ite = stop.end(); it != ite; ++it) { - if ((it->first & CASE_CLEAR) != a || (it->second & CASE_CLEAR) != b) { - return false; - } - } - - return true; -} - -static -void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { - size_t outs1 = info.double_stop1.count(); - size_t outs2 = info.double_stop2.size(); - - u8 offset = verify_u8(info.double_offset); - DEBUG_PRINTF("outs1=%zu, outs2=%zu\n", outs1, outs2); - - assert(aux->accel_type == ACCEL_NONE); - - if (!outs2) { - /* no double byte accel available */ - return; - } - - // double-byte accel - if (outs1 == 0 && outs2 == 1) { - aux->accel_type = ACCEL_DVERM; - aux->dverm.offset = offset; - aux->dverm.c1 = info.double_stop2.begin()->first; - aux->dverm.c2 = info.double_stop2.begin()->second; - DEBUG_PRINTF("building double-vermicelli caseful for 0x%02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); - return; - } - - if (outs1 == 0 && isCaselessDouble(info.double_stop2)) { - aux->accel_type = ACCEL_DVERM_NOCASE; - aux->dverm.offset = offset; - aux->dverm.c1 = info.double_stop2.begin()->first & CASE_CLEAR; - aux->dverm.c2 = info.double_stop2.begin()->second & CASE_CLEAR; - DEBUG_PRINTF("building double-vermicelli caseless for 0x%02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); - return; - } - +static +bool isCaselessDouble(const flat_set<pair<u8, u8>> &stop) { + // test for vector containing <A,Z> <A,z> <a,Z> <a,z> + if (stop.size() != 4) { + return false; + } + const u8 a = stop.begin()->first & CASE_CLEAR; + const u8 b = stop.begin()->second & CASE_CLEAR; + + flat_set<pair<u8, u8>>::const_iterator it, ite; + for (it = stop.begin(), ite = stop.end(); it != ite; ++it) { + if ((it->first & CASE_CLEAR) != a || (it->second & CASE_CLEAR) != b) { + return false; + } + } + + return true; +} + +static +void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { + size_t outs1 = info.double_stop1.count(); + size_t outs2 = info.double_stop2.size(); + + u8 offset = verify_u8(info.double_offset); + DEBUG_PRINTF("outs1=%zu, outs2=%zu\n", outs1, outs2); + + assert(aux->accel_type == ACCEL_NONE); + + if (!outs2) { + /* no double byte accel available */ + return; + } + + // double-byte accel + if (outs1 == 0 && outs2 == 1) { + aux->accel_type = ACCEL_DVERM; + aux->dverm.offset = offset; + aux->dverm.c1 = info.double_stop2.begin()->first; + aux->dverm.c2 = info.double_stop2.begin()->second; + DEBUG_PRINTF("building double-vermicelli caseful for 0x%02hhx%02hhx\n", + aux->dverm.c1, aux->dverm.c2); + return; + } + + if (outs1 == 0 && isCaselessDouble(info.double_stop2)) { + aux->accel_type = ACCEL_DVERM_NOCASE; + aux->dverm.offset = offset; + aux->dverm.c1 = info.double_stop2.begin()->first & CASE_CLEAR; + aux->dverm.c2 = info.double_stop2.begin()->second & CASE_CLEAR; + DEBUG_PRINTF("building double-vermicelli caseless for 0x%02hhx%02hhx\n", + aux->dverm.c1, aux->dverm.c2); + return; + } + if (outs1 == 0) { u8 m1; u8 m2; @@ -203,10 +203,10 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { aux->dverm.m2 = m2; DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n", aux->dverm.c1, aux->dverm.c2); - return; - } - } - + return; + } + } + if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438. DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu" " two-byte literals\n", outs1, outs2); @@ -220,29 +220,29 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) { } } - // drop back to attempt single-byte accel - DEBUG_PRINTF("dropping back to single-byte acceleration\n"); - aux->accel_type = ACCEL_NONE; -} - -bool buildAccelAux(const AccelInfo &info, AccelAux *aux) { - assert(aux->accel_type == ACCEL_NONE); - if (info.single_stops.none()) { - DEBUG_PRINTF("picked red tape\n"); - aux->accel_type = ACCEL_RED_TAPE; - aux->generic.offset = info.single_offset; + // drop back to attempt single-byte accel + DEBUG_PRINTF("dropping back to single-byte acceleration\n"); + aux->accel_type = ACCEL_NONE; +} + +bool buildAccelAux(const AccelInfo &info, AccelAux *aux) { + assert(aux->accel_type == ACCEL_NONE); + if (info.single_stops.none()) { + DEBUG_PRINTF("picked red tape\n"); + aux->accel_type = ACCEL_RED_TAPE; + aux->generic.offset = info.single_offset; } if (aux->accel_type == ACCEL_NONE) { - buildAccelDouble(info, aux); - } - if (aux->accel_type == ACCEL_NONE) { - buildAccelSingle(info, aux); - } - - assert(aux->accel_type == ACCEL_NONE - || aux->generic.offset == info.single_offset - || aux->generic.offset == info.double_offset); - return aux->accel_type != ACCEL_NONE; -} - -} // namespace ue2 + buildAccelDouble(info, aux); + } + if (aux->accel_type == ACCEL_NONE) { + buildAccelSingle(info, aux); + } + + assert(aux->accel_type == ACCEL_NONE + || aux->generic.offset == info.single_offset + || aux->generic.offset == info.double_offset); + return aux->accel_type != ACCEL_NONE; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/accelcompile.h b/contrib/libs/hyperscan/src/nfa/accelcompile.h index 926d25bb92..d0b3cdc74f 100644 --- a/contrib/libs/hyperscan/src/nfa/accelcompile.h +++ b/contrib/libs/hyperscan/src/nfa/accelcompile.h @@ -1,60 +1,60 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ACCEL_COMPILE_H -#define ACCEL_COMPILE_H - -#include "ue2common.h" -#include "util/charreach.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ACCEL_COMPILE_H +#define ACCEL_COMPILE_H + +#include "ue2common.h" +#include "util/charreach.h" #include "util/flat_containers.h" - -union AccelAux; - -namespace ue2 { - -struct AccelInfo { - AccelInfo() : single_offset(0U), double_offset(0U), - single_stops(CharReach::dot()) {} - u32 single_offset; /**< offset correction to apply to single schemes */ - u32 double_offset; /**< offset correction to apply to double schemes */ - CharReach double_stop1; /**< single-byte accel stop literals for double - * schemes */ - flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop - * literals */ - CharReach single_stops; /**< escapes for single byte acceleration */ -}; - -bool buildAccelAux(const AccelInfo &info, AccelAux *aux); - + +union AccelAux; + +namespace ue2 { + +struct AccelInfo { + AccelInfo() : single_offset(0U), double_offset(0U), + single_stops(CharReach::dot()) {} + u32 single_offset; /**< offset correction to apply to single schemes */ + u32 double_offset; /**< offset correction to apply to double schemes */ + CharReach double_stop1; /**< single-byte accel stop literals for double + * schemes */ + flat_set<std::pair<u8, u8>> double_stop2; /**< double-byte accel stop + * literals */ + CharReach single_stops; /**< escapes for single byte acceleration */ +}; + +bool buildAccelAux(const AccelInfo &info, AccelAux *aux); + /* returns true is the escape set can be handled with a masked double_verm */ bool buildDvermMask(const flat_set<std::pair<u8, u8>> &escape_set, u8 *m1_out = nullptr, u8 *m2_out = nullptr); -} // namespace ue2 - -#endif +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/callback.h b/contrib/libs/hyperscan/src/nfa/callback.h index aa025a9b78..9bdaa8d141 100644 --- a/contrib/libs/hyperscan/src/nfa/callback.h +++ b/contrib/libs/hyperscan/src/nfa/callback.h @@ -1,49 +1,49 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief NFA Callback definitions, used at runtime. - */ - -#ifndef NFA_CALLBACK_H -#define NFA_CALLBACK_H - -#include "ue2common.h" - -/** \brief The type for an NFA callback. - * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief NFA Callback definitions, used at runtime. + */ + +#ifndef NFA_CALLBACK_H +#define NFA_CALLBACK_H + +#include "ue2common.h" + +/** \brief The type for an NFA callback. + * * This is a function that takes as arguments the current start and end offsets * where the match occurs, the id of the match and the context pointer that was * passed into the NFA API function that executed the NFA. - * + * * The start offset is the "start of match" (SOM) offset for the match. It is * only provided by engines that natively support SOM tracking (e.g. Gough). - * + * * The end offset will be the offset after the character that caused the match. * Thus, if we have a buffer containing 'abc', then a pattern that matches an * empty string will have an offset of 0, a pattern that matches 'a' will have @@ -52,21 +52,21 @@ * we have n characters in the buffer, there are n+1 different potential * offsets for matches. * - * This function should return an int - currently the possible return values - * are 0, which means 'stop running the engine' or non-zero, which means - * 'continue matching'. - */ + * This function should return an int - currently the possible return values + * are 0, which means 'stop running the engine' or non-zero, which means + * 'continue matching'. + */ typedef int (*NfaCallback)(u64a start, u64a end, ReportID id, void *context); - -/** - * standard \ref NfaCallback return value indicating that engine execution - * should continue. (any non-zero value will serve this purpose) - */ -#define MO_CONTINUE_MATCHING 1 - -/** - * \ref NfaCallback return value indicating that engine execution should halt. - */ -#define MO_HALT_MATCHING 0 - -#endif // NFA_CALLBACK_H + +/** + * standard \ref NfaCallback return value indicating that engine execution + * should continue. (any non-zero value will serve this purpose) + */ +#define MO_CONTINUE_MATCHING 1 + +/** + * \ref NfaCallback return value indicating that engine execution should halt. + */ +#define MO_HALT_MATCHING 0 + +#endif // NFA_CALLBACK_H diff --git a/contrib/libs/hyperscan/src/nfa/castle.c b/contrib/libs/hyperscan/src/nfa/castle.c index 8cdef7264b..7c158b31c0 100644 --- a/contrib/libs/hyperscan/src/nfa/castle.c +++ b/contrib/libs/hyperscan/src/nfa/castle.c @@ -1,117 +1,117 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Castle: multi-tenant repeat engine, runtime code. - */ - -#include "castle.h" - -#include "castle_internal.h" -#include "nfa_api.h" -#include "nfa_api_queue.h" -#include "nfa_internal.h" -#include "repeat.h" -#include "shufti.h" -#include "truffle.h" -#include "vermicelli.h" -#include "util/bitutils.h" -#include "util/multibit.h" -#include "util/partial_store.h" -#include "ue2common.h" - -static really_inline -const struct SubCastle *getSubCastle(const struct Castle *c, u32 num) { - assert(num < c->numRepeats); - const struct SubCastle *sub = - (const struct SubCastle *)((const char *)c + sizeof(struct Castle)); - assert(ISALIGNED(sub)); - return &sub[num]; -} - -static really_inline -const struct RepeatInfo *getRepeatInfo(const struct SubCastle *sub) { - const struct RepeatInfo *repeatInfo = - (const struct RepeatInfo *)((const char *)sub + sub->repeatInfoOffset); - return repeatInfo; -} - -static really_inline -union RepeatControl *getControl(char *full_state, const struct SubCastle *sub) { - union RepeatControl *rctrl = - (union RepeatControl *)(full_state + sub->fullStateOffset); - assert(ISALIGNED(rctrl)); - return rctrl; -} - -static really_inline -const union RepeatControl *getControlConst(const char *full_state, - const struct SubCastle *sub) { - const union RepeatControl *rctrl = - (const union RepeatControl *)(full_state + sub->fullStateOffset); - assert(ISALIGNED(rctrl)); - return rctrl; -} - -enum MatchMode { - CALLBACK_OUTPUT, - STOP_AT_MATCH, -}; - -static really_inline -char subCastleReportCurrent(const struct Castle *c, struct mq *q, - const u64a offset, const u32 subIdx) { - const struct SubCastle *sub = getSubCastle(c, subIdx); - const struct RepeatInfo *info = getRepeatInfo(sub); - - union RepeatControl *rctrl = getControl(q->state, sub); - char *rstate = (char *)q->streamState + sub->streamStateOffset + - info->packedCtrlSize; - enum RepeatMatch match = - repeatHasMatch(info, rctrl, rstate, offset); - DEBUG_PRINTF("repeatHasMatch returned %d\n", match); - if (match == REPEAT_MATCH) { + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Castle: multi-tenant repeat engine, runtime code. + */ + +#include "castle.h" + +#include "castle_internal.h" +#include "nfa_api.h" +#include "nfa_api_queue.h" +#include "nfa_internal.h" +#include "repeat.h" +#include "shufti.h" +#include "truffle.h" +#include "vermicelli.h" +#include "util/bitutils.h" +#include "util/multibit.h" +#include "util/partial_store.h" +#include "ue2common.h" + +static really_inline +const struct SubCastle *getSubCastle(const struct Castle *c, u32 num) { + assert(num < c->numRepeats); + const struct SubCastle *sub = + (const struct SubCastle *)((const char *)c + sizeof(struct Castle)); + assert(ISALIGNED(sub)); + return &sub[num]; +} + +static really_inline +const struct RepeatInfo *getRepeatInfo(const struct SubCastle *sub) { + const struct RepeatInfo *repeatInfo = + (const struct RepeatInfo *)((const char *)sub + sub->repeatInfoOffset); + return repeatInfo; +} + +static really_inline +union RepeatControl *getControl(char *full_state, const struct SubCastle *sub) { + union RepeatControl *rctrl = + (union RepeatControl *)(full_state + sub->fullStateOffset); + assert(ISALIGNED(rctrl)); + return rctrl; +} + +static really_inline +const union RepeatControl *getControlConst(const char *full_state, + const struct SubCastle *sub) { + const union RepeatControl *rctrl = + (const union RepeatControl *)(full_state + sub->fullStateOffset); + assert(ISALIGNED(rctrl)); + return rctrl; +} + +enum MatchMode { + CALLBACK_OUTPUT, + STOP_AT_MATCH, +}; + +static really_inline +char subCastleReportCurrent(const struct Castle *c, struct mq *q, + const u64a offset, const u32 subIdx) { + const struct SubCastle *sub = getSubCastle(c, subIdx); + const struct RepeatInfo *info = getRepeatInfo(sub); + + union RepeatControl *rctrl = getControl(q->state, sub); + char *rstate = (char *)q->streamState + sub->streamStateOffset + + info->packedCtrlSize; + enum RepeatMatch match = + repeatHasMatch(info, rctrl, rstate, offset); + DEBUG_PRINTF("repeatHasMatch returned %d\n", match); + if (match == REPEAT_MATCH) { DEBUG_PRINTF("firing match at %llu for sub %u, report %u\n", offset, subIdx, sub->report); if (q->cb(0, offset, sub->report, q->context) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - - return MO_CONTINUE_MATCHING; -} - -static really_inline -int castleReportCurrent(const struct Castle *c, struct mq *q) { - const u64a offset = q_cur_offset(q); - DEBUG_PRINTF("offset=%llu\n", offset); - - if (c->exclusive) { + return MO_HALT_MATCHING; + } + } + + return MO_CONTINUE_MATCHING; +} + +static really_inline +int castleReportCurrent(const struct Castle *c, struct mq *q) { + const u64a offset = q_cur_offset(q); + DEBUG_PRINTF("offset=%llu\n", offset); + + if (c->exclusive) { u8 *active = (u8 *)q->streamState; u8 *groups = active + c->groupIterOffset; for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); @@ -123,131 +123,131 @@ int castleReportCurrent(const struct Castle *c, struct mq *q) { offset, activeIdx) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } - } - } - + } + } + if (c->exclusive != PURE_EXCLUSIVE) { const u8 *active = (const u8 *)q->streamState + c->activeOffset; - for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { - DEBUG_PRINTF("subcastle %u\n", i); - if (subCastleReportCurrent(c, q, offset, i) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - - return MO_CONTINUE_MATCHING; -} - -static really_inline -char subCastleInAccept(const struct Castle *c, struct mq *q, - const ReportID report, const u64a offset, - const u32 subIdx) { - const struct SubCastle *sub = getSubCastle(c, subIdx); - - if (sub->report != report) { - return 0; - } - const struct RepeatInfo *info = getRepeatInfo(sub); - - union RepeatControl *rctrl = getControl(q->state, sub); - char *rstate = (char *)q->streamState + sub->streamStateOffset + - info->packedCtrlSize; - enum RepeatMatch match = - repeatHasMatch(info, rctrl, rstate, offset); - if (match == REPEAT_MATCH) { - DEBUG_PRINTF("in an accept\n"); - return 1; - } - - return 0; -} - -static really_inline -char castleInAccept(const struct Castle *c, struct mq *q, - const ReportID report, const u64a offset) { - DEBUG_PRINTF("offset=%llu\n", offset); + for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { + DEBUG_PRINTF("subcastle %u\n", i); + if (subCastleReportCurrent(c, q, offset, i) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + + return MO_CONTINUE_MATCHING; +} + +static really_inline +char subCastleInAccept(const struct Castle *c, struct mq *q, + const ReportID report, const u64a offset, + const u32 subIdx) { + const struct SubCastle *sub = getSubCastle(c, subIdx); + + if (sub->report != report) { + return 0; + } + const struct RepeatInfo *info = getRepeatInfo(sub); + + union RepeatControl *rctrl = getControl(q->state, sub); + char *rstate = (char *)q->streamState + sub->streamStateOffset + + info->packedCtrlSize; + enum RepeatMatch match = + repeatHasMatch(info, rctrl, rstate, offset); + if (match == REPEAT_MATCH) { + DEBUG_PRINTF("in an accept\n"); + return 1; + } + + return 0; +} + +static really_inline +char castleInAccept(const struct Castle *c, struct mq *q, + const ReportID report, const u64a offset) { + DEBUG_PRINTF("offset=%llu\n", offset); /* ignore when just catching up due to full queue */ if (report == MO_INVALID_IDX) { return 0; } - - if (c->exclusive) { + + if (c->exclusive) { u8 *active = (u8 *)q->streamState; u8 *groups = active + c->groupIterOffset; for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { u8 *cur = active + i * c->activeIdxSize; const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); - DEBUG_PRINTF("subcastle %u\n", activeIdx); - if (subCastleInAccept(c, q, report, offset, activeIdx)) { - return 1; - } - } - } - + DEBUG_PRINTF("subcastle %u\n", activeIdx); + if (subCastleInAccept(c, q, report, offset, activeIdx)) { + return 1; + } + } + } + if (c->exclusive != PURE_EXCLUSIVE) { const u8 *active = (const u8 *)q->streamState + c->activeOffset; - for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); + for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { - DEBUG_PRINTF("subcastle %u\n", i); - if (subCastleInAccept(c, q, report, offset, i)) { - return 1; - } - } - } - - return 0; -} - -static really_inline -void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset, - void *full_state, void *stream_state, - const u32 subIdx) { - const struct SubCastle *sub = getSubCastle(c, subIdx); - const struct RepeatInfo *info = getRepeatInfo(sub); - - union RepeatControl *rctrl = getControl(full_state, sub); - char *rstate = (char *)stream_state + sub->streamStateOffset + - info->packedCtrlSize; - - if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) { - DEBUG_PRINTF("sub %u is stale at offset %llu\n", subIdx, offset); + DEBUG_PRINTF("subcastle %u\n", i); + if (subCastleInAccept(c, q, report, offset, i)) { + return 1; + } + } + } + + return 0; +} + +static really_inline +void subCastleDeactivateStaleSubs(const struct Castle *c, const u64a offset, + void *full_state, void *stream_state, + const u32 subIdx) { + const struct SubCastle *sub = getSubCastle(c, subIdx); + const struct RepeatInfo *info = getRepeatInfo(sub); + + union RepeatControl *rctrl = getControl(full_state, sub); + char *rstate = (char *)stream_state + sub->streamStateOffset + + info->packedCtrlSize; + + if (repeatHasMatch(info, rctrl, rstate, offset) == REPEAT_STALE) { + DEBUG_PRINTF("sub %u is stale at offset %llu\n", subIdx, offset); if (sub->exclusiveId < c->numRepeats) { u8 *active = (u8 *)stream_state; u8 *groups = active + c->groupIterOffset; mmbit_unset(groups, c->numGroups, sub->exclusiveId); - } else { + } else { u8 *active = (u8 *)stream_state + c->activeOffset; mmbit_unset(active, c->numRepeats, subIdx); - } - } -} - -static really_inline -void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset, - void *full_state, void *stream_state) { - DEBUG_PRINTF("offset=%llu\n", offset); - + } + } +} + +static really_inline +void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset, + void *full_state, void *stream_state) { + DEBUG_PRINTF("offset=%llu\n", offset); + if (!c->staleIterOffset) { DEBUG_PRINTF("{no repeats can go stale}\n"); return; /* no subcastle can ever go stale */ } - if (c->exclusive) { + if (c->exclusive) { u8 *active = (u8 *)stream_state; u8 *groups = active + c->groupIterOffset; for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { u8 *cur = active + i * c->activeIdxSize; const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); - DEBUG_PRINTF("subcastle %u\n", activeIdx); - subCastleDeactivateStaleSubs(c, offset, full_state, - stream_state, activeIdx); - } - } - + DEBUG_PRINTF("subcastle %u\n", activeIdx); + subCastleDeactivateStaleSubs(c, offset, full_state, + stream_state, activeIdx); + } + } + if (c->exclusive != PURE_EXCLUSIVE) { const u8 *active = (const u8 *)stream_state + c->activeOffset; const struct mmbit_sparse_iter *it @@ -259,27 +259,27 @@ void castleDeactivateStaleSubs(const struct Castle *c, const u64a offset, u32 i = mmbit_sparse_iter_begin(active, numRepeats, &idx, it, si_state); while(i != MMB_INVALID) { - DEBUG_PRINTF("subcastle %u\n", i); + DEBUG_PRINTF("subcastle %u\n", i); subCastleDeactivateStaleSubs(c, offset, full_state, stream_state, i); i = mmbit_sparse_iter_next(active, numRepeats, i, &idx, it, si_state); - } - } -} - -static really_inline -void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset, + } + } +} + +static really_inline +void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset, void *full_state, void *stream_state, UNUSED char stale_checked) { - assert(top < c->numRepeats); - - const struct SubCastle *sub = getSubCastle(c, top); - const struct RepeatInfo *info = getRepeatInfo(sub); - union RepeatControl *rctrl = getControl(full_state, sub); - char *rstate = (char *)stream_state + sub->streamStateOffset + - info->packedCtrlSize; - - char is_alive = 0; + assert(top < c->numRepeats); + + const struct SubCastle *sub = getSubCastle(c, top); + const struct RepeatInfo *info = getRepeatInfo(sub); + union RepeatControl *rctrl = getControl(full_state, sub); + char *rstate = (char *)stream_state + sub->streamStateOffset + + info->packedCtrlSize; + + char is_alive = 0; u8 *active = (u8 *)stream_state; if (sub->exclusiveId < c->numRepeats) { u8 *groups = active + c->groupIterOffset; @@ -292,125 +292,125 @@ void castleProcessTop(const struct Castle *c, const u32 top, const u64a offset, if (!is_alive) { partial_store_u32(active, top, c->activeIdxSize); } - } else { + } else { active += c->activeOffset; - is_alive = mmbit_set(active, c->numRepeats, top); - } - - if (!is_alive) { - DEBUG_PRINTF("first top for inactive repeat %u\n", top); - } else { - DEBUG_PRINTF("repeat %u is already alive\n", top); - // Caller should ensure we're not stale. + is_alive = mmbit_set(active, c->numRepeats, top); + } + + if (!is_alive) { + DEBUG_PRINTF("first top for inactive repeat %u\n", top); + } else { + DEBUG_PRINTF("repeat %u is already alive\n", top); + // Caller should ensure we're not stale. assert(!stale_checked || repeatHasMatch(info, rctrl, rstate, offset) != REPEAT_STALE); - - // Ignore duplicate top events. - u64a last = repeatLastTop(info, rctrl, rstate); - - assert(last <= offset); - if (last == offset) { - DEBUG_PRINTF("dupe top at %llu\n", offset); - return; - } - } - - repeatStore(info, rctrl, rstate, offset, is_alive); -} - -static really_inline -void subCastleFindMatch(const struct Castle *c, const u64a begin, - const u64a end, void *full_state, void *stream_state, - size_t *mloc, char *found, const u32 subIdx) { - const struct SubCastle *sub = getSubCastle(c, subIdx); - const struct RepeatInfo *info = getRepeatInfo(sub); - union RepeatControl *rctrl = getControl(full_state, sub); - char *rstate = (char *)stream_state + sub->streamStateOffset + - info->packedCtrlSize; - - u64a match = repeatNextMatch(info, rctrl, rstate, begin); - if (match == 0) { - DEBUG_PRINTF("no more matches for sub %u\n", subIdx); + + // Ignore duplicate top events. + u64a last = repeatLastTop(info, rctrl, rstate); + + assert(last <= offset); + if (last == offset) { + DEBUG_PRINTF("dupe top at %llu\n", offset); + return; + } + } + + repeatStore(info, rctrl, rstate, offset, is_alive); +} + +static really_inline +void subCastleFindMatch(const struct Castle *c, const u64a begin, + const u64a end, void *full_state, void *stream_state, + size_t *mloc, char *found, const u32 subIdx) { + const struct SubCastle *sub = getSubCastle(c, subIdx); + const struct RepeatInfo *info = getRepeatInfo(sub); + union RepeatControl *rctrl = getControl(full_state, sub); + char *rstate = (char *)stream_state + sub->streamStateOffset + + info->packedCtrlSize; + + u64a match = repeatNextMatch(info, rctrl, rstate, begin); + if (match == 0) { + DEBUG_PRINTF("no more matches for sub %u\n", subIdx); if (sub->exclusiveId < c->numRepeats) { u8 *groups = (u8 *)stream_state + c->groupIterOffset; mmbit_unset(groups, c->numGroups, sub->exclusiveId); - } else { + } else { u8 *active = (u8 *)stream_state + c->activeOffset; - mmbit_unset(active, c->numRepeats, subIdx); - } - return; - } else if (match > end) { - DEBUG_PRINTF("next match for sub %u at %llu is > horizon\n", subIdx, - match); - return; - } - DEBUG_PRINTF("sub %u earliest match at %llu\n", subIdx, match); - size_t diff = match - begin; - if (!(*found) || diff < *mloc) { - *mloc = diff; - DEBUG_PRINTF("mloc=%zu\n", *mloc); - } - *found = 1; -} - -static really_inline -char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end, - void *full_state, void *stream_state, size_t *mloc) { - DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end); - assert(begin <= end); - - if (begin == end) { - DEBUG_PRINTF("no work to do\n"); - return 0; - } - - char found = 0; - *mloc = 0; - - if (c->exclusive) { + mmbit_unset(active, c->numRepeats, subIdx); + } + return; + } else if (match > end) { + DEBUG_PRINTF("next match for sub %u at %llu is > horizon\n", subIdx, + match); + return; + } + DEBUG_PRINTF("sub %u earliest match at %llu\n", subIdx, match); + size_t diff = match - begin; + if (!(*found) || diff < *mloc) { + *mloc = diff; + DEBUG_PRINTF("mloc=%zu\n", *mloc); + } + *found = 1; +} + +static really_inline +char castleFindMatch(const struct Castle *c, const u64a begin, const u64a end, + void *full_state, void *stream_state, size_t *mloc) { + DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end); + assert(begin <= end); + + if (begin == end) { + DEBUG_PRINTF("no work to do\n"); + return 0; + } + + char found = 0; + *mloc = 0; + + if (c->exclusive) { u8 *active = (u8 *)stream_state; u8 *groups = active + c->groupIterOffset; for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { u8 *cur = active + i * c->activeIdxSize; const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); - DEBUG_PRINTF("subcastle %u\n", activeIdx); - subCastleFindMatch(c, begin, end, full_state, stream_state, mloc, - &found, activeIdx); - } - } - + DEBUG_PRINTF("subcastle %u\n", activeIdx); + subCastleFindMatch(c, begin, end, full_state, stream_state, mloc, + &found, activeIdx); + } + } + if (c->exclusive != PURE_EXCLUSIVE) { u8 *active = (u8 *)stream_state + c->activeOffset; - for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); - i != MMB_INVALID; - i = mmbit_iterate(active, c->numRepeats, i)) { - DEBUG_PRINTF("subcastle %u\n", i); - subCastleFindMatch(c, begin, end, full_state, stream_state, mloc, - &found, i); - } - } - - return found; -} - -static really_inline -u64a subCastleNextMatch(const struct Castle *c, void *full_state, - void *stream_state, const u64a loc, - const u32 subIdx) { - DEBUG_PRINTF("subcastle %u\n", subIdx); - const struct SubCastle *sub = getSubCastle(c, subIdx); - const struct RepeatInfo *info = getRepeatInfo(sub); - const union RepeatControl *rctrl = - getControlConst(full_state, sub); - const char *rstate = (const char *)stream_state + - sub->streamStateOffset + - info->packedCtrlSize; - - return repeatNextMatch(info, rctrl, rstate, loc); -} - -static really_inline + for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); + i != MMB_INVALID; + i = mmbit_iterate(active, c->numRepeats, i)) { + DEBUG_PRINTF("subcastle %u\n", i); + subCastleFindMatch(c, begin, end, full_state, stream_state, mloc, + &found, i); + } + } + + return found; +} + +static really_inline +u64a subCastleNextMatch(const struct Castle *c, void *full_state, + void *stream_state, const u64a loc, + const u32 subIdx) { + DEBUG_PRINTF("subcastle %u\n", subIdx); + const struct SubCastle *sub = getSubCastle(c, subIdx); + const struct RepeatInfo *info = getRepeatInfo(sub); + const union RepeatControl *rctrl = + getControlConst(full_state, sub); + const char *rstate = (const char *)stream_state + + sub->streamStateOffset + + info->packedCtrlSize; + + return repeatNextMatch(info, rctrl, rstate, loc); +} + +static really_inline void set_matching(const struct Castle *c, const u64a match, u8 *active, u8 *matching, const u32 active_size, const u32 active_id, const u32 matching_id, u64a *offset, const u64a end) { @@ -432,190 +432,190 @@ void set_matching(const struct Castle *c, const u64a match, u8 *active, } static really_inline -void subCastleMatchLoop(const struct Castle *c, void *full_state, - void *stream_state, const u64a end, - const u64a loc, u64a *offset) { +void subCastleMatchLoop(const struct Castle *c, void *full_state, + void *stream_state, const u64a end, + const u64a loc, u64a *offset) { u8 *active = (u8 *)stream_state + c->activeOffset; - u8 *matching = full_state; - for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { - u64a match = subCastleNextMatch(c, full_state, stream_state, loc, i); + u8 *matching = full_state; + for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { + u64a match = subCastleNextMatch(c, full_state, stream_state, loc, i); set_matching(c, match, active, matching, c->numRepeats, i, i, offset, end); - } -} - -static really_inline -char subCastleFireMatch(const struct Castle *c, const void *full_state, - UNUSED const void *stream_state, NfaCallback cb, - void *ctx, const u64a offset) { - const u8 *matching = full_state; - - // Fire all matching sub-castles at this offset. - for (u32 i = mmbit_iterate(matching, c->numRepeats, MMB_INVALID); - i != MMB_INVALID; - i = mmbit_iterate(matching, c->numRepeats, i)) { - const struct SubCastle *sub = getSubCastle(c, i); - DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, i); + } +} + +static really_inline +char subCastleFireMatch(const struct Castle *c, const void *full_state, + UNUSED const void *stream_state, NfaCallback cb, + void *ctx, const u64a offset) { + const u8 *matching = full_state; + + // Fire all matching sub-castles at this offset. + for (u32 i = mmbit_iterate(matching, c->numRepeats, MMB_INVALID); + i != MMB_INVALID; + i = mmbit_iterate(matching, c->numRepeats, i)) { + const struct SubCastle *sub = getSubCastle(c, i); + DEBUG_PRINTF("firing match at %llu for sub %u\n", offset, i); if (cb(0, offset, sub->report, ctx) == MO_HALT_MATCHING) { - DEBUG_PRINTF("caller told us to halt\n"); - return MO_HALT_MATCHING; - } - } - - return MO_CONTINUE_MATCHING; -} - -static really_inline -char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end, - void *full_state, void *stream_state, NfaCallback cb, - void *ctx) { - DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end); - assert(begin <= end); - - u8 *matching = full_state; // temp multibit - - u64a loc = begin; - while (loc < end) { - - // Find minimum next offset for the next match(es) from amongst our - // active sub-castles, and store the indices of the sub-castles that - // match at that offset in the 'matching' mmbit, which is in the - // full_state (scratch). - - u64a offset = end; // min offset of next match - u32 activeIdx = 0; + DEBUG_PRINTF("caller told us to halt\n"); + return MO_HALT_MATCHING; + } + } + + return MO_CONTINUE_MATCHING; +} + +static really_inline +char castleMatchLoop(const struct Castle *c, const u64a begin, const u64a end, + void *full_state, void *stream_state, NfaCallback cb, + void *ctx) { + DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end); + assert(begin <= end); + + u8 *matching = full_state; // temp multibit + + u64a loc = begin; + while (loc < end) { + + // Find minimum next offset for the next match(es) from amongst our + // active sub-castles, and store the indices of the sub-castles that + // match at that offset in the 'matching' mmbit, which is in the + // full_state (scratch). + + u64a offset = end; // min offset of next match + u32 activeIdx = 0; mmbit_clear(matching, c->numRepeats); - if (c->exclusive) { + if (c->exclusive) { u8 *active = (u8 *)stream_state; u8 *groups = active + c->groupIterOffset; for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { u8 *cur = active + i * c->activeIdxSize; activeIdx = partial_load_u32(cur, c->activeIdxSize); - u64a match = subCastleNextMatch(c, full_state, stream_state, + u64a match = subCastleNextMatch(c, full_state, stream_state, loc, activeIdx); set_matching(c, match, groups, matching, c->numGroups, i, activeIdx, &offset, end); - } - } - + } + } + if (c->exclusive != PURE_EXCLUSIVE) { - subCastleMatchLoop(c, full_state, stream_state, + subCastleMatchLoop(c, full_state, stream_state, end, loc, &offset); - } + } DEBUG_PRINTF("offset=%llu\n", offset); if (!mmbit_any(matching, c->numRepeats)) { DEBUG_PRINTF("no more matches\n"); - break; - } + break; + } if (subCastleFireMatch(c, full_state, stream_state, cb, ctx, offset) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } loc = offset; - } - - return MO_CONTINUE_MATCHING; -} - -static really_inline -char castleScanVerm(const struct Castle *c, const u8 *buf, const size_t begin, - const size_t end, size_t *loc) { - const u8 *ptr = vermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); - if (ptr == buf + end) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - assert(ptr >= buf && ptr < buf + end); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -static really_inline -char castleScanNVerm(const struct Castle *c, const u8 *buf, const size_t begin, - const size_t end, size_t *loc) { - const u8 *ptr = nvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); - if (ptr == buf + end) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - assert(ptr >= buf && ptr < buf + end); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -static really_inline -char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin, - const size_t end, size_t *loc) { - const m128 mask_lo = c->u.shuf.mask_lo; - const m128 mask_hi = c->u.shuf.mask_hi; - const u8 *ptr = shuftiExec(mask_lo, mask_hi, buf + begin, buf + end); - if (ptr == buf + end) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - assert(ptr >= buf && ptr < buf + end); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -static really_inline -char castleScanTruffle(const struct Castle *c, const u8 *buf, const size_t begin, - const size_t end, size_t *loc) { + } + + return MO_CONTINUE_MATCHING; +} + +static really_inline +char castleScanVerm(const struct Castle *c, const u8 *buf, const size_t begin, + const size_t end, size_t *loc) { + const u8 *ptr = vermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleScanNVerm(const struct Castle *c, const u8 *buf, const size_t begin, + const size_t end, size_t *loc) { + const u8 *ptr = nvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleScanShufti(const struct Castle *c, const u8 *buf, const size_t begin, + const size_t end, size_t *loc) { + const m128 mask_lo = c->u.shuf.mask_lo; + const m128 mask_hi = c->u.shuf.mask_hi; + const u8 *ptr = shuftiExec(mask_lo, mask_hi, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleScanTruffle(const struct Castle *c, const u8 *buf, const size_t begin, + const size_t end, size_t *loc) { const u8 *ptr = truffleExec(c->u.truffle.mask1, c->u.truffle.mask2, buf + begin, buf + end); - if (ptr == buf + end) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - assert(ptr >= buf && ptr < buf + end); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -static really_inline -char castleScan(const struct Castle *c, const u8 *buf, const size_t begin, - const size_t end, size_t *loc) { - assert(begin <= end); - - if (begin == end) { - return 0; - } - - switch (c->type) { - case CASTLE_DOT: - // Nothing can stop a dot scan! - return 0; - case CASTLE_VERM: - return castleScanVerm(c, buf, begin, end, loc); - case CASTLE_NVERM: - return castleScanNVerm(c, buf, begin, end, loc); - case CASTLE_SHUFTI: - return castleScanShufti(c, buf, begin, end, loc); - case CASTLE_TRUFFLE: - return castleScanTruffle(c, buf, begin, end, loc); - default: - DEBUG_PRINTF("unknown scan type!\n"); - assert(0); - return 0; - } -} - -static really_inline + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + assert(ptr >= buf && ptr < buf + end); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char castleScan(const struct Castle *c, const u8 *buf, const size_t begin, + const size_t end, size_t *loc) { + assert(begin <= end); + + if (begin == end) { + return 0; + } + + switch (c->type) { + case CASTLE_DOT: + // Nothing can stop a dot scan! + return 0; + case CASTLE_VERM: + return castleScanVerm(c, buf, begin, end, loc); + case CASTLE_NVERM: + return castleScanNVerm(c, buf, begin, end, loc); + case CASTLE_SHUFTI: + return castleScanShufti(c, buf, begin, end, loc); + case CASTLE_TRUFFLE: + return castleScanTruffle(c, buf, begin, end, loc); + default: + DEBUG_PRINTF("unknown scan type!\n"); + assert(0); + return 0; + } +} + +static really_inline char castleRevScanVerm(const struct Castle *c, const u8 *buf, const size_t begin, const size_t end, size_t *loc) { const u8 *ptr = rvermicelliExec(c->u.verm.c, 0, buf + begin, buf + end); @@ -713,25 +713,25 @@ char castleRevScan(const struct Castle *c, const u8 *buf, const size_t begin, static really_inline void castleHandleEvent(const struct Castle *c, struct mq *q, const u64a sp, char stale_checked) { - const u32 event = q->items[q->cur].type; - switch (event) { - case MQE_TOP: - assert(0); // should be a numbered top - break; - case MQE_START: - case MQE_END: - break; - default: - assert(event >= MQE_TOP_FIRST); - assert(event < MQE_INVALID); - u32 top = event - MQE_TOP_FIRST; - DEBUG_PRINTF("top %u at offset %llu\n", top, sp); + const u32 event = q->items[q->cur].type; + switch (event) { + case MQE_TOP: + assert(0); // should be a numbered top + break; + case MQE_START: + case MQE_END: + break; + default: + assert(event >= MQE_TOP_FIRST); + assert(event < MQE_INVALID); + u32 top = event - MQE_TOP_FIRST; + DEBUG_PRINTF("top %u at offset %llu\n", top, sp); castleProcessTop(c, top, sp, q->state, q->streamState, stale_checked); - break; - } -} - -static really_inline + break; + } +} + +static really_inline void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) { DEBUG_PRINTF("clearing active repeats due to escape\n"); if (c->exclusive) { @@ -747,244 +747,244 @@ void clear_repeats(const struct Castle *c, const struct mq *q, u8 *active) { static really_inline char nfaExecCastle_Q_i(const struct NFA *n, struct mq *q, s64a end, enum MatchMode mode) { - assert(n && q); + assert(n && q); assert(n->type == CASTLE_NFA); - - DEBUG_PRINTF("state=%p, streamState=%p\n", q->state, q->streamState); - - const struct Castle *c = getImplNfa(n); - - if (q->report_current) { - int rv = castleReportCurrent(c, q); - q->report_current = 0; - if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - - if (q->cur == q->end) { - return 1; - } - + + DEBUG_PRINTF("state=%p, streamState=%p\n", q->state, q->streamState); + + const struct Castle *c = getImplNfa(n); + + if (q->report_current) { + int rv = castleReportCurrent(c, q); + q->report_current = 0; + if (rv == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + + if (q->cur == q->end) { + return 1; + } + u8 *active = (u8 *)q->streamState + c->activeOffset;// active multibit - - assert(q->cur + 1 < q->end); // require at least two items - assert(q_cur_type(q) == MQE_START); - u64a sp = q_cur_offset(q); - q->cur++; - DEBUG_PRINTF("sp=%llu, abs_end=%llu\n", sp, end + q->offset); - - while (q->cur < q->end) { - DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q), - q_cur_offset(q)); - - char found = 0; - if (c->exclusive) { + + assert(q->cur + 1 < q->end); // require at least two items + assert(q_cur_type(q) == MQE_START); + u64a sp = q_cur_offset(q); + q->cur++; + DEBUG_PRINTF("sp=%llu, abs_end=%llu\n", sp, end + q->offset); + + while (q->cur < q->end) { + DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q), + q_cur_offset(q)); + + char found = 0; + if (c->exclusive) { u8 *groups = (u8 *)q->streamState + c->groupIterOffset; found = mmbit_any(groups, c->numGroups); - } - - if (!found && !mmbit_any(active, c->numRepeats)) { - DEBUG_PRINTF("no repeats active, skipping scan\n"); - goto scan_done; - } - - u64a ep = q_cur_offset(q); - ep = MIN(ep, q->offset + end); - if (sp < ep) { - size_t eloc = 0; - char escape_found = 0; - DEBUG_PRINTF("scanning from sp=%llu to ep=%llu\n", sp, ep); - assert(sp >= q->offset && ep >= q->offset); - if (castleScan(c, q->buffer, sp - q->offset, ep - q->offset, - &eloc)) { - escape_found = 1; - ep = q->offset + eloc; - DEBUG_PRINTF("escape found at %llu\n", ep); - assert(ep >= sp); - } - - assert(sp <= ep); - - if (mode == STOP_AT_MATCH) { - size_t mloc; - if (castleFindMatch(c, sp, ep, q->state, q->streamState, - &mloc)) { - DEBUG_PRINTF("storing match at %llu\n", sp + mloc); - q->cur--; - assert(q->cur < MAX_MQE_LEN); - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = (s64a)(sp - q->offset) + mloc; - return MO_MATCHES_PENDING; - } - } else { - assert(mode == CALLBACK_OUTPUT); - char rv = castleMatchLoop(c, sp, ep, q->state, q->streamState, - q->cb, q->context); - if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - assert(rv == MO_CONTINUE_MATCHING); - } - - if (escape_found) { + } + + if (!found && !mmbit_any(active, c->numRepeats)) { + DEBUG_PRINTF("no repeats active, skipping scan\n"); + goto scan_done; + } + + u64a ep = q_cur_offset(q); + ep = MIN(ep, q->offset + end); + if (sp < ep) { + size_t eloc = 0; + char escape_found = 0; + DEBUG_PRINTF("scanning from sp=%llu to ep=%llu\n", sp, ep); + assert(sp >= q->offset && ep >= q->offset); + if (castleScan(c, q->buffer, sp - q->offset, ep - q->offset, + &eloc)) { + escape_found = 1; + ep = q->offset + eloc; + DEBUG_PRINTF("escape found at %llu\n", ep); + assert(ep >= sp); + } + + assert(sp <= ep); + + if (mode == STOP_AT_MATCH) { + size_t mloc; + if (castleFindMatch(c, sp, ep, q->state, q->streamState, + &mloc)) { + DEBUG_PRINTF("storing match at %llu\n", sp + mloc); + q->cur--; + assert(q->cur < MAX_MQE_LEN); + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = (s64a)(sp - q->offset) + mloc; + return MO_MATCHES_PENDING; + } + } else { + assert(mode == CALLBACK_OUTPUT); + char rv = castleMatchLoop(c, sp, ep, q->state, q->streamState, + q->cb, q->context); + if (rv == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + assert(rv == MO_CONTINUE_MATCHING); + } + + if (escape_found) { clear_repeats(c, q, active); - } - } - - scan_done: - if (q_cur_loc(q) > end) { - q->cur--; - assert(q->cur < MAX_MQE_LEN); - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - return MO_ALIVE; - } - - sp = q_cur_offset(q); + } + } + + scan_done: + if (q_cur_loc(q) > end) { + q->cur--; + assert(q->cur < MAX_MQE_LEN); + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + return MO_ALIVE; + } + + sp = q_cur_offset(q); castleHandleEvent(c, q, sp, 1); - q->cur++; - } - - if (c->exclusive) { + q->cur++; + } + + if (c->exclusive) { u8 *groups = (u8 *)q->streamState + c->groupIterOffset; if (mmbit_any_precise(groups, c->numGroups)) { return 1; - } - } - - return mmbit_any_precise(active, c->numRepeats); -} - + } + } + + return mmbit_any_precise(active, c->numRepeats); +} + char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end) { - DEBUG_PRINTF("entry\n"); + DEBUG_PRINTF("entry\n"); return nfaExecCastle_Q_i(n, q, end, CALLBACK_OUTPUT); -} - +} + char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end) { - DEBUG_PRINTF("entry\n"); + DEBUG_PRINTF("entry\n"); return nfaExecCastle_Q_i(n, q, end, STOP_AT_MATCH); -} - +} + static s64a castleLastKillLoc(const struct Castle *c, struct mq *q) { assert(q_cur_type(q) == MQE_START); assert(q_last_type(q) == MQE_END); s64a sp = q_cur_loc(q); s64a ep = q_last_loc(q); - + DEBUG_PRINTF("finding final squash in (%lld, %lld]\n", sp, ep); - + size_t loc; if (ep > 0) { if (castleRevScan(c, q->buffer, sp > 0 ? sp : 0, ep, &loc)) { return (s64a)loc; - } + } ep = 0; } - + if (sp < 0) { s64a hlen = q->hlength; if (castleRevScan(c, q->history, sp + hlen, ep + hlen, &loc)) { return (s64a)loc - hlen; - } + } ep = 0; - } + } return sp - 1; /* the repeats are never killed */ -} - +} + char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report) { - assert(n && q); + assert(n && q); assert(n->type == CASTLE_NFA); - DEBUG_PRINTF("entry\n"); - - if (q->cur == q->end) { - return 1; - } - - assert(q->cur + 1 < q->end); /* require at least two items */ - assert(q_cur_type(q) == MQE_START); - - const struct Castle *c = getImplNfa(n); + DEBUG_PRINTF("entry\n"); + + if (q->cur == q->end) { + return 1; + } + + assert(q->cur + 1 < q->end); /* require at least two items */ + assert(q_cur_type(q) == MQE_START); + + const struct Castle *c = getImplNfa(n); u8 *active = (u8 *)q->streamState + c->activeOffset; - + u64a end_offset = q_last_loc(q) + q->offset; s64a last_kill_loc = castleLastKillLoc(c, q); DEBUG_PRINTF("all repeats killed at %lld (exec range %lld, %lld)\n", last_kill_loc, q_cur_loc(q), q_last_loc(q)); assert(last_kill_loc < q_last_loc(q)); - + if (last_kill_loc != q_cur_loc(q) - 1) { clear_repeats(c, q, active); } - + q->cur++; /* skip start event */ - + /* skip events prior to the repeats being squashed */ while (q_cur_loc(q) <= last_kill_loc) { DEBUG_PRINTF("skipping moot event at %lld\n", q_cur_loc(q)); q->cur++; assert(q->cur < q->end); } - + while (q->cur < q->end) { DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q), q_cur_offset(q)); u64a sp = q_cur_offset(q); castleHandleEvent(c, q, sp, 0); - q->cur++; - } - + q->cur++; + } + castleDeactivateStaleSubs(c, end_offset, q->state, q->streamState); char found = 0; - if (c->exclusive) { + if (c->exclusive) { u8 *groups = (u8 *)q->streamState + c->groupIterOffset; found = mmbit_any_precise(groups, c->numGroups); - } - - if (!found && !mmbit_any_precise(active, c->numRepeats)) { - DEBUG_PRINTF("castle is dead\n"); - return 0; - } - + } + + if (!found && !mmbit_any_precise(active, c->numRepeats)) { + DEBUG_PRINTF("castle is dead\n"); + return 0; + } + if (castleInAccept(c, q, report, end_offset)) { - return MO_MATCHES_PENDING; - } - - return 1; -} - + return MO_MATCHES_PENDING; + } + + return 1; +} + char nfaExecCastle_reportCurrent(const struct NFA *n, struct mq *q) { - assert(n && q); + assert(n && q); assert(n->type == CASTLE_NFA); - DEBUG_PRINTF("entry\n"); - - const struct Castle *c = getImplNfa(n); - castleReportCurrent(c, q); - return 0; -} - + DEBUG_PRINTF("entry\n"); + + const struct Castle *c = getImplNfa(n); + castleReportCurrent(c, q); + return 0; +} + char nfaExecCastle_inAccept(const struct NFA *n, ReportID report, struct mq *q) { - assert(n && q); + assert(n && q); assert(n->type == CASTLE_NFA); - DEBUG_PRINTF("entry\n"); - - const struct Castle *c = getImplNfa(n); - return castleInAccept(c, q, report, q_cur_offset(q)); -} - + DEBUG_PRINTF("entry\n"); + + const struct Castle *c = getImplNfa(n); + return castleInAccept(c, q, report, q_cur_offset(q)); +} + char nfaExecCastle_inAnyAccept(const struct NFA *n, struct mq *q) { - assert(n && q); + assert(n && q); assert(n->type == CASTLE_NFA); - DEBUG_PRINTF("entry\n"); - - const struct Castle *c = getImplNfa(n); + DEBUG_PRINTF("entry\n"); + + const struct Castle *c = getImplNfa(n); const u64a offset = q_cur_offset(q); DEBUG_PRINTF("offset=%llu\n", offset); @@ -1025,125 +1025,125 @@ char nfaExecCastle_queueInitState(UNUSED const struct NFA *n, struct mq *q) { DEBUG_PRINTF("entry\n"); const struct Castle *c = getImplNfa(n); - assert(q->streamState); - if (c->exclusive) { + assert(q->streamState); + if (c->exclusive) { u8 *groups = (u8 *)q->streamState + c->groupIterOffset; mmbit_clear(groups, c->numGroups); - } - + } + if (c->exclusive != PURE_EXCLUSIVE) { u8 *active = (u8 *)q->streamState + c->activeOffset; - mmbit_clear(active, c->numRepeats); - } - return 0; -} - + mmbit_clear(active, c->numRepeats); + } + return 0; +} + char nfaExecCastle_initCompressedState(const struct NFA *n, UNUSED u64a offset, void *state, UNUSED u8 key) { - assert(n && state); + assert(n && state); assert(n->type == CASTLE_NFA); - DEBUG_PRINTF("entry\n"); - - const struct Castle *c = getImplNfa(n); - if (c->exclusive) { + DEBUG_PRINTF("entry\n"); + + const struct Castle *c = getImplNfa(n); + if (c->exclusive) { u8 *groups = (u8 *)state + c->groupIterOffset; mmbit_clear(groups, c->numGroups); - } - + } + if (c->exclusive != PURE_EXCLUSIVE) { u8 *active = (u8 *)state + c->activeOffset; - mmbit_clear(active, c->numRepeats); - } - return 0; -} - -static really_inline -void subCastleQueueCompressState(const struct Castle *c, const u32 subIdx, - const struct mq *q, const u64a offset) { - const struct SubCastle *sub = getSubCastle(c, subIdx); - const struct RepeatInfo *info = getRepeatInfo(sub); - union RepeatControl *rctrl = getControl(q->state, sub); - char *packed = (char *)q->streamState + sub->streamStateOffset; - DEBUG_PRINTF("sub %u next match %llu\n", subIdx, - repeatNextMatch(info, rctrl, - packed + info->packedCtrlSize, offset)); - repeatPack(packed, info, rctrl, offset); -} - + mmbit_clear(active, c->numRepeats); + } + return 0; +} + +static really_inline +void subCastleQueueCompressState(const struct Castle *c, const u32 subIdx, + const struct mq *q, const u64a offset) { + const struct SubCastle *sub = getSubCastle(c, subIdx); + const struct RepeatInfo *info = getRepeatInfo(sub); + union RepeatControl *rctrl = getControl(q->state, sub); + char *packed = (char *)q->streamState + sub->streamStateOffset; + DEBUG_PRINTF("sub %u next match %llu\n", subIdx, + repeatNextMatch(info, rctrl, + packed + info->packedCtrlSize, offset)); + repeatPack(packed, info, rctrl, offset); +} + char nfaExecCastle_queueCompressState(const struct NFA *n, const struct mq *q, s64a loc) { - assert(n && q); + assert(n && q); assert(n->type == CASTLE_NFA); - DEBUG_PRINTF("entry, loc=%lld\n", loc); - - const struct Castle *c = getImplNfa(n); - - // Pack state for all active repeats. - const u64a offset = q->offset + loc; - DEBUG_PRINTF("offset=%llu\n", offset); - if (c->exclusive) { + DEBUG_PRINTF("entry, loc=%lld\n", loc); + + const struct Castle *c = getImplNfa(n); + + // Pack state for all active repeats. + const u64a offset = q->offset + loc; + DEBUG_PRINTF("offset=%llu\n", offset); + if (c->exclusive) { u8 *active = (u8 *)q->streamState; u8 *groups = active + c->groupIterOffset; for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { u8 *cur = active + i * c->activeIdxSize; const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); - DEBUG_PRINTF("packing state for sub %u\n", activeIdx); - subCastleQueueCompressState(c, activeIdx, q, offset); - } - } - + DEBUG_PRINTF("packing state for sub %u\n", activeIdx); + subCastleQueueCompressState(c, activeIdx, q, offset); + } + } + if (c->exclusive != PURE_EXCLUSIVE) { const u8 *active = (const u8 *)q->streamState + c->activeOffset; - for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { - DEBUG_PRINTF("packing state for sub %u\n", i); - subCastleQueueCompressState(c, i, q, offset); - } - } - return 0; -} - -static really_inline -void subCastleExpandState(const struct Castle *c, const u32 subIdx, - void *dest, const void *src, const u64a offset) { - const struct SubCastle *sub = getSubCastle(c, subIdx); - const struct RepeatInfo *info = getRepeatInfo(sub); - DEBUG_PRINTF("unpacking state for sub %u\n", subIdx); - union RepeatControl *rctrl = getControl(dest, sub); - const char *packed = (const char *)src + sub->streamStateOffset; - repeatUnpack(packed, info, offset, rctrl); - DEBUG_PRINTF("sub %u next match %llu\n", subIdx, - repeatNextMatch(info, rctrl, - packed + info->packedCtrlSize, offset)); -} - + for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { + DEBUG_PRINTF("packing state for sub %u\n", i); + subCastleQueueCompressState(c, i, q, offset); + } + } + return 0; +} + +static really_inline +void subCastleExpandState(const struct Castle *c, const u32 subIdx, + void *dest, const void *src, const u64a offset) { + const struct SubCastle *sub = getSubCastle(c, subIdx); + const struct RepeatInfo *info = getRepeatInfo(sub); + DEBUG_PRINTF("unpacking state for sub %u\n", subIdx); + union RepeatControl *rctrl = getControl(dest, sub); + const char *packed = (const char *)src + sub->streamStateOffset; + repeatUnpack(packed, info, offset, rctrl); + DEBUG_PRINTF("sub %u next match %llu\n", subIdx, + repeatNextMatch(info, rctrl, + packed + info->packedCtrlSize, offset)); +} + char nfaExecCastle_expandState(const struct NFA *n, void *dest, const void *src, u64a offset, UNUSED u8 key) { - assert(n && dest && src); + assert(n && dest && src); assert(n->type == CASTLE_NFA); - DEBUG_PRINTF("entry, src=%p, dest=%p, offset=%llu\n", src, dest, offset); - - const struct Castle *c = getImplNfa(n); - - if (c->exclusive) { + DEBUG_PRINTF("entry, src=%p, dest=%p, offset=%llu\n", src, dest, offset); + + const struct Castle *c = getImplNfa(n); + + if (c->exclusive) { const u8 *active = (const u8 *)src; const u8 *groups = active + c->groupIterOffset; for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID); i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) { const u8 *cur = active + i * c->activeIdxSize; const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize); - subCastleExpandState(c, activeIdx, dest, src, offset); - } - } - + subCastleExpandState(c, activeIdx, dest, src, offset); + } + } + if (c->exclusive != PURE_EXCLUSIVE) { - // Unpack state for all active repeats. + // Unpack state for all active repeats. const u8 *active = (const u8 *)src + c->activeOffset; - for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { - subCastleExpandState(c, i, dest, src, offset); - } - } - return 0; -} + for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) { + subCastleExpandState(c, i, dest, src, offset); + } + } + return 0; +} diff --git a/contrib/libs/hyperscan/src/nfa/castle.h b/contrib/libs/hyperscan/src/nfa/castle.h index fdbd7d8592..cc7496ca71 100644 --- a/contrib/libs/hyperscan/src/nfa/castle.h +++ b/contrib/libs/hyperscan/src/nfa/castle.h @@ -1,43 +1,43 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef NFA_CASTLE_H -#define NFA_CASTLE_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "ue2common.h" - -struct mq; -struct NFA; - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef NFA_CASTLE_H +#define NFA_CASTLE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "ue2common.h" + +struct mq; +struct NFA; + char nfaExecCastle_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecCastle_Q2(const struct NFA *n, struct mq *q, s64a end); char nfaExecCastle_QR(const struct NFA *n, struct mq *q, ReportID report); @@ -52,14 +52,14 @@ char nfaExecCastle_queueCompressState(const struct NFA *nfa, const struct mq *q, s64a loc); char nfaExecCastle_expandState(const struct NFA *nfa, void *dest, const void *src, u64a offset, u8 key); - + #define nfaExecCastle_testEOD NFA_API_NO_IMPL #define nfaExecCastle_B_Reverse NFA_API_NO_IMPL #define nfaExecCastle_zombie_status NFA_API_ZOMBIE_NO_IMPL - -#ifdef __cplusplus -} - -#endif // __cplusplus - -#endif + +#ifdef __cplusplus +} + +#endif // __cplusplus + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/castle_internal.h b/contrib/libs/hyperscan/src/nfa/castle_internal.h index 2b7b84b69a..429c232ff8 100644 --- a/contrib/libs/hyperscan/src/nfa/castle_internal.h +++ b/contrib/libs/hyperscan/src/nfa/castle_internal.h @@ -1,79 +1,79 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Castle: multi-tenant repeat engine, data structures. - */ - -#ifndef NFA_CASTLE_INTERNAL_H -#define NFA_CASTLE_INTERNAL_H - -#include "ue2common.h" -#include "repeat_internal.h" - -struct SubCastle { - ReportID report; //!< report to raise on match - u32 fullStateOffset; //!< offset within full state (scratch) - u32 streamStateOffset; //!< offset within stream state - u32 repeatInfoOffset; //!< offset of RepeatInfo structure - // relative to the start of SubCastle + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Castle: multi-tenant repeat engine, data structures. + */ + +#ifndef NFA_CASTLE_INTERNAL_H +#define NFA_CASTLE_INTERNAL_H + +#include "ue2common.h" +#include "repeat_internal.h" + +struct SubCastle { + ReportID report; //!< report to raise on match + u32 fullStateOffset; //!< offset within full state (scratch) + u32 streamStateOffset; //!< offset within stream state + u32 repeatInfoOffset; //!< offset of RepeatInfo structure + // relative to the start of SubCastle u32 exclusiveId; //!< exclusive group id of this SubCastle, // set to the number of SubCastles in Castle // if it is not exclusive -}; - -#define CASTLE_DOT 0 -#define CASTLE_VERM 1 -#define CASTLE_NVERM 2 -#define CASTLE_SHUFTI 3 -#define CASTLE_TRUFFLE 4 - +}; + +#define CASTLE_DOT 0 +#define CASTLE_VERM 1 +#define CASTLE_NVERM 2 +#define CASTLE_SHUFTI 3 +#define CASTLE_TRUFFLE 4 + enum ExclusiveType { NOT_EXCLUSIVE, //!< no subcastles are exclusive EXCLUSIVE, //!< a subset of subcastles are exclusive PURE_EXCLUSIVE //!< all subcastles are exclusive }; -/** - * \brief Castle engine structure. - * - * A Castle is a collection of repeats that all share the same character - * reachability. - * - * The whole engine is laid out in memory as: - * - * - struct NFA - * - struct Castle - * - struct SubCastle[numRepeats] - * - tables for sparse model repeats +/** + * \brief Castle engine structure. + * + * A Castle is a collection of repeats that all share the same character + * reachability. + * + * The whole engine is laid out in memory as: + * + * - struct NFA + * - struct Castle + * - struct SubCastle[numRepeats] + * - tables for sparse model repeats * - sparse iterator for subcastles that may be stale - * - * Castle stores an "active repeats" multibit in stream state, followed by the + * + * Castle stores an "active repeats" multibit in stream state, followed by the * packed repeat state for each SubCastle. If there are both exclusive and * non-exclusive SubCastle groups, we use an active id for each exclusive group * and a multibit for the non-exclusive group. We also store an "active @@ -106,12 +106,12 @@ enum ExclusiveType { * * ... * * | | * * |---| - * - * In full state (stored in scratch space) it stores a temporary multibit over - * the repeats (used by \ref castleMatchLoop), followed by the repeat control + * + * In full state (stored in scratch space) it stores a temporary multibit over + * the repeats (used by \ref castleMatchLoop), followed by the repeat control * blocks for each SubCastle. - */ -struct ALIGN_AVX_DIRECTIVE Castle { + */ +struct ALIGN_AVX_DIRECTIVE Castle { u32 numRepeats; //!< number of repeats in Castle u32 numGroups; //!< number of exclusive groups u8 type; //!< tells us which scanning mechanism (below) to use @@ -125,19 +125,19 @@ struct ALIGN_AVX_DIRECTIVE Castle { u32 groupIterOffset; //!< offset to a iterator to check the aliveness of // exclusive groups - union { - struct { - char c; - } verm; - struct { - m128 mask_lo; - m128 mask_hi; - } shuf; - struct { - m128 mask1; - m128 mask2; - } truffle; - } u; -}; - -#endif // NFA_CASTLE_INTERNAL_H + union { + struct { + char c; + } verm; + struct { + m128 mask_lo; + m128 mask_hi; + } shuf; + struct { + m128 mask1; + m128 mask2; + } truffle; + } u; +}; + +#endif // NFA_CASTLE_INTERNAL_H diff --git a/contrib/libs/hyperscan/src/nfa/castlecompile.cpp b/contrib/libs/hyperscan/src/nfa/castlecompile.cpp index fd0dd4a152..d4c361337a 100644 --- a/contrib/libs/hyperscan/src/nfa/castlecompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/castlecompile.cpp @@ -1,273 +1,273 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file - * \brief Castle: multi-tenant repeat engine, compiler code. - */ + * \brief Castle: multi-tenant repeat engine, compiler code. + */ + +#include "castlecompile.h" -#include "castlecompile.h" - -#include "castle_internal.h" +#include "castle_internal.h" #include "limex_limits.h" -#include "nfa_internal.h" -#include "repeatcompile.h" -#include "shufticompile.h" -#include "trufflecompile.h" -#include "nfagraph/ng_dump.h" -#include "nfagraph/ng_equivalence.h" -#include "nfagraph/ng_repeat.h" -#include "nfagraph/ng_redundancy.h" -#include "nfagraph/ng_util.h" -#include "util/alloc.h" -#include "util/compile_context.h" -#include "util/container.h" -#include "util/dump_charclass.h" +#include "nfa_internal.h" +#include "repeatcompile.h" +#include "shufticompile.h" +#include "trufflecompile.h" +#include "nfagraph/ng_dump.h" +#include "nfagraph/ng_equivalence.h" +#include "nfagraph/ng_repeat.h" +#include "nfagraph/ng_redundancy.h" +#include "nfagraph/ng_util.h" +#include "util/alloc.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/dump_charclass.h" #include "util/flat_containers.h" -#include "util/graph.h" -#include "util/make_unique.h" +#include "util/graph.h" +#include "util/make_unique.h" #include "util/multibit_build.h" #include "util/report_manager.h" -#include "util/verify_types.h" -#include "grey.h" - -#include <stack> -#include <cassert> - +#include "util/verify_types.h" +#include "grey.h" + +#include <stack> +#include <cassert> + #include <boost/graph/adjacency_list.hpp> -#include <boost/range/adaptor/map.hpp> - -using namespace std; -using boost::adaptors::map_keys; -using boost::adaptors::map_values; - -namespace ue2 { - -#define CLIQUE_GRAPH_MAX_SIZE 1000 - -static -u32 depth_to_u32(const depth &d) { - assert(d.is_reachable()); - if (d.is_infinite()) { - return REPEAT_INF; - } - - u32 d_val = d; - assert(d_val < REPEAT_INF); - return d_val; -} - -static -void writeCastleScanEngine(const CharReach &cr, Castle *c) { - if (cr.all()) { - c->type = CASTLE_DOT; - return; - } - - if (cr.count() == 1) { - c->type = CASTLE_NVERM; - c->u.verm.c = cr.find_first(); - return; - } - - const CharReach negated(~cr); - if (negated.count() == 1) { - c->type = CASTLE_VERM; - c->u.verm.c = negated.find_first(); - return; - } - +#include <boost/range/adaptor/map.hpp> + +using namespace std; +using boost::adaptors::map_keys; +using boost::adaptors::map_values; + +namespace ue2 { + +#define CLIQUE_GRAPH_MAX_SIZE 1000 + +static +u32 depth_to_u32(const depth &d) { + assert(d.is_reachable()); + if (d.is_infinite()) { + return REPEAT_INF; + } + + u32 d_val = d; + assert(d_val < REPEAT_INF); + return d_val; +} + +static +void writeCastleScanEngine(const CharReach &cr, Castle *c) { + if (cr.all()) { + c->type = CASTLE_DOT; + return; + } + + if (cr.count() == 1) { + c->type = CASTLE_NVERM; + c->u.verm.c = cr.find_first(); + return; + } + + const CharReach negated(~cr); + if (negated.count() == 1) { + c->type = CASTLE_VERM; + c->u.verm.c = negated.find_first(); + return; + } + if (shuftiBuildMasks(negated, (u8 *)&c->u.shuf.mask_lo, (u8 *)&c->u.shuf.mask_hi) != -1) { - c->type = CASTLE_SHUFTI; - return; - } - - c->type = CASTLE_TRUFFLE; + c->type = CASTLE_SHUFTI; + return; + } + + c->type = CASTLE_TRUFFLE; truffleBuildMasks(negated, (u8 *)(u8 *)&c->u.truffle.mask1, (u8 *)&c->u.truffle.mask2); -} - -static -bool literalOverlap(const vector<CharReach> &a, const vector<CharReach> &b, - const size_t dist) { - for (size_t i = 0; i < b.size(); i++) { - if (i > dist) { - return true; - } - size_t overlap_len = b.size() - i; - if (overlap_len <= a.size()) { - if (matches(a.end() - overlap_len, a.end(), b.begin(), - b.end() - i)) { - return false; - } - } else { - assert(overlap_len > a.size()); - if (matches(a.begin(), a.end(), b.end() - i - a.size(), - b.end() - i)) { - return false; - } - } - } - - return b.size() > dist; -} - -struct CliqueVertexProps { - CliqueVertexProps() {} - explicit CliqueVertexProps(u32 state_in) : stateId(state_in) {} - - u32 stateId = ~0U; -}; - -typedef boost::adjacency_list<boost::listS, boost::listS, boost::undirectedS, - CliqueVertexProps> CliqueGraph; -typedef CliqueGraph::vertex_descriptor CliqueVertex; - -static -void getNeighborInfo(const CliqueGraph &g, vector<u32> &neighbor, - const CliqueVertex &cv, const set<u32> &group) { - u32 id = g[cv].stateId; - - // find neighbors for cv - for (const auto &v : adjacent_vertices_range(cv, g)) { +} + +static +bool literalOverlap(const vector<CharReach> &a, const vector<CharReach> &b, + const size_t dist) { + for (size_t i = 0; i < b.size(); i++) { + if (i > dist) { + return true; + } + size_t overlap_len = b.size() - i; + if (overlap_len <= a.size()) { + if (matches(a.end() - overlap_len, a.end(), b.begin(), + b.end() - i)) { + return false; + } + } else { + assert(overlap_len > a.size()); + if (matches(a.begin(), a.end(), b.end() - i - a.size(), + b.end() - i)) { + return false; + } + } + } + + return b.size() > dist; +} + +struct CliqueVertexProps { + CliqueVertexProps() {} + explicit CliqueVertexProps(u32 state_in) : stateId(state_in) {} + + u32 stateId = ~0U; +}; + +typedef boost::adjacency_list<boost::listS, boost::listS, boost::undirectedS, + CliqueVertexProps> CliqueGraph; +typedef CliqueGraph::vertex_descriptor CliqueVertex; + +static +void getNeighborInfo(const CliqueGraph &g, vector<u32> &neighbor, + const CliqueVertex &cv, const set<u32> &group) { + u32 id = g[cv].stateId; + + // find neighbors for cv + for (const auto &v : adjacent_vertices_range(cv, g)) { if (g[v].stateId != id && contains(group, g[v].stateId)) { - neighbor.push_back(g[v].stateId); - DEBUG_PRINTF("Neighbor:%u\n", g[v].stateId); - } - } -} - -static -void findCliqueGroup(CliqueGraph &cg, vector<u32> &clique) { - stack<vector<u32>> gStack; - - // Create mapping between vertex and id - map<u32, CliqueVertex> vertexMap; - vector<u32> init; - for (const auto &v : vertices_range(cg)) { - vertexMap[cg[v].stateId] = v; - init.push_back(cg[v].stateId); - } - gStack.push(init); - - // Get the vertex to start from - CliqueGraph::vertex_iterator vi, ve; - tie(vi, ve) = vertices(cg); - while (!gStack.empty()) { - vector<u32> g = gStack.top(); - gStack.pop(); - - // Choose a vertex from the graph - u32 id = g[0]; - const CliqueVertex &n = vertexMap.at(id); - clique.push_back(id); - // Corresponding vertex in the original graph - vector<u32> neighbor; - set<u32> subgraphId(g.begin(), g.end()); - getNeighborInfo(cg, neighbor, n, subgraphId); - // Get graph consisting of neighbors for left branch - if (!neighbor.empty()) { - gStack.push(neighbor); - } - } -} - -template<typename Graph> -bool graph_empty(const Graph &g) { - typename Graph::vertex_iterator vi, ve; - tie(vi, ve) = vertices(g); - return vi == ve; -} - -static -vector<u32> removeClique(CliqueGraph &cg) { - vector<vector<u32>> cliquesVec(1); + neighbor.push_back(g[v].stateId); + DEBUG_PRINTF("Neighbor:%u\n", g[v].stateId); + } + } +} + +static +void findCliqueGroup(CliqueGraph &cg, vector<u32> &clique) { + stack<vector<u32>> gStack; + + // Create mapping between vertex and id + map<u32, CliqueVertex> vertexMap; + vector<u32> init; + for (const auto &v : vertices_range(cg)) { + vertexMap[cg[v].stateId] = v; + init.push_back(cg[v].stateId); + } + gStack.push(init); + + // Get the vertex to start from + CliqueGraph::vertex_iterator vi, ve; + tie(vi, ve) = vertices(cg); + while (!gStack.empty()) { + vector<u32> g = gStack.top(); + gStack.pop(); + + // Choose a vertex from the graph + u32 id = g[0]; + const CliqueVertex &n = vertexMap.at(id); + clique.push_back(id); + // Corresponding vertex in the original graph + vector<u32> neighbor; + set<u32> subgraphId(g.begin(), g.end()); + getNeighborInfo(cg, neighbor, n, subgraphId); + // Get graph consisting of neighbors for left branch + if (!neighbor.empty()) { + gStack.push(neighbor); + } + } +} + +template<typename Graph> +bool graph_empty(const Graph &g) { + typename Graph::vertex_iterator vi, ve; + tie(vi, ve) = vertices(g); + return vi == ve; +} + +static +vector<u32> removeClique(CliqueGraph &cg) { + vector<vector<u32>> cliquesVec(1); DEBUG_PRINTF("graph size:%zu\n", num_vertices(cg)); - findCliqueGroup(cg, cliquesVec[0]); - while (!graph_empty(cg)) { - const vector<u32> &c = cliquesVec.back(); - vector<CliqueVertex> dead; - for (const auto &v : vertices_range(cg)) { - if (find(c.begin(), c.end(), cg[v].stateId) != c.end()) { - dead.push_back(v); - } - } - for (const auto &v : dead) { - clear_vertex(v, cg); - remove_vertex(v, cg); - } - if (graph_empty(cg)) { - break; - } - vector<u32> clique; - findCliqueGroup(cg, clique); - cliquesVec.push_back(clique); - } - - // get the independent set with max size - size_t max = 0; - size_t id = 0; - for (size_t j = 0; j < cliquesVec.size(); ++j) { - if (cliquesVec[j].size() > max) { - max = cliquesVec[j].size(); - id = j; - } - } - + findCliqueGroup(cg, cliquesVec[0]); + while (!graph_empty(cg)) { + const vector<u32> &c = cliquesVec.back(); + vector<CliqueVertex> dead; + for (const auto &v : vertices_range(cg)) { + if (find(c.begin(), c.end(), cg[v].stateId) != c.end()) { + dead.push_back(v); + } + } + for (const auto &v : dead) { + clear_vertex(v, cg); + remove_vertex(v, cg); + } + if (graph_empty(cg)) { + break; + } + vector<u32> clique; + findCliqueGroup(cg, clique); + cliquesVec.push_back(clique); + } + + // get the independent set with max size + size_t max = 0; + size_t id = 0; + for (size_t j = 0; j < cliquesVec.size(); ++j) { + if (cliquesVec[j].size() > max) { + max = cliquesVec[j].size(); + id = j; + } + } + DEBUG_PRINTF("clique size:%zu\n", cliquesVec[id].size()); - return cliquesVec[id]; -} - -// if the location of any reset character in one literal are after -// the end locations where it overlaps with other literals, -// then the literals are mutual exclusive -static + return cliquesVec[id]; +} + +// if the location of any reset character in one literal are after +// the end locations where it overlaps with other literals, +// then the literals are mutual exclusive +static bool findExclusivePair(const size_t id1, const size_t id2, const size_t lower, - const vector<vector<size_t>> &min_reset_dist, - const vector<vector<vector<CharReach>>> &triggers) { - const auto &triggers1 = triggers[id1]; - const auto &triggers2 = triggers[id2]; + const vector<vector<size_t>> &min_reset_dist, + const vector<vector<vector<CharReach>>> &triggers) { + const auto &triggers1 = triggers[id1]; + const auto &triggers2 = triggers[id2]; for (size_t i = 0; i < triggers1.size(); ++i) { for (size_t j = 0; j < triggers2.size(); ++j) { - if (!literalOverlap(triggers1[i], triggers2[j], + if (!literalOverlap(triggers1[i], triggers2[j], min_reset_dist[id2 - lower][j]) || - !literalOverlap(triggers2[j], triggers1[i], + !literalOverlap(triggers2[j], triggers1[i], min_reset_dist[id1 - lower][i])) { - return false; - } - } - } - return true; -} - -static + return false; + } + } + } + return true; +} + +static vector<vector<u32>> checkExclusion(u32 &streamStateSize, const CharReach &cr, const vector<vector<vector<CharReach>>> &triggers, @@ -276,20 +276,20 @@ vector<vector<u32>> checkExclusion(u32 &streamStateSize, vector<vector<u32>> groups; size_t trigSize = triggers.size(); DEBUG_PRINTF("trigSize %zu\n", trigSize); - + size_t lower = 0; size_t total = 0; while (lower < trigSize) { vector<CliqueVertex> vertices; unique_ptr<CliqueGraph> cg = std::make_unique<CliqueGraph>(); - + vector<vector<size_t>> min_reset_dist; size_t upper = min(lower + CLIQUE_GRAPH_MAX_SIZE, trigSize); // get min reset distance for each repeat for (size_t i = lower; i < upper; i++) { CliqueVertex v = add_vertex(CliqueVertexProps(i), *cg); vertices.push_back(v); - + const vector<size_t> &tmp_dist = minResetDistToEnd(triggers[i], cr); min_reset_dist.push_back(tmp_dist); @@ -304,8 +304,8 @@ vector<vector<u32>> checkExclusion(u32 &streamStateSize, CliqueVertex d = vertices[j - lower]; add_edge(s, d, *cg); } - } - } + } + } // find the largest exclusive group auto clique = removeClique(*cg); @@ -317,17 +317,17 @@ vector<vector<u32>> checkExclusion(u32 &streamStateSize, } lower += CLIQUE_GRAPH_MAX_SIZE; - } + } DEBUG_PRINTF("clique size %zu, num of repeats %zu\n", total, numRepeats); if (total == numRepeats) { exclusive = PURE_EXCLUSIVE; streamStateSize = 0; }; - + return groups; -} - +} + namespace { struct ExclusiveInfo { @@ -339,37 +339,37 @@ struct ExclusiveInfo { }; } -static -void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs, - vector<RepeatInfo> &infos, vector<u64a> &patchSize, - const vector<pair<depth, bool>> &repeatInfoPair, - u32 &scratchStateSize, u32 &streamStateSize, - u32 &tableSize, vector<u64a> &tables, u32 &sparseRepeats, +static +void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs, + vector<RepeatInfo> &infos, vector<u64a> &patchSize, + const vector<pair<depth, bool>> &repeatInfoPair, + u32 &scratchStateSize, u32 &streamStateSize, + u32 &tableSize, vector<u64a> &tables, u32 &sparseRepeats, const ExclusiveInfo &exclusiveInfo, vector<u32> &may_stale, const ReportManager &rm) { const bool remap_reports = has_managed_reports(proto.kind); - u32 i = 0; + u32 i = 0; const auto &groupId = exclusiveInfo.groupId; const auto &numGroups = exclusiveInfo.numGroups; vector<u32> maxStreamSize(numGroups, 0); - for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); - it != ite; ++it, ++i) { - const PureRepeat &pr = it->second; - depth min_period = repeatInfoPair[i].first; - bool is_reset = repeatInfoPair[i].second; - - enum RepeatType rtype = chooseRepeatType(pr.bounds.min, pr.bounds.max, + for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); + it != ite; ++it, ++i) { + const PureRepeat &pr = it->second; + depth min_period = repeatInfoPair[i].first; + bool is_reset = repeatInfoPair[i].second; + + enum RepeatType rtype = chooseRepeatType(pr.bounds.min, pr.bounds.max, min_period, is_reset, true); - RepeatStateInfo rsi(rtype, pr.bounds.min, pr.bounds.max, min_period); - - DEBUG_PRINTF("sub %u: selected %s model for %s repeat\n", i, - repeatTypeName(rtype), pr.bounds.str().c_str()); - - SubCastle &sub = subs[i]; - RepeatInfo &info = infos[i]; - + RepeatStateInfo rsi(rtype, pr.bounds.min, pr.bounds.max, min_period); + + DEBUG_PRINTF("sub %u: selected %s model for %s repeat\n", i, + repeatTypeName(rtype), pr.bounds.str().c_str()); + + SubCastle &sub = subs[i]; + RepeatInfo &info = infos[i]; + info.packedCtrlSize = rsi.packedCtrlSize; u32 subStreamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize); @@ -379,46 +379,46 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs, maxStreamSize[id] = max(maxStreamSize[id], subStreamStateSize); // SubCastle full/stream state offsets are written in for the group // below. - } else { - sub.fullStateOffset = scratchStateSize; - sub.streamStateOffset = streamStateSize; + } else { + sub.fullStateOffset = scratchStateSize; + sub.streamStateOffset = streamStateSize; scratchStateSize += verify_u32(sizeof(RepeatControl)); - streamStateSize += subStreamStateSize; - } - + streamStateSize += subStreamStateSize; + } + if (pr.bounds.max.is_finite()) { may_stale.push_back(i); } - info.type = verify_u8(rtype); - info.repeatMin = depth_to_u32(pr.bounds.min); - info.repeatMax = depth_to_u32(pr.bounds.max); - info.stateSize = rsi.stateSize; - info.horizon = rsi.horizon; - info.minPeriod = min_period.is_finite() ? (u32)min_period : ~0U; - assert(rsi.packedFieldSizes.size() - <= ARRAY_LENGTH(info.packedFieldSizes)); - copy(rsi.packedFieldSizes.begin(), rsi.packedFieldSizes.end(), - info.packedFieldSizes); - info.patchCount = rsi.patchCount; - info.patchSize = rsi.patchSize; - info.encodingSize = rsi.encodingSize; - info.patchesOffset = rsi.patchesOffset; - + info.type = verify_u8(rtype); + info.repeatMin = depth_to_u32(pr.bounds.min); + info.repeatMax = depth_to_u32(pr.bounds.max); + info.stateSize = rsi.stateSize; + info.horizon = rsi.horizon; + info.minPeriod = min_period.is_finite() ? (u32)min_period : ~0U; + assert(rsi.packedFieldSizes.size() + <= ARRAY_LENGTH(info.packedFieldSizes)); + copy(rsi.packedFieldSizes.begin(), rsi.packedFieldSizes.end(), + info.packedFieldSizes); + info.patchCount = rsi.patchCount; + info.patchSize = rsi.patchSize; + info.encodingSize = rsi.encodingSize; + info.patchesOffset = rsi.patchesOffset; + assert(pr.reports.size() == 1); ReportID id = *pr.reports.begin(); sub.report = remap_reports ? rm.getProgramOffset(id) : id; - - if (rtype == REPEAT_SPARSE_OPTIMAL_P) { + + if (rtype == REPEAT_SPARSE_OPTIMAL_P) { for (u32 j = 0; j < rsi.patchSize; j++) { tables.push_back(rsi.table[j]); } sparseRepeats++; patchSize[i] = rsi.patchSize; tableSize += rsi.patchSize; - } - } - + } + } + vector<u32> scratchOffset(numGroups, 0); vector<u32> streamOffset(numGroups, 0); for (const auto &j : groupId) { @@ -426,8 +426,8 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs, u32 id = j.second; SubCastle &sub = subs[top]; if (!scratchOffset[id]) { - sub.fullStateOffset = scratchStateSize; - sub.streamStateOffset = streamStateSize; + sub.fullStateOffset = scratchStateSize; + sub.streamStateOffset = streamStateSize; scratchOffset[id] = scratchStateSize; streamOffset[id] = streamStateSize; scratchStateSize += verify_u32(sizeof(RepeatControl)); @@ -435,107 +435,107 @@ void buildSubcastles(const CastleProto &proto, vector<SubCastle> &subs, } else { sub.fullStateOffset = scratchOffset[id]; sub.streamStateOffset = streamOffset[id]; - } - } -} - + } + } +} + bytecode_ptr<NFA> -buildCastle(const CastleProto &proto, - const map<u32, vector<vector<CharReach>>> &triggers, +buildCastle(const CastleProto &proto, + const map<u32, vector<vector<CharReach>>> &triggers, const CompileContext &cc, const ReportManager &rm) { - assert(cc.grey.allowCastle); - - const size_t numRepeats = proto.repeats.size(); - assert(numRepeats > 0 && numRepeats <= proto.max_occupancy); - - const CharReach &cr = proto.reach(); - - DEBUG_PRINTF("reach %s, %zu repeats\n", describeClass(cr).c_str(), - numRepeats); - - vector<SubCastle> subs(numRepeats); - memset(&subs[0], 0, sizeof(SubCastle) * numRepeats); - - vector<RepeatInfo> infos(numRepeats); - memset(&infos[0], 0, sizeof(RepeatInfo) * numRepeats); - - vector<u64a> patchSize(numRepeats); - memset(&patchSize[0], 0, sizeof(u64a) * numRepeats); - - vector<u64a> tables; - - // We start with enough stream state to store the active bitfield. - u32 streamStateSize = mmbit_size(numRepeats); - - // We have a copy of the stream state in scratch for castleMatchLoop. - u32 scratchStateSize = ROUNDUP_N(streamStateSize, alignof(RepeatControl)); - - depth minWidth(depth::infinity()); - depth maxWidth(0); - - u32 i = 0; + assert(cc.grey.allowCastle); + + const size_t numRepeats = proto.repeats.size(); + assert(numRepeats > 0 && numRepeats <= proto.max_occupancy); + + const CharReach &cr = proto.reach(); + + DEBUG_PRINTF("reach %s, %zu repeats\n", describeClass(cr).c_str(), + numRepeats); + + vector<SubCastle> subs(numRepeats); + memset(&subs[0], 0, sizeof(SubCastle) * numRepeats); + + vector<RepeatInfo> infos(numRepeats); + memset(&infos[0], 0, sizeof(RepeatInfo) * numRepeats); + + vector<u64a> patchSize(numRepeats); + memset(&patchSize[0], 0, sizeof(u64a) * numRepeats); + + vector<u64a> tables; + + // We start with enough stream state to store the active bitfield. + u32 streamStateSize = mmbit_size(numRepeats); + + // We have a copy of the stream state in scratch for castleMatchLoop. + u32 scratchStateSize = ROUNDUP_N(streamStateSize, alignof(RepeatControl)); + + depth minWidth(depth::infinity()); + depth maxWidth(0); + + u32 i = 0; ExclusiveInfo exclusiveInfo; vector<vector<vector<CharReach>>> candidateTriggers; - vector<u32> candidateRepeats; - vector<pair<depth, bool>> repeatInfoPair; - for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); - it != ite; ++it, ++i) { - const u32 top = it->first; - const PureRepeat &pr = it->second; - assert(pr.reach == cr); - assert(pr.reports.size() == 1); - - if (top != i) { - // Tops have not been remapped? - assert(0); - throw std::logic_error("Tops not remapped"); - } - - minWidth = min(minWidth, pr.bounds.min); - maxWidth = max(maxWidth, pr.bounds.max); - - bool is_reset = false; - depth min_period = depth::infinity(); - - // If we've got a top in the castle without any trigger information, it - // possibly means that we've got a repeat that we can't trigger. We do - // need to cope with it though. - if (contains(triggers, top)) { + vector<u32> candidateRepeats; + vector<pair<depth, bool>> repeatInfoPair; + for (auto it = proto.repeats.begin(), ite = proto.repeats.end(); + it != ite; ++it, ++i) { + const u32 top = it->first; + const PureRepeat &pr = it->second; + assert(pr.reach == cr); + assert(pr.reports.size() == 1); + + if (top != i) { + // Tops have not been remapped? + assert(0); + throw std::logic_error("Tops not remapped"); + } + + minWidth = min(minWidth, pr.bounds.min); + maxWidth = max(maxWidth, pr.bounds.max); + + bool is_reset = false; + depth min_period = depth::infinity(); + + // If we've got a top in the castle without any trigger information, it + // possibly means that we've got a repeat that we can't trigger. We do + // need to cope with it though. + if (contains(triggers, top)) { min_period = depth(minPeriod(triggers.at(top), cr, &is_reset)); - } - - if (min_period > pr.bounds.max) { - DEBUG_PRINTF("trigger is longer than repeat; only need one offset\n"); - is_reset = true; - } - - repeatInfoPair.push_back(make_pair(min_period, is_reset)); - + } + + if (min_period > pr.bounds.max) { + DEBUG_PRINTF("trigger is longer than repeat; only need one offset\n"); + is_reset = true; + } + + repeatInfoPair.push_back(make_pair(min_period, is_reset)); + candidateTriggers.push_back(triggers.at(top)); candidateRepeats.push_back(i); - } - - // Case 1: exclusive repeats + } + + // Case 1: exclusive repeats enum ExclusiveType exclusive = NOT_EXCLUSIVE; - u32 activeIdxSize = 0; + u32 activeIdxSize = 0; u32 groupIterOffset = 0; - if (cc.grey.castleExclusive) { + if (cc.grey.castleExclusive) { auto cliqueGroups = checkExclusion(streamStateSize, cr, candidateTriggers, exclusive, numRepeats); for (const auto &group : cliqueGroups) { // mutual exclusive repeats group found, // update state sizes - activeIdxSize = calcPackedBytes(numRepeats + 1); - streamStateSize += activeIdxSize; - - // replace with top values + activeIdxSize = calcPackedBytes(numRepeats + 1); + streamStateSize += activeIdxSize; + + // replace with top values for (const auto &val : group) { const u32 top = candidateRepeats[val]; exclusiveInfo.groupId[top] = exclusiveInfo.numGroups; - } + } exclusiveInfo.numGroups++; - } + } if (exclusive) { groupIterOffset = streamStateSize; @@ -543,20 +543,20 @@ buildCastle(const CastleProto &proto, } DEBUG_PRINTF("num of groups:%u\n", exclusiveInfo.numGroups); - } + } candidateRepeats.clear(); - - DEBUG_PRINTF("reach %s exclusive %u\n", describeClass(cr).c_str(), - exclusive); - - u32 tableSize = 0; - u32 sparseRepeats = 0; + + DEBUG_PRINTF("reach %s exclusive %u\n", describeClass(cr).c_str(), + exclusive); + + u32 tableSize = 0; + u32 sparseRepeats = 0; vector<u32> may_stale; /* sub castles that may go stale */ - buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair, - scratchStateSize, streamStateSize, tableSize, + buildSubcastles(proto, subs, infos, patchSize, repeatInfoPair, + scratchStateSize, streamStateSize, tableSize, tables, sparseRepeats, exclusiveInfo, may_stale, rm); - + DEBUG_PRINTF("%zu subcastles may go stale\n", may_stale.size()); vector<mmbit_sparse_iter> stale_iter; if (!may_stale.empty()) { @@ -565,75 +565,75 @@ buildCastle(const CastleProto &proto, size_t total_size = - sizeof(NFA) + // initial NFA structure - sizeof(Castle) + // Castle structure - sizeof(SubCastle) * subs.size() + // SubCastles themselves - sizeof(RepeatInfo) * subs.size() + // RepeatInfo structure - sizeof(u64a) * tableSize + // table size for - // REPEAT_SPARSE_OPTIMAL_P - sizeof(u64a) * sparseRepeats; // paddings for - // REPEAT_SPARSE_OPTIMAL_P tables - + sizeof(NFA) + // initial NFA structure + sizeof(Castle) + // Castle structure + sizeof(SubCastle) * subs.size() + // SubCastles themselves + sizeof(RepeatInfo) * subs.size() + // RepeatInfo structure + sizeof(u64a) * tableSize + // table size for + // REPEAT_SPARSE_OPTIMAL_P + sizeof(u64a) * sparseRepeats; // paddings for + // REPEAT_SPARSE_OPTIMAL_P tables + total_size = ROUNDUP_N(total_size, alignof(mmbit_sparse_iter)); total_size += byte_length(stale_iter); // stale sparse iter auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); nfa->type = verify_u8(CASTLE_NFA); - nfa->length = verify_u32(total_size); - nfa->nPositions = verify_u32(subs.size()); - nfa->streamStateSize = streamStateSize; - nfa->scratchStateSize = scratchStateSize; - nfa->minWidth = verify_u32(minWidth); - nfa->maxWidth = maxWidth.is_finite() ? verify_u32(maxWidth) : 0; - + nfa->length = verify_u32(total_size); + nfa->nPositions = verify_u32(subs.size()); + nfa->streamStateSize = streamStateSize; + nfa->scratchStateSize = scratchStateSize; + nfa->minWidth = verify_u32(minWidth); + nfa->maxWidth = maxWidth.is_finite() ? verify_u32(maxWidth) : 0; + char * const base_ptr = (char *)nfa.get() + sizeof(NFA); char *ptr = base_ptr; - Castle *c = (Castle *)ptr; - c->numRepeats = verify_u32(subs.size()); + Castle *c = (Castle *)ptr; + c->numRepeats = verify_u32(subs.size()); c->numGroups = exclusiveInfo.numGroups; c->exclusive = verify_s8(exclusive); - c->activeIdxSize = verify_u8(activeIdxSize); + c->activeIdxSize = verify_u8(activeIdxSize); c->activeOffset = verify_u32(c->numGroups * activeIdxSize); c->groupIterOffset = groupIterOffset; - - writeCastleScanEngine(cr, c); - - ptr += sizeof(Castle); - SubCastle *subCastles = ((SubCastle *)(ROUNDUP_PTR(ptr, alignof(u32)))); - copy(subs.begin(), subs.end(), subCastles); - - u32 length = 0; - u32 tableIdx = 0; - for (i = 0; i < numRepeats; i++) { - u32 offset = sizeof(SubCastle) * (numRepeats - i) + length; - SubCastle *sub = &subCastles[i]; - sub->repeatInfoOffset = offset; - - ptr = (char *)sub + offset; - memcpy(ptr, &infos[i], sizeof(RepeatInfo)); - - if (patchSize[i]) { - RepeatInfo *info = (RepeatInfo *)ptr; - u64a *table = ((u64a *)(ROUNDUP_PTR(((char *)(info) + - sizeof(*info)), alignof(u64a)))); - copy(tables.begin() + tableIdx, - tables.begin() + tableIdx + patchSize[i], table); - u32 diff = (char *)table - (char *)info + - sizeof(u64a) * patchSize[i]; - info->length = diff; - length += diff; - tableIdx += patchSize[i]; - } else { - length += sizeof(RepeatInfo); - } - - // set exclusive group info + + writeCastleScanEngine(cr, c); + + ptr += sizeof(Castle); + SubCastle *subCastles = ((SubCastle *)(ROUNDUP_PTR(ptr, alignof(u32)))); + copy(subs.begin(), subs.end(), subCastles); + + u32 length = 0; + u32 tableIdx = 0; + for (i = 0; i < numRepeats; i++) { + u32 offset = sizeof(SubCastle) * (numRepeats - i) + length; + SubCastle *sub = &subCastles[i]; + sub->repeatInfoOffset = offset; + + ptr = (char *)sub + offset; + memcpy(ptr, &infos[i], sizeof(RepeatInfo)); + + if (patchSize[i]) { + RepeatInfo *info = (RepeatInfo *)ptr; + u64a *table = ((u64a *)(ROUNDUP_PTR(((char *)(info) + + sizeof(*info)), alignof(u64a)))); + copy(tables.begin() + tableIdx, + tables.begin() + tableIdx + patchSize[i], table); + u32 diff = (char *)table - (char *)info + + sizeof(u64a) * patchSize[i]; + info->length = diff; + length += diff; + tableIdx += patchSize[i]; + } else { + length += sizeof(RepeatInfo); + } + + // set exclusive group info if (contains(exclusiveInfo.groupId, i)) { sub->exclusiveId = exclusiveInfo.groupId[i]; - } else { + } else { sub->exclusiveId = numRepeats; - } - } + } + } ptr = base_ptr + total_size - sizeof(NFA) - byte_length(stale_iter); @@ -644,356 +644,356 @@ buildCastle(const CastleProto &proto, ptr += byte_length(stale_iter); } - return nfa; -} - -set<ReportID> all_reports(const CastleProto &proto) { - set<ReportID> reports; - for (const ReportID &report : proto.report_map | map_keys) { - reports.insert(report); - } - return reports; -} - -depth findMinWidth(const CastleProto &proto) { - depth min_width(depth::infinity()); - for (const PureRepeat &pr : proto.repeats | map_values) { - min_width = min(min_width, pr.bounds.min); - } - return min_width; -} - -depth findMaxWidth(const CastleProto &proto) { - depth max_width(0); - for (const PureRepeat &pr : proto.repeats | map_values) { - max_width = max(max_width, pr.bounds.max); - } - return max_width; -} - -depth findMinWidth(const CastleProto &proto, u32 top) { - if (!contains(proto.repeats, top)) { - assert(0); // should not happen - return depth::infinity(); - } - return proto.repeats.at(top).bounds.min; -} - -depth findMaxWidth(const CastleProto &proto, u32 top) { - if (!contains(proto.repeats, top)) { - assert(0); // should not happen - return depth(0); - } - return proto.repeats.at(top).bounds.max; -} - + return nfa; +} + +set<ReportID> all_reports(const CastleProto &proto) { + set<ReportID> reports; + for (const ReportID &report : proto.report_map | map_keys) { + reports.insert(report); + } + return reports; +} + +depth findMinWidth(const CastleProto &proto) { + depth min_width(depth::infinity()); + for (const PureRepeat &pr : proto.repeats | map_values) { + min_width = min(min_width, pr.bounds.min); + } + return min_width; +} + +depth findMaxWidth(const CastleProto &proto) { + depth max_width(0); + for (const PureRepeat &pr : proto.repeats | map_values) { + max_width = max(max_width, pr.bounds.max); + } + return max_width; +} + +depth findMinWidth(const CastleProto &proto, u32 top) { + if (!contains(proto.repeats, top)) { + assert(0); // should not happen + return depth::infinity(); + } + return proto.repeats.at(top).bounds.min; +} + +depth findMaxWidth(const CastleProto &proto, u32 top) { + if (!contains(proto.repeats, top)) { + assert(0); // should not happen + return depth(0); + } + return proto.repeats.at(top).bounds.max; +} + CastleProto::CastleProto(nfa_kind k, const PureRepeat &pr) : kind(k) { - assert(pr.reach.any()); - assert(pr.reports.size() == 1); - u32 top = 0; - repeats.emplace(top, pr); - for (const auto &report : pr.reports) { - report_map[report].insert(top); - } -} - -const CharReach &CastleProto::reach() const { - assert(!repeats.empty()); - return repeats.begin()->second.reach; -} - -u32 CastleProto::add(const PureRepeat &pr) { - assert(repeats.size() < max_occupancy); - assert(pr.reach == reach()); - assert(pr.reports.size() == 1); - u32 top = next_top++; - DEBUG_PRINTF("selected unused top %u\n", top); - assert(!contains(repeats, top)); - repeats.emplace(top, pr); - for (const auto &report : pr.reports) { - report_map[report].insert(top); - } - return top; -} - -void CastleProto::erase(u32 top) { - DEBUG_PRINTF("erase top %u\n", top); - assert(contains(repeats, top)); - repeats.erase(top); - for (auto &m : report_map) { - m.second.erase(top); - } -} - -u32 CastleProto::merge(const PureRepeat &pr) { - assert(repeats.size() <= max_occupancy); - assert(pr.reach == reach()); - assert(pr.reports.size() == 1); - - // First, see if this repeat is already in this castle. - for (const auto &m : repeats) { - if (m.second == pr) { - DEBUG_PRINTF("repeat already present, with top %u\n", m.first); - return m.first; - } - } - - if (repeats.size() == max_occupancy) { - DEBUG_PRINTF("this castle is full\n"); - return max_occupancy; - } - - return add(pr); -} - -bool mergeCastle(CastleProto &c1, const CastleProto &c2, - map<u32, u32> &top_map) { - assert(&c1 != &c2); + assert(pr.reach.any()); + assert(pr.reports.size() == 1); + u32 top = 0; + repeats.emplace(top, pr); + for (const auto &report : pr.reports) { + report_map[report].insert(top); + } +} + +const CharReach &CastleProto::reach() const { + assert(!repeats.empty()); + return repeats.begin()->second.reach; +} + +u32 CastleProto::add(const PureRepeat &pr) { + assert(repeats.size() < max_occupancy); + assert(pr.reach == reach()); + assert(pr.reports.size() == 1); + u32 top = next_top++; + DEBUG_PRINTF("selected unused top %u\n", top); + assert(!contains(repeats, top)); + repeats.emplace(top, pr); + for (const auto &report : pr.reports) { + report_map[report].insert(top); + } + return top; +} + +void CastleProto::erase(u32 top) { + DEBUG_PRINTF("erase top %u\n", top); + assert(contains(repeats, top)); + repeats.erase(top); + for (auto &m : report_map) { + m.second.erase(top); + } +} + +u32 CastleProto::merge(const PureRepeat &pr) { + assert(repeats.size() <= max_occupancy); + assert(pr.reach == reach()); + assert(pr.reports.size() == 1); + + // First, see if this repeat is already in this castle. + for (const auto &m : repeats) { + if (m.second == pr) { + DEBUG_PRINTF("repeat already present, with top %u\n", m.first); + return m.first; + } + } + + if (repeats.size() == max_occupancy) { + DEBUG_PRINTF("this castle is full\n"); + return max_occupancy; + } + + return add(pr); +} + +bool mergeCastle(CastleProto &c1, const CastleProto &c2, + map<u32, u32> &top_map) { + assert(&c1 != &c2); assert(c1.kind == c2.kind); - - DEBUG_PRINTF("c1 has %zu repeats, c2 has %zu repeats\n", c1.repeats.size(), - c2.repeats.size()); - - if (c1.reach() != c2.reach()) { - DEBUG_PRINTF("different reach!\n"); - return false; - } - - if (c1.repeats.size() + c2.repeats.size() > c1.max_occupancy) { - DEBUG_PRINTF("too many repeats to merge\n"); - return false; - } - - top_map.clear(); - - for (const auto &m : c2.repeats) { - const u32 top = m.first; - const PureRepeat &pr = m.second; - DEBUG_PRINTF("top %u\n", top); + + DEBUG_PRINTF("c1 has %zu repeats, c2 has %zu repeats\n", c1.repeats.size(), + c2.repeats.size()); + + if (c1.reach() != c2.reach()) { + DEBUG_PRINTF("different reach!\n"); + return false; + } + + if (c1.repeats.size() + c2.repeats.size() > c1.max_occupancy) { + DEBUG_PRINTF("too many repeats to merge\n"); + return false; + } + + top_map.clear(); + + for (const auto &m : c2.repeats) { + const u32 top = m.first; + const PureRepeat &pr = m.second; + DEBUG_PRINTF("top %u\n", top); u32 new_top = c1.merge(pr); - top_map[top] = new_top; - DEBUG_PRINTF("adding repeat: map %u->%u\n", top, new_top); - } - - assert(c1.repeats.size() <= c1.max_occupancy); - return true; -} - -void remapCastleTops(CastleProto &proto, map<u32, u32> &top_map) { - map<u32, PureRepeat> out; - top_map.clear(); - - for (const auto &m : proto.repeats) { - const u32 top = m.first; - const PureRepeat &pr = m.second; - u32 new_top = out.size(); - out.emplace(new_top, pr); - top_map[top] = new_top; - } - - proto.repeats.swap(out); - - // Remap report map. - proto.report_map.clear(); - for (const auto &m : proto.repeats) { - const u32 top = m.first; - const PureRepeat &pr = m.second; - for (const auto &report : pr.reports) { - proto.report_map[report].insert(top); - } - } - - assert(proto.repeats.size() <= proto.max_occupancy); -} - -namespace { -struct HasReport { - explicit HasReport(ReportID r) : report(r) {} - - bool operator()(const pair<u32, PureRepeat> &a) const { - return contains(a.second.reports, report); - } - -private: - ReportID report; -}; -} - -bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2, - ReportID report2) { - assert(!c1.repeats.empty()); - assert(!c2.repeats.empty()); + top_map[top] = new_top; + DEBUG_PRINTF("adding repeat: map %u->%u\n", top, new_top); + } + + assert(c1.repeats.size() <= c1.max_occupancy); + return true; +} + +void remapCastleTops(CastleProto &proto, map<u32, u32> &top_map) { + map<u32, PureRepeat> out; + top_map.clear(); + + for (const auto &m : proto.repeats) { + const u32 top = m.first; + const PureRepeat &pr = m.second; + u32 new_top = out.size(); + out.emplace(new_top, pr); + top_map[top] = new_top; + } + + proto.repeats.swap(out); + + // Remap report map. + proto.report_map.clear(); + for (const auto &m : proto.repeats) { + const u32 top = m.first; + const PureRepeat &pr = m.second; + for (const auto &report : pr.reports) { + proto.report_map[report].insert(top); + } + } + + assert(proto.repeats.size() <= proto.max_occupancy); +} + +namespace { +struct HasReport { + explicit HasReport(ReportID r) : report(r) {} + + bool operator()(const pair<u32, PureRepeat> &a) const { + return contains(a.second.reports, report); + } + +private: + ReportID report; +}; +} + +bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2, + ReportID report2) { + assert(!c1.repeats.empty()); + assert(!c2.repeats.empty()); assert(c1.kind == c2.kind); - - if (c1.reach() != c2.reach()) { - DEBUG_PRINTF("different reach\n"); - return false; - } - - map<u32, PureRepeat>::const_iterator it = c1.repeats.begin(), - ite = c1.repeats.end(), - jt = c2.repeats.begin(), - jte = c2.repeats.end(); - - for (;; ++it, ++jt) { - it = find_if(it, ite, HasReport(report1)); - jt = find_if(jt, jte, HasReport(report2)); - - if (it == ite && jt == jte) { - DEBUG_PRINTF("success, cases are equivalent!\n"); - return true; - } - - if (it == ite || jt == jte) { - DEBUG_PRINTF("no match for one repeat\n"); - break; - } - - if (it->first != jt->first) { - DEBUG_PRINTF("different tops\n"); - break; - } - - const PureRepeat &r1 = it->second; - const PureRepeat &r2 = jt->second; - assert(r1.reach == c1.reach()); - assert(r2.reach == c1.reach()); - if (r1.bounds != r2.bounds) { - DEBUG_PRINTF("different bounds\n"); - break; - } - } - - return false; -} - -bool is_equal(const CastleProto &c1, const CastleProto &c2) { - assert(!c1.repeats.empty()); - assert(!c2.repeats.empty()); + + if (c1.reach() != c2.reach()) { + DEBUG_PRINTF("different reach\n"); + return false; + } + + map<u32, PureRepeat>::const_iterator it = c1.repeats.begin(), + ite = c1.repeats.end(), + jt = c2.repeats.begin(), + jte = c2.repeats.end(); + + for (;; ++it, ++jt) { + it = find_if(it, ite, HasReport(report1)); + jt = find_if(jt, jte, HasReport(report2)); + + if (it == ite && jt == jte) { + DEBUG_PRINTF("success, cases are equivalent!\n"); + return true; + } + + if (it == ite || jt == jte) { + DEBUG_PRINTF("no match for one repeat\n"); + break; + } + + if (it->first != jt->first) { + DEBUG_PRINTF("different tops\n"); + break; + } + + const PureRepeat &r1 = it->second; + const PureRepeat &r2 = jt->second; + assert(r1.reach == c1.reach()); + assert(r2.reach == c1.reach()); + if (r1.bounds != r2.bounds) { + DEBUG_PRINTF("different bounds\n"); + break; + } + } + + return false; +} + +bool is_equal(const CastleProto &c1, const CastleProto &c2) { + assert(!c1.repeats.empty()); + assert(!c2.repeats.empty()); assert(c1.kind == c2.kind); - - if (c1.reach() != c2.reach()) { - DEBUG_PRINTF("different reach\n"); - return false; - } - - return c1.repeats == c2.repeats; -} - -bool requiresDedupe(const CastleProto &proto, + + if (c1.reach() != c2.reach()) { + DEBUG_PRINTF("different reach\n"); + return false; + } + + return c1.repeats == c2.repeats; +} + +bool requiresDedupe(const CastleProto &proto, const flat_set<ReportID> &reports) { - for (const auto &report : reports) { - auto it = proto.report_map.find(report); - if (it == end(proto.report_map)) { - continue; - } - if (it->second.size() > 1) { - DEBUG_PRINTF("castle proto %p has dupe report %u\n", &proto, - report); - return true; - } - } - return false; -} - -static -void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { - DEBUG_PRINTF("top %u -> repeat %s\n", top, pr.bounds.str().c_str()); - NFAVertex u = g.start; - - // Mandatory repeats to min bound. - u32 min_bound = pr.bounds.min; // always finite - if (min_bound == 0) { // Vacuous case, we can only do this once. - assert(!edge(g.start, g.accept, g).second); + for (const auto &report : reports) { + auto it = proto.report_map.find(report); + if (it == end(proto.report_map)) { + continue; + } + if (it->second.size() > 1) { + DEBUG_PRINTF("castle proto %p has dupe report %u\n", &proto, + report); + return true; + } + } + return false; +} + +static +void addToHolder(NGHolder &g, u32 top, const PureRepeat &pr) { + DEBUG_PRINTF("top %u -> repeat %s\n", top, pr.bounds.str().c_str()); + NFAVertex u = g.start; + + // Mandatory repeats to min bound. + u32 min_bound = pr.bounds.min; // always finite + if (min_bound == 0) { // Vacuous case, we can only do this once. + assert(!edge(g.start, g.accept, g).second); NFAEdge e = add_edge(g.start, g.accept, g); g[e].tops.insert(top); - g[u].reports.insert(pr.reports.begin(), pr.reports.end()); - min_bound = 1; - } - - for (u32 i = 0; i < min_bound; i++) { - NFAVertex v = add_vertex(g); - g[v].char_reach = pr.reach; + g[u].reports.insert(pr.reports.begin(), pr.reports.end()); + min_bound = 1; + } + + for (u32 i = 0; i < min_bound; i++) { + NFAVertex v = add_vertex(g); + g[v].char_reach = pr.reach; NFAEdge e = add_edge(u, v, g); - if (u == g.start) { + if (u == g.start) { g[e].tops.insert(top); - } - u = v; - } - - NFAVertex head = u; - - // Optional repeats to max bound. - if (pr.bounds.max.is_finite()) { - assert(pr.bounds.max > depth(0)); - const u32 max_bound = pr.bounds.max; - for (u32 i = 0; i < max_bound - min_bound; i++) { - NFAVertex v = add_vertex(g); - g[v].char_reach = pr.reach; - if (head != u) { - add_edge(head, v, g); - } + } + u = v; + } + + NFAVertex head = u; + + // Optional repeats to max bound. + if (pr.bounds.max.is_finite()) { + assert(pr.bounds.max > depth(0)); + const u32 max_bound = pr.bounds.max; + for (u32 i = 0; i < max_bound - min_bound; i++) { + NFAVertex v = add_vertex(g); + g[v].char_reach = pr.reach; + if (head != u) { + add_edge(head, v, g); + } NFAEdge e = add_edge(u, v, g); - if (u == g.start) { + if (u == g.start) { g[e].tops.insert(top); - } - u = v; - } - } else { - assert(pr.bounds.max.is_infinite()); - add_edge(u, u, g); - } - - // Connect to accept. - add_edge(u, g.accept, g); - g[u].reports.insert(pr.reports.begin(), pr.reports.end()); - if (u != head) { - add_edge(head, g.accept, g); - g[head].reports.insert(pr.reports.begin(), pr.reports.end()); - } -} - -static -bool hasZeroMinBound(const CastleProto &proto) { - const depth zero(0); - for (const PureRepeat &pr : proto.repeats | map_values) { - if (pr.bounds.min == zero) { - return true; - } - } - return false; -} - + } + u = v; + } + } else { + assert(pr.bounds.max.is_infinite()); + add_edge(u, u, g); + } + + // Connect to accept. + add_edge(u, g.accept, g); + g[u].reports.insert(pr.reports.begin(), pr.reports.end()); + if (u != head) { + add_edge(head, g.accept, g); + g[head].reports.insert(pr.reports.begin(), pr.reports.end()); + } +} + +static +bool hasZeroMinBound(const CastleProto &proto) { + const depth zero(0); + for (const PureRepeat &pr : proto.repeats | map_values) { + if (pr.bounds.min == zero) { + return true; + } + } + return false; +} + unique_ptr<NGHolder> makeHolder(const CastleProto &proto, - const CompileContext &cc) { - assert(!proto.repeats.empty()); - - // Vacuous edges are only doable in the NGHolder if we are a single-top - // Castle. - if (hasZeroMinBound(proto)) { - if (proto.repeats.size() != 1 || proto.repeats.begin()->first != 0) { - DEBUG_PRINTF("can't build multi-top vacuous holder\n"); - return nullptr; - } - } - + const CompileContext &cc) { + assert(!proto.repeats.empty()); + + // Vacuous edges are only doable in the NGHolder if we are a single-top + // Castle. + if (hasZeroMinBound(proto)) { + if (proto.repeats.size() != 1 || proto.repeats.begin()->first != 0) { + DEBUG_PRINTF("can't build multi-top vacuous holder\n"); + return nullptr; + } + } + auto g = ue2::make_unique<NGHolder>(proto.kind); - - for (const auto &m : proto.repeats) { - addToHolder(*g, m.first, m.second); - } - + + for (const auto &m : proto.repeats) { + addToHolder(*g, m.first, m.second); + } + //dumpGraph("castle_holder.dot", *g); - - // Sanity checks. - assert(allMatchStatesHaveReports(*g)); - assert(!has_parallel_edge(*g)); - - reduceGraphEquivalences(*g, cc); - - removeRedundancy(*g, SOM_NONE); - - return g; -} - -} // namespace ue2 + + // Sanity checks. + assert(allMatchStatesHaveReports(*g)); + assert(!has_parallel_edge(*g)); + + reduceGraphEquivalences(*g, cc); + + removeRedundancy(*g, SOM_NONE); + + return g; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/castlecompile.h b/contrib/libs/hyperscan/src/nfa/castlecompile.h index 1a0ef2421c..ea5f06dabc 100644 --- a/contrib/libs/hyperscan/src/nfa/castlecompile.h +++ b/contrib/libs/hyperscan/src/nfa/castlecompile.h @@ -1,171 +1,171 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file - * \brief Castle: multi-tenant repeat engine, compiler code. - */ - -#ifndef NFA_CASTLECOMPILE_H -#define NFA_CASTLECOMPILE_H - -#include "nfa_kind.h" -#include "ue2common.h" -#include "nfagraph/ng_repeat.h" + * \brief Castle: multi-tenant repeat engine, compiler code. + */ + +#ifndef NFA_CASTLECOMPILE_H +#define NFA_CASTLECOMPILE_H + +#include "nfa_kind.h" +#include "ue2common.h" +#include "nfagraph/ng_repeat.h" #include "util/bytecode_ptr.h" -#include "util/depth.h" +#include "util/depth.h" #include "util/flat_containers.h" - -#include <map> -#include <memory> -#include <set> + +#include <map> +#include <memory> +#include <set> #include <unordered_map> -#include <vector> - -struct NFA; - -namespace ue2 { - -class CharReach; -class NGHolder; +#include <vector> + +struct NFA; + +namespace ue2 { + +class CharReach; +class NGHolder; class ReportManager; -struct CompileContext; - -/** - * \brief Prototype for a Castle engine: contains at least one CastleRepeat. - * - * Currently, all repeats in a Castle must have the same character - * reachability. - * - * A CastleProto is converted into a single NFA, with each top triggering a - * unique repeat. A CastleProto can contain at most CastleProto::max_occupancy - * elements. - */ -struct CastleProto { - static constexpr size_t max_occupancy = 65536; // arbitrary limit +struct CompileContext; + +/** + * \brief Prototype for a Castle engine: contains at least one CastleRepeat. + * + * Currently, all repeats in a Castle must have the same character + * reachability. + * + * A CastleProto is converted into a single NFA, with each top triggering a + * unique repeat. A CastleProto can contain at most CastleProto::max_occupancy + * elements. + */ +struct CastleProto { + static constexpr size_t max_occupancy = 65536; // arbitrary limit CastleProto(nfa_kind k, const PureRepeat &pr); - const CharReach &reach() const; - - /** \brief Add a new repeat. */ - u32 add(const PureRepeat &pr); - - /** \brief Remove a repeat. */ - void erase(u32 top); - - /** - * \brief Merge in the given repeat, returning the top used. - * - * If the repeat already exists in this castle, we will re-use (and return) - * the old top. If it doesn't, it will be added and assigned a new top. - * Returns \ref max_occupancy if capacity would be exceeded. - */ - u32 merge(const PureRepeat &pr); - - /** \brief Mapping from unique top id to repeat. */ - std::map<u32, PureRepeat> repeats; - - /** \brief Mapping from report to associated tops. */ + const CharReach &reach() const; + + /** \brief Add a new repeat. */ + u32 add(const PureRepeat &pr); + + /** \brief Remove a repeat. */ + void erase(u32 top); + + /** + * \brief Merge in the given repeat, returning the top used. + * + * If the repeat already exists in this castle, we will re-use (and return) + * the old top. If it doesn't, it will be added and assigned a new top. + * Returns \ref max_occupancy if capacity would be exceeded. + */ + u32 merge(const PureRepeat &pr); + + /** \brief Mapping from unique top id to repeat. */ + std::map<u32, PureRepeat> repeats; + + /** \brief Mapping from report to associated tops. */ std::unordered_map<ReportID, flat_set<u32>> report_map; - - /** - * \brief Next top id to use. Repeats may be removed without top remapping, - * so we track this explicitly instead of using repeats.size(). - */ - u32 next_top = 1; + + /** + * \brief Next top id to use. Repeats may be removed without top remapping, + * so we track this explicitly instead of using repeats.size(). + */ + u32 next_top = 1; /** \brief Kind for this engine. */ nfa_kind kind; -}; - -std::set<ReportID> all_reports(const CastleProto &proto); -depth findMinWidth(const CastleProto &proto); -depth findMaxWidth(const CastleProto &proto); -depth findMinWidth(const CastleProto &proto, u32 top); -depth findMaxWidth(const CastleProto &proto, u32 top); - -/** - * \brief Remap tops to be contiguous. - * - * Remap the tops in the given CastleProto so that they're contiguous in the - * range [0 .. N-1]. - */ -void remapCastleTops(CastleProto &proto, std::map<u32, u32> &top_map); - -/** - * \brief Construct an NFA from a CastleProto. - * - * NOTE: Tops must be contiguous, i.e. \ref remapCastleTops must have been run - * first. - */ +}; + +std::set<ReportID> all_reports(const CastleProto &proto); +depth findMinWidth(const CastleProto &proto); +depth findMaxWidth(const CastleProto &proto); +depth findMinWidth(const CastleProto &proto, u32 top); +depth findMaxWidth(const CastleProto &proto, u32 top); + +/** + * \brief Remap tops to be contiguous. + * + * Remap the tops in the given CastleProto so that they're contiguous in the + * range [0 .. N-1]. + */ +void remapCastleTops(CastleProto &proto, std::map<u32, u32> &top_map); + +/** + * \brief Construct an NFA from a CastleProto. + * + * NOTE: Tops must be contiguous, i.e. \ref remapCastleTops must have been run + * first. + */ bytecode_ptr<NFA> -buildCastle(const CastleProto &proto, - const std::map<u32, std::vector<std::vector<CharReach>>> &triggers, +buildCastle(const CastleProto &proto, + const std::map<u32, std::vector<std::vector<CharReach>>> &triggers, const CompileContext &cc, const ReportManager &rm); - -/** + +/** * \brief Merge two CastleProto prototypes together, if possible. If a * particular repeat from c2 is already in c1, then it will be reused rather * than adding a duplicate repeat. - * - * Returns true if merge of all repeats in c2 into c1 succeeds, and fills - * mapping with the repeat indices. - */ -bool mergeCastle(CastleProto &c1, const CastleProto &c2, - std::map<u32, u32> &top_map); - -/** - * \brief True if the two castles are identical with respect to the reports - * given; i.e. the same tops lead to the same repeats, just with report1 in c1 - * and report2 in c2. - * - * Repeats leading to other reports are ignored. - */ -bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2, - ReportID report2); - -/** - * \brief True if the two castles given are identical. - */ -bool is_equal(const CastleProto &c1, const CastleProto &c2); - -/** - * \brief True if the given castle contains more than a single instance of any - * of the reports in the given set. - */ -bool requiresDedupe(const CastleProto &proto, + * + * Returns true if merge of all repeats in c2 into c1 succeeds, and fills + * mapping with the repeat indices. + */ +bool mergeCastle(CastleProto &c1, const CastleProto &c2, + std::map<u32, u32> &top_map); + +/** + * \brief True if the two castles are identical with respect to the reports + * given; i.e. the same tops lead to the same repeats, just with report1 in c1 + * and report2 in c2. + * + * Repeats leading to other reports are ignored. + */ +bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2, + ReportID report2); + +/** + * \brief True if the two castles given are identical. + */ +bool is_equal(const CastleProto &c1, const CastleProto &c2); + +/** + * \brief True if the given castle contains more than a single instance of any + * of the reports in the given set. + */ +bool requiresDedupe(const CastleProto &proto, const flat_set<ReportID> &reports); - -/** - * \brief Build an NGHolder from a CastleProto. - */ + +/** + * \brief Build an NGHolder from a CastleProto. + */ std::unique_ptr<NGHolder> makeHolder(const CastleProto &castle, - const CompileContext &cc); - -} // namespace ue2 - -#endif // NFA_CASTLECOMPILE_H + const CompileContext &cc); + +} // namespace ue2 + +#endif // NFA_CASTLECOMPILE_H diff --git a/contrib/libs/hyperscan/src/nfa/dfa_min.cpp b/contrib/libs/hyperscan/src/nfa/dfa_min.cpp index 8c0fc09ff5..1a07e8a7d3 100644 --- a/contrib/libs/hyperscan/src/nfa/dfa_min.cpp +++ b/contrib/libs/hyperscan/src/nfa/dfa_min.cpp @@ -1,111 +1,111 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file * \brief Build code for DFA minimization. */ - -/** + +/** * /Summary of the Hopcroft minimisation algorithm/ * - * partition := {F, Q \ F}; - * work_queue := {F}; - * while (work_queue is not empty) do - * choose and remove a set A from work_queue - * for each c in . do - * let X be the set of states for which a transition on c - * leads to a state in A - * for each set Y in partition for which X . Y is nonempty and - * Y \ X is nonempty do - * replace Y in partition by the two sets X . Y and Y \ X - * if Y is in work_queue - * replace Y in work_queue by the same two sets - * else - * if |X . Y| <= |Y \ X| - * add X . Y to work_queue - * else - * add Y \ X to work_queue - * end; - * end; - * end; - */ - -#include "dfa_min.h" - -#include "grey.h" + * partition := {F, Q \ F}; + * work_queue := {F}; + * while (work_queue is not empty) do + * choose and remove a set A from work_queue + * for each c in . do + * let X be the set of states for which a transition on c + * leads to a state in A + * for each set Y in partition for which X . Y is nonempty and + * Y \ X is nonempty do + * replace Y in partition by the two sets X . Y and Y \ X + * if Y is in work_queue + * replace Y in work_queue by the same two sets + * else + * if |X . Y| <= |Y \ X| + * add X . Y to work_queue + * else + * add Y \ X to work_queue + * end; + * end; + * end; + */ + +#include "dfa_min.h" + +#include "grey.h" #include "mcclellancompile_util.h" #include "rdfa.h" -#include "ue2common.h" +#include "ue2common.h" #include "util/container.h" #include "util/flat_containers.h" #include "util/noncopyable.h" -#include "util/partitioned_set.h" - -#include <algorithm> -#include <functional> +#include "util/partitioned_set.h" + +#include <algorithm> +#include <functional> #include <iterator> -#include <map> +#include <map> #include <queue> -#include <set> -#include <vector> - -using namespace std; - -namespace ue2 { - -namespace { - -struct hopcroft_state_info { +#include <set> +#include <vector> + +using namespace std; + +namespace ue2 { + +namespace { + +struct hopcroft_state_info { explicit hopcroft_state_info(size_t alpha_size) : prev(alpha_size) {} /** \brief Mapping from symbol to a list of predecessors that transition to * this state on that symbol. */ vector<vector<dstate_id_t>> prev; -}; - +}; + struct HopcroftInfo : noncopyable { size_t alpha_size; //!< Size of DFA alphabet. queue<size_t> work_queue; //!< Hopcroft work queue of partition indices. partitioned_set<dstate_id_t> partition; //!< Partition set of DFA states. vector<hopcroft_state_info> states; //!< Pre-calculated state info (preds) - + explicit HopcroftInfo(const raw_dfa &rdfa); -}; - +}; + } // namespace - -/** + +/** * \brief Create an initial partitioning and work_queue. - * + * * Initial partition contains {accepting states..., Non-accepting states} * Initial work_queue contains accepting state subsets - * + * * The initial partitioning needs to distinguish between the different * reporting behaviours (unlike standard Hopcroft) --> more than one subset * possible for the accepting states. @@ -115,36 +115,36 @@ struct HopcroftInfo : noncopyable { * Reports of each state are searched against the map and * added to the corresponding id -> partition[id] and work_queue[id]. * Non Accept states are added to partition[id+1]. - */ -static + */ +static vector<size_t> create_map(const raw_dfa &rdfa, queue<size_t> &work_queue) { - using ReportKey = pair<flat_set<ReportID>, flat_set<ReportID>>; - map<ReportKey, size_t> subset_map; - vector<size_t> state_to_subset(rdfa.states.size(), INVALID_SUBSET); - - for (size_t i = 0; i < rdfa.states.size(); i++) { + using ReportKey = pair<flat_set<ReportID>, flat_set<ReportID>>; + map<ReportKey, size_t> subset_map; + vector<size_t> state_to_subset(rdfa.states.size(), INVALID_SUBSET); + + for (size_t i = 0; i < rdfa.states.size(); i++) { const auto &ds = rdfa.states[i]; if (!ds.reports.empty() || !ds.reports_eod.empty()) { ReportKey key(ds.reports, ds.reports_eod); - if (contains(subset_map, key)) { - state_to_subset[i] = subset_map[key]; - } else { - size_t sub = subset_map.size(); + if (contains(subset_map, key)) { + state_to_subset[i] = subset_map[key]; + } else { + size_t sub = subset_map.size(); subset_map.emplace(std::move(key), sub); - state_to_subset[i] = sub; + state_to_subset[i] = sub; work_queue.push(sub); - } - } - } - + } + } + } + /* Give non-accept states their own subset. */ - size_t non_accept_sub = subset_map.size(); + size_t non_accept_sub = subset_map.size(); replace(state_to_subset.begin(), state_to_subset.end(), INVALID_SUBSET, non_accept_sub); - - return state_to_subset; -} - + + return state_to_subset; +} + HopcroftInfo::HopcroftInfo(const raw_dfa &rdfa) : alpha_size(rdfa.alpha_size), partition(create_map(rdfa, work_queue)), states(rdfa.states.size(), hopcroft_state_info(alpha_size)) { @@ -153,51 +153,51 @@ HopcroftInfo::HopcroftInfo(const raw_dfa &rdfa) for (size_t sym = 0; sym < alpha_size; sym++) { dstate_id_t present_state = rdfa.states[i].next[sym]; states[present_state].prev[sym].push_back(i); - } - } -} - -/** - * For a split set X, each subset S (given by part_index) in the partition, two - * sets are created: v_inter (X intersection S) and v_sub (S - X). - * - * For each subset S in the partition that could be split (v_inter is nonempty - * and v_sub is nonempty): - * - replace S in partition by the two sets v_inter and v_sub. - * - if S is in work_queue: - * - replace S in work_queue by the two subsets. - * - else: - * - replace S in work_queue by the smaller of the two sets. - */ -static + } + } +} + +/** + * For a split set X, each subset S (given by part_index) in the partition, two + * sets are created: v_inter (X intersection S) and v_sub (S - X). + * + * For each subset S in the partition that could be split (v_inter is nonempty + * and v_sub is nonempty): + * - replace S in partition by the two sets v_inter and v_sub. + * - if S is in work_queue: + * - replace S in work_queue by the two subsets. + * - else: + * - replace S in work_queue by the smaller of the two sets. + */ +static void split_and_replace_set(const size_t part_index, HopcroftInfo &info, const flat_set<dstate_id_t> &splitter) { - /* singleton sets cannot be split */ + /* singleton sets cannot be split */ if (info.partition[part_index].size() == 1) { - return; - } - + return; + } + size_t small_index = info.partition.split(part_index, splitter); - - if (small_index == INVALID_SUBSET) { - /* the set could not be split */ - return; - } - - /* larger subset remains at the input subset index, if the input subset was - * already in the work queue then the larger subset will remain there. */ - + + if (small_index == INVALID_SUBSET) { + /* the set could not be split */ + return; + } + + /* larger subset remains at the input subset index, if the input subset was + * already in the work queue then the larger subset will remain there. */ + info.work_queue.push(small_index); -} - -/** +} + +/** * \brief Core of the Hopcroft minimisation algorithm. - */ -static + */ +static void dfa_min(HopcroftInfo &info) { flat_set<dstate_id_t> curr, sym_preds; - vector<size_t> cand_subsets; - + vector<size_t> cand_subsets; + while (!info.work_queue.empty()) { /* Choose and remove a set of states (curr, or A in the description * above) from the work queue. Note that we copy the set because the @@ -205,7 +205,7 @@ void dfa_min(HopcroftInfo &info) { curr.clear(); insert(&curr, info.partition[info.work_queue.front()]); info.work_queue.pop(); - + for (size_t sym = 0; sym < info.alpha_size; sym++) { /* Find the set of states sym_preds for which a transition on the * given symbol leads to a state in curr. */ @@ -215,104 +215,104 @@ void dfa_min(HopcroftInfo &info) { } if (sym_preds.empty()) { - continue; - } - + continue; + } + /* we only need to consider subsets with at least one member in * sym_preds for splitting */ - cand_subsets.clear(); + cand_subsets.clear(); info.partition.find_overlapping(sym_preds, &cand_subsets); - - for (size_t sub : cand_subsets) { + + for (size_t sub : cand_subsets) { split_and_replace_set(sub, info, sym_preds); - } - } - } -} - -/** + } + } + } +} + +/** * \brief Build the new DFA state table. - */ -static + */ +static void mapping_new_states(const HopcroftInfo &info, vector<dstate_id_t> &old_to_new, raw_dfa &rdfa) { const size_t num_partitions = info.partition.size(); - - // Mapping from equiv class's first state to equiv class index. - map<dstate_id_t, size_t> ordering; - - // New state id for each equiv class. - vector<dstate_id_t> eq_state(num_partitions); - - for (size_t i = 0; i < num_partitions; i++) { + + // Mapping from equiv class's first state to equiv class index. + map<dstate_id_t, size_t> ordering; + + // New state id for each equiv class. + vector<dstate_id_t> eq_state(num_partitions); + + for (size_t i = 0; i < num_partitions; i++) { ordering[*info.partition[i].begin()] = i; - } - - dstate_id_t new_id = 0; - for (const auto &m : ordering) { - eq_state[m.second] = new_id++; - } - + } + + dstate_id_t new_id = 0; + for (const auto &m : ordering) { + eq_state[m.second] = new_id++; + } + for (size_t t = 0; t < info.partition.size(); t++) { for (dstate_id_t id : info.partition[t]) { - old_to_new[id] = eq_state[t]; - } - } - - vector<dstate> new_states; - new_states.reserve(num_partitions); + old_to_new[id] = eq_state[t]; + } + } + + vector<dstate> new_states; + new_states.reserve(num_partitions); for (const auto &m : ordering) { new_states.push_back(rdfa.states[m.first]); - } + } rdfa.states = std::move(new_states); -} - -static +} + +static void renumber_new_states(const HopcroftInfo &info, const vector<dstate_id_t> &old_to_new, raw_dfa &rdfa) { for (size_t i = 0; i < info.partition.size(); i++) { for (size_t sym = 0; sym < info.alpha_size; sym++) { dstate_id_t output = rdfa.states[i].next[sym]; rdfa.states[i].next[sym] = old_to_new[output]; - } - dstate_id_t dad = rdfa.states[i].daddy; - rdfa.states[i].daddy = old_to_new[dad]; - } - - rdfa.start_floating = old_to_new[rdfa.start_floating]; - rdfa.start_anchored = old_to_new[rdfa.start_anchored]; -} - -static + } + dstate_id_t dad = rdfa.states[i].daddy; + rdfa.states[i].daddy = old_to_new[dad]; + } + + rdfa.start_floating = old_to_new[rdfa.start_floating]; + rdfa.start_anchored = old_to_new[rdfa.start_anchored]; +} + +static void new_dfa(raw_dfa &rdfa, const HopcroftInfo &info) { if (info.partition.size() == info.states.size()) { return; - } + } vector<dstate_id_t> old_to_new(info.states.size()); mapping_new_states(info, old_to_new, rdfa); renumber_new_states(info, old_to_new, rdfa); -} - -void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) { - if (!grey.minimizeDFA) { - return; - } - +} + +void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey) { + if (!grey.minimizeDFA) { + return; + } + if (is_dead(rdfa)) { DEBUG_PRINTF("dfa is empty\n"); } - UNUSED const size_t states_before = rdfa.states.size(); - + UNUSED const size_t states_before = rdfa.states.size(); + HopcroftInfo info(rdfa); - + dfa_min(info); new_dfa(rdfa, info); - - DEBUG_PRINTF("reduced from %zu to %zu states\n", states_before, - rdfa.states.size()); -} - -} // namespace ue2 + + DEBUG_PRINTF("reduced from %zu to %zu states\n", states_before, + rdfa.states.size()); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/dfa_min.h b/contrib/libs/hyperscan/src/nfa/dfa_min.h index 7ccd59e590..61ca6c21a4 100644 --- a/contrib/libs/hyperscan/src/nfa/dfa_min.h +++ b/contrib/libs/hyperscan/src/nfa/dfa_min.h @@ -1,46 +1,46 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file * \brief Build code for DFA minimization. - */ - -#ifndef DFA_MIN_H -#define DFA_MIN_H - -namespace ue2 { - -struct raw_dfa; -struct Grey; - -void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey); - -} // namespace ue2 - -#endif + */ + +#ifndef DFA_MIN_H +#define DFA_MIN_H + +namespace ue2 { + +struct raw_dfa; +struct Grey; + +void minimize_hopcroft(raw_dfa &rdfa, const Grey &grey); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/gough.c b/contrib/libs/hyperscan/src/nfa/gough.c index 176fd22e82..44acd4c286 100644 --- a/contrib/libs/hyperscan/src/nfa/gough.c +++ b/contrib/libs/hyperscan/src/nfa/gough.c @@ -1,1036 +1,1036 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "gough.h" - -#include "accel.h" -#include "gough_internal.h" -#include "mcclellan.h" -#include "nfa_api.h" -#include "nfa_api_queue.h" -#include "nfa_internal.h" -#include "util/bitutils.h" -#include "util/compare.h" -#include "util/simd_utils.h" -#include "util/unaligned.h" -#include "ue2common.h" -#include <string.h> - -#include "mcclellan_common_impl.h" - -#define GOUGH_SOM_EARLY (~0ULL) - -static really_inline -void compressSomValue(u32 comp_slot_width, u64a curr_offset, - void *dest_som_base, u32 i, u64a val) { - void *dest_som = (u8 *)dest_som_base + i * comp_slot_width; - /* gough does not initialise all slots, so may contain garbage */ - u64a delta = curr_offset - val; - switch (comp_slot_width) { - case 2: - if (delta >= (u16)~0U) { - delta = GOUGH_SOM_EARLY; - } - unaligned_store_u16(dest_som, delta); - break; - case 4: - if (delta >= (u32)~0U) { - delta = GOUGH_SOM_EARLY; - } - unaligned_store_u32(dest_som, delta); - break; - case 8: - if (delta >= ~0ULL) { - delta = GOUGH_SOM_EARLY; - } - unaligned_store_u64a(dest_som, delta); - break; - default: - assert(0); - } -} - -static really_inline -u64a expandSomValue(u32 comp_slot_width, u64a curr_offset, - const void *src_som_base, u32 i) { - /* Note: gough does not initialise all slots, so we may end up decompressing - * garbage */ - - const void *src_som = (const u8 *)src_som_base + i * comp_slot_width; - u64a val = 0; - switch (comp_slot_width) { - case 2: - val = unaligned_load_u16(src_som); - if (val == (u16)~0U) { - return GOUGH_SOM_EARLY; - } - break; - case 4: - val = unaligned_load_u32(src_som); - if (val == (u32)~0U) { - return GOUGH_SOM_EARLY; - } - break; - case 8: - val = unaligned_load_u64a(src_som); - if (val == ~0ULL) { - return GOUGH_SOM_EARLY; - } - break; - - default: - assert(0); - } - return curr_offset - val; -} - -static really_inline + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "gough.h" + +#include "accel.h" +#include "gough_internal.h" +#include "mcclellan.h" +#include "nfa_api.h" +#include "nfa_api_queue.h" +#include "nfa_internal.h" +#include "util/bitutils.h" +#include "util/compare.h" +#include "util/simd_utils.h" +#include "util/unaligned.h" +#include "ue2common.h" +#include <string.h> + +#include "mcclellan_common_impl.h" + +#define GOUGH_SOM_EARLY (~0ULL) + +static really_inline +void compressSomValue(u32 comp_slot_width, u64a curr_offset, + void *dest_som_base, u32 i, u64a val) { + void *dest_som = (u8 *)dest_som_base + i * comp_slot_width; + /* gough does not initialise all slots, so may contain garbage */ + u64a delta = curr_offset - val; + switch (comp_slot_width) { + case 2: + if (delta >= (u16)~0U) { + delta = GOUGH_SOM_EARLY; + } + unaligned_store_u16(dest_som, delta); + break; + case 4: + if (delta >= (u32)~0U) { + delta = GOUGH_SOM_EARLY; + } + unaligned_store_u32(dest_som, delta); + break; + case 8: + if (delta >= ~0ULL) { + delta = GOUGH_SOM_EARLY; + } + unaligned_store_u64a(dest_som, delta); + break; + default: + assert(0); + } +} + +static really_inline +u64a expandSomValue(u32 comp_slot_width, u64a curr_offset, + const void *src_som_base, u32 i) { + /* Note: gough does not initialise all slots, so we may end up decompressing + * garbage */ + + const void *src_som = (const u8 *)src_som_base + i * comp_slot_width; + u64a val = 0; + switch (comp_slot_width) { + case 2: + val = unaligned_load_u16(src_som); + if (val == (u16)~0U) { + return GOUGH_SOM_EARLY; + } + break; + case 4: + val = unaligned_load_u32(src_som); + if (val == (u32)~0U) { + return GOUGH_SOM_EARLY; + } + break; + case 8: + val = unaligned_load_u64a(src_som); + if (val == ~0ULL) { + return GOUGH_SOM_EARLY; + } + break; + + default: + assert(0); + } + return curr_offset - val; +} + +static really_inline char doReports(NfaCallback cb, void *ctxt, const struct mcclellan *m, - const struct gough_som_info *som, u16 s, u64a loc, - char eod, u16 * const cached_accept_state, - u32 * const cached_accept_id, u32 * const cached_accept_som) { - DEBUG_PRINTF("reporting state = %hu, loc=%llu, eod %hhu\n", - (u16)(s & STATE_MASK), loc, eod); - - if (!eod && s == *cached_accept_state) { - u64a from = *cached_accept_som == INVALID_SLOT ? loc - : som->slots[*cached_accept_som]; - if (cb(from, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - - return MO_CONTINUE_MATCHING; /* continue execution */ - } - - const struct mstate_aux *aux = get_aux(m, s); - size_t offset = eod ? aux->accept_eod : aux->accept; - - assert(offset); - const struct gough_report_list *rl - = (const void *)((const char *)m + offset - sizeof(struct NFA)); - assert(ISALIGNED(rl)); - - DEBUG_PRINTF("report list size %u\n", rl->count); - u32 count = rl->count; - - if (!eod && count == 1) { - *cached_accept_state = s; - *cached_accept_id = rl->report[0].r; - *cached_accept_som = rl->report[0].som; - - u64a from = *cached_accept_som == INVALID_SLOT ? loc - : som->slots[*cached_accept_som]; - DEBUG_PRINTF("reporting %u, using som[%u]=%llu\n", rl->report[0].r, - *cached_accept_som, from); - if (cb(from, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - - return MO_CONTINUE_MATCHING; /* continue execution */ - } - - for (u32 i = 0; i < count; i++) { - u32 slot = rl->report[i].som; - u64a from = slot == INVALID_SLOT ? loc : som->slots[slot]; - DEBUG_PRINTF("reporting %u, using som[%u] = %llu\n", - rl->report[i].r, slot, from); - if (cb(from, loc, rl->report[i].r, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - } - - return MO_CONTINUE_MATCHING; /* continue execution */ -} - -#ifdef DUMP_SUPPORT -static UNUSED -const char *dump_op(u8 op) { - switch (op) { - case GOUGH_INS_END: - return "END"; - case GOUGH_INS_MOV: - return "MOV"; - case GOUGH_INS_NEW: - return "NEW"; - case GOUGH_INS_MIN: - return "MIN"; - default: - return "???"; - } -} -#endif - -static really_inline -void run_prog_i(UNUSED const struct NFA *nfa, - const struct gough_ins *pc, u64a som_offset, - struct gough_som_info *som) { - DEBUG_PRINTF("run prog at som_offset of %llu\n", som_offset); - while (1) { - assert((const u8 *)pc >= (const u8 *)nfa); - assert((const u8 *)pc < (const u8 *)nfa + nfa->length); - u32 dest = pc->dest; - u32 src = pc->src; - assert(pc->op == GOUGH_INS_END - || dest < (nfa->scratchStateSize - 16) / 8); - DEBUG_PRINTF("%s %u %u\n", dump_op(pc->op), dest, src); - switch (pc->op) { - case GOUGH_INS_END: - return; - case GOUGH_INS_MOV: - som->slots[dest] = som->slots[src]; - break; - case GOUGH_INS_NEW: - /* note: c has already been advanced */ - DEBUG_PRINTF("current offset %llu; adjust %u\n", som_offset, - pc->src); - assert(som_offset >= pc->src); - som->slots[dest] = som_offset - pc->src; - break; - case GOUGH_INS_MIN: - /* TODO: shift all values along by one so that a normal min works - */ - if (som->slots[src] == GOUGH_SOM_EARLY) { - som->slots[dest] = som->slots[src]; - } else if (som->slots[dest] != GOUGH_SOM_EARLY) { - LIMIT_TO_AT_MOST(&som->slots[dest], som->slots[src]); - } - break; - default: - assert(0); - return; - } - DEBUG_PRINTF("dest slot[%u] = %llu\n", dest, som->slots[dest]); - ++pc; - } -} - -static really_inline -void run_prog(const struct NFA *nfa, const u32 *edge_prog_table, - const u8 *buf, u64a offAdj, const u8 *c, u32 edge_num, - struct gough_som_info *som) { - DEBUG_PRINTF("taking edge %u\n", edge_num); - u32 prog_offset = edge_prog_table[edge_num]; - if (!prog_offset) { - DEBUG_PRINTF("no prog on edge\n"); - return; - } - - const struct gough_ins *pc = (const void *)((const u8 *)nfa + prog_offset); - u64a curr_offset = (u64a)(c - buf) + offAdj - 1; - run_prog_i(nfa, pc, curr_offset, som); -} - -static never_inline -void run_accel_prog(const struct NFA *nfa, const struct gough_accel *gacc, - const u8 *buf, u64a offAdj, const u8 *c, const u8 *c2, - struct gough_som_info *som) { - assert(gacc->prog_offset); - assert(c2 > c); - - const struct gough_ins *pc - = (const void *)((const u8 *)nfa + gacc->prog_offset); - s64a margin_dist = gacc->margin_dist; - - DEBUG_PRINTF("run accel after skip %lld margin; advanced %zd\n", - margin_dist, c2 - c); - - if (c2 - c <= 2 * margin_dist) { - while (c < c2) { - u64a curr_offset = (u64a)(c - buf) + offAdj; - run_prog_i(nfa, pc, curr_offset, som); - c++; - } - } else { - u64a curr_offset = (u64a)(c - buf) + offAdj; - for (s64a i = 0; i < margin_dist; i++) { - run_prog_i(nfa, pc, curr_offset + i, som); - } - - curr_offset = (u64a)(c2 - buf) + offAdj - margin_dist; - for (s64a i = 0; i < margin_dist; i++) { - run_prog_i(nfa, pc, curr_offset + i, som); - } - } -} - -static never_inline -u16 goughEnableStarts(const struct mcclellan *m, u16 s, u64a som_offset, - struct gough_som_info *som) { - DEBUG_PRINTF("top triggered while at %hu\n", s); - const struct mstate_aux *aux = get_aux(m, s); - DEBUG_PRINTF("now going to state %hu\n", aux->top); - - const u32 *top_offsets = get_gough_top_offsets(m); - if (!top_offsets) { - return aux->top; - } - - u32 prog_offset = top_offsets[s]; - if (!prog_offset) { - return aux->top; - } - - DEBUG_PRINTF("doing som for top\n"); - const struct NFA *nfa - = (const struct NFA *)((const char *)m - sizeof(struct NFA)); - const struct gough_ins *pc = (const void *)((const u8 *)nfa - + prog_offset); - run_prog_i(nfa, pc, som_offset, som); - return aux->top; -} - -static really_inline -char goughExec16_i(const struct mcclellan *m, struct gough_som_info *som, - u16 *state, const u8 *buf, size_t len, u64a offAdj, + const struct gough_som_info *som, u16 s, u64a loc, + char eod, u16 * const cached_accept_state, + u32 * const cached_accept_id, u32 * const cached_accept_som) { + DEBUG_PRINTF("reporting state = %hu, loc=%llu, eod %hhu\n", + (u16)(s & STATE_MASK), loc, eod); + + if (!eod && s == *cached_accept_state) { + u64a from = *cached_accept_som == INVALID_SLOT ? loc + : som->slots[*cached_accept_som]; + if (cb(from, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + const struct mstate_aux *aux = get_aux(m, s); + size_t offset = eod ? aux->accept_eod : aux->accept; + + assert(offset); + const struct gough_report_list *rl + = (const void *)((const char *)m + offset - sizeof(struct NFA)); + assert(ISALIGNED(rl)); + + DEBUG_PRINTF("report list size %u\n", rl->count); + u32 count = rl->count; + + if (!eod && count == 1) { + *cached_accept_state = s; + *cached_accept_id = rl->report[0].r; + *cached_accept_som = rl->report[0].som; + + u64a from = *cached_accept_som == INVALID_SLOT ? loc + : som->slots[*cached_accept_som]; + DEBUG_PRINTF("reporting %u, using som[%u]=%llu\n", rl->report[0].r, + *cached_accept_som, from); + if (cb(from, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + for (u32 i = 0; i < count; i++) { + u32 slot = rl->report[i].som; + u64a from = slot == INVALID_SLOT ? loc : som->slots[slot]; + DEBUG_PRINTF("reporting %u, using som[%u] = %llu\n", + rl->report[i].r, slot, from); + if (cb(from, loc, rl->report[i].r, ctxt) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; /* termination requested */ + } + } + + return MO_CONTINUE_MATCHING; /* continue execution */ +} + +#ifdef DUMP_SUPPORT +static UNUSED +const char *dump_op(u8 op) { + switch (op) { + case GOUGH_INS_END: + return "END"; + case GOUGH_INS_MOV: + return "MOV"; + case GOUGH_INS_NEW: + return "NEW"; + case GOUGH_INS_MIN: + return "MIN"; + default: + return "???"; + } +} +#endif + +static really_inline +void run_prog_i(UNUSED const struct NFA *nfa, + const struct gough_ins *pc, u64a som_offset, + struct gough_som_info *som) { + DEBUG_PRINTF("run prog at som_offset of %llu\n", som_offset); + while (1) { + assert((const u8 *)pc >= (const u8 *)nfa); + assert((const u8 *)pc < (const u8 *)nfa + nfa->length); + u32 dest = pc->dest; + u32 src = pc->src; + assert(pc->op == GOUGH_INS_END + || dest < (nfa->scratchStateSize - 16) / 8); + DEBUG_PRINTF("%s %u %u\n", dump_op(pc->op), dest, src); + switch (pc->op) { + case GOUGH_INS_END: + return; + case GOUGH_INS_MOV: + som->slots[dest] = som->slots[src]; + break; + case GOUGH_INS_NEW: + /* note: c has already been advanced */ + DEBUG_PRINTF("current offset %llu; adjust %u\n", som_offset, + pc->src); + assert(som_offset >= pc->src); + som->slots[dest] = som_offset - pc->src; + break; + case GOUGH_INS_MIN: + /* TODO: shift all values along by one so that a normal min works + */ + if (som->slots[src] == GOUGH_SOM_EARLY) { + som->slots[dest] = som->slots[src]; + } else if (som->slots[dest] != GOUGH_SOM_EARLY) { + LIMIT_TO_AT_MOST(&som->slots[dest], som->slots[src]); + } + break; + default: + assert(0); + return; + } + DEBUG_PRINTF("dest slot[%u] = %llu\n", dest, som->slots[dest]); + ++pc; + } +} + +static really_inline +void run_prog(const struct NFA *nfa, const u32 *edge_prog_table, + const u8 *buf, u64a offAdj, const u8 *c, u32 edge_num, + struct gough_som_info *som) { + DEBUG_PRINTF("taking edge %u\n", edge_num); + u32 prog_offset = edge_prog_table[edge_num]; + if (!prog_offset) { + DEBUG_PRINTF("no prog on edge\n"); + return; + } + + const struct gough_ins *pc = (const void *)((const u8 *)nfa + prog_offset); + u64a curr_offset = (u64a)(c - buf) + offAdj - 1; + run_prog_i(nfa, pc, curr_offset, som); +} + +static never_inline +void run_accel_prog(const struct NFA *nfa, const struct gough_accel *gacc, + const u8 *buf, u64a offAdj, const u8 *c, const u8 *c2, + struct gough_som_info *som) { + assert(gacc->prog_offset); + assert(c2 > c); + + const struct gough_ins *pc + = (const void *)((const u8 *)nfa + gacc->prog_offset); + s64a margin_dist = gacc->margin_dist; + + DEBUG_PRINTF("run accel after skip %lld margin; advanced %zd\n", + margin_dist, c2 - c); + + if (c2 - c <= 2 * margin_dist) { + while (c < c2) { + u64a curr_offset = (u64a)(c - buf) + offAdj; + run_prog_i(nfa, pc, curr_offset, som); + c++; + } + } else { + u64a curr_offset = (u64a)(c - buf) + offAdj; + for (s64a i = 0; i < margin_dist; i++) { + run_prog_i(nfa, pc, curr_offset + i, som); + } + + curr_offset = (u64a)(c2 - buf) + offAdj - margin_dist; + for (s64a i = 0; i < margin_dist; i++) { + run_prog_i(nfa, pc, curr_offset + i, som); + } + } +} + +static never_inline +u16 goughEnableStarts(const struct mcclellan *m, u16 s, u64a som_offset, + struct gough_som_info *som) { + DEBUG_PRINTF("top triggered while at %hu\n", s); + const struct mstate_aux *aux = get_aux(m, s); + DEBUG_PRINTF("now going to state %hu\n", aux->top); + + const u32 *top_offsets = get_gough_top_offsets(m); + if (!top_offsets) { + return aux->top; + } + + u32 prog_offset = top_offsets[s]; + if (!prog_offset) { + return aux->top; + } + + DEBUG_PRINTF("doing som for top\n"); + const struct NFA *nfa + = (const struct NFA *)((const char *)m - sizeof(struct NFA)); + const struct gough_ins *pc = (const void *)((const u8 *)nfa + + prog_offset); + run_prog_i(nfa, pc, som_offset, som); + return aux->top; +} + +static really_inline +char goughExec16_i(const struct mcclellan *m, struct gough_som_info *som, + u16 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, const u8 **c_final, - enum MatchMode mode) { - assert(ISALIGNED_N(state, 2)); - - u16 s = *state; - const struct NFA *nfa - = (const struct NFA *)((const char *)m - sizeof(struct NFA)); - const u8 *c = buf, *c_end = buf + len; - const u16 *succ_table = (const u16 *)((const char *)m - + sizeof(struct mcclellan)); - assert(ISALIGNED_N(succ_table, 2)); - const u16 sherman_base = m->sherman_limit; - const char *sherman_base_offset - = (const char *)nfa + m->sherman_offset; - const u32 as = m->alphaShift; - - s &= STATE_MASK; - - u32 cached_accept_id = 0; - u16 cached_accept_state = 0; - u32 cached_accept_som = 0; - - const u32 *edge_prog_table = (const u32 *)(get_gough(m) + 1); - - DEBUG_PRINTF("s: %hu, len %zu\n", s, len); - - const u8 *min_accel_offset = c; - if (!m->has_accel || len < ACCEL_MIN_LEN) { - min_accel_offset = c_end; - goto without_accel; - } - - goto with_accel; - -without_accel: - while (c < min_accel_offset && s) { - u8 cprime = m->remap[*(c++)]; - DEBUG_PRINTF("c: %02hhx cp:%02hhx (s=%hu)\n", *(c-1), cprime, s); - - u32 edge_num = ((u32)s << as) + cprime; - run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som); - if (s < sherman_base) { - DEBUG_PRINTF("doing normal\n"); - assert(s < m->state_count); - s = succ_table[edge_num]; - } else { - const char *sherman_state - = findShermanState(m, sherman_base_offset, sherman_base, s); - DEBUG_PRINTF("doing sherman\n"); - s = doSherman16(sherman_state, cprime, succ_table, as); - } - DEBUG_PRINTF("s: %hu (%hu)\n", s, (u16)(s & STATE_MASK)); - - if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { - if (mode == STOP_AT_MATCH) { - *state = s & STATE_MASK; - *c_final = c - 1; - return MO_CONTINUE_MATCHING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - if (doReports(cb, ctxt, m, som, s & STATE_MASK, loc, 0, - &cached_accept_state, &cached_accept_id, - &cached_accept_som) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - - s &= STATE_MASK; - } - -with_accel: - while (c < c_end && s) { - u8 cprime = m->remap[*(c++)]; - DEBUG_PRINTF("c: %02hhx cp:%02hhx (s=%hu)\n", *(c-1), cprime, s); - - u32 edge_num = ((u32)s << as) + cprime; - run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som); - if (s < sherman_base) { - DEBUG_PRINTF("doing normal\n"); - assert(s < m->state_count); - s = succ_table[edge_num]; - } else { - const char *sherman_state - = findShermanState(m, sherman_base_offset, sherman_base, s); - DEBUG_PRINTF("doing sherman\n"); - s = doSherman16(sherman_state, cprime, succ_table, as); - } - DEBUG_PRINTF("s: %hu (%hu)\n", s, (u16)(s & STATE_MASK)); - - if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { - if (mode == STOP_AT_MATCH) { - *state = s & STATE_MASK; - *c_final = c - 1; - return MO_CONTINUE_MATCHING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - - if (doReports(cb, ctxt, m, som, s & STATE_MASK, loc, 0, - &cached_accept_state, &cached_accept_id, - &cached_accept_som) - == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else if (s & ACCEL_FLAG) { - DEBUG_PRINTF("skipping\n"); - const struct mstate_aux *this_aux = get_aux(m, s & STATE_MASK); - u32 accel_offset = this_aux->accel_offset; - - assert(accel_offset >= m->aux_offset); - assert(accel_offset < m->sherman_offset); - - const struct gough_accel *gacc - = (const void *)((const char *)m + accel_offset); - assert(!gacc->prog_offset == !gacc->margin_dist); - const u8 *c2 = run_accel(&gacc->accel, c, c_end); - - if (c2 != c && gacc->prog_offset) { - run_accel_prog(nfa, gacc, buf, offAdj, c, c2, som); - } - - if (c2 < min_accel_offset + BAD_ACCEL_DIST) { - min_accel_offset = c2 + BIG_ACCEL_PENALTY; - } else { - min_accel_offset = c2 + SMALL_ACCEL_PENALTY; - } - - if (min_accel_offset >= c_end - ACCEL_MIN_LEN) { - min_accel_offset = c_end; - } - - DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", - c2 - c, min_accel_offset - c2, c_end - c2); - - c = c2; - s &= STATE_MASK; - goto without_accel; - } - - s &= STATE_MASK; - } - - if (mode == STOP_AT_MATCH) { - *c_final = c_end; - } - *state = s; - - return MO_CONTINUE_MATCHING; -} - -static really_inline -char goughExec8_i(const struct mcclellan *m, struct gough_som_info *som, - u8 *state, const u8 *buf, size_t len, u64a offAdj, + enum MatchMode mode) { + assert(ISALIGNED_N(state, 2)); + + u16 s = *state; + const struct NFA *nfa + = (const struct NFA *)((const char *)m - sizeof(struct NFA)); + const u8 *c = buf, *c_end = buf + len; + const u16 *succ_table = (const u16 *)((const char *)m + + sizeof(struct mcclellan)); + assert(ISALIGNED_N(succ_table, 2)); + const u16 sherman_base = m->sherman_limit; + const char *sherman_base_offset + = (const char *)nfa + m->sherman_offset; + const u32 as = m->alphaShift; + + s &= STATE_MASK; + + u32 cached_accept_id = 0; + u16 cached_accept_state = 0; + u32 cached_accept_som = 0; + + const u32 *edge_prog_table = (const u32 *)(get_gough(m) + 1); + + DEBUG_PRINTF("s: %hu, len %zu\n", s, len); + + const u8 *min_accel_offset = c; + if (!m->has_accel || len < ACCEL_MIN_LEN) { + min_accel_offset = c_end; + goto without_accel; + } + + goto with_accel; + +without_accel: + while (c < min_accel_offset && s) { + u8 cprime = m->remap[*(c++)]; + DEBUG_PRINTF("c: %02hhx cp:%02hhx (s=%hu)\n", *(c-1), cprime, s); + + u32 edge_num = ((u32)s << as) + cprime; + run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som); + if (s < sherman_base) { + DEBUG_PRINTF("doing normal\n"); + assert(s < m->state_count); + s = succ_table[edge_num]; + } else { + const char *sherman_state + = findShermanState(m, sherman_base_offset, sherman_base, s); + DEBUG_PRINTF("doing sherman\n"); + s = doSherman16(sherman_state, cprime, succ_table, as); + } + DEBUG_PRINTF("s: %hu (%hu)\n", s, (u16)(s & STATE_MASK)); + + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + if (mode == STOP_AT_MATCH) { + *state = s & STATE_MASK; + *c_final = c - 1; + return MO_CONTINUE_MATCHING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (doReports(cb, ctxt, m, som, s & STATE_MASK, loc, 0, + &cached_accept_state, &cached_accept_id, + &cached_accept_som) == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + + s &= STATE_MASK; + } + +with_accel: + while (c < c_end && s) { + u8 cprime = m->remap[*(c++)]; + DEBUG_PRINTF("c: %02hhx cp:%02hhx (s=%hu)\n", *(c-1), cprime, s); + + u32 edge_num = ((u32)s << as) + cprime; + run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som); + if (s < sherman_base) { + DEBUG_PRINTF("doing normal\n"); + assert(s < m->state_count); + s = succ_table[edge_num]; + } else { + const char *sherman_state + = findShermanState(m, sherman_base_offset, sherman_base, s); + DEBUG_PRINTF("doing sherman\n"); + s = doSherman16(sherman_state, cprime, succ_table, as); + } + DEBUG_PRINTF("s: %hu (%hu)\n", s, (u16)(s & STATE_MASK)); + + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + if (mode == STOP_AT_MATCH) { + *state = s & STATE_MASK; + *c_final = c - 1; + return MO_CONTINUE_MATCHING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + + if (doReports(cb, ctxt, m, som, s & STATE_MASK, loc, 0, + &cached_accept_state, &cached_accept_id, + &cached_accept_som) + == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else if (s & ACCEL_FLAG) { + DEBUG_PRINTF("skipping\n"); + const struct mstate_aux *this_aux = get_aux(m, s & STATE_MASK); + u32 accel_offset = this_aux->accel_offset; + + assert(accel_offset >= m->aux_offset); + assert(accel_offset < m->sherman_offset); + + const struct gough_accel *gacc + = (const void *)((const char *)m + accel_offset); + assert(!gacc->prog_offset == !gacc->margin_dist); + const u8 *c2 = run_accel(&gacc->accel, c, c_end); + + if (c2 != c && gacc->prog_offset) { + run_accel_prog(nfa, gacc, buf, offAdj, c, c2, som); + } + + if (c2 < min_accel_offset + BAD_ACCEL_DIST) { + min_accel_offset = c2 + BIG_ACCEL_PENALTY; + } else { + min_accel_offset = c2 + SMALL_ACCEL_PENALTY; + } + + if (min_accel_offset >= c_end - ACCEL_MIN_LEN) { + min_accel_offset = c_end; + } + + DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", + c2 - c, min_accel_offset - c2, c_end - c2); + + c = c2; + s &= STATE_MASK; + goto without_accel; + } + + s &= STATE_MASK; + } + + if (mode == STOP_AT_MATCH) { + *c_final = c_end; + } + *state = s; + + return MO_CONTINUE_MATCHING; +} + +static really_inline +char goughExec8_i(const struct mcclellan *m, struct gough_som_info *som, + u8 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, const u8 **c_final, - enum MatchMode mode) { - u8 s = *state; - const u8 *c = buf, *c_end = buf + len; - const u8 *succ_table = (const u8 *)((const char *)m - + sizeof(struct mcclellan)); - const u32 as = m->alphaShift; - const struct mstate_aux *aux; - - const struct NFA *nfa - = (const struct NFA *)((const char *)m - sizeof(struct NFA)); - aux = (const struct mstate_aux *)((const char *)nfa + m->aux_offset); - - const u32 *edge_prog_table = (const u32 *)(get_gough(m) + 1); - - u16 accel_limit = m->accel_limit_8; - u16 accept_limit = m->accept_limit_8; - - u32 cached_accept_id = 0; - u16 cached_accept_state = 0; - u32 cached_accept_som = 0; - - DEBUG_PRINTF("accel %hu, accept %hu\n", accel_limit, accept_limit); - - DEBUG_PRINTF("s: %hhu, len %zu\n", s, len); - - const u8 *min_accel_offset = c; - if (!m->has_accel || len < ACCEL_MIN_LEN) { - min_accel_offset = c_end; - goto without_accel; - } - - goto with_accel; - -without_accel: - while (c < min_accel_offset && s) { - u8 cprime = m->remap[*(c++)]; - DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *(c-1), - ourisprint(*(c-1)) ? *(c-1) : '?', cprime); - - u32 edge_num = ((u32)s << as) + cprime; - - run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som); - - s = succ_table[edge_num]; - DEBUG_PRINTF("s: %hhu\n", s); - - if (mode != NO_MATCHES && s >= accept_limit) { - if (mode == STOP_AT_MATCH) { - DEBUG_PRINTF("match - pausing\n"); - *state = s; - *c_final = c - 1; - return MO_CONTINUE_MATCHING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - if (doReports(cb, ctxt, m, som, s, loc, 0, - &cached_accept_state, &cached_accept_id, - &cached_accept_som) - == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - } - -with_accel: - while (c < c_end && s) { - u8 cprime = m->remap[*(c++)]; - DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *(c-1), - ourisprint(*(c-1)) ? *(c-1) : '?', cprime); - - u32 edge_num = ((u32)s << as) + cprime; - - run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som); - - s = succ_table[edge_num]; - DEBUG_PRINTF("s: %hhu\n", s); - - if (s >= accel_limit) { /* accept_limit >= accel_limit */ - if (mode != NO_MATCHES && s >= accept_limit) { - if (mode == STOP_AT_MATCH) { - DEBUG_PRINTF("match - pausing\n"); - *state = s; - *c_final = c - 1; - return MO_CONTINUE_MATCHING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - if (doReports(cb, ctxt, m, som, s, loc, 0, - &cached_accept_state, &cached_accept_id, - &cached_accept_som) - == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } else if (aux[s].accel_offset) { - DEBUG_PRINTF("skipping\n"); - - const struct gough_accel *gacc - = (const void *)((const char *)m + aux[s].accel_offset); - const u8 *c2 = run_accel(&gacc->accel, c, c_end); - - if (c2 != c && gacc->prog_offset) { - run_accel_prog(nfa, gacc, buf, offAdj, c, c2, som); - } - - if (c2 < min_accel_offset + BAD_ACCEL_DIST) { - min_accel_offset = c2 + BIG_ACCEL_PENALTY; - } else { - min_accel_offset = c2 + SMALL_ACCEL_PENALTY; - } - - if (min_accel_offset >= c_end - ACCEL_MIN_LEN) { - min_accel_offset = c_end; - } - - DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", - c2 - c, min_accel_offset - c2, c_end - c2); - - c = c2; - goto without_accel; - } - } - } - - *state = s; - if (mode == STOP_AT_MATCH) { - *c_final = c_end; - } - return MO_CONTINUE_MATCHING; -} - -static never_inline -char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som, - u8 *state, const u8 *buf, size_t len, u64a offAdj, + enum MatchMode mode) { + u8 s = *state; + const u8 *c = buf, *c_end = buf + len; + const u8 *succ_table = (const u8 *)((const char *)m + + sizeof(struct mcclellan)); + const u32 as = m->alphaShift; + const struct mstate_aux *aux; + + const struct NFA *nfa + = (const struct NFA *)((const char *)m - sizeof(struct NFA)); + aux = (const struct mstate_aux *)((const char *)nfa + m->aux_offset); + + const u32 *edge_prog_table = (const u32 *)(get_gough(m) + 1); + + u16 accel_limit = m->accel_limit_8; + u16 accept_limit = m->accept_limit_8; + + u32 cached_accept_id = 0; + u16 cached_accept_state = 0; + u32 cached_accept_som = 0; + + DEBUG_PRINTF("accel %hu, accept %hu\n", accel_limit, accept_limit); + + DEBUG_PRINTF("s: %hhu, len %zu\n", s, len); + + const u8 *min_accel_offset = c; + if (!m->has_accel || len < ACCEL_MIN_LEN) { + min_accel_offset = c_end; + goto without_accel; + } + + goto with_accel; + +without_accel: + while (c < min_accel_offset && s) { + u8 cprime = m->remap[*(c++)]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *(c-1), + ourisprint(*(c-1)) ? *(c-1) : '?', cprime); + + u32 edge_num = ((u32)s << as) + cprime; + + run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som); + + s = succ_table[edge_num]; + DEBUG_PRINTF("s: %hhu\n", s); + + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; + return MO_CONTINUE_MATCHING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (doReports(cb, ctxt, m, som, s, loc, 0, + &cached_accept_state, &cached_accept_id, + &cached_accept_som) + == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + } + +with_accel: + while (c < c_end && s) { + u8 cprime = m->remap[*(c++)]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *(c-1), + ourisprint(*(c-1)) ? *(c-1) : '?', cprime); + + u32 edge_num = ((u32)s << as) + cprime; + + run_prog(nfa, edge_prog_table, buf, offAdj, c, edge_num, som); + + s = succ_table[edge_num]; + DEBUG_PRINTF("s: %hhu\n", s); + + if (s >= accel_limit) { /* accept_limit >= accel_limit */ + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; + return MO_CONTINUE_MATCHING; + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (doReports(cb, ctxt, m, som, s, loc, 0, + &cached_accept_state, &cached_accept_id, + &cached_accept_som) + == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } else if (aux[s].accel_offset) { + DEBUG_PRINTF("skipping\n"); + + const struct gough_accel *gacc + = (const void *)((const char *)m + aux[s].accel_offset); + const u8 *c2 = run_accel(&gacc->accel, c, c_end); + + if (c2 != c && gacc->prog_offset) { + run_accel_prog(nfa, gacc, buf, offAdj, c, c2, som); + } + + if (c2 < min_accel_offset + BAD_ACCEL_DIST) { + min_accel_offset = c2 + BIG_ACCEL_PENALTY; + } else { + min_accel_offset = c2 + SMALL_ACCEL_PENALTY; + } + + if (min_accel_offset >= c_end - ACCEL_MIN_LEN) { + min_accel_offset = c_end; + } + + DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", + c2 - c, min_accel_offset - c2, c_end - c2); + + c = c2; + goto without_accel; + } + } + } + + *state = s; + if (mode == STOP_AT_MATCH) { + *c_final = c_end; + } + return MO_CONTINUE_MATCHING; +} + +static never_inline +char goughExec8_i_ni(const struct mcclellan *m, struct gough_som_info *som, + u8 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, const u8 **final_point, - enum MatchMode mode) { - return goughExec8_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point, - mode); -} - -static never_inline -char goughExec16_i_ni(const struct mcclellan *m, struct gough_som_info *som, - u16 *state, const u8 *buf, size_t len, u64a offAdj, + enum MatchMode mode) { + return goughExec8_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point, + mode); +} + +static never_inline +char goughExec16_i_ni(const struct mcclellan *m, struct gough_som_info *som, + u16 *state, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, const u8 **final_point, - enum MatchMode mode) { - return goughExec16_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point, - mode); -} - -static really_inline -struct gough_som_info *getSomInfo(char *state_base) { - return (struct gough_som_info *)(state_base + 16); -} - -static really_inline -const struct gough_som_info *getSomInfoConst(const char *state_base) { - return (const struct gough_som_info *)(state_base + 16); -} - -static really_inline -char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, + enum MatchMode mode) { + return goughExec16_i(m, som, state, buf, len, offAdj, cb, ctxt, final_point, + mode); +} + +static really_inline +struct gough_som_info *getSomInfo(char *state_base) { + return (struct gough_som_info *)(state_base + 16); +} + +static really_inline +const struct gough_som_info *getSomInfoConst(const char *state_base) { + return (const struct gough_som_info *)(state_base + 16); +} + +static really_inline +char nfaExecGough8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *hend, NfaCallback cb, void *context, - struct mq *q, s64a end, enum MatchMode mode) { - DEBUG_PRINTF("enter\n"); - struct gough_som_info *som = getSomInfo(q->state); - assert(n->type == GOUGH_NFA_8); - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); - s64a sp; - u8 s = *(u8 *)q->state; - - if (q->report_current) { - assert(s); - assert(s >= m->accept_limit_8); - - u32 cached_accept_id = 0; - u16 cached_accept_state = 0; - u32 cached_accept_som = 0; - - int rv = doReports(cb, context, m, som, s, q_cur_offset(q), 0, - &cached_accept_state, &cached_accept_id, - &cached_accept_som); - - q->report_current = 0; - - if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - - sp = q_cur_loc(q); - q->cur++; - - const u8 *cur_buf = sp < 0 ? hend : buffer; - - if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { - /* this is as far as we go */ - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u8 *)q->state = s; - return MO_ALIVE; - } - - while (1) { - DEBUG_PRINTF("%s @ %llu [som %llu]\n", - q->items[q->cur].type == MQE_TOP ? "TOP" : - q->items[q->cur].type == MQE_END ? "END" : "???", - q->items[q->cur].location + offset, q->items[q->cur].som); - assert(q->cur < q->end); - s64a ep = q->items[q->cur].location; - if (mode != NO_MATCHES) { - ep = MIN(ep, end); - } - - assert(ep >= sp); - DEBUG_PRINTF("run to %lld from %lld\n", ep, sp); - - s64a local_ep = ep; - if (sp < 0) { - local_ep = MIN(0, ep); - } - - const u8 *final_look; - if (goughExec8_i_ni(m, som, &s, cur_buf + sp, local_ep - sp, + struct mq *q, s64a end, enum MatchMode mode) { + DEBUG_PRINTF("enter\n"); + struct gough_som_info *som = getSomInfo(q->state); + assert(n->type == GOUGH_NFA_8); + const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + s64a sp; + u8 s = *(u8 *)q->state; + + if (q->report_current) { + assert(s); + assert(s >= m->accept_limit_8); + + u32 cached_accept_id = 0; + u16 cached_accept_state = 0; + u32 cached_accept_som = 0; + + int rv = doReports(cb, context, m, som, s, q_cur_offset(q), 0, + &cached_accept_state, &cached_accept_id, + &cached_accept_som); + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + + sp = q_cur_loc(q); + q->cur++; + + const u8 *cur_buf = sp < 0 ? hend : buffer; + + if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { + /* this is as far as we go */ + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u8 *)q->state = s; + return MO_ALIVE; + } + + while (1) { + DEBUG_PRINTF("%s @ %llu [som %llu]\n", + q->items[q->cur].type == MQE_TOP ? "TOP" : + q->items[q->cur].type == MQE_END ? "END" : "???", + q->items[q->cur].location + offset, q->items[q->cur].som); + assert(q->cur < q->end); + s64a ep = q->items[q->cur].location; + if (mode != NO_MATCHES) { + ep = MIN(ep, end); + } + + assert(ep >= sp); + DEBUG_PRINTF("run to %lld from %lld\n", ep, sp); + + s64a local_ep = ep; + if (sp < 0) { + local_ep = MIN(0, ep); + } + + const u8 *final_look; + if (goughExec8_i_ni(m, som, &s, cur_buf + sp, local_ep - sp, offset + sp, cb, context, &final_look, mode) - == MO_HALT_MATCHING) { - *(u8 *)q->state = 0; - return 0; - } - if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) { - /* found a match */ - DEBUG_PRINTF("found a match\n"); - assert(q->cur); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = final_look - cur_buf + 1; /* due to - * early -1 */ - *(u8 *)q->state = s; - return MO_MATCHES_PENDING; - } - - assert(q->cur); - if (mode != NO_MATCHES && q->items[q->cur].location > end) { - /* this is as far as we go */ - assert(q->cur); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u8 *)q->state = s; - return MO_ALIVE; - } - - sp = local_ep; - - if (sp == 0) { - cur_buf = buffer; - } - - if (sp != ep) { - continue; - } - - switch (q->items[q->cur].type) { - case MQE_TOP: - assert(!s || sp + offset > 0); - if (sp + offset == 0) { - s = (u8)m->start_anchored; - break; - } - s = goughEnableStarts(m, s, q->items[q->cur].som, som); - break; - case MQE_END: - *(u8 *)q->state = s; - q->cur++; - return s ? MO_ALIVE : 0; - default: - assert(!"invalid queue event"); - } - - q->cur++; - } -} - - -static really_inline -char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, + == MO_HALT_MATCHING) { + *(u8 *)q->state = 0; + return 0; + } + if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) { + /* found a match */ + DEBUG_PRINTF("found a match\n"); + assert(q->cur); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = final_look - cur_buf + 1; /* due to + * early -1 */ + *(u8 *)q->state = s; + return MO_MATCHES_PENDING; + } + + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur].location > end) { + /* this is as far as we go */ + assert(q->cur); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u8 *)q->state = s; + return MO_ALIVE; + } + + sp = local_ep; + + if (sp == 0) { + cur_buf = buffer; + } + + if (sp != ep) { + continue; + } + + switch (q->items[q->cur].type) { + case MQE_TOP: + assert(!s || sp + offset > 0); + if (sp + offset == 0) { + s = (u8)m->start_anchored; + break; + } + s = goughEnableStarts(m, s, q->items[q->cur].som, som); + break; + case MQE_END: + *(u8 *)q->state = s; + q->cur++; + return s ? MO_ALIVE : 0; + default: + assert(!"invalid queue event"); + } + + q->cur++; + } +} + + +static really_inline +char nfaExecGough16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *hend, NfaCallback cb, void *context, - struct mq *q, s64a end, enum MatchMode mode) { - struct gough_som_info *som = getSomInfo(q->state); - assert(n->type == GOUGH_NFA_16); - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); - s64a sp; - - assert(ISALIGNED_N(q->state, 2)); - u16 s = *(u16 *)q->state; - - if (q->report_current) { - assert(s); - assert(get_aux(m, s)->accept); - - u32 cached_accept_id = 0; - u16 cached_accept_state = 0; - u32 cached_accept_som = 0; - - int rv = doReports(cb, context, m, som, s, q_cur_offset(q), 0, - &cached_accept_state, &cached_accept_id, - &cached_accept_som); - - q->report_current = 0; - - if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - - sp = q_cur_loc(q); - q->cur++; - - const u8 *cur_buf = sp < 0 ? hend : buffer; - - assert(q->cur); - if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { - /* this is as far as we go */ - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u16 *)q->state = s; - return MO_ALIVE; - } - - while (1) { - assert(q->cur < q->end); - s64a ep = q->items[q->cur].location; - if (mode != NO_MATCHES) { - ep = MIN(ep, end); - } - - assert(ep >= sp); - - s64a local_ep = ep; - if (sp < 0) { - local_ep = MIN(0, ep); - } - - /* do main buffer region */ - const u8 *final_look; - if (goughExec16_i_ni(m, som, &s, cur_buf + sp, local_ep - sp, + struct mq *q, s64a end, enum MatchMode mode) { + struct gough_som_info *som = getSomInfo(q->state); + assert(n->type == GOUGH_NFA_16); + const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + s64a sp; + + assert(ISALIGNED_N(q->state, 2)); + u16 s = *(u16 *)q->state; + + if (q->report_current) { + assert(s); + assert(get_aux(m, s)->accept); + + u32 cached_accept_id = 0; + u16 cached_accept_state = 0; + u32 cached_accept_som = 0; + + int rv = doReports(cb, context, m, som, s, q_cur_offset(q), 0, + &cached_accept_state, &cached_accept_id, + &cached_accept_som); + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + + sp = q_cur_loc(q); + q->cur++; + + const u8 *cur_buf = sp < 0 ? hend : buffer; + + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { + /* this is as far as we go */ + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u16 *)q->state = s; + return MO_ALIVE; + } + + while (1) { + assert(q->cur < q->end); + s64a ep = q->items[q->cur].location; + if (mode != NO_MATCHES) { + ep = MIN(ep, end); + } + + assert(ep >= sp); + + s64a local_ep = ep; + if (sp < 0) { + local_ep = MIN(0, ep); + } + + /* do main buffer region */ + const u8 *final_look; + if (goughExec16_i_ni(m, som, &s, cur_buf + sp, local_ep - sp, offset + sp, cb, context, &final_look, mode) - == MO_HALT_MATCHING) { - *(u16 *)q->state = 0; - return 0; - } - if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) { - /* this is as far as we go */ - assert(q->cur); - DEBUG_PRINTF("state %hu final_look %zd\n", s, - final_look - cur_buf); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = final_look - cur_buf + 1; /* due to - * early -1 */ - *(u16 *)q->state = s; - return MO_MATCHES_PENDING; - } - - assert(q->cur); - if (mode != NO_MATCHES && q->items[q->cur].location > end) { - /* this is as far as we go */ - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u16 *)q->state = s; - return MO_ALIVE; - } - - sp = local_ep; - - if (sp == 0) { - cur_buf = buffer; - } - - if (sp != ep) { - continue; - } - - switch (q->items[q->cur].type) { - case MQE_TOP: - assert(!s || sp + offset > 0); - if (sp + offset == 0) { - s = m->start_anchored; - break; - } - s = goughEnableStarts(m, s, q->items[q->cur].som, som); - break; - case MQE_END: - *(u16 *)q->state = s; - q->cur++; - return s ? MO_ALIVE : 0; - default: - assert(!"invalid queue event"); - } - - q->cur++; - } -} - -char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; + == MO_HALT_MATCHING) { + *(u16 *)q->state = 0; + return 0; + } + if (mode == STOP_AT_MATCH && final_look != cur_buf + local_ep) { + /* this is as far as we go */ + assert(q->cur); + DEBUG_PRINTF("state %hu final_look %zd\n", s, + final_look - cur_buf); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = final_look - cur_buf + 1; /* due to + * early -1 */ + *(u16 *)q->state = s; + return MO_MATCHES_PENDING; + } + + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur].location > end) { + /* this is as far as we go */ + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u16 *)q->state = s; + return MO_ALIVE; + } + + sp = local_ep; + + if (sp == 0) { + cur_buf = buffer; + } + + if (sp != ep) { + continue; + } + + switch (q->items[q->cur].type) { + case MQE_TOP: + assert(!s || sp + offset > 0); + if (sp + offset == 0) { + s = m->start_anchored; + break; + } + s = goughEnableStarts(m, s, q->items[q->cur].som, som); + break; + case MQE_END: + *(u16 *)q->state = s; + q->cur++; + return s ? MO_ALIVE : 0; + default: + assert(!"invalid queue event"); + } + + q->cur++; + } +} + +char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == GOUGH_NFA_8); - const u8 *hend = q->history + q->hlength; - - return nfaExecGough8_Q2i(n, offset, buffer, hend, cb, context, q, end, - CALLBACK_OUTPUT); -} - -char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; + void *context = q->context; + assert(n->type == GOUGH_NFA_8); + const u8 *hend = q->history + q->hlength; + + return nfaExecGough8_Q2i(n, offset, buffer, hend, cb, context, q, end, + CALLBACK_OUTPUT); +} + +char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == GOUGH_NFA_16); - const u8 *hend = q->history + q->hlength; - - return nfaExecGough16_Q2i(n, offset, buffer, hend, cb, context, q, end, - CALLBACK_OUTPUT); -} - -char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; + void *context = q->context; + assert(n->type == GOUGH_NFA_16); + const u8 *hend = q->history + q->hlength; + + return nfaExecGough16_Q2i(n, offset, buffer, hend, cb, context, q, end, + CALLBACK_OUTPUT); +} + +char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == GOUGH_NFA_8); - const u8 *hend = q->history + q->hlength; - - return nfaExecGough8_Q2i(n, offset, buffer, hend, cb, context, q, end, - STOP_AT_MATCH); -} - -char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; + void *context = q->context; + assert(n->type == GOUGH_NFA_8); + const u8 *hend = q->history + q->hlength; + + return nfaExecGough8_Q2i(n, offset, buffer, hend, cb, context, q, end, + STOP_AT_MATCH); +} + +char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == GOUGH_NFA_16); - const u8 *hend = q->history + q->hlength; - - return nfaExecGough16_Q2i(n, offset, buffer, hend, cb, context, q, end, - STOP_AT_MATCH); -} - -char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; + void *context = q->context; + assert(n->type == GOUGH_NFA_16); + const u8 *hend = q->history + q->hlength; + + return nfaExecGough16_Q2i(n, offset, buffer, hend, cb, context, q, end, + STOP_AT_MATCH); +} + +char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == GOUGH_NFA_8); - const u8 *hend = q->history + q->hlength; - - char rv = nfaExecGough8_Q2i(n, offset, buffer, hend, cb, context, q, - 0 /* end */, NO_MATCHES); - if (rv && nfaExecMcClellan8_inAccept(n, report, q)) { - return MO_MATCHES_PENDING; - } else { - return rv; - } -} - -char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; + void *context = q->context; + assert(n->type == GOUGH_NFA_8); + const u8 *hend = q->history + q->hlength; + + char rv = nfaExecGough8_Q2i(n, offset, buffer, hend, cb, context, q, + 0 /* end */, NO_MATCHES); + if (rv && nfaExecMcClellan8_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } else { + return rv; + } +} + +char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == GOUGH_NFA_16); - const u8 *hend = q->history + q->hlength; - - char rv = nfaExecGough16_Q2i(n, offset, buffer, hend, cb, context, q, - 0 /* end */, NO_MATCHES); - - if (rv && nfaExecMcClellan16_inAccept(n, report, q)) { - return MO_MATCHES_PENDING; - } else { - return rv; - } -} - -char nfaExecGough8_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, UNUSED u8 key) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); - memset(state, 0, nfa->streamStateSize); - u8 s = offset ? m->start_floating : m->start_anchored; - if (s) { - *(u8 *)state = s; - return 1; - } - return 0; -} - -char nfaExecGough16_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, UNUSED u8 key) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); - memset(state, 0, nfa->streamStateSize); - u16 s = offset ? m->start_floating : m->start_anchored; - if (s) { - unaligned_store_u16(state, s); - return 1; - } - return 0; -} - - -char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + void *context = q->context; + assert(n->type == GOUGH_NFA_16); + const u8 *hend = q->history + q->hlength; + + char rv = nfaExecGough16_Q2i(n, offset, buffer, hend, cb, context, q, + 0 /* end */, NO_MATCHES); + + if (rv && nfaExecMcClellan16_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } else { + return rv; + } +} + +char nfaExecGough8_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); + memset(state, 0, nfa->streamStateSize); + u8 s = offset ? m->start_floating : m->start_anchored; + if (s) { + *(u8 *)state = s; + return 1; + } + return 0; +} + +char nfaExecGough16_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { + const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); + memset(state, 0, nfa->streamStateSize); + u16 s = offset ? m->start_floating : m->start_anchored; + if (s) { + unaligned_store_u16(state, s); + return 1; + } + return 0; +} + + +char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q) { + const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); NfaCallback cb = q->cb; - void *ctxt = q->context; - u8 s = *(u8 *)q->state; - u64a offset = q_cur_offset(q); - struct gough_som_info *som = getSomInfo(q->state); - assert(q_cur_type(q) == MQE_START); - assert(s); - - if (s >= m->accept_limit_8) { - u32 cached_accept_id = 0; - u16 cached_accept_state = 0; - u32 cached_accept_som = 0; - - doReports(cb, ctxt, m, som, s, offset, 0, &cached_accept_state, - &cached_accept_id, &cached_accept_som); - } - - return 0; -} - -char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); + void *ctxt = q->context; + u8 s = *(u8 *)q->state; + u64a offset = q_cur_offset(q); + struct gough_som_info *som = getSomInfo(q->state); + assert(q_cur_type(q) == MQE_START); + assert(s); + + if (s >= m->accept_limit_8) { + u32 cached_accept_id = 0; + u16 cached_accept_state = 0; + u32 cached_accept_som = 0; + + doReports(cb, ctxt, m, som, s, offset, 0, &cached_accept_state, + &cached_accept_id, &cached_accept_som); + } + + return 0; +} + +char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q) { + const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n); NfaCallback cb = q->cb; - void *ctxt = q->context; - u16 s = *(u16 *)q->state; - const struct mstate_aux *aux = get_aux(m, s); - u64a offset = q_cur_offset(q); - struct gough_som_info *som = getSomInfo(q->state); - assert(q_cur_type(q) == MQE_START); - DEBUG_PRINTF("state %hu\n", s); - assert(s); - - if (aux->accept) { - u32 cached_accept_id = 0; - u16 cached_accept_state = 0; - u32 cached_accept_som = 0; - - doReports(cb, ctxt, m, som, s, offset, 0, &cached_accept_state, - &cached_accept_id, &cached_accept_som); - } - - return 0; -} - -char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { - return nfaExecMcClellan8_inAccept(n, report, q); -} - -char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { - return nfaExecMcClellan16_inAccept(n, report, q); -} - + void *ctxt = q->context; + u16 s = *(u16 *)q->state; + const struct mstate_aux *aux = get_aux(m, s); + u64a offset = q_cur_offset(q); + struct gough_som_info *som = getSomInfo(q->state); + assert(q_cur_type(q) == MQE_START); + DEBUG_PRINTF("state %hu\n", s); + assert(s); + + if (aux->accept) { + u32 cached_accept_id = 0; + u16 cached_accept_state = 0; + u32 cached_accept_som = 0; + + doReports(cb, ctxt, m, som, s, offset, 0, &cached_accept_state, + &cached_accept_id, &cached_accept_som); + } + + return 0; +} + +char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + return nfaExecMcClellan8_inAccept(n, report, q); +} + +char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + return nfaExecMcClellan16_inAccept(n, report, q); +} + char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q) { return nfaExecMcClellan8_inAnyAccept(n, q); } @@ -1039,105 +1039,105 @@ char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q) { return nfaExecMcClellan16_inAnyAccept(n, q); } -static +static char goughCheckEOD(const struct NFA *nfa, u16 s, - const struct gough_som_info *som, + const struct gough_som_info *som, u64a offset, NfaCallback cb, void *ctxt) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); - const struct mstate_aux *aux = get_aux(m, s); - + const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); + const struct mstate_aux *aux = get_aux(m, s); + if (!aux->accept_eod) { return MO_CONTINUE_MATCHING; - } + } return doReports(cb, ctxt, m, som, s, offset, 1, NULL, NULL, NULL); -} - -char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state, +} + +char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state, UNUSED const char *streamState, u64a offset, NfaCallback callback, void *context) { - const struct gough_som_info *som = getSomInfoConst(state); + const struct gough_som_info *som = getSomInfoConst(state); return goughCheckEOD(nfa, *(const u8 *)state, som, offset, callback, context); -} - -char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state, +} + +char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state, UNUSED const char *streamState, u64a offset, NfaCallback callback, void *context) { - assert(ISALIGNED_N(state, 8)); - const struct gough_som_info *som = getSomInfoConst(state); + assert(ISALIGNED_N(state, 8)); + const struct gough_som_info *som = getSomInfoConst(state); return goughCheckEOD(nfa, *(const u16 *)state, som, offset, callback, context); -} - -char nfaExecGough8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { - memset(q->state, 0, nfa->scratchStateSize); - return 0; -} - -char nfaExecGough16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { - memset(q->state, 0, nfa->scratchStateSize); - assert(ISALIGNED_N(q->state, 2)); - return 0; -} - -static really_inline -void compSomSpace(const struct NFA *nfa, u8 *dest_som_base, - const struct gough_som_info *src, u64a curr_offset) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); - const struct gough_info *gi = get_gough(m); - u32 count = gi->stream_som_loc_count; - u32 width = gi->stream_som_loc_width; - - for (u32 i = 0; i < count; i++) { - compressSomValue(width, curr_offset, dest_som_base, i, src->slots[i]); - } -} - -static really_inline -void expandSomSpace(const struct NFA *nfa, struct gough_som_info *som, - const u8 *src_som_base, u64a curr_offset) { - const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); - const struct gough_info *gi = get_gough(m); - u32 count = gi->stream_som_loc_count; - u32 width = gi->stream_som_loc_width; - - for (u32 i = 0; i < count; i++) { - som->slots[i] = expandSomValue(width, curr_offset, src_som_base, i); - } -} - -char nfaExecGough8_queueCompressState(const struct NFA *nfa, const struct mq *q, - s64a loc) { - void *dest = q->streamState; - const void *src = q->state; - - *(u8 *)dest = *(const u8 *)src; - compSomSpace(nfa, (u8 *)dest + 1, getSomInfoConst(src), q->offset + loc); - return 0; -} - -char nfaExecGough8_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, UNUSED u8 key) { - *(u8 *)dest = *(const u8 *)src; - expandSomSpace(nfa, getSomInfo(dest), (const u8 *)src + 1, offset); - return 0; -} - -char nfaExecGough16_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc) { - void *dest = q->streamState; - const void *src = q->state; - - assert(ISALIGNED_N(src, 2)); - unaligned_store_u16(dest, *(const u16 *)(src)); - compSomSpace(nfa, (u8 *)dest + 2, getSomInfoConst(src), q->offset + loc); - return 0; -} - -char nfaExecGough16_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, UNUSED u8 key) { - assert(ISALIGNED_N(dest, 2)); - *(u16 *)dest = unaligned_load_u16(src); - expandSomSpace(nfa, getSomInfo(dest), (const u8 *)src + 2, offset); - return 0; -} +} + +char nfaExecGough8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { + memset(q->state, 0, nfa->scratchStateSize); + return 0; +} + +char nfaExecGough16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { + memset(q->state, 0, nfa->scratchStateSize); + assert(ISALIGNED_N(q->state, 2)); + return 0; +} + +static really_inline +void compSomSpace(const struct NFA *nfa, u8 *dest_som_base, + const struct gough_som_info *src, u64a curr_offset) { + const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); + const struct gough_info *gi = get_gough(m); + u32 count = gi->stream_som_loc_count; + u32 width = gi->stream_som_loc_width; + + for (u32 i = 0; i < count; i++) { + compressSomValue(width, curr_offset, dest_som_base, i, src->slots[i]); + } +} + +static really_inline +void expandSomSpace(const struct NFA *nfa, struct gough_som_info *som, + const u8 *src_som_base, u64a curr_offset) { + const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa); + const struct gough_info *gi = get_gough(m); + u32 count = gi->stream_som_loc_count; + u32 width = gi->stream_som_loc_width; + + for (u32 i = 0; i < count; i++) { + som->slots[i] = expandSomValue(width, curr_offset, src_som_base, i); + } +} + +char nfaExecGough8_queueCompressState(const struct NFA *nfa, const struct mq *q, + s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + + *(u8 *)dest = *(const u8 *)src; + compSomSpace(nfa, (u8 *)dest + 1, getSomInfoConst(src), q->offset + loc); + return 0; +} + +char nfaExecGough8_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, UNUSED u8 key) { + *(u8 *)dest = *(const u8 *)src; + expandSomSpace(nfa, getSomInfo(dest), (const u8 *)src + 1, offset); + return 0; +} + +char nfaExecGough16_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + + assert(ISALIGNED_N(src, 2)); + unaligned_store_u16(dest, *(const u16 *)(src)); + compSomSpace(nfa, (u8 *)dest + 2, getSomInfoConst(src), q->offset + loc); + return 0; +} + +char nfaExecGough16_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, UNUSED u8 key) { + assert(ISALIGNED_N(dest, 2)); + *(u16 *)dest = unaligned_load_u16(src); + expandSomSpace(nfa, getSomInfo(dest), (const u8 *)src + 2, offset); + return 0; +} diff --git a/contrib/libs/hyperscan/src/nfa/gough.h b/contrib/libs/hyperscan/src/nfa/gough.h index 9f32818ef8..a7f4889232 100644 --- a/contrib/libs/hyperscan/src/nfa/gough.h +++ b/contrib/libs/hyperscan/src/nfa/gough.h @@ -1,82 +1,82 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef GOUGH_H -#define GOUGH_H - -#include "callback.h" -#include "ue2common.h" - -struct NFA; -struct mq; - -// 8-bit Gough - -char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state, - const char *streamState, u64a offset, + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef GOUGH_H +#define GOUGH_H + +#include "callback.h" +#include "ue2common.h" + +struct NFA; +struct mq; + +// 8-bit Gough + +char nfaExecGough8_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, NfaCallback callback, void *context); -char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecGough8_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q); char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecGough8_queueCompressState(const struct NFA *nfa, const struct mq *q, - s64a loc); -char nfaExecGough8_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); - -#define nfaExecGough8_B_Reverse NFA_API_NO_IMPL -#define nfaExecGough8_zombie_status NFA_API_ZOMBIE_NO_IMPL - -// 16-bit Gough - -char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state, - const char *streamState, u64a offset, +char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecGough8_queueCompressState(const struct NFA *nfa, const struct mq *q, + s64a loc); +char nfaExecGough8_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecGough8_B_Reverse NFA_API_NO_IMPL +#define nfaExecGough8_zombie_status NFA_API_ZOMBIE_NO_IMPL + +// 16-bit Gough + +char nfaExecGough16_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, NfaCallback callback, void *context); -char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q); +char nfaExecGough16_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q); char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecGough16_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc); -char nfaExecGough16_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); - -#define nfaExecGough16_B_Reverse NFA_API_NO_IMPL -#define nfaExecGough16_zombie_status NFA_API_ZOMBIE_NO_IMPL - -#endif +char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecGough16_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecGough16_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecGough16_B_Reverse NFA_API_NO_IMPL +#define nfaExecGough16_zombie_status NFA_API_ZOMBIE_NO_IMPL + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/gough_internal.h b/contrib/libs/hyperscan/src/nfa/gough_internal.h index 42d73970cf..8bf06e0f7f 100644 --- a/contrib/libs/hyperscan/src/nfa/gough_internal.h +++ b/contrib/libs/hyperscan/src/nfa/gough_internal.h @@ -1,134 +1,134 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef GOUGH_INTERNAL_H -#define GOUGH_INTERNAL_H - -#include "accel.h" -#include "mcclellan_internal.h" -#include "ue2common.h" - -#define INVALID_SLOT (~0U) - -#define GOUGH_INS_END 0 -#define GOUGH_INS_MOV 1 -#define GOUGH_INS_NEW 2 -#define GOUGH_INS_MIN 3 -/* todo: add instructions targeting acc reg? */ - -struct gough_ins { - u32 op; /* u32 to avoid padding */ - u32 dest; - u32 src; /* for GOUGH_INS_NEW, this specifies the adjustment to apply to the - * current offset */ -}; - -/* - * HAPPY FUN ASCII ART TIME - * - * ---- - * | | struct NFA - * ---- - * ~~~~ normal(ish) mcclellan engine - * ~~~~ - * ~~~~ - * ~~~~ - * ~~~~ - * ~~~~ - * ~~~~ - * ~~~~ - * ---- = m->haig_offset - * | | } struct gough_info - * ---- - * | | } - * | | } edge prog table -> provides the offset of the start of the program - * | | } to run when the edge is taken. 0 indicates no - * | | } work to do - * ---- = h->top_prog_offset - * | | } - * | | } top prog table -> provides the offset of the start of the program - * | | } to run when a top is taken from this state. 0 - * | | } indicates nothing to do - * ---- = h->prog_base_offset - * | | } - * | | } programs to run - * | | } - * | | } - * ---- - */ - -struct gough_info { - u32 top_prog_offset; /**< offset to the base of the top prog table */ - u32 prog_base_offset; /**< not used at runtime */ - u32 stream_som_loc_count; /**< number of som locs in the stream state */ - u8 stream_som_loc_width; /**< number of bytes per som loc */ -}; - -static really_inline -const struct gough_info *get_gough(const struct mcclellan *m) { - assert(m->haig_offset); - const char *n = (const char *)m - sizeof(struct NFA); - return (const struct gough_info *)(n + m->haig_offset); -} - -static really_inline -const u32 *get_gough_top_offsets(const struct mcclellan *m) { - const struct gough_info *g = get_gough(m); - if (!g->top_prog_offset) { - return NULL; - } - const char *n = (const char *)m - sizeof(struct NFA); - return (const u32 *)(n + g->top_prog_offset); -} - -/* Gough state representation in scratch. - * - * During execution, gough tracks a number of variables containing potential - * starts of match. These are all stored in a large array of u64a slots. - */ -struct gough_som_info { - u64a slots[1]; /* 'flexible' member array */ -}; - -struct gough_report { - ReportID r; - u32 som; /* som slot to report */ -}; - -struct gough_report_list { - u32 count; - struct gough_report report[]; -}; - -struct gough_accel { - union AccelAux accel; - u8 margin_dist; - u32 prog_offset; -}; - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef GOUGH_INTERNAL_H +#define GOUGH_INTERNAL_H + +#include "accel.h" +#include "mcclellan_internal.h" +#include "ue2common.h" + +#define INVALID_SLOT (~0U) + +#define GOUGH_INS_END 0 +#define GOUGH_INS_MOV 1 +#define GOUGH_INS_NEW 2 +#define GOUGH_INS_MIN 3 +/* todo: add instructions targeting acc reg? */ + +struct gough_ins { + u32 op; /* u32 to avoid padding */ + u32 dest; + u32 src; /* for GOUGH_INS_NEW, this specifies the adjustment to apply to the + * current offset */ +}; + +/* + * HAPPY FUN ASCII ART TIME + * + * ---- + * | | struct NFA + * ---- + * ~~~~ normal(ish) mcclellan engine + * ~~~~ + * ~~~~ + * ~~~~ + * ~~~~ + * ~~~~ + * ~~~~ + * ~~~~ + * ---- = m->haig_offset + * | | } struct gough_info + * ---- + * | | } + * | | } edge prog table -> provides the offset of the start of the program + * | | } to run when the edge is taken. 0 indicates no + * | | } work to do + * ---- = h->top_prog_offset + * | | } + * | | } top prog table -> provides the offset of the start of the program + * | | } to run when a top is taken from this state. 0 + * | | } indicates nothing to do + * ---- = h->prog_base_offset + * | | } + * | | } programs to run + * | | } + * | | } + * ---- + */ + +struct gough_info { + u32 top_prog_offset; /**< offset to the base of the top prog table */ + u32 prog_base_offset; /**< not used at runtime */ + u32 stream_som_loc_count; /**< number of som locs in the stream state */ + u8 stream_som_loc_width; /**< number of bytes per som loc */ +}; + +static really_inline +const struct gough_info *get_gough(const struct mcclellan *m) { + assert(m->haig_offset); + const char *n = (const char *)m - sizeof(struct NFA); + return (const struct gough_info *)(n + m->haig_offset); +} + +static really_inline +const u32 *get_gough_top_offsets(const struct mcclellan *m) { + const struct gough_info *g = get_gough(m); + if (!g->top_prog_offset) { + return NULL; + } + const char *n = (const char *)m - sizeof(struct NFA); + return (const u32 *)(n + g->top_prog_offset); +} + +/* Gough state representation in scratch. + * + * During execution, gough tracks a number of variables containing potential + * starts of match. These are all stored in a large array of u64a slots. + */ +struct gough_som_info { + u64a slots[1]; /* 'flexible' member array */ +}; + +struct gough_report { + ReportID r; + u32 som; /* som slot to report */ +}; + +struct gough_report_list { + u32 count; + struct gough_report report[]; +}; + +struct gough_accel { + union AccelAux accel; + u8 margin_dist; + u32 prog_offset; +}; + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile.cpp b/contrib/libs/hyperscan/src/nfa/goughcompile.cpp index 0fd64bf126..d41c6f4235 100644 --- a/contrib/libs/hyperscan/src/nfa/goughcompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/goughcompile.cpp @@ -1,1170 +1,1170 @@ -/* +/* * Copyright (c) 2015-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "goughcompile.h" - -#include "accel.h" -#include "goughcompile_dump.h" -#include "goughcompile_internal.h" -#include "gough_internal.h" -#include "grey.h" -#include "mcclellancompile.h" -#include "nfa_internal.h" -#include "util/compile_context.h" -#include "util/container.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "goughcompile.h" + +#include "accel.h" +#include "goughcompile_dump.h" +#include "goughcompile_internal.h" +#include "gough_internal.h" +#include "grey.h" +#include "mcclellancompile.h" +#include "nfa_internal.h" +#include "util/compile_context.h" +#include "util/container.h" #include "util/flat_containers.h" -#include "util/graph_range.h" -#include "util/make_unique.h" -#include "util/order_check.h" +#include "util/graph_range.h" +#include "util/make_unique.h" +#include "util/order_check.h" #include "util/report_manager.h" -#include "util/verify_types.h" - -#include "ue2common.h" - -#include <algorithm> -#include <boost/dynamic_bitset.hpp> -#include <boost/range/adaptor/map.hpp> - -using namespace std; -using boost::adaptors::map_keys; -using boost::adaptors::map_values; -using boost::vertex_index; - -namespace ue2 { - -void raw_som_dfa::stripExtraEodReports(void) { - /* if a state generates a given report as a normal accept - then it does - * not also need to generate an eod report for it */ - for (vector<dstate_som>::iterator it = state_som.begin(); - it != state_som.end(); ++it) { - for (const som_report &sr : it->reports) { - it->reports_eod.erase(sr); - } - dstate &norm = states[it - state_som.begin()]; - norm.reports_eod.clear(); - for (const som_report &sr : it->reports_eod) { - norm.reports_eod.insert(sr.report); - } - } -} - -namespace { - -class gough_build_strat : public mcclellan_build_strat { -public: +#include "util/verify_types.h" + +#include "ue2common.h" + +#include <algorithm> +#include <boost/dynamic_bitset.hpp> +#include <boost/range/adaptor/map.hpp> + +using namespace std; +using boost::adaptors::map_keys; +using boost::adaptors::map_values; +using boost::vertex_index; + +namespace ue2 { + +void raw_som_dfa::stripExtraEodReports(void) { + /* if a state generates a given report as a normal accept - then it does + * not also need to generate an eod report for it */ + for (vector<dstate_som>::iterator it = state_som.begin(); + it != state_som.end(); ++it) { + for (const som_report &sr : it->reports) { + it->reports_eod.erase(sr); + } + dstate &norm = states[it - state_som.begin()]; + norm.reports_eod.clear(); + for (const som_report &sr : it->reports_eod) { + norm.reports_eod.insert(sr.report); + } + } +} + +namespace { + +class gough_build_strat : public mcclellan_build_strat { +public: gough_build_strat( raw_som_dfa &r, const GoughGraph &g, const ReportManager &rm_in, const map<dstate_id_t, gough_accel_state_info> &accel_info) : mcclellan_build_strat(r, rm_in, false), rdfa(r), gg(g), - accel_gough_info(accel_info) {} - unique_ptr<raw_report_info> gatherReports(vector<u32> &reports /* out */, - vector<u32> &reports_eod /* out */, - u8 *isSingleReport /* out */, - ReportID *arbReport /* out */) const override; + accel_gough_info(accel_info) {} + unique_ptr<raw_report_info> gatherReports(vector<u32> &reports /* out */, + vector<u32> &reports_eod /* out */, + u8 *isSingleReport /* out */, + ReportID *arbReport /* out */) const override; AccelScheme find_escape_strings(dstate_id_t this_idx) const override; - size_t accelSize(void) const override { return sizeof(gough_accel); } + size_t accelSize(void) const override { return sizeof(gough_accel); } void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out) override; u32 max_allowed_offset_accel() const override { return 0; } DfaType getType() const override { return Gough; } - - raw_som_dfa &rdfa; - const GoughGraph ≫ - map<dstate_id_t, gough_accel_state_info> accel_gough_info; - map<gough_accel *, dstate_id_t> built_accel; -}; - -} - -GoughSSAVar::~GoughSSAVar() { -} - -void GoughSSAVar::clear_outputs() { - for (GoughSSAVarWithInputs *var : outputs) { - var->remove_input_raw(this); - } - outputs.clear(); -} - -void GoughSSAVarWithInputs::clear_all() { - clear_inputs(); - clear_outputs(); -} - -void GoughSSAVarMin::clear_inputs() { - for (GoughSSAVar *var : inputs) { - assert(contains(var->outputs, this)); - var->outputs.erase(this); - } - inputs.clear(); -} - -void GoughSSAVarMin::replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) { - assert(contains(inputs, old_v)); - inputs.erase(old_v); - old_v->outputs.erase(this); - inputs.insert(new_v); - new_v->outputs.insert(this); -} - -static -void translateRawReports(UNUSED GoughGraph &cfg, UNUSED const raw_som_dfa &raw, - const flat_map<u32, GoughSSAVarJoin *> &joins_at_s, - UNUSED GoughVertex s, - const set<som_report> &reports_in, - vector<pair<ReportID, GoughSSAVar *> > *reports_out) { - for (const som_report &sr : reports_in) { - DEBUG_PRINTF("state %u: report %u slot %d\n", cfg[s].state_id, - sr.report, sr.slot); - GoughSSAVar *var = nullptr; - if (sr.slot == CREATE_NEW_SOM) { - assert(!generates_callbacks(raw.kind)); - } else { - var = joins_at_s.at(sr.slot); - } - reports_out->push_back(make_pair(sr.report, var)); - } -} - -static -void makeCFG_reports(GoughGraph &cfg, const raw_som_dfa &raw, - const vector<flat_map<u32, GoughSSAVarJoin *> > &joins, - const vector<GoughVertex> &vertices) { - for (u32 i = 1; i < raw.states.size(); ++i) { - GoughVertex s = vertices[i]; - const flat_map<u32, GoughSSAVarJoin *> &joins_at_s - = joins[get(vertex_index, cfg, s)]; - translateRawReports(cfg, raw, joins_at_s, s, - raw.state_som[i].reports, &cfg[s].reports); - translateRawReports(cfg, raw, joins_at_s, s, - raw.state_som[i].reports_eod, &cfg[s].reports_eod); - } -} - -static never_inline -void makeCFG_top_edge(GoughGraph &cfg, const vector<GoughVertex> &vertices, - const vector<flat_map<u32, GoughSSAVarJoin *> > &joins, - u32 trigger_slot, const som_tran_info &src_slots, - const som_tran_info &dest_slot_pred, - dstate_id_t i, dstate_id_t n, const GoughEdge &e) { - GoughVertex s = vertices[i]; - GoughVertex t = vertices[n]; - const flat_map<u32, GoughSSAVarJoin *> &joins_at_s - = joins[get(vertex_index, cfg, s)]; - const flat_map<u32, GoughSSAVarJoin *> &joins_at_t - = joins[get(vertex_index, cfg, t)]; - - DEBUG_PRINTF("top for %u -> %u\n", i, n); - - for (som_tran_info::const_iterator it = dest_slot_pred.begin(); - it != dest_slot_pred.end(); ++it) { - /* for ordering, need to ensure that new values feeding directly - * into mins come first */ - u32 slot_id = it->first; - - shared_ptr<GoughSSAVarNew> vnew; - if (slot_id == trigger_slot) { - vnew = make_shared<GoughSSAVarNew>(0U); - cfg[e].vars.push_back(vnew); - } else { - assert(contains(src_slots, slot_id)); - } - - GoughSSAVar *final_var; - if (vnew && !contains(src_slots, slot_id)) { - final_var = vnew.get(); - DEBUG_PRINTF("bypassing min on join %u\n", slot_id); - } else if (!vnew) { - final_var = joins_at_s.at(slot_id); - DEBUG_PRINTF("bypassing min on join %u\n", slot_id); - } else { - assert(vnew); - assert(contains(src_slots, slot_id)); - - shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>(); - cfg[e].vars.push_back(vmin); - final_var = vmin.get(); - - DEBUG_PRINTF("slot %u gets a new value\n", slot_id); - vmin->add_input(vnew.get()); - - DEBUG_PRINTF("slot %u is constant\n", slot_id); - vmin->add_input(joins_at_s.at(slot_id)); - } - - /* wire to destination target */ - GoughSSAVarJoin *vk = joins_at_t.at(slot_id); - vk->add_input(final_var, e); - } -} - -static never_inline -void makeCFG_edge(GoughGraph &cfg, const map<u32, u32> &som_creators, - const vector<GoughVertex> &vertices, - const vector<flat_map<u32, GoughSSAVarJoin *> > &joins, - const som_tran_info &src_slots, - const som_tran_info &dest_slot_pred, dstate_id_t i, - dstate_id_t n, const GoughEdge &e) { - GoughVertex s = vertices[i]; - GoughVertex t = vertices[n]; - const flat_map<u32, GoughSSAVarJoin *> &joins_at_s - = joins[get(vertex_index, cfg, s)]; - const flat_map<u32, GoughSSAVarJoin *> &joins_at_t - = joins[get(vertex_index, cfg, t)]; - - map<u32, shared_ptr<GoughSSAVarNew> > vnew_by_adj; - for (som_tran_info::const_iterator it = dest_slot_pred.begin(); - it != dest_slot_pred.end(); ++it) { - /* for ordering, need to ensure that new values feeding directly - * into mins come first */ - u32 slot_id = it->first; - - if (contains(som_creators, slot_id) && !som_creators.at(slot_id)) { - continue; - } - - shared_ptr<GoughSSAVarNew> vnew; - const vector<u32> &inputs = it->second; - u32 useful_input_count = 0; - u32 first_useful_input = ~0U; - - for (const u32 &input_slot : inputs) { - if (!contains(src_slots, input_slot)) { - continue; - } - DEBUG_PRINTF("%u is useful\n", input_slot); - - if (!vnew || !contains(som_creators, input_slot)) { - useful_input_count++; - if (useful_input_count == 1) { - first_useful_input = input_slot; - } - } - - if (contains(som_creators, input_slot)) { - u32 adjust = som_creators.at(input_slot); - - if (vnew && vnew->adjust >= adjust) { - DEBUG_PRINTF("skipping %u as domininated by adj%u\n", - adjust, vnew->adjust); - continue; /* deeper starts can be seen to statically - dominate */ - } - - if (contains(vnew_by_adj, adjust)) { - vnew = vnew_by_adj[adjust]; - } else { - vnew = make_shared<GoughSSAVarNew>(adjust); - cfg[e].vars.push_back(vnew); - vnew_by_adj[adjust] = vnew; - } - assert(vnew); - } - } - - /* If we have a new start of match (with no offset or 1 byte offset) and - * other variables coming in, the new will always be dominated by the - * existing variables (as they must be at least one byte into the match) - * -- and so can be dropped. */ - if (vnew && vnew->adjust < 2 && useful_input_count > 1) { - useful_input_count--; - vnew.reset(); - - /* need to reestablish the first useful input */ - for (const u32 &input_slot : inputs) { - if (!contains(src_slots, input_slot)) { - continue; - } - if (!contains(som_creators, input_slot)) { - first_useful_input = input_slot; - } - } - - } - - GoughSSAVar *final_var; - if (useful_input_count == 1) { - if (vnew) { - final_var = vnew.get(); - } else { - assert(first_useful_input != ~0U); - final_var = joins_at_s.at(first_useful_input); - } - DEBUG_PRINTF("bypassing min on join %u\n", slot_id); - } else { - shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>(); - cfg[e].vars.push_back(vmin); - final_var = vmin.get(); - - if (vnew) { - vmin->add_input(vnew.get()); - } - - /* wire the normal inputs to the min */ - for (const u32 &input_slot : inputs) { - if (!contains(src_slots, input_slot)) { - continue; - } - if (!contains(som_creators, input_slot)) { - vmin->add_input(joins_at_s.at(input_slot)); - } - } - assert(vmin->get_inputs().size() > 1); - DEBUG_PRINTF("wire min to join %u\n", slot_id); - } - - GoughSSAVarJoin *vk = joins_at_t.at(slot_id); - assert(final_var); - vk->add_input(final_var, e); - } -} - -static never_inline -unique_ptr<GoughGraph> makeCFG(const raw_som_dfa &raw) { - vector<GoughVertex> vertices; - vertices.reserve(raw.states.size()); - unique_ptr<GoughGraph> cfg = ue2::make_unique<GoughGraph>(); - u32 min_state = !is_triggered(raw.kind); - - if (min_state) { - vertices.push_back(GoughGraph::null_vertex()); /* skip dead state */ - } - - vector<flat_map<u32, GoughSSAVarJoin *> > joins(raw.states.size()); - for (u32 i = min_state; i < raw.states.size(); ++i) { - GoughVertex v = add_vertex(GoughVertexProps(i), *cfg); - vertices.push_back(v); - - /* create JOIN variables */ - for (som_tran_info::const_iterator it = raw.state_som[i].preds.begin(); - it != raw.state_som[i].preds.end(); ++it) { - u32 slot_id = it->first; - if (!contains(raw.new_som_nfa_states, slot_id) - || raw.new_som_nfa_states.at(slot_id)) { - (*cfg)[v].vars.push_back(make_shared<GoughSSAVarJoin>()); - joins[get(vertex_index, *cfg, v)][slot_id] - = (*cfg)[v].vars.back().get(); - DEBUG_PRINTF("dfa %u:: slot %u\n", i, slot_id); - } - } - } - - u16 top_sym = raw.alpha_remap[TOP]; + + raw_som_dfa &rdfa; + const GoughGraph ≫ + map<dstate_id_t, gough_accel_state_info> accel_gough_info; + map<gough_accel *, dstate_id_t> built_accel; +}; + +} + +GoughSSAVar::~GoughSSAVar() { +} + +void GoughSSAVar::clear_outputs() { + for (GoughSSAVarWithInputs *var : outputs) { + var->remove_input_raw(this); + } + outputs.clear(); +} + +void GoughSSAVarWithInputs::clear_all() { + clear_inputs(); + clear_outputs(); +} + +void GoughSSAVarMin::clear_inputs() { + for (GoughSSAVar *var : inputs) { + assert(contains(var->outputs, this)); + var->outputs.erase(this); + } + inputs.clear(); +} + +void GoughSSAVarMin::replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) { + assert(contains(inputs, old_v)); + inputs.erase(old_v); + old_v->outputs.erase(this); + inputs.insert(new_v); + new_v->outputs.insert(this); +} + +static +void translateRawReports(UNUSED GoughGraph &cfg, UNUSED const raw_som_dfa &raw, + const flat_map<u32, GoughSSAVarJoin *> &joins_at_s, + UNUSED GoughVertex s, + const set<som_report> &reports_in, + vector<pair<ReportID, GoughSSAVar *> > *reports_out) { + for (const som_report &sr : reports_in) { + DEBUG_PRINTF("state %u: report %u slot %d\n", cfg[s].state_id, + sr.report, sr.slot); + GoughSSAVar *var = nullptr; + if (sr.slot == CREATE_NEW_SOM) { + assert(!generates_callbacks(raw.kind)); + } else { + var = joins_at_s.at(sr.slot); + } + reports_out->push_back(make_pair(sr.report, var)); + } +} + +static +void makeCFG_reports(GoughGraph &cfg, const raw_som_dfa &raw, + const vector<flat_map<u32, GoughSSAVarJoin *> > &joins, + const vector<GoughVertex> &vertices) { + for (u32 i = 1; i < raw.states.size(); ++i) { + GoughVertex s = vertices[i]; + const flat_map<u32, GoughSSAVarJoin *> &joins_at_s + = joins[get(vertex_index, cfg, s)]; + translateRawReports(cfg, raw, joins_at_s, s, + raw.state_som[i].reports, &cfg[s].reports); + translateRawReports(cfg, raw, joins_at_s, s, + raw.state_som[i].reports_eod, &cfg[s].reports_eod); + } +} + +static never_inline +void makeCFG_top_edge(GoughGraph &cfg, const vector<GoughVertex> &vertices, + const vector<flat_map<u32, GoughSSAVarJoin *> > &joins, + u32 trigger_slot, const som_tran_info &src_slots, + const som_tran_info &dest_slot_pred, + dstate_id_t i, dstate_id_t n, const GoughEdge &e) { + GoughVertex s = vertices[i]; + GoughVertex t = vertices[n]; + const flat_map<u32, GoughSSAVarJoin *> &joins_at_s + = joins[get(vertex_index, cfg, s)]; + const flat_map<u32, GoughSSAVarJoin *> &joins_at_t + = joins[get(vertex_index, cfg, t)]; + + DEBUG_PRINTF("top for %u -> %u\n", i, n); + + for (som_tran_info::const_iterator it = dest_slot_pred.begin(); + it != dest_slot_pred.end(); ++it) { + /* for ordering, need to ensure that new values feeding directly + * into mins come first */ + u32 slot_id = it->first; + + shared_ptr<GoughSSAVarNew> vnew; + if (slot_id == trigger_slot) { + vnew = make_shared<GoughSSAVarNew>(0U); + cfg[e].vars.push_back(vnew); + } else { + assert(contains(src_slots, slot_id)); + } + + GoughSSAVar *final_var; + if (vnew && !contains(src_slots, slot_id)) { + final_var = vnew.get(); + DEBUG_PRINTF("bypassing min on join %u\n", slot_id); + } else if (!vnew) { + final_var = joins_at_s.at(slot_id); + DEBUG_PRINTF("bypassing min on join %u\n", slot_id); + } else { + assert(vnew); + assert(contains(src_slots, slot_id)); + + shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>(); + cfg[e].vars.push_back(vmin); + final_var = vmin.get(); + + DEBUG_PRINTF("slot %u gets a new value\n", slot_id); + vmin->add_input(vnew.get()); + + DEBUG_PRINTF("slot %u is constant\n", slot_id); + vmin->add_input(joins_at_s.at(slot_id)); + } + + /* wire to destination target */ + GoughSSAVarJoin *vk = joins_at_t.at(slot_id); + vk->add_input(final_var, e); + } +} + +static never_inline +void makeCFG_edge(GoughGraph &cfg, const map<u32, u32> &som_creators, + const vector<GoughVertex> &vertices, + const vector<flat_map<u32, GoughSSAVarJoin *> > &joins, + const som_tran_info &src_slots, + const som_tran_info &dest_slot_pred, dstate_id_t i, + dstate_id_t n, const GoughEdge &e) { + GoughVertex s = vertices[i]; + GoughVertex t = vertices[n]; + const flat_map<u32, GoughSSAVarJoin *> &joins_at_s + = joins[get(vertex_index, cfg, s)]; + const flat_map<u32, GoughSSAVarJoin *> &joins_at_t + = joins[get(vertex_index, cfg, t)]; + + map<u32, shared_ptr<GoughSSAVarNew> > vnew_by_adj; + for (som_tran_info::const_iterator it = dest_slot_pred.begin(); + it != dest_slot_pred.end(); ++it) { + /* for ordering, need to ensure that new values feeding directly + * into mins come first */ + u32 slot_id = it->first; + + if (contains(som_creators, slot_id) && !som_creators.at(slot_id)) { + continue; + } + + shared_ptr<GoughSSAVarNew> vnew; + const vector<u32> &inputs = it->second; + u32 useful_input_count = 0; + u32 first_useful_input = ~0U; + + for (const u32 &input_slot : inputs) { + if (!contains(src_slots, input_slot)) { + continue; + } + DEBUG_PRINTF("%u is useful\n", input_slot); + + if (!vnew || !contains(som_creators, input_slot)) { + useful_input_count++; + if (useful_input_count == 1) { + first_useful_input = input_slot; + } + } + + if (contains(som_creators, input_slot)) { + u32 adjust = som_creators.at(input_slot); + + if (vnew && vnew->adjust >= adjust) { + DEBUG_PRINTF("skipping %u as domininated by adj%u\n", + adjust, vnew->adjust); + continue; /* deeper starts can be seen to statically + dominate */ + } + + if (contains(vnew_by_adj, adjust)) { + vnew = vnew_by_adj[adjust]; + } else { + vnew = make_shared<GoughSSAVarNew>(adjust); + cfg[e].vars.push_back(vnew); + vnew_by_adj[adjust] = vnew; + } + assert(vnew); + } + } + + /* If we have a new start of match (with no offset or 1 byte offset) and + * other variables coming in, the new will always be dominated by the + * existing variables (as they must be at least one byte into the match) + * -- and so can be dropped. */ + if (vnew && vnew->adjust < 2 && useful_input_count > 1) { + useful_input_count--; + vnew.reset(); + + /* need to reestablish the first useful input */ + for (const u32 &input_slot : inputs) { + if (!contains(src_slots, input_slot)) { + continue; + } + if (!contains(som_creators, input_slot)) { + first_useful_input = input_slot; + } + } + + } + + GoughSSAVar *final_var; + if (useful_input_count == 1) { + if (vnew) { + final_var = vnew.get(); + } else { + assert(first_useful_input != ~0U); + final_var = joins_at_s.at(first_useful_input); + } + DEBUG_PRINTF("bypassing min on join %u\n", slot_id); + } else { + shared_ptr<GoughSSAVarMin> vmin = make_shared<GoughSSAVarMin>(); + cfg[e].vars.push_back(vmin); + final_var = vmin.get(); + + if (vnew) { + vmin->add_input(vnew.get()); + } + + /* wire the normal inputs to the min */ + for (const u32 &input_slot : inputs) { + if (!contains(src_slots, input_slot)) { + continue; + } + if (!contains(som_creators, input_slot)) { + vmin->add_input(joins_at_s.at(input_slot)); + } + } + assert(vmin->get_inputs().size() > 1); + DEBUG_PRINTF("wire min to join %u\n", slot_id); + } + + GoughSSAVarJoin *vk = joins_at_t.at(slot_id); + assert(final_var); + vk->add_input(final_var, e); + } +} + +static never_inline +unique_ptr<GoughGraph> makeCFG(const raw_som_dfa &raw) { + vector<GoughVertex> vertices; + vertices.reserve(raw.states.size()); + unique_ptr<GoughGraph> cfg = ue2::make_unique<GoughGraph>(); + u32 min_state = !is_triggered(raw.kind); + + if (min_state) { + vertices.push_back(GoughGraph::null_vertex()); /* skip dead state */ + } + + vector<flat_map<u32, GoughSSAVarJoin *> > joins(raw.states.size()); + for (u32 i = min_state; i < raw.states.size(); ++i) { + GoughVertex v = add_vertex(GoughVertexProps(i), *cfg); + vertices.push_back(v); + + /* create JOIN variables */ + for (som_tran_info::const_iterator it = raw.state_som[i].preds.begin(); + it != raw.state_som[i].preds.end(); ++it) { + u32 slot_id = it->first; + if (!contains(raw.new_som_nfa_states, slot_id) + || raw.new_som_nfa_states.at(slot_id)) { + (*cfg)[v].vars.push_back(make_shared<GoughSSAVarJoin>()); + joins[get(vertex_index, *cfg, v)][slot_id] + = (*cfg)[v].vars.back().get(); + DEBUG_PRINTF("dfa %u:: slot %u\n", i, slot_id); + } + } + } + + u16 top_sym = raw.alpha_remap[TOP]; DEBUG_PRINTF("top: %hu, kind %s\n", top_sym, to_string(raw.kind).c_str()); - - /* create edges, JOIN variables (on edge targets) */ - map<dstate_id_t, GoughEdge> seen; - for (u32 i = min_state; i < raw.states.size(); ++i) { - seen.clear(); /* seen is really local to each state */ - - DEBUG_PRINTF("creating edges out of %u/%zu\n", i, raw.states.size()); - GoughVertex s = vertices[i]; - const vector<dstate_id_t> &next = raw.states[i].next; - for (u32 j = 0; j < next.size(); ++j) { - if (!is_triggered(raw.kind) && j == top_sym) { - continue; - } - - dstate_id_t n = next[j]; - DEBUG_PRINTF(" edge to %hu out on %u\n", n, j); - assert(n < raw.states.size()); - GoughVertex t = vertices[n]; - - if (j == top_sym) { - GoughEdge e = add_edge(s, t, *cfg).first; - (*cfg)[e].top = true; - makeCFG_top_edge(*cfg, vertices, joins, raw.trigger_nfa_state, - raw.state_som[i].preds, raw.state_som[n].preds, - i, n, e); - } else { - if (contains(seen, n)) { - const GoughEdge &e = seen[n]; - (*cfg)[e].reach.set(j); - continue; - } - - GoughEdge e = add_edge(s, t, *cfg).first; - (*cfg)[e].reach.set(j); - - seen[n] = e; - - makeCFG_edge(*cfg, raw.new_som_nfa_states, vertices, joins, - raw.state_som[i].preds, raw.state_som[n].preds, - i, n, e); - } - } - } - - /* populate reports */ - makeCFG_reports(*cfg, raw, joins, vertices); - - using boost::graph_bundle; - if (is_triggered(raw.kind)) { - (*cfg)[graph_bundle].initial_vertex = vertices[DEAD_STATE]; - } else { - (*cfg)[graph_bundle].initial_vertex = vertices[raw.start_anchored]; - } - - return cfg; -} - -static -void copy_propagate_report_set(vector<pair<ReportID, GoughSSAVar *> > &rep) { - vector<pair<ReportID, GoughSSAVar *> >::iterator it = rep.begin(); - while (it != rep.end()) { - GoughSSAVar *var = it->second; - if (!var) { - ++it; - continue; - } - const flat_set<GoughSSAVar *> &inputs = var->get_inputs(); - if (inputs.size() != 1) { - ++it; - continue; - } - it->second = *inputs.begin(); /* note may result in dupes, - filter later */ - } -} - -template<typename VarP> -void copy_propagate_update_vars(vector<VarP> &vars, bool *changes) { - for (u32 i = 0; i < vars.size(); i++) { - GoughSSAVar *vp = vars[i].get(); - const flat_set<GoughSSAVar *> &inputs = vp->get_inputs(); - - /* no need to worry about data coming from self; ignore self loops */ - GoughSSAVar *new_input = nullptr; - - if (inputs.size() == 1) { - new_input = *inputs.begin(); - } else if (inputs.size() == 2) { - flat_set<GoughSSAVar *>::const_iterator jt = inputs.begin(); - GoughSSAVar *i_0 = *jt; - GoughSSAVar *i_1 = *++jt; - - if (i_0 == vp) { - new_input = i_1; - } else if (i_1 == vp) { - new_input = i_0; - } - } - - if (!new_input) { - continue; - } - - assert(new_input != vp); - - /* copy set as it will be modified by iteration */ - const flat_set<GoughSSAVarWithInputs *> outputs = vp->get_outputs(); - - for (GoughSSAVar *curr : outputs) { - curr->replace_input(vp, new_input); - *changes = true; - } - } -} - -static -void copy_propagation(GoughGraph &g, const Grey &grey) { - if (!grey.goughCopyPropagate) { - return; - } - /* TODO order visit of variables sensibly */ - bool changes = false; - do { - DEBUG_PRINTF("new iteration\n"); - changes = false; - for (auto v : vertices_range(g)) { - copy_propagate_update_vars(g[v].vars, &changes); - } - for (const auto &e : edges_range(g)) { - copy_propagate_update_vars(g[e].vars, &changes); - } - } while(changes); - - /* see if any reports can also be moved along */ - for (auto v : vertices_range(g)) { - copy_propagate_report_set(g[v].reports); - copy_propagate_report_set(g[v].reports_eod); - } -} - -static -void mark_live_reports(const vector<pair<ReportID, GoughSSAVar *> > &reps, - vector<GoughSSAVar *> *queue) { - for (const auto &r : reps) { - GoughSSAVar *var = r.second; - if (!var || var->seen) { - continue; - } - var->seen = true; - queue->push_back(var); - } -} - -static -void remove_dead(GoughGraph &g) { - vector<GoughSSAVar *> queue; - - for (auto v : vertices_range(g)) { - mark_live_reports(g[v].reports, &queue); - mark_live_reports(g[v].reports_eod, &queue); - } - - while (!queue.empty()) { - GoughSSAVar *v = queue.back(); - queue.pop_back(); - for (GoughSSAVar *var : v->get_inputs()) { - if (var->seen) { - continue; - } - var->seen = true; - queue.push_back(var); - } - } - - /* remove unused variables */ - for (auto v : vertices_range(g)) { - for (u32 i = 0; i < g[v].vars.size(); i++) { - GoughSSAVar *var = g[v].vars[i].get(); - if (var->seen) { - continue; - } - var->clear_all(); - g[v].vars.erase(g[v].vars.begin() + i); - i--; - } - } - for (const auto &e : edges_range(g)) { - for (u32 i = 0; i < g[e].vars.size(); i++) { - GoughSSAVar *var = g[e].vars[i].get(); - if (var->seen) { - continue; - } - var->clear_all(); - g[e].vars.erase(g[e].vars.begin() + i); - i--; - } - } -} - -static -gough_ins make_gough_ins(u8 op, u32 dest = INVALID_SLOT, - u32 src = INVALID_SLOT) { - assert(dest != INVALID_SLOT || op == GOUGH_INS_END); - assert(src != INVALID_SLOT || op == GOUGH_INS_END || op == GOUGH_INS_NEW); - gough_ins rv; - rv.op = op; - rv.dest = dest; - rv.src = src; - return rv; -} - -void GoughSSAVarNew::generate(vector<gough_ins> *out) const { - assert(slot != INVALID_SLOT); - out->push_back(make_gough_ins(GOUGH_INS_NEW, slot, adjust)); -} - -#ifndef NDEBUG -template<typename C, typename K> -bool contains_loose(const C &container, const K &key) { - for (const auto &elem : container) { - if (elem == key) { - return true; - } - } - return false; -} -#endif - -void GoughSSAVarMin::generate(vector<gough_ins> *out) const { - assert(slot != INVALID_SLOT); - assert(!inputs.empty()); - // assert(inputs.size() > 1); - vector<u32> input_slots; /* for determinism */ - bool first = true; - for (const GoughSSAVar *var : inputs) { - assert(contains_loose(var->outputs, this)); - if (var->slot == slot) { - /* if the destination is one of the sources, no need to move it */ - first = false; - } else { - input_slots.push_back(var->slot); - } - } - - sort(input_slots.begin(), input_slots.end()); - - for (const u32 &input_slot : input_slots) { - if (first) { - out->push_back(make_gough_ins(GOUGH_INS_MOV, slot, input_slot)); - first = false; - } else { - out->push_back(make_gough_ins(GOUGH_INS_MIN, slot, input_slot)); - } - } -} - -void GoughSSAVarMin::remove_input_raw(GoughSSAVar *v) { - assert(contains(inputs, v)); - inputs.erase(v); -} - -void GoughSSAVarJoin::generate(UNUSED vector<gough_ins> *out) const { - assert(0); -} - -GoughSSAVar *GoughSSAVarJoin::get_input(const GoughEdge &prev) const { - for (const auto &var_edge : input_map) { - if (contains(var_edge.second, prev)) { - return var_edge.first; - } - } - assert(0); - return nullptr; -} - -const flat_set<GoughEdge> &GoughSSAVarJoin::get_edges_for_input( - GoughSSAVar *input) const { - return input_map.at(input); -} - -const map<GoughSSAVar *, flat_set<GoughEdge> > &GoughSSAVarJoin::get_input_map() - const { - return input_map; -} - -void GoughSSAVarJoin::clear_inputs() { - for (GoughSSAVar *var : input_map | map_keys) { - assert(contains(var->outputs, this)); - var->outputs.erase(this); - } - input_map.clear(); - inputs.clear(); -} - -void GoughSSAVarJoin::replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) { - assert(contains(input_map, old_v)); - assert(contains(inputs, old_v)); - if (old_v == new_v) { - assert(0); - return; - } - insert(&input_map[new_v], input_map[old_v]); - input_map.erase(old_v); - inputs.erase(old_v); - inputs.insert(new_v); - old_v->outputs.erase(this); - new_v->outputs.insert(this); -} - -void GoughSSAVarJoin::add_input(GoughSSAVar *v, GoughEdge prev) { - input_map[v].insert(prev); - inputs.insert(v); - v->outputs.insert(this); -} - -void GoughSSAVarJoin::remove_input_raw(GoughSSAVar *v) { - assert(contains(inputs, v)); - assert(contains(input_map, v)); - input_map.erase(v); - inputs.erase(v); -} - -static -u32 highest_slot_used(const vector<gough_ins> &program) { - u32 rv = INVALID_SLOT; - for (const gough_ins &ins : program) { - if (rv == INVALID_SLOT) { - rv = ins.dest; - } else if (ins.dest != INVALID_SLOT) { - ENSURE_AT_LEAST(&rv, ins.dest); - } - if (rv == INVALID_SLOT) { - rv = ins.src; - } else if (ins.src != INVALID_SLOT) { - ENSURE_AT_LEAST(&rv, ins.src); - } - } - assert(rv != INVALID_SLOT); - return rv; -} - -static -u32 highest_slot_used(const map<gough_edge_id, vector<gough_ins> > &blocks) { - u32 rv = INVALID_SLOT; - for (const vector<gough_ins> &ins_list : blocks | map_values) { - u32 used = highest_slot_used(ins_list); - if (rv == INVALID_SLOT) { - rv = used; - } else if (used != INVALID_SLOT) { - ENSURE_AT_LEAST(&rv, used); - } - } - return rv; -} - -static -void add_to_block(const vector<shared_ptr<GoughSSAVar> > &vars, - vector<gough_ins> *out) { - for (const auto &var : vars) { - var->generate(out); - } -} - -namespace { -struct edge_join_info { - bool empty() const { return dest_to_src.empty(); } - - void insert(u32 src, u32 dest) { - assert(!contains(dest_to_src, dest)); - assert(src != dest); - dest_to_src[dest] = src; - src_to_dest[src].insert(dest); - } - - void erase(u32 src, u32 dest) { - assert(dest_to_src.at(dest) == src); - dest_to_src.erase(dest); - src_to_dest[src].erase(dest); - - if (src_to_dest[src].empty()) { - src_to_dest.erase(src); - } - } - - bool is_src(u32 v) const { - bool rv = contains(src_to_dest, v); - assert(!rv || !src_to_dest.at(v).empty()); - return rv; - } - - bool is_dest(u32 v) const { - return contains(dest_to_src, v); - } - - void remap_src(u32 old_src, u32 new_src) { - assert(is_src(old_src)); - assert(!is_src(new_src)); - - for (const u32 &e : src_to_dest[old_src]) { - assert(e != new_src); - dest_to_src[e] = new_src; - } - src_to_dest[new_src].swap(src_to_dest[old_src]); - src_to_dest.erase(old_src); - - assert(!is_src(old_src)); - assert(is_src(new_src)); - } - - /* returns an arbitrary unresolved entry */ - void get_pending(u32 *src, u32 *dest) { - assert(!empty()); - *dest = dest_to_src.begin()->first; - *src = dest_to_src.begin()->second; - } - - const map<u32, u32> &get_dest_mapping() const { return dest_to_src; } - -private: - map<u32, set<u32> > src_to_dest; - map<u32, u32> dest_to_src; -}; - -} - -static -void prep_joins_for_generation(const GoughGraph &g, GoughVertex v, - map<GoughEdge, edge_join_info> *edge_info) { - DEBUG_PRINTF("writing out joins for %u\n", g[v].state_id); - for (const auto &var : g[v].vars) { - u32 dest_slot = var->slot; - for (const auto &var_edges : var->get_input_map()) { - u32 input = var_edges.first->slot; - if (dest_slot == input) { - continue; - } - - for (const GoughEdge &incoming_edge : var_edges.second) { - (*edge_info)[incoming_edge].insert(input, dest_slot); - DEBUG_PRINTF("need %u<-%u\n", dest_slot, input); - } - } - } -} - -static -void add_simple_joins(edge_join_info &eji, vector<gough_ins> *out) { - /* any slot whose value we don't need can be written to immediately */ - const map<u32, u32> &dest_to_src = eji.get_dest_mapping(); - - bool changed; - do { - changed = false; - for (map<u32, u32>::const_iterator it = dest_to_src.begin(); - it != dest_to_src.end();) { - u32 src = it->second; - u32 dest = it->first; - ++it; /* avoid iterator being invalidated */ - - if (eji.is_src(dest)) { - continue; /* conflict; not simple (yet) */ - } - - /* value of destination slot is not used by any remaining joins; - * we can output this join immediately */ - DEBUG_PRINTF("out %u<-%u\n", dest, src); - out->push_back(make_gough_ins(GOUGH_INS_MOV, dest, src)); - - eji.erase(src, dest); - - if (eji.is_dest(src) && eji.is_src(src)) { - /* we can unblock src being used as an output by shifting - * across everybody using src as input to using dest (as == src - * now) */ - eji.remap_src(src, dest); - } - changed = true; - } - } while (changed); -} - -static -void add_joins_to_block(edge_join_info &eji, vector<gough_ins> *out, - u32 base_temp_slot) { - /* joins happen concurrently: none of them should see the outputs of another - * join happening due to the same entry of the vertex. If there are - * conflicts we may have to handle things by using a temp output slot for - * each join and then copying into the final slot. - */ - - add_simple_joins(eji, out); - while (!eji.empty()) { - u32 split; - u32 input_for_split; - eji.get_pending(&input_for_split, &split); - - assert(eji.is_src(split)); /* otherwise should be handled by simple */ - - /* stash the initial value of the split register in a temp register */ - u32 temp = base_temp_slot++; - DEBUG_PRINTF("out %u<-%u\n", temp, split); - out->push_back(make_gough_ins(GOUGH_INS_MOV, temp, split)); - eji.remap_src(split, temp); /* update maps */ - - /* split can now be safely written out to as all the uses of it as an - * input now refer to temp instead */ - - DEBUG_PRINTF("out %u<-%u\n", split, input_for_split); - out->push_back(make_gough_ins(GOUGH_INS_MOV, split, input_for_split)); - eji.erase(input_for_split, split); - - /* handle any uncovered simple cases */ - add_simple_joins(eji, out); - } -} - -static -void build_blocks(const GoughGraph &g, - map<gough_edge_id, vector<gough_ins> > *blocks, - u32 base_temp_slot) { - for (const auto &e : edges_range(g)) { - if (g[e].vars.empty()) { - continue; - } - - vector<gough_ins> &block = (*blocks)[gough_edge_id(g, e)]; - add_to_block(g[e].vars, &block); - assert(!block.empty()); - } - - for (const auto t : vertices_range(g)) { - if (g[t].vars.empty()) { - continue; - } - - map<GoughEdge, edge_join_info> eji; - prep_joins_for_generation(g, t, &eji); - - for (auto &m : eji) { - vector<gough_ins> &block = (*blocks)[gough_edge_id(g, m.first)]; - u32 cur_base = base_temp_slot; - if (!block.empty()) { - /* some temp slots may already be in use by short-lived vars */ - ENSURE_AT_LEAST(&cur_base, highest_slot_used(block) + 1); - } - - add_joins_to_block(m.second, &block, cur_base); - if (block.empty()) { - blocks->erase(gough_edge_id(g, m.first)); - } - } - } - - for (vector<gough_ins> &ins_list : *blocks | map_values) { - assert(!ins_list.empty()); - ins_list.push_back(make_gough_ins(GOUGH_INS_END)); - } -} - -static -void copy_in_blocks(raw_som_dfa &raw, u8 alphaShift, const GoughGraph &cfg, - const map<gough_edge_id, vector<gough_ins> > &blocks, - u32 *edge_blocks, u32 *top_blocks, u32 base_offset, - map<vector<gough_ins>, u32> *prog_offsets, - vector<gough_ins> *out) { - u32 impl_alpha_size = 1U << alphaShift; - UNUSED u32 top_sym = raw.alpha_remap[TOP]; - assert(top_sym == raw.alpha_size - 1U); - map<vector<gough_ins>, u32> &processed = *prog_offsets; - - for (const auto &e : edges_range(cfg)) { - if (!contains(blocks, gough_edge_id(cfg, e))) { - continue; - } - const vector<gough_ins> &block = blocks.at(gough_edge_id(cfg, e)); - u32 prog_offset; - if (!contains(processed, block)) { - prog_offset = base_offset + byte_length(*out); - insert(out, out->end(), block); - processed[block] = prog_offset; - } else { - prog_offset = processed[block]; - } - - /* update edges */ - u32 s_id = cfg[source(e, cfg)].state_id; - UNUSED u32 t_id = cfg[target(e, cfg)].state_id; - u32 impl_src_id = raw.states[s_id].impl_id; - DEBUG_PRINTF("%u: writing out block for edge_%u_%u at %u:\n", - impl_src_id, s_id, t_id,prog_offset); - - for (u32 j = cfg[e].reach.find_first(); j != CharReach::npos; - j = cfg[e].reach.find_next(j)) { - assert(raw.states[s_id].next[j] == t_id); - u32 edge_index = impl_src_id * impl_alpha_size + j; - DEBUG_PRINTF("\tsetting on %u, %u\n", j, edge_index); - edge_blocks[edge_index] = prog_offset; - } - - if (cfg[e].top) { - assert(raw.states[s_id].next[top_sym] == t_id); - DEBUG_PRINTF("\tsetting top on %u to block at %u\n", impl_src_id, - prog_offset); - top_blocks[impl_src_id] = prog_offset; - } - } -} - -bool find_normal_self_loop(GoughVertex v, const GoughGraph &g, GoughEdge *out) { - for (const auto &e : out_edges_range(v, g)) { - if (target(e, g) != v) { - continue; - } - if (g[e].top) { - assert(g[e].reach.find_first() == CharReach::npos); - continue; /* corresponds to a top, not a normal transition */ - } - - *out = e; - return true; - } - - return false; -} - -static never_inline -void update_accel_prog_offset(const gough_build_strat &gbs, - const map<gough_edge_id, vector<gough_ins> > &blocks, - const map<vector<gough_ins>, u32> &prog_offsets) { - map<dstate_id_t, GoughVertex> verts; - for (auto v : vertices_range(gbs.gg)) { - verts[gbs.gg[v].state_id] = v; - } - - for (auto &m : gbs.built_accel) { - gough_accel *ga = m.first; - assert(!ga->prog_offset); - GoughVertex v = verts[m.second]; - GoughEdge e; - UNUSED bool rv = find_normal_self_loop(v, gbs.gg, &e); - assert(rv); - - if (!rv) { - continue; - } - - DEBUG_PRINTF("updating state %u accel with margin %hhu\n", - gbs.gg[v].state_id, ga->margin_dist); - if (contains(blocks, gough_edge_id(gbs.gg, e))) { - const vector<gough_ins> &block - = blocks.at(gough_edge_id(gbs.gg, e)); - ga->prog_offset = prog_offsets.at(block); - DEBUG_PRINTF("prog offset %u\n", ga->prog_offset); - } else { - ga->margin_dist = 0; - DEBUG_PRINTF("removing margin as no som\n"); - } - } -} - + + /* create edges, JOIN variables (on edge targets) */ + map<dstate_id_t, GoughEdge> seen; + for (u32 i = min_state; i < raw.states.size(); ++i) { + seen.clear(); /* seen is really local to each state */ + + DEBUG_PRINTF("creating edges out of %u/%zu\n", i, raw.states.size()); + GoughVertex s = vertices[i]; + const vector<dstate_id_t> &next = raw.states[i].next; + for (u32 j = 0; j < next.size(); ++j) { + if (!is_triggered(raw.kind) && j == top_sym) { + continue; + } + + dstate_id_t n = next[j]; + DEBUG_PRINTF(" edge to %hu out on %u\n", n, j); + assert(n < raw.states.size()); + GoughVertex t = vertices[n]; + + if (j == top_sym) { + GoughEdge e = add_edge(s, t, *cfg).first; + (*cfg)[e].top = true; + makeCFG_top_edge(*cfg, vertices, joins, raw.trigger_nfa_state, + raw.state_som[i].preds, raw.state_som[n].preds, + i, n, e); + } else { + if (contains(seen, n)) { + const GoughEdge &e = seen[n]; + (*cfg)[e].reach.set(j); + continue; + } + + GoughEdge e = add_edge(s, t, *cfg).first; + (*cfg)[e].reach.set(j); + + seen[n] = e; + + makeCFG_edge(*cfg, raw.new_som_nfa_states, vertices, joins, + raw.state_som[i].preds, raw.state_som[n].preds, + i, n, e); + } + } + } + + /* populate reports */ + makeCFG_reports(*cfg, raw, joins, vertices); + + using boost::graph_bundle; + if (is_triggered(raw.kind)) { + (*cfg)[graph_bundle].initial_vertex = vertices[DEAD_STATE]; + } else { + (*cfg)[graph_bundle].initial_vertex = vertices[raw.start_anchored]; + } + + return cfg; +} + +static +void copy_propagate_report_set(vector<pair<ReportID, GoughSSAVar *> > &rep) { + vector<pair<ReportID, GoughSSAVar *> >::iterator it = rep.begin(); + while (it != rep.end()) { + GoughSSAVar *var = it->second; + if (!var) { + ++it; + continue; + } + const flat_set<GoughSSAVar *> &inputs = var->get_inputs(); + if (inputs.size() != 1) { + ++it; + continue; + } + it->second = *inputs.begin(); /* note may result in dupes, + filter later */ + } +} + +template<typename VarP> +void copy_propagate_update_vars(vector<VarP> &vars, bool *changes) { + for (u32 i = 0; i < vars.size(); i++) { + GoughSSAVar *vp = vars[i].get(); + const flat_set<GoughSSAVar *> &inputs = vp->get_inputs(); + + /* no need to worry about data coming from self; ignore self loops */ + GoughSSAVar *new_input = nullptr; + + if (inputs.size() == 1) { + new_input = *inputs.begin(); + } else if (inputs.size() == 2) { + flat_set<GoughSSAVar *>::const_iterator jt = inputs.begin(); + GoughSSAVar *i_0 = *jt; + GoughSSAVar *i_1 = *++jt; + + if (i_0 == vp) { + new_input = i_1; + } else if (i_1 == vp) { + new_input = i_0; + } + } + + if (!new_input) { + continue; + } + + assert(new_input != vp); + + /* copy set as it will be modified by iteration */ + const flat_set<GoughSSAVarWithInputs *> outputs = vp->get_outputs(); + + for (GoughSSAVar *curr : outputs) { + curr->replace_input(vp, new_input); + *changes = true; + } + } +} + +static +void copy_propagation(GoughGraph &g, const Grey &grey) { + if (!grey.goughCopyPropagate) { + return; + } + /* TODO order visit of variables sensibly */ + bool changes = false; + do { + DEBUG_PRINTF("new iteration\n"); + changes = false; + for (auto v : vertices_range(g)) { + copy_propagate_update_vars(g[v].vars, &changes); + } + for (const auto &e : edges_range(g)) { + copy_propagate_update_vars(g[e].vars, &changes); + } + } while(changes); + + /* see if any reports can also be moved along */ + for (auto v : vertices_range(g)) { + copy_propagate_report_set(g[v].reports); + copy_propagate_report_set(g[v].reports_eod); + } +} + +static +void mark_live_reports(const vector<pair<ReportID, GoughSSAVar *> > &reps, + vector<GoughSSAVar *> *queue) { + for (const auto &r : reps) { + GoughSSAVar *var = r.second; + if (!var || var->seen) { + continue; + } + var->seen = true; + queue->push_back(var); + } +} + +static +void remove_dead(GoughGraph &g) { + vector<GoughSSAVar *> queue; + + for (auto v : vertices_range(g)) { + mark_live_reports(g[v].reports, &queue); + mark_live_reports(g[v].reports_eod, &queue); + } + + while (!queue.empty()) { + GoughSSAVar *v = queue.back(); + queue.pop_back(); + for (GoughSSAVar *var : v->get_inputs()) { + if (var->seen) { + continue; + } + var->seen = true; + queue.push_back(var); + } + } + + /* remove unused variables */ + for (auto v : vertices_range(g)) { + for (u32 i = 0; i < g[v].vars.size(); i++) { + GoughSSAVar *var = g[v].vars[i].get(); + if (var->seen) { + continue; + } + var->clear_all(); + g[v].vars.erase(g[v].vars.begin() + i); + i--; + } + } + for (const auto &e : edges_range(g)) { + for (u32 i = 0; i < g[e].vars.size(); i++) { + GoughSSAVar *var = g[e].vars[i].get(); + if (var->seen) { + continue; + } + var->clear_all(); + g[e].vars.erase(g[e].vars.begin() + i); + i--; + } + } +} + +static +gough_ins make_gough_ins(u8 op, u32 dest = INVALID_SLOT, + u32 src = INVALID_SLOT) { + assert(dest != INVALID_SLOT || op == GOUGH_INS_END); + assert(src != INVALID_SLOT || op == GOUGH_INS_END || op == GOUGH_INS_NEW); + gough_ins rv; + rv.op = op; + rv.dest = dest; + rv.src = src; + return rv; +} + +void GoughSSAVarNew::generate(vector<gough_ins> *out) const { + assert(slot != INVALID_SLOT); + out->push_back(make_gough_ins(GOUGH_INS_NEW, slot, adjust)); +} + +#ifndef NDEBUG +template<typename C, typename K> +bool contains_loose(const C &container, const K &key) { + for (const auto &elem : container) { + if (elem == key) { + return true; + } + } + return false; +} +#endif + +void GoughSSAVarMin::generate(vector<gough_ins> *out) const { + assert(slot != INVALID_SLOT); + assert(!inputs.empty()); + // assert(inputs.size() > 1); + vector<u32> input_slots; /* for determinism */ + bool first = true; + for (const GoughSSAVar *var : inputs) { + assert(contains_loose(var->outputs, this)); + if (var->slot == slot) { + /* if the destination is one of the sources, no need to move it */ + first = false; + } else { + input_slots.push_back(var->slot); + } + } + + sort(input_slots.begin(), input_slots.end()); + + for (const u32 &input_slot : input_slots) { + if (first) { + out->push_back(make_gough_ins(GOUGH_INS_MOV, slot, input_slot)); + first = false; + } else { + out->push_back(make_gough_ins(GOUGH_INS_MIN, slot, input_slot)); + } + } +} + +void GoughSSAVarMin::remove_input_raw(GoughSSAVar *v) { + assert(contains(inputs, v)); + inputs.erase(v); +} + +void GoughSSAVarJoin::generate(UNUSED vector<gough_ins> *out) const { + assert(0); +} + +GoughSSAVar *GoughSSAVarJoin::get_input(const GoughEdge &prev) const { + for (const auto &var_edge : input_map) { + if (contains(var_edge.second, prev)) { + return var_edge.first; + } + } + assert(0); + return nullptr; +} + +const flat_set<GoughEdge> &GoughSSAVarJoin::get_edges_for_input( + GoughSSAVar *input) const { + return input_map.at(input); +} + +const map<GoughSSAVar *, flat_set<GoughEdge> > &GoughSSAVarJoin::get_input_map() + const { + return input_map; +} + +void GoughSSAVarJoin::clear_inputs() { + for (GoughSSAVar *var : input_map | map_keys) { + assert(contains(var->outputs, this)); + var->outputs.erase(this); + } + input_map.clear(); + inputs.clear(); +} + +void GoughSSAVarJoin::replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) { + assert(contains(input_map, old_v)); + assert(contains(inputs, old_v)); + if (old_v == new_v) { + assert(0); + return; + } + insert(&input_map[new_v], input_map[old_v]); + input_map.erase(old_v); + inputs.erase(old_v); + inputs.insert(new_v); + old_v->outputs.erase(this); + new_v->outputs.insert(this); +} + +void GoughSSAVarJoin::add_input(GoughSSAVar *v, GoughEdge prev) { + input_map[v].insert(prev); + inputs.insert(v); + v->outputs.insert(this); +} + +void GoughSSAVarJoin::remove_input_raw(GoughSSAVar *v) { + assert(contains(inputs, v)); + assert(contains(input_map, v)); + input_map.erase(v); + inputs.erase(v); +} + +static +u32 highest_slot_used(const vector<gough_ins> &program) { + u32 rv = INVALID_SLOT; + for (const gough_ins &ins : program) { + if (rv == INVALID_SLOT) { + rv = ins.dest; + } else if (ins.dest != INVALID_SLOT) { + ENSURE_AT_LEAST(&rv, ins.dest); + } + if (rv == INVALID_SLOT) { + rv = ins.src; + } else if (ins.src != INVALID_SLOT) { + ENSURE_AT_LEAST(&rv, ins.src); + } + } + assert(rv != INVALID_SLOT); + return rv; +} + +static +u32 highest_slot_used(const map<gough_edge_id, vector<gough_ins> > &blocks) { + u32 rv = INVALID_SLOT; + for (const vector<gough_ins> &ins_list : blocks | map_values) { + u32 used = highest_slot_used(ins_list); + if (rv == INVALID_SLOT) { + rv = used; + } else if (used != INVALID_SLOT) { + ENSURE_AT_LEAST(&rv, used); + } + } + return rv; +} + +static +void add_to_block(const vector<shared_ptr<GoughSSAVar> > &vars, + vector<gough_ins> *out) { + for (const auto &var : vars) { + var->generate(out); + } +} + +namespace { +struct edge_join_info { + bool empty() const { return dest_to_src.empty(); } + + void insert(u32 src, u32 dest) { + assert(!contains(dest_to_src, dest)); + assert(src != dest); + dest_to_src[dest] = src; + src_to_dest[src].insert(dest); + } + + void erase(u32 src, u32 dest) { + assert(dest_to_src.at(dest) == src); + dest_to_src.erase(dest); + src_to_dest[src].erase(dest); + + if (src_to_dest[src].empty()) { + src_to_dest.erase(src); + } + } + + bool is_src(u32 v) const { + bool rv = contains(src_to_dest, v); + assert(!rv || !src_to_dest.at(v).empty()); + return rv; + } + + bool is_dest(u32 v) const { + return contains(dest_to_src, v); + } + + void remap_src(u32 old_src, u32 new_src) { + assert(is_src(old_src)); + assert(!is_src(new_src)); + + for (const u32 &e : src_to_dest[old_src]) { + assert(e != new_src); + dest_to_src[e] = new_src; + } + src_to_dest[new_src].swap(src_to_dest[old_src]); + src_to_dest.erase(old_src); + + assert(!is_src(old_src)); + assert(is_src(new_src)); + } + + /* returns an arbitrary unresolved entry */ + void get_pending(u32 *src, u32 *dest) { + assert(!empty()); + *dest = dest_to_src.begin()->first; + *src = dest_to_src.begin()->second; + } + + const map<u32, u32> &get_dest_mapping() const { return dest_to_src; } + +private: + map<u32, set<u32> > src_to_dest; + map<u32, u32> dest_to_src; +}; + +} + +static +void prep_joins_for_generation(const GoughGraph &g, GoughVertex v, + map<GoughEdge, edge_join_info> *edge_info) { + DEBUG_PRINTF("writing out joins for %u\n", g[v].state_id); + for (const auto &var : g[v].vars) { + u32 dest_slot = var->slot; + for (const auto &var_edges : var->get_input_map()) { + u32 input = var_edges.first->slot; + if (dest_slot == input) { + continue; + } + + for (const GoughEdge &incoming_edge : var_edges.second) { + (*edge_info)[incoming_edge].insert(input, dest_slot); + DEBUG_PRINTF("need %u<-%u\n", dest_slot, input); + } + } + } +} + +static +void add_simple_joins(edge_join_info &eji, vector<gough_ins> *out) { + /* any slot whose value we don't need can be written to immediately */ + const map<u32, u32> &dest_to_src = eji.get_dest_mapping(); + + bool changed; + do { + changed = false; + for (map<u32, u32>::const_iterator it = dest_to_src.begin(); + it != dest_to_src.end();) { + u32 src = it->second; + u32 dest = it->first; + ++it; /* avoid iterator being invalidated */ + + if (eji.is_src(dest)) { + continue; /* conflict; not simple (yet) */ + } + + /* value of destination slot is not used by any remaining joins; + * we can output this join immediately */ + DEBUG_PRINTF("out %u<-%u\n", dest, src); + out->push_back(make_gough_ins(GOUGH_INS_MOV, dest, src)); + + eji.erase(src, dest); + + if (eji.is_dest(src) && eji.is_src(src)) { + /* we can unblock src being used as an output by shifting + * across everybody using src as input to using dest (as == src + * now) */ + eji.remap_src(src, dest); + } + changed = true; + } + } while (changed); +} + +static +void add_joins_to_block(edge_join_info &eji, vector<gough_ins> *out, + u32 base_temp_slot) { + /* joins happen concurrently: none of them should see the outputs of another + * join happening due to the same entry of the vertex. If there are + * conflicts we may have to handle things by using a temp output slot for + * each join and then copying into the final slot. + */ + + add_simple_joins(eji, out); + while (!eji.empty()) { + u32 split; + u32 input_for_split; + eji.get_pending(&input_for_split, &split); + + assert(eji.is_src(split)); /* otherwise should be handled by simple */ + + /* stash the initial value of the split register in a temp register */ + u32 temp = base_temp_slot++; + DEBUG_PRINTF("out %u<-%u\n", temp, split); + out->push_back(make_gough_ins(GOUGH_INS_MOV, temp, split)); + eji.remap_src(split, temp); /* update maps */ + + /* split can now be safely written out to as all the uses of it as an + * input now refer to temp instead */ + + DEBUG_PRINTF("out %u<-%u\n", split, input_for_split); + out->push_back(make_gough_ins(GOUGH_INS_MOV, split, input_for_split)); + eji.erase(input_for_split, split); + + /* handle any uncovered simple cases */ + add_simple_joins(eji, out); + } +} + +static +void build_blocks(const GoughGraph &g, + map<gough_edge_id, vector<gough_ins> > *blocks, + u32 base_temp_slot) { + for (const auto &e : edges_range(g)) { + if (g[e].vars.empty()) { + continue; + } + + vector<gough_ins> &block = (*blocks)[gough_edge_id(g, e)]; + add_to_block(g[e].vars, &block); + assert(!block.empty()); + } + + for (const auto t : vertices_range(g)) { + if (g[t].vars.empty()) { + continue; + } + + map<GoughEdge, edge_join_info> eji; + prep_joins_for_generation(g, t, &eji); + + for (auto &m : eji) { + vector<gough_ins> &block = (*blocks)[gough_edge_id(g, m.first)]; + u32 cur_base = base_temp_slot; + if (!block.empty()) { + /* some temp slots may already be in use by short-lived vars */ + ENSURE_AT_LEAST(&cur_base, highest_slot_used(block) + 1); + } + + add_joins_to_block(m.second, &block, cur_base); + if (block.empty()) { + blocks->erase(gough_edge_id(g, m.first)); + } + } + } + + for (vector<gough_ins> &ins_list : *blocks | map_values) { + assert(!ins_list.empty()); + ins_list.push_back(make_gough_ins(GOUGH_INS_END)); + } +} + +static +void copy_in_blocks(raw_som_dfa &raw, u8 alphaShift, const GoughGraph &cfg, + const map<gough_edge_id, vector<gough_ins> > &blocks, + u32 *edge_blocks, u32 *top_blocks, u32 base_offset, + map<vector<gough_ins>, u32> *prog_offsets, + vector<gough_ins> *out) { + u32 impl_alpha_size = 1U << alphaShift; + UNUSED u32 top_sym = raw.alpha_remap[TOP]; + assert(top_sym == raw.alpha_size - 1U); + map<vector<gough_ins>, u32> &processed = *prog_offsets; + + for (const auto &e : edges_range(cfg)) { + if (!contains(blocks, gough_edge_id(cfg, e))) { + continue; + } + const vector<gough_ins> &block = blocks.at(gough_edge_id(cfg, e)); + u32 prog_offset; + if (!contains(processed, block)) { + prog_offset = base_offset + byte_length(*out); + insert(out, out->end(), block); + processed[block] = prog_offset; + } else { + prog_offset = processed[block]; + } + + /* update edges */ + u32 s_id = cfg[source(e, cfg)].state_id; + UNUSED u32 t_id = cfg[target(e, cfg)].state_id; + u32 impl_src_id = raw.states[s_id].impl_id; + DEBUG_PRINTF("%u: writing out block for edge_%u_%u at %u:\n", + impl_src_id, s_id, t_id,prog_offset); + + for (u32 j = cfg[e].reach.find_first(); j != CharReach::npos; + j = cfg[e].reach.find_next(j)) { + assert(raw.states[s_id].next[j] == t_id); + u32 edge_index = impl_src_id * impl_alpha_size + j; + DEBUG_PRINTF("\tsetting on %u, %u\n", j, edge_index); + edge_blocks[edge_index] = prog_offset; + } + + if (cfg[e].top) { + assert(raw.states[s_id].next[top_sym] == t_id); + DEBUG_PRINTF("\tsetting top on %u to block at %u\n", impl_src_id, + prog_offset); + top_blocks[impl_src_id] = prog_offset; + } + } +} + +bool find_normal_self_loop(GoughVertex v, const GoughGraph &g, GoughEdge *out) { + for (const auto &e : out_edges_range(v, g)) { + if (target(e, g) != v) { + continue; + } + if (g[e].top) { + assert(g[e].reach.find_first() == CharReach::npos); + continue; /* corresponds to a top, not a normal transition */ + } + + *out = e; + return true; + } + + return false; +} + +static never_inline +void update_accel_prog_offset(const gough_build_strat &gbs, + const map<gough_edge_id, vector<gough_ins> > &blocks, + const map<vector<gough_ins>, u32> &prog_offsets) { + map<dstate_id_t, GoughVertex> verts; + for (auto v : vertices_range(gbs.gg)) { + verts[gbs.gg[v].state_id] = v; + } + + for (auto &m : gbs.built_accel) { + gough_accel *ga = m.first; + assert(!ga->prog_offset); + GoughVertex v = verts[m.second]; + GoughEdge e; + UNUSED bool rv = find_normal_self_loop(v, gbs.gg, &e); + assert(rv); + + if (!rv) { + continue; + } + + DEBUG_PRINTF("updating state %u accel with margin %hhu\n", + gbs.gg[v].state_id, ga->margin_dist); + if (contains(blocks, gough_edge_id(gbs.gg, e))) { + const vector<gough_ins> &block + = blocks.at(gough_edge_id(gbs.gg, e)); + ga->prog_offset = prog_offsets.at(block); + DEBUG_PRINTF("prog offset %u\n", ga->prog_offset); + } else { + ga->margin_dist = 0; + DEBUG_PRINTF("removing margin as no som\n"); + } + } +} + bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision, const CompileContext &cc, const ReportManager &rm) { - assert(somPrecision == 2 || somPrecision == 4 || somPrecision == 8 - || !cc.streaming); - - if (!cc.grey.allowGough) { - return nullptr; - } - - DEBUG_PRINTF("hello world\n"); - unique_ptr<GoughGraph> cfg = makeCFG(raw); - dump(*cfg, "init", cc.grey); - copy_propagation(*cfg, cc.grey); - remove_dead(*cfg); - dump(*cfg, "prop", cc.grey); - u32 slot_count = assign_slots(*cfg, cc.grey); - dump(*cfg, "slots", cc.grey); - - map<gough_edge_id, vector<gough_ins> > blocks; - build_blocks(*cfg, &blocks, slot_count); - DEBUG_PRINTF("%u slots\n", highest_slot_used(blocks) + 1); - - u32 scratch_slot_count = highest_slot_used(blocks) + 1; - assert(slot_count <= scratch_slot_count); - - dump(*cfg, "final", cc.grey); - dump_blocks(blocks, "final", cc.grey); - - gough_info gi; - memset(&gi, 0, sizeof(gi)); - - map<dstate_id_t, gough_accel_state_info> accel_allowed; - find_allowed_accel_states(*cfg, blocks, &accel_allowed); + assert(somPrecision == 2 || somPrecision == 4 || somPrecision == 8 + || !cc.streaming); + + if (!cc.grey.allowGough) { + return nullptr; + } + + DEBUG_PRINTF("hello world\n"); + unique_ptr<GoughGraph> cfg = makeCFG(raw); + dump(*cfg, "init", cc.grey); + copy_propagation(*cfg, cc.grey); + remove_dead(*cfg); + dump(*cfg, "prop", cc.grey); + u32 slot_count = assign_slots(*cfg, cc.grey); + dump(*cfg, "slots", cc.grey); + + map<gough_edge_id, vector<gough_ins> > blocks; + build_blocks(*cfg, &blocks, slot_count); + DEBUG_PRINTF("%u slots\n", highest_slot_used(blocks) + 1); + + u32 scratch_slot_count = highest_slot_used(blocks) + 1; + assert(slot_count <= scratch_slot_count); + + dump(*cfg, "final", cc.grey); + dump_blocks(blocks, "final", cc.grey); + + gough_info gi; + memset(&gi, 0, sizeof(gi)); + + map<dstate_id_t, gough_accel_state_info> accel_allowed; + find_allowed_accel_states(*cfg, blocks, &accel_allowed); gough_build_strat gbs(raw, *cfg, rm, accel_allowed); auto basic_dfa = mcclellanCompile_i(raw, gbs, cc); - assert(basic_dfa); - if (!basic_dfa) { - return nullptr; - } - - u8 alphaShift - = ((const mcclellan *)getImplNfa(basic_dfa.get()))->alphaShift; - u32 edge_count = (1U << alphaShift) * raw.states.size(); - - u32 curr_offset = ROUNDUP_N(basic_dfa->length, 4); - - u32 haig_offset = curr_offset; - curr_offset += sizeof(gi); - /* reserve space for edge->program mapping */ - u32 edge_prog_offset = curr_offset; - curr_offset += sizeof(u32) * edge_count; - vector<u32> edge_blocks(edge_count); - - u32 top_prog_offset = 0; - if (is_triggered(raw.kind)) { - /* reserve space for edge->program mapping */ - top_prog_offset = curr_offset; - curr_offset += sizeof(u32) * raw.states.size(); - } - gi.top_prog_offset = top_prog_offset; - vector<u32> top_blocks(raw.states.size()); - - /* reserve space for blocks */ - u32 prog_base_offset = curr_offset; - gi.prog_base_offset = prog_base_offset; - - vector<gough_ins> temp_blocks; - map<vector<gough_ins>, u32> prog_offsets; - copy_in_blocks(raw, alphaShift, *cfg, blocks, &edge_blocks[0], - &top_blocks[0], prog_base_offset, &prog_offsets, - &temp_blocks); - update_accel_prog_offset(gbs, blocks, prog_offsets); - - u32 total_prog_size = byte_length(temp_blocks); - curr_offset += total_prog_size; - - gi.stream_som_loc_count = slot_count; - gi.stream_som_loc_width = somPrecision; - - u32 gough_size = ROUNDUP_N(curr_offset, 16); + assert(basic_dfa); + if (!basic_dfa) { + return nullptr; + } + + u8 alphaShift + = ((const mcclellan *)getImplNfa(basic_dfa.get()))->alphaShift; + u32 edge_count = (1U << alphaShift) * raw.states.size(); + + u32 curr_offset = ROUNDUP_N(basic_dfa->length, 4); + + u32 haig_offset = curr_offset; + curr_offset += sizeof(gi); + /* reserve space for edge->program mapping */ + u32 edge_prog_offset = curr_offset; + curr_offset += sizeof(u32) * edge_count; + vector<u32> edge_blocks(edge_count); + + u32 top_prog_offset = 0; + if (is_triggered(raw.kind)) { + /* reserve space for edge->program mapping */ + top_prog_offset = curr_offset; + curr_offset += sizeof(u32) * raw.states.size(); + } + gi.top_prog_offset = top_prog_offset; + vector<u32> top_blocks(raw.states.size()); + + /* reserve space for blocks */ + u32 prog_base_offset = curr_offset; + gi.prog_base_offset = prog_base_offset; + + vector<gough_ins> temp_blocks; + map<vector<gough_ins>, u32> prog_offsets; + copy_in_blocks(raw, alphaShift, *cfg, blocks, &edge_blocks[0], + &top_blocks[0], prog_base_offset, &prog_offsets, + &temp_blocks); + update_accel_prog_offset(gbs, blocks, prog_offsets); + + u32 total_prog_size = byte_length(temp_blocks); + curr_offset += total_prog_size; + + gi.stream_som_loc_count = slot_count; + gi.stream_som_loc_width = somPrecision; + + u32 gough_size = ROUNDUP_N(curr_offset, 16); auto gough_dfa = make_zeroed_bytecode_ptr<NFA>(gough_size); - - memcpy(gough_dfa.get(), basic_dfa.get(), basic_dfa->length); - memcpy((char *)gough_dfa.get() + haig_offset, &gi, sizeof(gi)); - if (gough_dfa->type == MCCLELLAN_NFA_16) { - gough_dfa->type = GOUGH_NFA_16; - } else { - assert(gough_dfa->type == MCCLELLAN_NFA_8); - gough_dfa->type = GOUGH_NFA_8; - } - - /* update stream state requirements */ - u32 base_state_size = gough_dfa->type == GOUGH_NFA_8 ? 1 : 2; - gough_dfa->streamStateSize = base_state_size + slot_count * somPrecision; - gough_dfa->scratchStateSize = (u32)(16 + scratch_slot_count * sizeof(u64a)); - - mcclellan *m = (mcclellan *)getMutableImplNfa(gough_dfa.get()); - m->haig_offset = haig_offset; - - /* update nfa length, haig_info offset (leave mcclellan length alone) */ - gough_dfa->length = gough_size; - - /* copy in blocks */ - copy_bytes((u8 *)gough_dfa.get() + edge_prog_offset, edge_blocks); - if (top_prog_offset) { - copy_bytes((u8 *)gough_dfa.get() + top_prog_offset, top_blocks); - } - copy_bytes((u8 *)gough_dfa.get() + prog_base_offset, temp_blocks); - - return gough_dfa; -} - + + memcpy(gough_dfa.get(), basic_dfa.get(), basic_dfa->length); + memcpy((char *)gough_dfa.get() + haig_offset, &gi, sizeof(gi)); + if (gough_dfa->type == MCCLELLAN_NFA_16) { + gough_dfa->type = GOUGH_NFA_16; + } else { + assert(gough_dfa->type == MCCLELLAN_NFA_8); + gough_dfa->type = GOUGH_NFA_8; + } + + /* update stream state requirements */ + u32 base_state_size = gough_dfa->type == GOUGH_NFA_8 ? 1 : 2; + gough_dfa->streamStateSize = base_state_size + slot_count * somPrecision; + gough_dfa->scratchStateSize = (u32)(16 + scratch_slot_count * sizeof(u64a)); + + mcclellan *m = (mcclellan *)getMutableImplNfa(gough_dfa.get()); + m->haig_offset = haig_offset; + + /* update nfa length, haig_info offset (leave mcclellan length alone) */ + gough_dfa->length = gough_size; + + /* copy in blocks */ + copy_bytes((u8 *)gough_dfa.get() + edge_prog_offset, edge_blocks); + if (top_prog_offset) { + copy_bytes((u8 *)gough_dfa.get() + top_prog_offset, top_blocks); + } + copy_bytes((u8 *)gough_dfa.get() + prog_base_offset, temp_blocks); + + return gough_dfa; +} + AccelScheme gough_build_strat::find_escape_strings(dstate_id_t this_idx) const { AccelScheme rv; - if (!contains(accel_gough_info, this_idx)) { + if (!contains(accel_gough_info, this_idx)) { rv.cr = CharReach::dot(); rv.double_byte.clear(); return rv; - } - + } + rv = mcclellan_build_strat::find_escape_strings(this_idx); - + assert(!rv.offset || rv.cr.all()); /* should have been limited by strat */ if (rv.offset) { rv.cr = CharReach::dot(); rv.double_byte.clear(); return rv; - } + } if (rv.double_offset || !accel_gough_info.at(this_idx).two_byte) { @@ -1172,163 +1172,163 @@ AccelScheme gough_build_strat::find_escape_strings(dstate_id_t this_idx) const { } return rv; -} - +} + void gough_build_strat::buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out) { - assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux)); - gough_accel *accel = (gough_accel *)accel_out; - /* build a plain accelaux so we can work out where we can get to */ + assert(mcclellan_build_strat::accelSize() == sizeof(AccelAux)); + gough_accel *accel = (gough_accel *)accel_out; + /* build a plain accelaux so we can work out where we can get to */ mcclellan_build_strat::buildAccel(this_idx, info, &accel->accel); - DEBUG_PRINTF("state %hu is accel with type %hhu\n", this_idx, - accel->accel.accel_type); - if (accel->accel.accel_type == ACCEL_NONE) { - return; - } - + DEBUG_PRINTF("state %hu is accel with type %hhu\n", this_idx, + accel->accel.accel_type); + if (accel->accel.accel_type == ACCEL_NONE) { + return; + } + assert(!accel->accel.generic.offset); - assert(contains(accel_gough_info, this_idx)); - accel->margin_dist = verify_u8(accel_gough_info.at(this_idx).margin); - built_accel[accel] = this_idx; - DEBUG_PRINTF("state %hu is accel with margin %hhu\n", this_idx, - accel->margin_dist); -} - -namespace { -struct raw_gough_report_list { - set<som_report> reports; - + assert(contains(accel_gough_info, this_idx)); + accel->margin_dist = verify_u8(accel_gough_info.at(this_idx).margin); + built_accel[accel] = this_idx; + DEBUG_PRINTF("state %hu is accel with margin %hhu\n", this_idx, + accel->margin_dist); +} + +namespace { +struct raw_gough_report_list { + set<som_report> reports; + raw_gough_report_list( const vector<pair<ReportID, GoughSSAVar *>> &raw_reports, const ReportManager &rm, bool do_remap) { - for (const auto &m : raw_reports) { + for (const auto &m : raw_reports) { ReportID r = do_remap ? rm.getProgramOffset(m.first) : m.first; - u32 impl_slot = INVALID_SLOT; - if (m.second) { - impl_slot = m.second->slot; - assert(impl_slot != INVALID_SLOT); - } - reports.emplace(r, impl_slot); - } - } - - bool operator<(const raw_gough_report_list &b) const { - return reports < b.reports; - } -}; - -struct raw_gough_report_info_impl : public raw_report_info { - vector<raw_gough_report_list> rl; - u32 getReportListSize() const override; - size_t size() const override; - void fillReportLists(NFA *n, size_t base_offset, - vector<u32> &ro /* out */) const override; -}; -} - -unique_ptr<raw_report_info> gough_build_strat::gatherReports( - vector<u32> &reports, - vector<u32> &reports_eod, - u8 *isSingleReport, - ReportID *arbReport) const { - DEBUG_PRINTF("gathering reports\n"); - + u32 impl_slot = INVALID_SLOT; + if (m.second) { + impl_slot = m.second->slot; + assert(impl_slot != INVALID_SLOT); + } + reports.emplace(r, impl_slot); + } + } + + bool operator<(const raw_gough_report_list &b) const { + return reports < b.reports; + } +}; + +struct raw_gough_report_info_impl : public raw_report_info { + vector<raw_gough_report_list> rl; + u32 getReportListSize() const override; + size_t size() const override; + void fillReportLists(NFA *n, size_t base_offset, + vector<u32> &ro /* out */) const override; +}; +} + +unique_ptr<raw_report_info> gough_build_strat::gatherReports( + vector<u32> &reports, + vector<u32> &reports_eod, + u8 *isSingleReport, + ReportID *arbReport) const { + DEBUG_PRINTF("gathering reports\n"); + const bool remap_reports = has_managed_reports(rdfa.kind); auto ri = ue2::make_unique<raw_gough_report_info_impl>(); map<raw_gough_report_list, u32> rev; - assert(!rdfa.states.empty()); - - vector<GoughVertex> verts(rdfa.states.size()); - for (auto v : vertices_range(gg)) { - verts[gg[v].state_id] = v; - } - - for (u32 state_id = 0; state_id < verts.size(); state_id++) { - assert(state_id < rdfa.states.size()); - GoughVertex v = verts[state_id]; - assert(v != GoughGraph::null_vertex() || !state_id); - - DEBUG_PRINTF("i = %zu [%zu]\n", reports.size(), gg[v].reports.size()); - if (v == GoughGraph::null_vertex() || gg[v].reports.empty()) { - reports.push_back(MO_INVALID_IDX); - continue; - } - + assert(!rdfa.states.empty()); + + vector<GoughVertex> verts(rdfa.states.size()); + for (auto v : vertices_range(gg)) { + verts[gg[v].state_id] = v; + } + + for (u32 state_id = 0; state_id < verts.size(); state_id++) { + assert(state_id < rdfa.states.size()); + GoughVertex v = verts[state_id]; + assert(v != GoughGraph::null_vertex() || !state_id); + + DEBUG_PRINTF("i = %zu [%zu]\n", reports.size(), gg[v].reports.size()); + if (v == GoughGraph::null_vertex() || gg[v].reports.empty()) { + reports.push_back(MO_INVALID_IDX); + continue; + } + raw_gough_report_list rrl(gg[v].reports, rm, remap_reports); - DEBUG_PRINTF("non empty r %zu\n", reports.size()); - if (rev.find(rrl) != rev.end()) { - reports.push_back(rev[rrl]); - } else { - DEBUG_PRINTF("adding to rl\n"); - rev[rrl] = ri->size(); - reports.push_back(ri->size()); - ri->rl.push_back(rrl); - } - } - - for (auto v : verts) { - if (v == GoughGraph::null_vertex() || gg[v].reports_eod.empty()) { - reports_eod.push_back(MO_INVALID_IDX); - continue; - } - - DEBUG_PRINTF("non empty r eod\n"); + DEBUG_PRINTF("non empty r %zu\n", reports.size()); + if (rev.find(rrl) != rev.end()) { + reports.push_back(rev[rrl]); + } else { + DEBUG_PRINTF("adding to rl\n"); + rev[rrl] = ri->size(); + reports.push_back(ri->size()); + ri->rl.push_back(rrl); + } + } + + for (auto v : verts) { + if (v == GoughGraph::null_vertex() || gg[v].reports_eod.empty()) { + reports_eod.push_back(MO_INVALID_IDX); + continue; + } + + DEBUG_PRINTF("non empty r eod\n"); raw_gough_report_list rrl(gg[v].reports_eod, rm, remap_reports); - if (rev.find(rrl) != rev.end()) { - reports_eod.push_back(rev[rrl]); - continue; - } - - DEBUG_PRINTF("adding to rl eod %zu\n", gg[v].reports_eod.size()); - rev[rrl] = ri->size(); - reports_eod.push_back(ri->size()); - ri->rl.push_back(rrl); - } - - /* TODO: support single report in gough */ - *isSingleReport = 0; - *arbReport = MO_INVALID_IDX; - assert(!ri->rl.empty()); /* all components should be able to generate - reports */ - return move(ri); -} - -u32 raw_gough_report_info_impl::getReportListSize() const { - u32 sz = 0; - - for (const raw_gough_report_list &r : rl) { - sz += sizeof(gough_report_list); - sz += sizeof(gough_report) * r.reports.size(); - } - - return sz; -} - -size_t raw_gough_report_info_impl::size() const { - return rl.size(); -} - -void raw_gough_report_info_impl::fillReportLists(NFA *n, size_t base_offset, - vector<u32> &ro) const { - for (const raw_gough_report_list &r : rl) { - ro.push_back(base_offset); - - gough_report_list *p = (gough_report_list *)((char *)n + base_offset); - u32 i = 0; - - for (const som_report &sr : r.reports) { - p->report[i].r = sr.report; - p->report[i].som = sr.slot; - i++; - } - - p->count = verify_u32(r.reports.size()); - - base_offset += sizeof(gough_report_list); - base_offset += sizeof(gough_report) * r.reports.size(); - } -} - -} // namespace ue2 + if (rev.find(rrl) != rev.end()) { + reports_eod.push_back(rev[rrl]); + continue; + } + + DEBUG_PRINTF("adding to rl eod %zu\n", gg[v].reports_eod.size()); + rev[rrl] = ri->size(); + reports_eod.push_back(ri->size()); + ri->rl.push_back(rrl); + } + + /* TODO: support single report in gough */ + *isSingleReport = 0; + *arbReport = MO_INVALID_IDX; + assert(!ri->rl.empty()); /* all components should be able to generate + reports */ + return move(ri); +} + +u32 raw_gough_report_info_impl::getReportListSize() const { + u32 sz = 0; + + for (const raw_gough_report_list &r : rl) { + sz += sizeof(gough_report_list); + sz += sizeof(gough_report) * r.reports.size(); + } + + return sz; +} + +size_t raw_gough_report_info_impl::size() const { + return rl.size(); +} + +void raw_gough_report_info_impl::fillReportLists(NFA *n, size_t base_offset, + vector<u32> &ro) const { + for (const raw_gough_report_list &r : rl) { + ro.push_back(base_offset); + + gough_report_list *p = (gough_report_list *)((char *)n + base_offset); + u32 i = 0; + + for (const som_report &sr : r.reports) { + p->report[i].r = sr.report; + p->report[i].som = sr.slot; + i++; + } + + p->count = verify_u32(r.reports.size()); + + base_offset += sizeof(gough_report_list); + base_offset += sizeof(gough_report) * r.reports.size(); + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile.h b/contrib/libs/hyperscan/src/nfa/goughcompile.h index 4d03eb6450..00da1891ec 100644 --- a/contrib/libs/hyperscan/src/nfa/goughcompile.h +++ b/contrib/libs/hyperscan/src/nfa/goughcompile.h @@ -1,97 +1,97 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef GOUGHCOMPILE_H -#define GOUGHCOMPILE_H - -#include "mcclellancompile.h" -#include "nfa_kind.h" -#include "ue2common.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef GOUGHCOMPILE_H +#define GOUGHCOMPILE_H + +#include "mcclellancompile.h" +#include "nfa_kind.h" +#include "ue2common.h" #include "util/bytecode_ptr.h" #include "util/flat_containers.h" -#include "util/order_check.h" - -#include <map> -#include <memory> -#include <set> -#include <vector> - -namespace ue2 { - -#define CREATE_NEW_SOM (~0U) - -/* dest nfa state -> som info for dest state is min of provided loc idx som - * info */ -typedef flat_map<u32, std::vector<u32>> som_tran_info; - -struct som_report { - som_report(ReportID r, u32 s) : report(r), slot(s) {} - - ReportID report; - u32 slot; - - bool operator<(const som_report &b) const { - const som_report &a = *this; - ORDER_CHECK(report); - ORDER_CHECK(slot); - return false; - } -}; - -struct dstate_som { - std::set<som_report> reports; - std::set<som_report> reports_eod; - som_tran_info preds; /* live nfa states mapped back to pred states */ -}; - -struct raw_som_dfa : public raw_dfa { - raw_som_dfa(nfa_kind k, bool unordered_som_triggers_in, u32 trigger, - u32 stream_som_loc_width_in) - : raw_dfa(k), stream_som_loc_width(stream_som_loc_width_in), - unordered_som_triggers(unordered_som_triggers_in), - trigger_nfa_state(trigger) { - assert(!unordered_som_triggers || is_triggered(kind)); - } - - std::vector<dstate_som> state_som; - u32 stream_som_loc_width; - bool unordered_som_triggers; - void stripExtraEodReports(void) override; - - std::map<u32, u32> new_som_nfa_states; /* map nfa vertex id -> offset */ - u32 trigger_nfa_state; /* for triggered cases, slot_id that contains a new - * som */ -}; - +#include "util/order_check.h" + +#include <map> +#include <memory> +#include <set> +#include <vector> + +namespace ue2 { + +#define CREATE_NEW_SOM (~0U) + +/* dest nfa state -> som info for dest state is min of provided loc idx som + * info */ +typedef flat_map<u32, std::vector<u32>> som_tran_info; + +struct som_report { + som_report(ReportID r, u32 s) : report(r), slot(s) {} + + ReportID report; + u32 slot; + + bool operator<(const som_report &b) const { + const som_report &a = *this; + ORDER_CHECK(report); + ORDER_CHECK(slot); + return false; + } +}; + +struct dstate_som { + std::set<som_report> reports; + std::set<som_report> reports_eod; + som_tran_info preds; /* live nfa states mapped back to pred states */ +}; + +struct raw_som_dfa : public raw_dfa { + raw_som_dfa(nfa_kind k, bool unordered_som_triggers_in, u32 trigger, + u32 stream_som_loc_width_in) + : raw_dfa(k), stream_som_loc_width(stream_som_loc_width_in), + unordered_som_triggers(unordered_som_triggers_in), + trigger_nfa_state(trigger) { + assert(!unordered_som_triggers || is_triggered(kind)); + } + + std::vector<dstate_som> state_som; + u32 stream_som_loc_width; + bool unordered_som_triggers; + void stripExtraEodReports(void) override; + + std::map<u32, u32> new_som_nfa_states; /* map nfa vertex id -> offset */ + u32 trigger_nfa_state; /* for triggered cases, slot_id that contains a new + * som */ +}; + bytecode_ptr<NFA> goughCompile(raw_som_dfa &raw, u8 somPrecision, const CompileContext &cc, const ReportManager &rm); - -} // namespace ue2 - + +} // namespace ue2 + #endif // GOUGHCOMPILE_H diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile_accel.cpp b/contrib/libs/hyperscan/src/nfa/goughcompile_accel.cpp index 3a3a44498e..849202a192 100644 --- a/contrib/libs/hyperscan/src/nfa/goughcompile_accel.cpp +++ b/contrib/libs/hyperscan/src/nfa/goughcompile_accel.cpp @@ -1,281 +1,281 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "goughcompile_internal.h" -#include "gough_internal.h" -#include "grey.h" -#include "mcclellancompile.h" -#include "util/container.h" -#include "util/graph.h" -#include "util/graph_range.h" - -#include "ue2common.h" - -#include <map> -#include <vector> - -using namespace std; - -namespace ue2 { - -template<typename Graph> -void add_edge_if_not_selfloop(const typename Graph::vertex_descriptor &u, - const typename Graph::vertex_descriptor &v, - Graph &g) { - if (u != v) { - add_edge(u, v, g); - } -} - -static -bool can_accel_over_selfloop(const GoughVertexProps &vp, const GoughEdge &e, - const GoughEdgeProps &ep, u32 *margin) { - if (vp.vars.empty() && ep.vars.empty()) { - /* if we update no som information, then it is trivial to accelerate */ - *margin = 0; - return true; - } - - /* if the effect of running a self loop stabilises after a small number of - * iterations, it is possible to accelerate over the state and only then run - * the block N times. To model this we create a graph which shows how the - * value for a variable at the end of a self loop block is related to values - * at the start */ - - typedef boost::adjacency_list<boost::vecS, boost::vecS, - boost::bidirectionalS> basic_graph; - typedef basic_graph::vertex_descriptor basic_vertex; - basic_graph bg; - - map<const GoughSSAVar *, basic_vertex> verts; - - /* create verts */ - for (const auto &var : ep.vars) { - verts[var.get()] = add_vertex(bg); - } - - for (const auto &var : vp.vars) { - verts[var.get()] = add_vertex(bg); - } - - /* wire edges */ - set<basic_vertex> done; - for (const auto &var : ep.vars) { - assert(contains(verts, var.get())); - basic_vertex v = verts[var.get()]; - for (GoughSSAVar *pred : var->get_inputs()) { - if (!contains(verts, pred)) { - continue; - } - basic_vertex u = verts[pred]; - if (contains(done, u)) { /* u has already taken on new values this - * iteration */ - for (auto p : inv_adjacent_vertices_range(u, bg)) { - add_edge_if_not_selfloop(p, v, bg); - } - } else { - add_edge_if_not_selfloop(u, v, bg); - } - } - done.insert(v); - } - - for (const auto &var : vp.vars) { - GoughSSAVar *pred = var->get_input(e); - assert(contains(verts, var.get())); - basic_vertex v = verts[var.get()]; - if (!contains(verts, pred)) { - continue; - } - - basic_vertex u = verts[pred]; - if (contains(done, u)) { /* u has already taken on new values this - * iteration */ - for (auto p : inv_adjacent_vertices_range(u, bg)) { - add_edge_if_not_selfloop(p, v, bg); - } - } else { - add_edge_if_not_selfloop(u, v, bg); - } - /* do not add v to done as all joins happen in parallel */ - } - - /* check for loops - non self loops may prevent settling */ - - if (!is_dag(bg)) { - DEBUG_PRINTF("can not %u accel as large loops\n", vp.state_id); - return false; - } - - *margin = num_vertices(bg); /* TODO: be less conservative */ - - if (*margin > 50) { - return false; - } - - return true; -} - -static -bool verify_neighbour(const GoughGraph &g, GoughVertex u, - const map<gough_edge_id, vector<gough_ins> > &blocks, - const set<GoughVertex> &succs, - const vector<gough_ins> &block_sl) { - for (const auto &e : out_edges_range(u, g)) { - if (!g[e].reach.any()) { /* ignore top edges */ - continue; - } - - GoughVertex t = target(e, g); - if (!contains(succs, t)) { /* must be an escape string */ - continue; - } - - if (!contains(blocks, gough_edge_id(g, e))) { - return false; - } - - if (blocks.at(gough_edge_id(g, e)) != block_sl) { - return false; - } - } - - return true; -} - -static -bool verify_neighbour_no_block(const GoughGraph &g, GoughVertex u, - const map<gough_edge_id, vector<gough_ins> > &blocks, - const set<GoughVertex> &succs) { - for (const auto &e : out_edges_range(u, g)) { - if (!g[e].reach.any()) { /* ignore top edges */ - continue; - } - - GoughVertex t = target(e, g); - if (!contains(succs, t)) { /* must be an escape string */ - continue; - } - - if (contains(blocks, gough_edge_id(g, e))) { - return false; - } - } - - return true; -} - -/* Checks the som aspects of allowing two byte accel - it is expected that the - * mcclellan logic will identify escape strings. - * - * For 2 byte acceleration to be correct we require that any non-escape sequence - * characters xy from the accel state has the same effect as just the character - * of y. - * - * The current way of ensuring this is to require: - * (a) all edges out of the cyclic state behave identically to the cyclic self - * loop edge - * (b) edges out of the neighbouring state which do not correspond to escape - * string behave identical to the cyclic state edges. - * - * TODO: these restrictions could be relaxed by looking at the effect on - * relevant (live?) vars only, allowing additions to the escape string set, and - * considering one byte escapes. - */ -static -bool allow_two_byte_accel(const GoughGraph &g, - const map<gough_edge_id, vector<gough_ins> > &blocks, - GoughVertex v, const GoughEdge &self_loop) { - if (contains(blocks, gough_edge_id(g, self_loop))) { - DEBUG_PRINTF("edge plan on self loop\n"); - const auto &block_sl = blocks.at(gough_edge_id(g, self_loop)); - - set<GoughVertex> succs; - for (const auto &e : out_edges_range(v, g)) { - if (g[e].reach.none()) { /* ignore top edges */ - continue; - } - - gough_edge_id ged(g, e); - if (!contains(blocks, ged) || blocks.at(ged) != block_sl) { - DEBUG_PRINTF("different out-edge behaviour\n"); - return false; - } - succs.insert(target(e, g)); - } - - for (auto w : adjacent_vertices_range(v, g)) { - if (w != v && !verify_neighbour(g, w, blocks, succs, block_sl)) { - return false; - } - } - } else { - DEBUG_PRINTF("no edge plan on self loop\n"); - set<GoughVertex> succs; - for (const auto &e : out_edges_range(v, g)) { - if (g[e].reach.none()) { /* ignore top edges */ - continue; - } - - gough_edge_id ged(g, e); - if (contains(blocks, ged)) { - DEBUG_PRINTF("different out-edge behaviour\n"); - return false; - } - succs.insert(target(e, g)); - - for (auto w : adjacent_vertices_range(v, g)) { - if (w != v && !verify_neighbour_no_block(g, w, blocks, succs)) { - return false; - } - } - } - } - - DEBUG_PRINTF("allowing two byte accel for %u\n", g[v].state_id); - return true; -} - -void find_allowed_accel_states(const GoughGraph &g, - const map<gough_edge_id, vector<gough_ins> > &blocks, - map<dstate_id_t, gough_accel_state_info> *out) { - for (auto v : vertices_range(g)) { - GoughEdge e; - if (!find_normal_self_loop(v, g, &e)) { - continue; /* not accelerable */ - } - u32 margin = 0; - if (!can_accel_over_selfloop(g[v], e, g[e], &margin)) { - continue; /* not accelerable */ - } - bool tba = allow_two_byte_accel(g, blocks, v, e); - out->emplace(g[v].state_id, gough_accel_state_info(margin, tba)); - } -} - -} // namespace ue2 +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "goughcompile_internal.h" +#include "gough_internal.h" +#include "grey.h" +#include "mcclellancompile.h" +#include "util/container.h" +#include "util/graph.h" +#include "util/graph_range.h" + +#include "ue2common.h" + +#include <map> +#include <vector> + +using namespace std; + +namespace ue2 { + +template<typename Graph> +void add_edge_if_not_selfloop(const typename Graph::vertex_descriptor &u, + const typename Graph::vertex_descriptor &v, + Graph &g) { + if (u != v) { + add_edge(u, v, g); + } +} + +static +bool can_accel_over_selfloop(const GoughVertexProps &vp, const GoughEdge &e, + const GoughEdgeProps &ep, u32 *margin) { + if (vp.vars.empty() && ep.vars.empty()) { + /* if we update no som information, then it is trivial to accelerate */ + *margin = 0; + return true; + } + + /* if the effect of running a self loop stabilises after a small number of + * iterations, it is possible to accelerate over the state and only then run + * the block N times. To model this we create a graph which shows how the + * value for a variable at the end of a self loop block is related to values + * at the start */ + + typedef boost::adjacency_list<boost::vecS, boost::vecS, + boost::bidirectionalS> basic_graph; + typedef basic_graph::vertex_descriptor basic_vertex; + basic_graph bg; + + map<const GoughSSAVar *, basic_vertex> verts; + + /* create verts */ + for (const auto &var : ep.vars) { + verts[var.get()] = add_vertex(bg); + } + + for (const auto &var : vp.vars) { + verts[var.get()] = add_vertex(bg); + } + + /* wire edges */ + set<basic_vertex> done; + for (const auto &var : ep.vars) { + assert(contains(verts, var.get())); + basic_vertex v = verts[var.get()]; + for (GoughSSAVar *pred : var->get_inputs()) { + if (!contains(verts, pred)) { + continue; + } + basic_vertex u = verts[pred]; + if (contains(done, u)) { /* u has already taken on new values this + * iteration */ + for (auto p : inv_adjacent_vertices_range(u, bg)) { + add_edge_if_not_selfloop(p, v, bg); + } + } else { + add_edge_if_not_selfloop(u, v, bg); + } + } + done.insert(v); + } + + for (const auto &var : vp.vars) { + GoughSSAVar *pred = var->get_input(e); + assert(contains(verts, var.get())); + basic_vertex v = verts[var.get()]; + if (!contains(verts, pred)) { + continue; + } + + basic_vertex u = verts[pred]; + if (contains(done, u)) { /* u has already taken on new values this + * iteration */ + for (auto p : inv_adjacent_vertices_range(u, bg)) { + add_edge_if_not_selfloop(p, v, bg); + } + } else { + add_edge_if_not_selfloop(u, v, bg); + } + /* do not add v to done as all joins happen in parallel */ + } + + /* check for loops - non self loops may prevent settling */ + + if (!is_dag(bg)) { + DEBUG_PRINTF("can not %u accel as large loops\n", vp.state_id); + return false; + } + + *margin = num_vertices(bg); /* TODO: be less conservative */ + + if (*margin > 50) { + return false; + } + + return true; +} + +static +bool verify_neighbour(const GoughGraph &g, GoughVertex u, + const map<gough_edge_id, vector<gough_ins> > &blocks, + const set<GoughVertex> &succs, + const vector<gough_ins> &block_sl) { + for (const auto &e : out_edges_range(u, g)) { + if (!g[e].reach.any()) { /* ignore top edges */ + continue; + } + + GoughVertex t = target(e, g); + if (!contains(succs, t)) { /* must be an escape string */ + continue; + } + + if (!contains(blocks, gough_edge_id(g, e))) { + return false; + } + + if (blocks.at(gough_edge_id(g, e)) != block_sl) { + return false; + } + } + + return true; +} + +static +bool verify_neighbour_no_block(const GoughGraph &g, GoughVertex u, + const map<gough_edge_id, vector<gough_ins> > &blocks, + const set<GoughVertex> &succs) { + for (const auto &e : out_edges_range(u, g)) { + if (!g[e].reach.any()) { /* ignore top edges */ + continue; + } + + GoughVertex t = target(e, g); + if (!contains(succs, t)) { /* must be an escape string */ + continue; + } + + if (contains(blocks, gough_edge_id(g, e))) { + return false; + } + } + + return true; +} + +/* Checks the som aspects of allowing two byte accel - it is expected that the + * mcclellan logic will identify escape strings. + * + * For 2 byte acceleration to be correct we require that any non-escape sequence + * characters xy from the accel state has the same effect as just the character + * of y. + * + * The current way of ensuring this is to require: + * (a) all edges out of the cyclic state behave identically to the cyclic self + * loop edge + * (b) edges out of the neighbouring state which do not correspond to escape + * string behave identical to the cyclic state edges. + * + * TODO: these restrictions could be relaxed by looking at the effect on + * relevant (live?) vars only, allowing additions to the escape string set, and + * considering one byte escapes. + */ +static +bool allow_two_byte_accel(const GoughGraph &g, + const map<gough_edge_id, vector<gough_ins> > &blocks, + GoughVertex v, const GoughEdge &self_loop) { + if (contains(blocks, gough_edge_id(g, self_loop))) { + DEBUG_PRINTF("edge plan on self loop\n"); + const auto &block_sl = blocks.at(gough_edge_id(g, self_loop)); + + set<GoughVertex> succs; + for (const auto &e : out_edges_range(v, g)) { + if (g[e].reach.none()) { /* ignore top edges */ + continue; + } + + gough_edge_id ged(g, e); + if (!contains(blocks, ged) || blocks.at(ged) != block_sl) { + DEBUG_PRINTF("different out-edge behaviour\n"); + return false; + } + succs.insert(target(e, g)); + } + + for (auto w : adjacent_vertices_range(v, g)) { + if (w != v && !verify_neighbour(g, w, blocks, succs, block_sl)) { + return false; + } + } + } else { + DEBUG_PRINTF("no edge plan on self loop\n"); + set<GoughVertex> succs; + for (const auto &e : out_edges_range(v, g)) { + if (g[e].reach.none()) { /* ignore top edges */ + continue; + } + + gough_edge_id ged(g, e); + if (contains(blocks, ged)) { + DEBUG_PRINTF("different out-edge behaviour\n"); + return false; + } + succs.insert(target(e, g)); + + for (auto w : adjacent_vertices_range(v, g)) { + if (w != v && !verify_neighbour_no_block(g, w, blocks, succs)) { + return false; + } + } + } + } + + DEBUG_PRINTF("allowing two byte accel for %u\n", g[v].state_id); + return true; +} + +void find_allowed_accel_states(const GoughGraph &g, + const map<gough_edge_id, vector<gough_ins> > &blocks, + map<dstate_id_t, gough_accel_state_info> *out) { + for (auto v : vertices_range(g)) { + GoughEdge e; + if (!find_normal_self_loop(v, g, &e)) { + continue; /* not accelerable */ + } + u32 margin = 0; + if (!can_accel_over_selfloop(g[v], e, g[e], &margin)) { + continue; /* not accelerable */ + } + bool tba = allow_two_byte_accel(g, blocks, v, e); + out->emplace(g[v].state_id, gough_accel_state_info(margin, tba)); + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile_dump.h b/contrib/libs/hyperscan/src/nfa/goughcompile_dump.h index 25db6b2a39..f63983a791 100644 --- a/contrib/libs/hyperscan/src/nfa/goughcompile_dump.h +++ b/contrib/libs/hyperscan/src/nfa/goughcompile_dump.h @@ -1,63 +1,63 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef GOUGHCOMPILE_DUMP_H -#define GOUGHCOMPILE_DUMP_H - -#include "goughcompile_internal.h" - -#include <map> -#include <string> - -namespace ue2 { - -struct Grey; -#ifdef DUMP_SUPPORT - -std::string dump_name(const GoughVertexProps &vp); -std::string dump_name(const gough_edge_id &e); -void dump(const GoughGraph &g, const std::string &base, const Grey &grey); -void dump_blocks(const std::map<gough_edge_id, std::vector<gough_ins> > &blocks, - const std::string &base, const Grey &grey); -#else - -static UNUSED -void dump(UNUSED const GoughGraph &g, UNUSED const std::string &base, - UNUSED const Grey &grey) { -} -static UNUSED -void dump_blocks( - UNUSED const std::map<gough_edge_id, std::vector<gough_ins> > &blocks, - UNUSED const std::string &base, UNUSED const Grey &grey) { -} - -#endif - -} // namespace ue2 - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef GOUGHCOMPILE_DUMP_H +#define GOUGHCOMPILE_DUMP_H + +#include "goughcompile_internal.h" + +#include <map> +#include <string> + +namespace ue2 { + +struct Grey; +#ifdef DUMP_SUPPORT + +std::string dump_name(const GoughVertexProps &vp); +std::string dump_name(const gough_edge_id &e); +void dump(const GoughGraph &g, const std::string &base, const Grey &grey); +void dump_blocks(const std::map<gough_edge_id, std::vector<gough_ins> > &blocks, + const std::string &base, const Grey &grey); +#else + +static UNUSED +void dump(UNUSED const GoughGraph &g, UNUSED const std::string &base, + UNUSED const Grey &grey) { +} +static UNUSED +void dump_blocks( + UNUSED const std::map<gough_edge_id, std::vector<gough_ins> > &blocks, + UNUSED const std::string &base, UNUSED const Grey &grey) { +} + +#endif + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile_internal.h b/contrib/libs/hyperscan/src/nfa/goughcompile_internal.h index dbf2d1e3f7..e64540523b 100644 --- a/contrib/libs/hyperscan/src/nfa/goughcompile_internal.h +++ b/contrib/libs/hyperscan/src/nfa/goughcompile_internal.h @@ -1,225 +1,225 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef GOUGHCOMPILE_INTERNAL_H -#define GOUGHCOMPILE_INTERNAL_H - -#include "gough_internal.h" -#include "mcclellancompile.h" -#include "ue2common.h" -#include "util/charreach.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef GOUGHCOMPILE_INTERNAL_H +#define GOUGHCOMPILE_INTERNAL_H + +#include "gough_internal.h" +#include "mcclellancompile.h" +#include "ue2common.h" +#include "util/charreach.h" #include "util/flat_containers.h" #include "util/noncopyable.h" -#include "util/order_check.h" - -#include <map> -#include <memory> -#include <set> -#include <vector> - -#include <boost/graph/adjacency_list.hpp> - -namespace ue2 { - -struct Grey; -struct GoughSSAVar; -struct GoughSSAVarJoin; - -struct GoughVertexProps { - GoughVertexProps() {} - explicit GoughVertexProps(u32 state_in) : state_id(state_in) {} - u32 state_id = ~0U; - - std::vector<std::shared_ptr<GoughSSAVarJoin> > vars; /* owns variables */ - - std::vector<std::pair<ReportID, GoughSSAVar *> > reports; /**< report som, - som variable */ - std::vector<std::pair<ReportID, GoughSSAVar *> > reports_eod; -}; - -struct GoughEdgeProps { - GoughEdgeProps(void) : top(false) {} - bool top; - CharReach reach; - - std::vector<std::shared_ptr<GoughSSAVar> > vars; /* owns variables */ -}; - -struct GoughGraphProps { - boost::adjacency_list_traits<boost::vecS, boost::vecS>::vertex_descriptor - initial_vertex; /* for triggered nfas, dead state; - * for others start anchored or start floating - */ -}; - -typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, - GoughVertexProps, GoughEdgeProps, GoughGraphProps> GoughGraph; - -typedef GoughGraph::vertex_descriptor GoughVertex; -typedef GoughGraph::edge_descriptor GoughEdge; - -struct gough_edge_id { - gough_edge_id(const GoughGraph &g, const GoughEdge &e) - : src(g[source(e, g)].state_id), dest(g[target(e, g)].state_id), - first_char(g[e].reach.find_first()) {} - bool operator<(const gough_edge_id &b) const { - const gough_edge_id &a = *this; - ORDER_CHECK(src); - ORDER_CHECK(dest); - ORDER_CHECK(first_char); - return false; - } - const u32 src; - const u32 dest; - const u32 first_char; /* ~0U if only top */ -}; - -struct GoughSSAVarWithInputs; -struct GoughSSAVarMin; -struct GoughSSAVarJoin; - +#include "util/order_check.h" + +#include <map> +#include <memory> +#include <set> +#include <vector> + +#include <boost/graph/adjacency_list.hpp> + +namespace ue2 { + +struct Grey; +struct GoughSSAVar; +struct GoughSSAVarJoin; + +struct GoughVertexProps { + GoughVertexProps() {} + explicit GoughVertexProps(u32 state_in) : state_id(state_in) {} + u32 state_id = ~0U; + + std::vector<std::shared_ptr<GoughSSAVarJoin> > vars; /* owns variables */ + + std::vector<std::pair<ReportID, GoughSSAVar *> > reports; /**< report som, + som variable */ + std::vector<std::pair<ReportID, GoughSSAVar *> > reports_eod; +}; + +struct GoughEdgeProps { + GoughEdgeProps(void) : top(false) {} + bool top; + CharReach reach; + + std::vector<std::shared_ptr<GoughSSAVar> > vars; /* owns variables */ +}; + +struct GoughGraphProps { + boost::adjacency_list_traits<boost::vecS, boost::vecS>::vertex_descriptor + initial_vertex; /* for triggered nfas, dead state; + * for others start anchored or start floating + */ +}; + +typedef boost::adjacency_list<boost::vecS, boost::vecS, boost::bidirectionalS, + GoughVertexProps, GoughEdgeProps, GoughGraphProps> GoughGraph; + +typedef GoughGraph::vertex_descriptor GoughVertex; +typedef GoughGraph::edge_descriptor GoughEdge; + +struct gough_edge_id { + gough_edge_id(const GoughGraph &g, const GoughEdge &e) + : src(g[source(e, g)].state_id), dest(g[target(e, g)].state_id), + first_char(g[e].reach.find_first()) {} + bool operator<(const gough_edge_id &b) const { + const gough_edge_id &a = *this; + ORDER_CHECK(src); + ORDER_CHECK(dest); + ORDER_CHECK(first_char); + return false; + } + const u32 src; + const u32 dest; + const u32 first_char; /* ~0U if only top */ +}; + +struct GoughSSAVarWithInputs; +struct GoughSSAVarMin; +struct GoughSSAVarJoin; + struct GoughSSAVar : noncopyable { - GoughSSAVar(void) : seen(false), slot(INVALID_SLOT) {} - virtual ~GoughSSAVar(); + GoughSSAVar(void) : seen(false), slot(INVALID_SLOT) {} + virtual ~GoughSSAVar(); const flat_set<GoughSSAVar *> &get_inputs() const { - return inputs; - } + return inputs; + } const flat_set<GoughSSAVarWithInputs *> &get_outputs() const { - return outputs; - } - virtual void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) = 0; - - virtual void generate(std::vector<gough_ins> *out) const = 0; - - bool seen; /* for temp use by remove_dead alg */ - u32 slot; - - void clear_outputs(); - - /** remove all inputs and outputs of the vertex, call before - * removing vertex */ - virtual void clear_all() { - clear_outputs(); - } -protected: + return outputs; + } + virtual void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) = 0; + + virtual void generate(std::vector<gough_ins> *out) const = 0; + + bool seen; /* for temp use by remove_dead alg */ + u32 slot; + + void clear_outputs(); + + /** remove all inputs and outputs of the vertex, call before + * removing vertex */ + virtual void clear_all() { + clear_outputs(); + } +protected: flat_set<GoughSSAVar *> inputs; flat_set<GoughSSAVarWithInputs *> outputs; - friend struct GoughSSAVarWithInputs; - friend struct GoughSSAVarMin; - friend struct GoughSSAVarJoin; -}; - -struct GoughSSAVarNew : public GoughSSAVar { - explicit GoughSSAVarNew(u32 adjust_in) : adjust(adjust_in) {} - - void replace_input(GoughSSAVar *, GoughSSAVar *) override { - assert(0); - } - - void generate(std::vector<gough_ins> *out) const override; - - const u32 adjust; -}; - -struct GoughSSAVarWithInputs : public GoughSSAVar { - GoughSSAVarWithInputs(void) {} - void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override = 0; - virtual void clear_inputs() = 0; - void clear_all() override; -protected: - virtual void remove_input_raw(GoughSSAVar *v) = 0; - friend struct GoughSSAVar; -}; - -struct GoughSSAVarMin : public GoughSSAVarWithInputs { - GoughSSAVarMin(void) {} - void generate(std::vector<gough_ins> *out) const override; - - void clear_inputs() override; - void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override; - - virtual void add_input(GoughSSAVar *v) { - inputs.insert(v); - v->outputs.insert(this); - } - -protected: - void remove_input_raw(GoughSSAVar *v) override; -}; - -struct GoughSSAVarJoin : public GoughSSAVarWithInputs { - GoughSSAVarJoin(void) {} - - /* dummy; all joins at a point must be generated simultaneously */ - void generate(std::vector<gough_ins> *out) const override; - GoughSSAVar *get_input(const GoughEdge &prev) const; - - void clear_inputs() override; - void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override; - - void add_input(GoughSSAVar *v, GoughEdge prev); - + friend struct GoughSSAVarWithInputs; + friend struct GoughSSAVarMin; + friend struct GoughSSAVarJoin; +}; + +struct GoughSSAVarNew : public GoughSSAVar { + explicit GoughSSAVarNew(u32 adjust_in) : adjust(adjust_in) {} + + void replace_input(GoughSSAVar *, GoughSSAVar *) override { + assert(0); + } + + void generate(std::vector<gough_ins> *out) const override; + + const u32 adjust; +}; + +struct GoughSSAVarWithInputs : public GoughSSAVar { + GoughSSAVarWithInputs(void) {} + void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override = 0; + virtual void clear_inputs() = 0; + void clear_all() override; +protected: + virtual void remove_input_raw(GoughSSAVar *v) = 0; + friend struct GoughSSAVar; +}; + +struct GoughSSAVarMin : public GoughSSAVarWithInputs { + GoughSSAVarMin(void) {} + void generate(std::vector<gough_ins> *out) const override; + + void clear_inputs() override; + void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override; + + virtual void add_input(GoughSSAVar *v) { + inputs.insert(v); + v->outputs.insert(this); + } + +protected: + void remove_input_raw(GoughSSAVar *v) override; +}; + +struct GoughSSAVarJoin : public GoughSSAVarWithInputs { + GoughSSAVarJoin(void) {} + + /* dummy; all joins at a point must be generated simultaneously */ + void generate(std::vector<gough_ins> *out) const override; + GoughSSAVar *get_input(const GoughEdge &prev) const; + + void clear_inputs() override; + void replace_input(GoughSSAVar *old_v, GoughSSAVar *new_v) override; + + void add_input(GoughSSAVar *v, GoughEdge prev); + const flat_set<GoughEdge> &get_edges_for_input(GoughSSAVar *input) const; const std::map<GoughSSAVar *, flat_set<GoughEdge>> &get_input_map() const; - -protected: - void remove_input_raw(GoughSSAVar *v) override; - -private: + +protected: + void remove_input_raw(GoughSSAVar *v) override; + +private: std::map<GoughSSAVar *, flat_set<GoughEdge>> input_map; -}; - -struct gough_accel_state_info { - u32 margin; - bool two_byte; - - gough_accel_state_info(u32 margin_in, bool two_byte_in) - : margin(margin_in), two_byte(two_byte_in) { - } -}; - -u32 assign_slots(GoughGraph &g, const Grey &grey); -void find_allowed_accel_states(const GoughGraph &g, - const std::map<gough_edge_id, std::vector<gough_ins> > &blocks, - std::map<dstate_id_t, gough_accel_state_info> *out); -bool find_normal_self_loop(GoughVertex v, const GoughGraph &g, GoughEdge *out); - -} // namespace ue2 - -// Note: C structure, can't be in namespace ue2 -static inline -bool operator==(const gough_ins &a, const gough_ins &b) { - return a.op == b.op && a.dest == b.dest && a.src == b.src; -} - -static inline -bool operator<(const gough_ins &a, const gough_ins &b) { - return std::tie(a.op, a.src, a.dest) < std::tie(b.op, b.src, b.dest); -} - -#endif +}; + +struct gough_accel_state_info { + u32 margin; + bool two_byte; + + gough_accel_state_info(u32 margin_in, bool two_byte_in) + : margin(margin_in), two_byte(two_byte_in) { + } +}; + +u32 assign_slots(GoughGraph &g, const Grey &grey); +void find_allowed_accel_states(const GoughGraph &g, + const std::map<gough_edge_id, std::vector<gough_ins> > &blocks, + std::map<dstate_id_t, gough_accel_state_info> *out); +bool find_normal_self_loop(GoughVertex v, const GoughGraph &g, GoughEdge *out); + +} // namespace ue2 + +// Note: C structure, can't be in namespace ue2 +static inline +bool operator==(const gough_ins &a, const gough_ins &b) { + return a.op == b.op && a.dest == b.dest && a.src == b.src; +} + +static inline +bool operator<(const gough_ins &a, const gough_ins &b) { + return std::tie(a.op, a.src, a.dest) < std::tie(b.op, b.src, b.dest); +} + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/goughcompile_reg.cpp b/contrib/libs/hyperscan/src/nfa/goughcompile_reg.cpp index 7d9e2e4b1c..48e515b9ad 100644 --- a/contrib/libs/hyperscan/src/nfa/goughcompile_reg.cpp +++ b/contrib/libs/hyperscan/src/nfa/goughcompile_reg.cpp @@ -1,502 +1,502 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "goughcompile.h" -#include "goughcompile_dump.h" -#include "goughcompile_internal.h" -#include "gough_internal.h" -#include "grey.h" -#include "util/container.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "goughcompile.h" +#include "goughcompile_dump.h" +#include "goughcompile_internal.h" +#include "gough_internal.h" +#include "grey.h" +#include "util/container.h" #include "util/flat_containers.h" -#include "util/graph.h" -#include "util/graph_range.h" -#include "util/order_check.h" - -#include "ue2common.h" - -#include <algorithm> -#include <boost/graph/depth_first_search.hpp> -#include <boost/range/adaptor/map.hpp> - -using namespace std; -using boost::adaptors::map_values; - -namespace ue2 { - -template<typename VarP, typename VarQ> -void push_back_all_raw(vector<VarP> *out, const vector<VarQ> &in) { - for (const auto &var : in) { - out->push_back(var.get()); - } -} - -static -void all_vars(const GoughGraph &g, vector<GoughSSAVar *> *out) { - for (auto v : vertices_range(g)) { - push_back_all_raw(out, g[v].vars); - } - for (const auto &e : edges_range(g)) { - push_back_all_raw(out, g[e].vars); - } -} - -namespace { -struct GoughGraphAux { - map<const GoughSSAVar *, GoughVertex> containing_v; - map<const GoughSSAVar *, GoughEdge> containing_e; - map<const GoughSSAVar *, set<GoughVertex> > reporters; -}; -} - -static never_inline -void fill_aux(const GoughGraph &g, GoughGraphAux *aux) { - for (auto v : vertices_range(g)) { - for (const auto &var : g[v].vars) { - aux->containing_v[var.get()] = v; - DEBUG_PRINTF("%u is on vertex %u\n", var->slot, g[v].state_id); - } - - for (GoughSSAVar *var : g[v].reports | map_values) { - aux->reporters[var].insert(v); - } - - for (GoughSSAVar *var : g[v].reports_eod | map_values) { - aux->reporters[var].insert(v); - } - } - for (const auto &e : edges_range(g)) { - for (const auto &var : g[e].vars) { - aux->containing_e[var.get()] = e; - DEBUG_PRINTF("%u is on edge %u->%u\n", var->slot, - g[source(e, g)].state_id, g[target(e, g)].state_id); - } - } -} - -static -bool is_block_local(const GoughGraph &cfg, GoughSSAVar *var, - const GoughGraphAux &aux) { - /* if var used as a report, it cannot be considered block local */ - if (contains(aux.reporters, var)) { - return false; - } - - /* (useful) vertex/join vars never local - they are terminal in blocks - * and so should be read by another block. */ - if (!contains(aux.containing_e, var)) { - return false; - } - - /* for other cases, require that all uses of var are later in the same edge - * or on the target AND if on target it is sole on flow coming from the - * edge in question. */ - const GoughEdge &e = aux.containing_e.at(var); - GoughVertex t = target(e, cfg); - - size_t seen_outputs = 0; - const flat_set<GoughSSAVarWithInputs *> &out = var->get_outputs(); - bool seen_var = false; - for (const auto &e_var : cfg[e].vars) { - if (seen_var) { - GoughSSAVarWithInputs *w - = dynamic_cast<GoughSSAVarWithInputs *>(e_var.get()); - if (contains(out, w)) { - seen_outputs++; - } - } else { - seen_var = var == e_var.get(); - } - } - assert(seen_var); - - for (const auto &t_var : cfg[t].vars) { - if (contains(out, t_var.get())) { - seen_outputs++; - const flat_set<GoughEdge> &flow = t_var->get_edges_for_input(var); - if (flow.size() != 1 || *flow.begin() != e) { - /* this var is used by the target join var BUT on a different - * flow, so this is not a block local variable */ - return false; - } - } - } - - assert(seen_outputs <= out.size()); - return seen_outputs == out.size(); -} - -static -void handle_pending_edge(const GoughGraph &g, const GoughEdge &e, - GoughSSAVar *start, set<GoughVertex> &pending_vertex, - set<const GoughSSAVar *> &rv) { - const vector<shared_ptr<GoughSSAVar> > &vars = g[e].vars; - bool marking = !start; - DEBUG_PRINTF(" ---checking edge %u->%u %s %zu\n", g[source(e, g)].state_id, - g[target(e, g)].state_id, marking ? "full" : "partial", - vars.size()); - for (auto it = vars.rbegin(); it != vars.rend(); ++it) { - GoughSSAVar *var = it->get(); - if (contains(rv, var)) { - DEBUG_PRINTF("somebody has already processed this vertex [%u]\n", - var->slot); - return; - } - if (var == start) { - assert(!marking); - marking = true; - continue; - } - if (marking) { - rv.insert(var); - } - } - assert(marking); - GoughVertex s = source(e, g); - for (const auto &var : g[s].vars) { - DEBUG_PRINTF("interferes %u\n", var->slot); - rv.insert(var.get()); - } - pending_vertex.insert(s); -} - -static -void handle_pending_vars(GoughSSAVar *def, const GoughGraph &g, - const GoughGraphAux &aux, - const flat_set<GoughSSAVarWithInputs *> &pending_var, - set<GoughVertex> &pending_vertex, - set<const GoughSSAVar *> &rv) { - for (GoughSSAVarWithInputs *var : pending_var) { - if (contains(aux.containing_v, var)) { - /* def is used by join vertex, value only needs to be live on some - * incoming edges */ - GoughSSAVarJoin *vj = (GoughSSAVarJoin *)var; - const flat_set<GoughEdge> &live_edges - = vj->get_edges_for_input(def); - for (const auto &e : live_edges) { - handle_pending_edge(g, e, nullptr, pending_vertex, rv); - } - continue; - } - const GoughEdge &e = aux.containing_e.at(var); - handle_pending_edge(g, e, var, pending_vertex, rv); - } -} - -static -void handle_pending_vertex(GoughVertex def_v, const GoughGraph &g, - GoughVertex current, - set<GoughVertex> &pending_vertex, - set<const GoughSSAVar *> &rv) { - DEBUG_PRINTF("---checking vertex %u\n", g[current].state_id); - if (def_v == current) { - DEBUG_PRINTF("contains target vertex\n"); - return; /* we have reached def */ - } - for (const auto &e : in_edges_range(current, g)) { - handle_pending_edge(g, e, nullptr, pending_vertex, rv); - } -} - -static -void handle_pending_vertices(GoughSSAVar *def, const GoughGraph &g, - const GoughGraphAux &aux, - set<GoughVertex> &pending_vertex, - set<const GoughSSAVar *> &rv) { - if (pending_vertex.empty()) { - return; - } - - GoughVertex def_v = GoughGraph::null_vertex(); - if (contains(aux.containing_v, def)) { - def_v = aux.containing_v.at(def); - } +#include "util/graph.h" +#include "util/graph_range.h" +#include "util/order_check.h" + +#include "ue2common.h" + +#include <algorithm> +#include <boost/graph/depth_first_search.hpp> +#include <boost/range/adaptor/map.hpp> + +using namespace std; +using boost::adaptors::map_values; + +namespace ue2 { + +template<typename VarP, typename VarQ> +void push_back_all_raw(vector<VarP> *out, const vector<VarQ> &in) { + for (const auto &var : in) { + out->push_back(var.get()); + } +} + +static +void all_vars(const GoughGraph &g, vector<GoughSSAVar *> *out) { + for (auto v : vertices_range(g)) { + push_back_all_raw(out, g[v].vars); + } + for (const auto &e : edges_range(g)) { + push_back_all_raw(out, g[e].vars); + } +} + +namespace { +struct GoughGraphAux { + map<const GoughSSAVar *, GoughVertex> containing_v; + map<const GoughSSAVar *, GoughEdge> containing_e; + map<const GoughSSAVar *, set<GoughVertex> > reporters; +}; +} + +static never_inline +void fill_aux(const GoughGraph &g, GoughGraphAux *aux) { + for (auto v : vertices_range(g)) { + for (const auto &var : g[v].vars) { + aux->containing_v[var.get()] = v; + DEBUG_PRINTF("%u is on vertex %u\n", var->slot, g[v].state_id); + } + + for (GoughSSAVar *var : g[v].reports | map_values) { + aux->reporters[var].insert(v); + } + + for (GoughSSAVar *var : g[v].reports_eod | map_values) { + aux->reporters[var].insert(v); + } + } + for (const auto &e : edges_range(g)) { + for (const auto &var : g[e].vars) { + aux->containing_e[var.get()] = e; + DEBUG_PRINTF("%u is on edge %u->%u\n", var->slot, + g[source(e, g)].state_id, g[target(e, g)].state_id); + } + } +} + +static +bool is_block_local(const GoughGraph &cfg, GoughSSAVar *var, + const GoughGraphAux &aux) { + /* if var used as a report, it cannot be considered block local */ + if (contains(aux.reporters, var)) { + return false; + } + + /* (useful) vertex/join vars never local - they are terminal in blocks + * and so should be read by another block. */ + if (!contains(aux.containing_e, var)) { + return false; + } + + /* for other cases, require that all uses of var are later in the same edge + * or on the target AND if on target it is sole on flow coming from the + * edge in question. */ + const GoughEdge &e = aux.containing_e.at(var); + GoughVertex t = target(e, cfg); + + size_t seen_outputs = 0; + const flat_set<GoughSSAVarWithInputs *> &out = var->get_outputs(); + bool seen_var = false; + for (const auto &e_var : cfg[e].vars) { + if (seen_var) { + GoughSSAVarWithInputs *w + = dynamic_cast<GoughSSAVarWithInputs *>(e_var.get()); + if (contains(out, w)) { + seen_outputs++; + } + } else { + seen_var = var == e_var.get(); + } + } + assert(seen_var); + + for (const auto &t_var : cfg[t].vars) { + if (contains(out, t_var.get())) { + seen_outputs++; + const flat_set<GoughEdge> &flow = t_var->get_edges_for_input(var); + if (flow.size() != 1 || *flow.begin() != e) { + /* this var is used by the target join var BUT on a different + * flow, so this is not a block local variable */ + return false; + } + } + } + + assert(seen_outputs <= out.size()); + return seen_outputs == out.size(); +} + +static +void handle_pending_edge(const GoughGraph &g, const GoughEdge &e, + GoughSSAVar *start, set<GoughVertex> &pending_vertex, + set<const GoughSSAVar *> &rv) { + const vector<shared_ptr<GoughSSAVar> > &vars = g[e].vars; + bool marking = !start; + DEBUG_PRINTF(" ---checking edge %u->%u %s %zu\n", g[source(e, g)].state_id, + g[target(e, g)].state_id, marking ? "full" : "partial", + vars.size()); + for (auto it = vars.rbegin(); it != vars.rend(); ++it) { + GoughSSAVar *var = it->get(); + if (contains(rv, var)) { + DEBUG_PRINTF("somebody has already processed this vertex [%u]\n", + var->slot); + return; + } + if (var == start) { + assert(!marking); + marking = true; + continue; + } + if (marking) { + rv.insert(var); + } + } + assert(marking); + GoughVertex s = source(e, g); + for (const auto &var : g[s].vars) { + DEBUG_PRINTF("interferes %u\n", var->slot); + rv.insert(var.get()); + } + pending_vertex.insert(s); +} + +static +void handle_pending_vars(GoughSSAVar *def, const GoughGraph &g, + const GoughGraphAux &aux, + const flat_set<GoughSSAVarWithInputs *> &pending_var, + set<GoughVertex> &pending_vertex, + set<const GoughSSAVar *> &rv) { + for (GoughSSAVarWithInputs *var : pending_var) { + if (contains(aux.containing_v, var)) { + /* def is used by join vertex, value only needs to be live on some + * incoming edges */ + GoughSSAVarJoin *vj = (GoughSSAVarJoin *)var; + const flat_set<GoughEdge> &live_edges + = vj->get_edges_for_input(def); + for (const auto &e : live_edges) { + handle_pending_edge(g, e, nullptr, pending_vertex, rv); + } + continue; + } + const GoughEdge &e = aux.containing_e.at(var); + handle_pending_edge(g, e, var, pending_vertex, rv); + } +} + +static +void handle_pending_vertex(GoughVertex def_v, const GoughGraph &g, + GoughVertex current, + set<GoughVertex> &pending_vertex, + set<const GoughSSAVar *> &rv) { + DEBUG_PRINTF("---checking vertex %u\n", g[current].state_id); + if (def_v == current) { + DEBUG_PRINTF("contains target vertex\n"); + return; /* we have reached def */ + } + for (const auto &e : in_edges_range(current, g)) { + handle_pending_edge(g, e, nullptr, pending_vertex, rv); + } +} + +static +void handle_pending_vertices(GoughSSAVar *def, const GoughGraph &g, + const GoughGraphAux &aux, + set<GoughVertex> &pending_vertex, + set<const GoughSSAVar *> &rv) { + if (pending_vertex.empty()) { + return; + } + + GoughVertex def_v = GoughGraph::null_vertex(); + if (contains(aux.containing_v, def)) { + def_v = aux.containing_v.at(def); + } unordered_set<GoughVertex> done; - while (!pending_vertex.empty()) { - GoughVertex current = *pending_vertex.begin(); - pending_vertex.erase(current); - if (contains(done, current)) { - continue; - } - done.insert(current); - handle_pending_vertex(def_v, g, current, pending_vertex, rv); - } -} - -/* returns set of labels that the given def is live at */ -static never_inline -set<const GoughSSAVar *> live_during(GoughSSAVar *def, const GoughGraph &g, - const GoughGraphAux &aux) { - DEBUG_PRINTF("checking who is defined during %u lifetime\n", def->slot); - set<GoughVertex> pending_vertex; - - set<const GoughSSAVar *> rv; - rv.insert(def); - - if (contains(aux.reporters, def)) { - DEBUG_PRINTF("--> gets reported\n"); - const set<GoughVertex> &reporters = aux.reporters.at(def); - for (auto v : reporters) { - pending_vertex.insert(v); - for (const auto &var : g[v].vars) { - DEBUG_PRINTF("interferes %u\n", var->slot); - rv.insert(var.get()); - } - } - } - - handle_pending_vars(def, g, aux, def->get_outputs(), pending_vertex, rv); - handle_pending_vertices(def, g, aux, pending_vertex, rv); - - rv.erase(def); - return rv; -} - -template<typename VarP> -void set_initial_slots(const vector<VarP> &vars, u32 *next_slot) { - for (auto &var : vars) { - assert(var->slot == INVALID_SLOT); - var->slot = (*next_slot)++; - } -} - -/* crude, deterministic assignment of symbolic register slots. - * returns number of slots given out - */ -static -u32 initial_slots(const GoughGraph &g) { - u32 next_slot = 0; - for (auto v : vertices_range(g)) { - set_initial_slots(g[v].vars, &next_slot); - } - for (const auto &e : edges_range(g)) { - set_initial_slots(g[e].vars, &next_slot); - } - - return next_slot; -} - -#define NO_COLOUR (~0U) - -static -u32 available_colour(const flat_set<u32> &bad_colours) { - u32 rv = 0; - for (const u32 &colour : bad_colours) { - if (colour != rv) { - assert(colour > rv); - break; - } - rv = colour + 1; - } - - assert(rv != NO_COLOUR); - return rv; -} - -static -void poison_colours(const set<const GoughSSAVar *> &live, u32 c, - const vector<u32> &colour_map, - vector<flat_set<u32> > *bad_colour) { - for (const GoughSSAVar *var : live) { - u32 var_index = var->slot; - if (colour_map[var_index] != NO_COLOUR) { - assert(c != colour_map[var_index]); - } else { - (*bad_colour)[var_index].insert(c); - } - } -} - -static -void find_bad_due_to_live(const set<const GoughSSAVar *> &live, - const vector<u32> &colour_map, flat_set<u32> *out) { - for (const GoughSSAVar *var : live) { - u32 var_index = var->slot; - if (colour_map[var_index] != NO_COLOUR) { - out->insert(colour_map[var_index]); - } - } -} - -static -void sequential_vertex_colouring(const GoughGraph &g, const GoughGraphAux &aux, - const vector<GoughSSAVar *> &order, - vector<u32> &colour_map) { - assert(order.size() < NO_COLOUR); - colour_map.clear(); - colour_map.resize(order.size(), NO_COLOUR); - vector<u32> temp(order.size(), ~0U); - vector<flat_set<u32> > bad_colour(order.size()); - - for (GoughSSAVar *var : order) { - u32 var_index = var->slot; - if (is_block_local(g, var, aux)) { - DEBUG_PRINTF("%u is block local\n", var_index); - /* ignore variable whose lifetime is limited to their local block - * there is no need to assign stream state to these variables */ - continue; - } - assert(colour_map[var_index] == NO_COLOUR); - set<const GoughSSAVar *> live = live_during(var, g, aux); - flat_set<u32> &local_bad = bad_colour[var_index]; - find_bad_due_to_live(live, colour_map, &local_bad); - DEBUG_PRINTF("colouring %u\n", var_index); - u32 c = available_colour(local_bad); - colour_map[var_index] = c; - assert(!contains(bad_colour[var_index], c)); - poison_colours(live, c, colour_map, &bad_colour); - - flat_set<u32> temp_set; - local_bad.swap(temp_set); - DEBUG_PRINTF(" %u coloured %u\n", var_index, c); - } -} - -template<typename VarP> -void add_to_dom_ordering(const vector<VarP> &vars, - vector<GoughSSAVar *> *out) { - for (const auto &var : vars) { - out->push_back(var.get()); - } -} - -namespace { -class FinishVisitor : public boost::default_dfs_visitor { -public: - explicit FinishVisitor(vector<GoughVertex> *o) : out(o) {} - void finish_vertex(const GoughVertex v, const GoughGraph &) { - out->push_back(v); - } - vector<GoughVertex> *out; -}; -} - -static -void find_dom_ordering(const GoughGraph &cfg, vector<GoughSSAVar *> *out) { - vector<GoughVertex> g_order; - - /* due to construction quirks, default vertex order provides entry points */ - depth_first_search(cfg, visitor(FinishVisitor(&g_order)) - .root_vertex(cfg[boost::graph_bundle].initial_vertex)); - - for (auto it = g_order.rbegin(); it != g_order.rend(); ++it) { - add_to_dom_ordering(cfg[*it].vars, out); - for (const auto &e : out_edges_range(*it, cfg)) { - add_to_dom_ordering(cfg[e].vars, out); - } - } -} - -static -void create_slot_mapping(const GoughGraph &cfg, UNUSED u32 old_slot_count, - vector<u32> *old_new) { - /* Interference graphs from SSA form are chordal -> optimally colourable in - * poly time. - * - * Chordal graphs can be coloured by walking in perfect elimination order. - * If the SSA CFG is iterated over in a way that respects dominance - * relationship, the interference graph will be iterated in a perfect - * elimination order. - * - * We can avoid creating the full interference graph and use liveness - * information as we iterate over the definitions to perform the colouring. - * - * See S Hack various 2006- - */ - vector<GoughSSAVar *> dom_order; - - GoughGraphAux aux; - fill_aux(cfg, &aux); - - find_dom_ordering(cfg, &dom_order); - assert(dom_order.size() == old_slot_count); - sequential_vertex_colouring(cfg, aux, dom_order, *old_new); -} - -static -void update_local_slots(GoughGraph &g, set<GoughSSAVar *> &locals, - u32 local_base) { - DEBUG_PRINTF("%zu local variables\n", locals.size()); - /* local variables only occur on edges (joins are never local) */ - - u32 allocated_count = 0; - for (const auto &e : edges_range(g)) { - u32 next_slot = local_base; - for (auto &var : g[e].vars) { - if (contains(locals, var.get())) { - DEBUG_PRINTF("updating slot %u using local %u\n", var->slot, - next_slot); - var->slot = next_slot++; - allocated_count++; - } - } - } - - assert(allocated_count == locals.size()); -} - -static never_inline -u32 update_slots(GoughGraph &g, const vector<u32> &old_new, - UNUSED u32 old_slot_count) { - vector<GoughSSAVar *> vars; - set<GoughSSAVar *> locals; - all_vars(g, &vars); - u32 slot_count = 0; - for (GoughSSAVar *v : vars) { - assert(v->slot < old_new.size()); - DEBUG_PRINTF("updating slot %u to %u\n", v->slot, old_new[v->slot]); - if (old_new[v->slot] != NO_COLOUR) { /* not local, assign final slot */ - v->slot = old_new[v->slot]; - ENSURE_AT_LEAST(&slot_count, v->slot + 1); - } else { - locals.insert(v); - } - } - assert(slot_count <= old_slot_count); - DEBUG_PRINTF("reduce stream slots from %u to %u\n", old_slot_count, - slot_count); - update_local_slots(g, locals, slot_count); - - return slot_count; -} - -u32 assign_slots(GoughGraph &cfg, const Grey &grey) { - u32 slot_count = initial_slots(cfg); - - if (!grey.goughRegisterAllocate) { - return slot_count; - } - dump(cfg, "slots_pre", grey); - - vector<u32> old_new; - create_slot_mapping(cfg, slot_count, &old_new); - slot_count = update_slots(cfg, old_new, slot_count); - - return slot_count; -} - -} // namespace ue2 + while (!pending_vertex.empty()) { + GoughVertex current = *pending_vertex.begin(); + pending_vertex.erase(current); + if (contains(done, current)) { + continue; + } + done.insert(current); + handle_pending_vertex(def_v, g, current, pending_vertex, rv); + } +} + +/* returns set of labels that the given def is live at */ +static never_inline +set<const GoughSSAVar *> live_during(GoughSSAVar *def, const GoughGraph &g, + const GoughGraphAux &aux) { + DEBUG_PRINTF("checking who is defined during %u lifetime\n", def->slot); + set<GoughVertex> pending_vertex; + + set<const GoughSSAVar *> rv; + rv.insert(def); + + if (contains(aux.reporters, def)) { + DEBUG_PRINTF("--> gets reported\n"); + const set<GoughVertex> &reporters = aux.reporters.at(def); + for (auto v : reporters) { + pending_vertex.insert(v); + for (const auto &var : g[v].vars) { + DEBUG_PRINTF("interferes %u\n", var->slot); + rv.insert(var.get()); + } + } + } + + handle_pending_vars(def, g, aux, def->get_outputs(), pending_vertex, rv); + handle_pending_vertices(def, g, aux, pending_vertex, rv); + + rv.erase(def); + return rv; +} + +template<typename VarP> +void set_initial_slots(const vector<VarP> &vars, u32 *next_slot) { + for (auto &var : vars) { + assert(var->slot == INVALID_SLOT); + var->slot = (*next_slot)++; + } +} + +/* crude, deterministic assignment of symbolic register slots. + * returns number of slots given out + */ +static +u32 initial_slots(const GoughGraph &g) { + u32 next_slot = 0; + for (auto v : vertices_range(g)) { + set_initial_slots(g[v].vars, &next_slot); + } + for (const auto &e : edges_range(g)) { + set_initial_slots(g[e].vars, &next_slot); + } + + return next_slot; +} + +#define NO_COLOUR (~0U) + +static +u32 available_colour(const flat_set<u32> &bad_colours) { + u32 rv = 0; + for (const u32 &colour : bad_colours) { + if (colour != rv) { + assert(colour > rv); + break; + } + rv = colour + 1; + } + + assert(rv != NO_COLOUR); + return rv; +} + +static +void poison_colours(const set<const GoughSSAVar *> &live, u32 c, + const vector<u32> &colour_map, + vector<flat_set<u32> > *bad_colour) { + for (const GoughSSAVar *var : live) { + u32 var_index = var->slot; + if (colour_map[var_index] != NO_COLOUR) { + assert(c != colour_map[var_index]); + } else { + (*bad_colour)[var_index].insert(c); + } + } +} + +static +void find_bad_due_to_live(const set<const GoughSSAVar *> &live, + const vector<u32> &colour_map, flat_set<u32> *out) { + for (const GoughSSAVar *var : live) { + u32 var_index = var->slot; + if (colour_map[var_index] != NO_COLOUR) { + out->insert(colour_map[var_index]); + } + } +} + +static +void sequential_vertex_colouring(const GoughGraph &g, const GoughGraphAux &aux, + const vector<GoughSSAVar *> &order, + vector<u32> &colour_map) { + assert(order.size() < NO_COLOUR); + colour_map.clear(); + colour_map.resize(order.size(), NO_COLOUR); + vector<u32> temp(order.size(), ~0U); + vector<flat_set<u32> > bad_colour(order.size()); + + for (GoughSSAVar *var : order) { + u32 var_index = var->slot; + if (is_block_local(g, var, aux)) { + DEBUG_PRINTF("%u is block local\n", var_index); + /* ignore variable whose lifetime is limited to their local block + * there is no need to assign stream state to these variables */ + continue; + } + assert(colour_map[var_index] == NO_COLOUR); + set<const GoughSSAVar *> live = live_during(var, g, aux); + flat_set<u32> &local_bad = bad_colour[var_index]; + find_bad_due_to_live(live, colour_map, &local_bad); + DEBUG_PRINTF("colouring %u\n", var_index); + u32 c = available_colour(local_bad); + colour_map[var_index] = c; + assert(!contains(bad_colour[var_index], c)); + poison_colours(live, c, colour_map, &bad_colour); + + flat_set<u32> temp_set; + local_bad.swap(temp_set); + DEBUG_PRINTF(" %u coloured %u\n", var_index, c); + } +} + +template<typename VarP> +void add_to_dom_ordering(const vector<VarP> &vars, + vector<GoughSSAVar *> *out) { + for (const auto &var : vars) { + out->push_back(var.get()); + } +} + +namespace { +class FinishVisitor : public boost::default_dfs_visitor { +public: + explicit FinishVisitor(vector<GoughVertex> *o) : out(o) {} + void finish_vertex(const GoughVertex v, const GoughGraph &) { + out->push_back(v); + } + vector<GoughVertex> *out; +}; +} + +static +void find_dom_ordering(const GoughGraph &cfg, vector<GoughSSAVar *> *out) { + vector<GoughVertex> g_order; + + /* due to construction quirks, default vertex order provides entry points */ + depth_first_search(cfg, visitor(FinishVisitor(&g_order)) + .root_vertex(cfg[boost::graph_bundle].initial_vertex)); + + for (auto it = g_order.rbegin(); it != g_order.rend(); ++it) { + add_to_dom_ordering(cfg[*it].vars, out); + for (const auto &e : out_edges_range(*it, cfg)) { + add_to_dom_ordering(cfg[e].vars, out); + } + } +} + +static +void create_slot_mapping(const GoughGraph &cfg, UNUSED u32 old_slot_count, + vector<u32> *old_new) { + /* Interference graphs from SSA form are chordal -> optimally colourable in + * poly time. + * + * Chordal graphs can be coloured by walking in perfect elimination order. + * If the SSA CFG is iterated over in a way that respects dominance + * relationship, the interference graph will be iterated in a perfect + * elimination order. + * + * We can avoid creating the full interference graph and use liveness + * information as we iterate over the definitions to perform the colouring. + * + * See S Hack various 2006- + */ + vector<GoughSSAVar *> dom_order; + + GoughGraphAux aux; + fill_aux(cfg, &aux); + + find_dom_ordering(cfg, &dom_order); + assert(dom_order.size() == old_slot_count); + sequential_vertex_colouring(cfg, aux, dom_order, *old_new); +} + +static +void update_local_slots(GoughGraph &g, set<GoughSSAVar *> &locals, + u32 local_base) { + DEBUG_PRINTF("%zu local variables\n", locals.size()); + /* local variables only occur on edges (joins are never local) */ + + u32 allocated_count = 0; + for (const auto &e : edges_range(g)) { + u32 next_slot = local_base; + for (auto &var : g[e].vars) { + if (contains(locals, var.get())) { + DEBUG_PRINTF("updating slot %u using local %u\n", var->slot, + next_slot); + var->slot = next_slot++; + allocated_count++; + } + } + } + + assert(allocated_count == locals.size()); +} + +static never_inline +u32 update_slots(GoughGraph &g, const vector<u32> &old_new, + UNUSED u32 old_slot_count) { + vector<GoughSSAVar *> vars; + set<GoughSSAVar *> locals; + all_vars(g, &vars); + u32 slot_count = 0; + for (GoughSSAVar *v : vars) { + assert(v->slot < old_new.size()); + DEBUG_PRINTF("updating slot %u to %u\n", v->slot, old_new[v->slot]); + if (old_new[v->slot] != NO_COLOUR) { /* not local, assign final slot */ + v->slot = old_new[v->slot]; + ENSURE_AT_LEAST(&slot_count, v->slot + 1); + } else { + locals.insert(v); + } + } + assert(slot_count <= old_slot_count); + DEBUG_PRINTF("reduce stream slots from %u to %u\n", old_slot_count, + slot_count); + update_local_slots(g, locals, slot_count); + + return slot_count; +} + +u32 assign_slots(GoughGraph &cfg, const Grey &grey) { + u32 slot_count = initial_slots(cfg); + + if (!grey.goughRegisterAllocate) { + return slot_count; + } + dump(cfg, "slots_pre", grey); + + vector<u32> old_new; + create_slot_mapping(cfg, slot_count, &old_new); + slot_count = update_slots(cfg, old_new, slot_count); + + return slot_count; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/lbr.c b/contrib/libs/hyperscan/src/nfa/lbr.c index c0433f87ca..d403733a65 100644 --- a/contrib/libs/hyperscan/src/nfa/lbr.c +++ b/contrib/libs/hyperscan/src/nfa/lbr.c @@ -1,531 +1,531 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Large Bounded Repeat (LBR) engine: runtime code. - */ -#include "lbr.h" - -#include "lbr_internal.h" -#include "nfa_api.h" -#include "nfa_api_queue.h" -#include "nfa_internal.h" -#include "repeat.h" -#include "repeat_internal.h" -#include "shufti.h" -#include "truffle.h" -#include "vermicelli.h" -#include "util/partial_store.h" -#include "util/unaligned.h" - -/** \brief Sentinel value used to indicate that a repeat is dead/empty/unused. - * * */ -#define REPEAT_DEAD 0xffffffffffffffffull - -enum MatchMode { - CALLBACK_OUTPUT, - STOP_AT_MATCH, -}; - -static really_inline -const struct RepeatInfo *getRepeatInfo(const struct lbr_common *l) { - const struct RepeatInfo *repeatInfo = - (const struct RepeatInfo *)((const char *)l + l->repeatInfoOffset); - return repeatInfo; -} - -static really_inline -void lbrCompressState(const struct lbr_common *l, u64a offset, - const struct lbr_state *lstate, char *stream_state) { - assert(l && lstate && stream_state); - assert(ISALIGNED(lstate)); - - const struct RepeatInfo *info = getRepeatInfo(l); - repeatPack(stream_state, info, &lstate->ctrl, offset); -} - -static really_inline -void lbrExpandState(const struct lbr_common *l, u64a offset, - const char *stream_state, struct lbr_state *lstate) { - assert(l && stream_state && lstate); - assert(ISALIGNED(lstate)); - - const struct RepeatInfo *info = getRepeatInfo(l); - repeatUnpack(stream_state, info, offset, &lstate->ctrl); + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Large Bounded Repeat (LBR) engine: runtime code. + */ +#include "lbr.h" + +#include "lbr_internal.h" +#include "nfa_api.h" +#include "nfa_api_queue.h" +#include "nfa_internal.h" +#include "repeat.h" +#include "repeat_internal.h" +#include "shufti.h" +#include "truffle.h" +#include "vermicelli.h" +#include "util/partial_store.h" +#include "util/unaligned.h" + +/** \brief Sentinel value used to indicate that a repeat is dead/empty/unused. + * * */ +#define REPEAT_DEAD 0xffffffffffffffffull + +enum MatchMode { + CALLBACK_OUTPUT, + STOP_AT_MATCH, +}; + +static really_inline +const struct RepeatInfo *getRepeatInfo(const struct lbr_common *l) { + const struct RepeatInfo *repeatInfo = + (const struct RepeatInfo *)((const char *)l + l->repeatInfoOffset); + return repeatInfo; +} + +static really_inline +void lbrCompressState(const struct lbr_common *l, u64a offset, + const struct lbr_state *lstate, char *stream_state) { + assert(l && lstate && stream_state); + assert(ISALIGNED(lstate)); + + const struct RepeatInfo *info = getRepeatInfo(l); + repeatPack(stream_state, info, &lstate->ctrl, offset); +} + +static really_inline +void lbrExpandState(const struct lbr_common *l, u64a offset, + const char *stream_state, struct lbr_state *lstate) { + assert(l && stream_state && lstate); + assert(ISALIGNED(lstate)); + + const struct RepeatInfo *info = getRepeatInfo(l); + repeatUnpack(stream_state, info, offset, &lstate->ctrl); lstate->lastEscape = 0; -} - -static really_inline -void clearRepeat(const struct RepeatInfo *info, struct lbr_state *lstate) { - assert(info && lstate); - - DEBUG_PRINTF("clear repeat at %p\n", lstate); - - switch ((enum RepeatType)info->type) { - case REPEAT_RING: - lstate->ctrl.ring.offset = REPEAT_DEAD; - break; - case REPEAT_RANGE: - lstate->ctrl.range.offset = REPEAT_DEAD; - break; - case REPEAT_FIRST: - case REPEAT_LAST: - lstate->ctrl.offset.offset = REPEAT_DEAD; - break; - case REPEAT_BITMAP: - lstate->ctrl.bitmap.offset = REPEAT_DEAD; - break; - case REPEAT_SPARSE_OPTIMAL_P: - lstate->ctrl.ring.offset = REPEAT_DEAD; - break; - case REPEAT_TRAILER: - lstate->ctrl.trailer.offset = REPEAT_DEAD; - break; - default: - assert(0); - break; - } -} - -static really_inline -char repeatIsDead(const struct RepeatInfo *info, - const struct lbr_state *lstate) { - assert(info && lstate); - - switch ((enum RepeatType)info->type) { - case REPEAT_RING: - return lstate->ctrl.ring.offset == REPEAT_DEAD; - case REPEAT_RANGE: - return lstate->ctrl.range.offset == REPEAT_DEAD; - case REPEAT_FIRST: - case REPEAT_LAST: - return lstate->ctrl.offset.offset == REPEAT_DEAD; - case REPEAT_BITMAP: - return lstate->ctrl.bitmap.offset == REPEAT_DEAD; - case REPEAT_SPARSE_OPTIMAL_P: - return lstate->ctrl.ring.offset == REPEAT_DEAD; - case REPEAT_TRAILER: - return lstate->ctrl.trailer.offset == REPEAT_DEAD; +} + +static really_inline +void clearRepeat(const struct RepeatInfo *info, struct lbr_state *lstate) { + assert(info && lstate); + + DEBUG_PRINTF("clear repeat at %p\n", lstate); + + switch ((enum RepeatType)info->type) { + case REPEAT_RING: + lstate->ctrl.ring.offset = REPEAT_DEAD; + break; + case REPEAT_RANGE: + lstate->ctrl.range.offset = REPEAT_DEAD; + break; + case REPEAT_FIRST: + case REPEAT_LAST: + lstate->ctrl.offset.offset = REPEAT_DEAD; + break; + case REPEAT_BITMAP: + lstate->ctrl.bitmap.offset = REPEAT_DEAD; + break; + case REPEAT_SPARSE_OPTIMAL_P: + lstate->ctrl.ring.offset = REPEAT_DEAD; + break; + case REPEAT_TRAILER: + lstate->ctrl.trailer.offset = REPEAT_DEAD; + break; + default: + assert(0); + break; + } +} + +static really_inline +char repeatIsDead(const struct RepeatInfo *info, + const struct lbr_state *lstate) { + assert(info && lstate); + + switch ((enum RepeatType)info->type) { + case REPEAT_RING: + return lstate->ctrl.ring.offset == REPEAT_DEAD; + case REPEAT_RANGE: + return lstate->ctrl.range.offset == REPEAT_DEAD; + case REPEAT_FIRST: + case REPEAT_LAST: + return lstate->ctrl.offset.offset == REPEAT_DEAD; + case REPEAT_BITMAP: + return lstate->ctrl.bitmap.offset == REPEAT_DEAD; + case REPEAT_SPARSE_OPTIMAL_P: + return lstate->ctrl.ring.offset == REPEAT_DEAD; + case REPEAT_TRAILER: + return lstate->ctrl.trailer.offset == REPEAT_DEAD; case REPEAT_ALWAYS: assert(!"REPEAT_ALWAYS should only be used by Castle"); return 0; - } - - assert(0); - return 1; -} - -/** Returns true if the LBR can produce matches at offsets greater than the - * given one. TODO: can this be combined with lbrIsActive? */ -static really_inline -char lbrIsAlive(const struct lbr_common *l, const struct lbr_state *lstate, - const char *state, u64a offset) { - assert(l && lstate && state); - - const struct RepeatInfo *info = getRepeatInfo(l); - if (repeatIsDead(info, lstate)) { - DEBUG_PRINTF("repeat is dead\n"); - return 0; - } - - if (info->repeatMax == REPEAT_INF) { - DEBUG_PRINTF("active repeat with inf max bound, alive\n"); - return 1; - } - - assert(info->repeatMax < REPEAT_INF); - const char *repeatState = state + info->packedCtrlSize; - u64a lastTop = repeatLastTop(info, &lstate->ctrl, repeatState); - if (offset < lastTop + info->repeatMax) { - DEBUG_PRINTF("alive, as we can still produce matches after %llu\n", - offset); - return 1; - } - - DEBUG_PRINTF("dead\n"); - return 0; -} - -/** Returns true if the LBR is matching at the given offset or it could produce - * a match in the future. */ -static really_inline -char lbrIsActive(const struct lbr_common *l, const struct lbr_state *lstate, - const char *state, u64a offset) { - assert(l && lstate && state); - const struct RepeatInfo *info = getRepeatInfo(l); - assert(!repeatIsDead(info, lstate)); // Guaranteed by caller. - - const char *repeatState = state + info->packedCtrlSize; - if (repeatHasMatch(info, &lstate->ctrl, repeatState, offset) == - REPEAT_MATCH) { - DEBUG_PRINTF("currently matching\n"); - return 1; - } - - u64a i = repeatNextMatch(info, &lstate->ctrl, repeatState, offset); - if (i != 0) { - DEBUG_PRINTF("active, next match is at %llu\n", i); - return 1; - } - - DEBUG_PRINTF("no more matches\n"); - return 0; -} - -static really_inline -void lbrTop(const struct lbr_common *l, struct lbr_state *lstate, char *state, - u64a offset) { - assert(l && lstate && state); - DEBUG_PRINTF("top at %llu\n", offset); - - const struct RepeatInfo *info = getRepeatInfo(l); - char *repeatState = state + info->packedCtrlSize; - - char is_alive = !repeatIsDead(info, lstate); - if (is_alive) { - // Ignore duplicate TOPs. - u64a last = repeatLastTop(info, &lstate->ctrl, repeatState); - assert(last <= offset); - if (last == offset) { - return; - } - } - - repeatStore(info, &lstate->ctrl, repeatState, offset, is_alive); -} - -static really_inline -char lbrInAccept(const struct lbr_common *l, const struct lbr_state *lstate, - const char *state, u64a offset, ReportID report) { - assert(l && lstate && state); - DEBUG_PRINTF("offset=%llu, report=%u\n", offset, report); - - if (report != l->report) { - DEBUG_PRINTF("report=%u is not LBR report %u\n", report, l->report); - return 0; - } - - const struct RepeatInfo *info = getRepeatInfo(l); - assert(!repeatIsDead(info, lstate)); // Guaranteed by caller. - - const char *repeatState = state + info->packedCtrlSize; - return repeatHasMatch(info, &lstate->ctrl, repeatState, offset) == - REPEAT_MATCH; -} - -static really_inline -char lbrFindMatch(const struct lbr_common *l, const u64a begin, const u64a end, - const struct lbr_state *lstate, const char *state, - size_t *mloc) { - DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end); - assert(begin <= end); - - if (begin == end) { - return 0; - } - - const struct RepeatInfo *info = getRepeatInfo(l); - const char *repeatState = state + info->packedCtrlSize; - u64a i = repeatNextMatch(info, &lstate->ctrl, repeatState, begin); - if (i == 0) { - DEBUG_PRINTF("no more matches\n"); - return 0; - } - if (i > end) { - DEBUG_PRINTF("next match at %llu is beyond the horizon\n", i); - return 0; - } - - DEBUG_PRINTF("stop at match at %llu\n", i); - assert(mloc); - *mloc = i - begin; - return 1; -} - -static really_inline -char lbrMatchLoop(const struct lbr_common *l, const u64a begin, const u64a end, - const struct lbr_state *lstate, const char *state, - NfaCallback cb, void *ctx) { - DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end); - assert(begin <= end); - - if (begin == end) { - return MO_CONTINUE_MATCHING; - } - - const struct RepeatInfo *info = getRepeatInfo(l); - const char *repeatState = state + info->packedCtrlSize; - - u64a i = begin; - for (;;) { - i = repeatNextMatch(info, &lstate->ctrl, repeatState, i); - if (i == 0) { - DEBUG_PRINTF("no more matches\n"); - return MO_CONTINUE_MATCHING; - } - if (i > end) { - DEBUG_PRINTF("next match at %llu is beyond the horizon\n", i); - return MO_CONTINUE_MATCHING; - } - - DEBUG_PRINTF("firing match at %llu\n", i); + } + + assert(0); + return 1; +} + +/** Returns true if the LBR can produce matches at offsets greater than the + * given one. TODO: can this be combined with lbrIsActive? */ +static really_inline +char lbrIsAlive(const struct lbr_common *l, const struct lbr_state *lstate, + const char *state, u64a offset) { + assert(l && lstate && state); + + const struct RepeatInfo *info = getRepeatInfo(l); + if (repeatIsDead(info, lstate)) { + DEBUG_PRINTF("repeat is dead\n"); + return 0; + } + + if (info->repeatMax == REPEAT_INF) { + DEBUG_PRINTF("active repeat with inf max bound, alive\n"); + return 1; + } + + assert(info->repeatMax < REPEAT_INF); + const char *repeatState = state + info->packedCtrlSize; + u64a lastTop = repeatLastTop(info, &lstate->ctrl, repeatState); + if (offset < lastTop + info->repeatMax) { + DEBUG_PRINTF("alive, as we can still produce matches after %llu\n", + offset); + return 1; + } + + DEBUG_PRINTF("dead\n"); + return 0; +} + +/** Returns true if the LBR is matching at the given offset or it could produce + * a match in the future. */ +static really_inline +char lbrIsActive(const struct lbr_common *l, const struct lbr_state *lstate, + const char *state, u64a offset) { + assert(l && lstate && state); + const struct RepeatInfo *info = getRepeatInfo(l); + assert(!repeatIsDead(info, lstate)); // Guaranteed by caller. + + const char *repeatState = state + info->packedCtrlSize; + if (repeatHasMatch(info, &lstate->ctrl, repeatState, offset) == + REPEAT_MATCH) { + DEBUG_PRINTF("currently matching\n"); + return 1; + } + + u64a i = repeatNextMatch(info, &lstate->ctrl, repeatState, offset); + if (i != 0) { + DEBUG_PRINTF("active, next match is at %llu\n", i); + return 1; + } + + DEBUG_PRINTF("no more matches\n"); + return 0; +} + +static really_inline +void lbrTop(const struct lbr_common *l, struct lbr_state *lstate, char *state, + u64a offset) { + assert(l && lstate && state); + DEBUG_PRINTF("top at %llu\n", offset); + + const struct RepeatInfo *info = getRepeatInfo(l); + char *repeatState = state + info->packedCtrlSize; + + char is_alive = !repeatIsDead(info, lstate); + if (is_alive) { + // Ignore duplicate TOPs. + u64a last = repeatLastTop(info, &lstate->ctrl, repeatState); + assert(last <= offset); + if (last == offset) { + return; + } + } + + repeatStore(info, &lstate->ctrl, repeatState, offset, is_alive); +} + +static really_inline +char lbrInAccept(const struct lbr_common *l, const struct lbr_state *lstate, + const char *state, u64a offset, ReportID report) { + assert(l && lstate && state); + DEBUG_PRINTF("offset=%llu, report=%u\n", offset, report); + + if (report != l->report) { + DEBUG_PRINTF("report=%u is not LBR report %u\n", report, l->report); + return 0; + } + + const struct RepeatInfo *info = getRepeatInfo(l); + assert(!repeatIsDead(info, lstate)); // Guaranteed by caller. + + const char *repeatState = state + info->packedCtrlSize; + return repeatHasMatch(info, &lstate->ctrl, repeatState, offset) == + REPEAT_MATCH; +} + +static really_inline +char lbrFindMatch(const struct lbr_common *l, const u64a begin, const u64a end, + const struct lbr_state *lstate, const char *state, + size_t *mloc) { + DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end); + assert(begin <= end); + + if (begin == end) { + return 0; + } + + const struct RepeatInfo *info = getRepeatInfo(l); + const char *repeatState = state + info->packedCtrlSize; + u64a i = repeatNextMatch(info, &lstate->ctrl, repeatState, begin); + if (i == 0) { + DEBUG_PRINTF("no more matches\n"); + return 0; + } + if (i > end) { + DEBUG_PRINTF("next match at %llu is beyond the horizon\n", i); + return 0; + } + + DEBUG_PRINTF("stop at match at %llu\n", i); + assert(mloc); + *mloc = i - begin; + return 1; +} + +static really_inline +char lbrMatchLoop(const struct lbr_common *l, const u64a begin, const u64a end, + const struct lbr_state *lstate, const char *state, + NfaCallback cb, void *ctx) { + DEBUG_PRINTF("begin=%llu, end=%llu\n", begin, end); + assert(begin <= end); + + if (begin == end) { + return MO_CONTINUE_MATCHING; + } + + const struct RepeatInfo *info = getRepeatInfo(l); + const char *repeatState = state + info->packedCtrlSize; + + u64a i = begin; + for (;;) { + i = repeatNextMatch(info, &lstate->ctrl, repeatState, i); + if (i == 0) { + DEBUG_PRINTF("no more matches\n"); + return MO_CONTINUE_MATCHING; + } + if (i > end) { + DEBUG_PRINTF("next match at %llu is beyond the horizon\n", i); + return MO_CONTINUE_MATCHING; + } + + DEBUG_PRINTF("firing match at %llu\n", i); if (cb(0, i, l->report, ctx) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - - assert(0); - return MO_CONTINUE_MATCHING; -} - -static really_inline -char lbrRevScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf, - UNUSED size_t begin, UNUSED size_t end, - UNUSED size_t *loc) { - assert(begin <= end); + return MO_HALT_MATCHING; + } + } + + assert(0); + return MO_CONTINUE_MATCHING; +} + +static really_inline +char lbrRevScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf, + UNUSED size_t begin, UNUSED size_t end, + UNUSED size_t *loc) { + assert(begin <= end); assert(nfa->type == LBR_NFA_DOT); - // Nothing can kill a dot! - return 0; -} - -static really_inline -char lbrRevScanVerm(const struct NFA *nfa, const u8 *buf, - size_t begin, size_t end, size_t *loc) { - assert(begin <= end); + // Nothing can kill a dot! + return 0; +} + +static really_inline +char lbrRevScanVerm(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); assert(nfa->type == LBR_NFA_VERM); - const struct lbr_verm *l = getImplNfa(nfa); - - if (begin == end) { - return 0; - } - - const u8 *ptr = rvermicelliExec(l->c, 0, buf + begin, buf + end); - if (ptr == buf + begin - 1) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - assert((char)*ptr == l->c); - return 1; -} - -static really_inline -char lbrRevScanNVerm(const struct NFA *nfa, const u8 *buf, - size_t begin, size_t end, size_t *loc) { - assert(begin <= end); + const struct lbr_verm *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = rvermicelliExec(l->c, 0, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + assert((char)*ptr == l->c); + return 1; +} + +static really_inline +char lbrRevScanNVerm(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); assert(nfa->type == LBR_NFA_NVERM); - const struct lbr_verm *l = getImplNfa(nfa); - - if (begin == end) { - return 0; - } - - const u8 *ptr = rnvermicelliExec(l->c, 0, buf + begin, buf + end); - if (ptr == buf + begin - 1) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - assert((char)*ptr != l->c); - return 1; -} - -static really_inline -char lbrRevScanShuf(const struct NFA *nfa, const u8 *buf, - size_t begin, size_t end, - size_t *loc) { - assert(begin <= end); + const struct lbr_verm *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = rnvermicelliExec(l->c, 0, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + assert((char)*ptr != l->c); + return 1; +} + +static really_inline +char lbrRevScanShuf(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, + size_t *loc) { + assert(begin <= end); assert(nfa->type == LBR_NFA_SHUF); - const struct lbr_shuf *l = getImplNfa(nfa); - - if (begin == end) { - return 0; - } - - const u8 *ptr = rshuftiExec(l->mask_lo, l->mask_hi, buf + begin, buf + end); - if (ptr == buf + begin - 1) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -static really_inline -char lbrRevScanTruf(const struct NFA *nfa, const u8 *buf, - size_t begin, size_t end, - size_t *loc) { - assert(begin <= end); + const struct lbr_shuf *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = rshuftiExec(l->mask_lo, l->mask_hi, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char lbrRevScanTruf(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, + size_t *loc) { + assert(begin <= end); assert(nfa->type == LBR_NFA_TRUF); - const struct lbr_truf *l = getImplNfa(nfa); - - if (begin == end) { - return 0; - } - - const u8 *ptr = rtruffleExec(l->mask1, l->mask2, buf + begin, buf + end); - if (ptr == buf + begin - 1) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -static really_inline -char lbrFwdScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf, - UNUSED size_t begin, UNUSED size_t end, - UNUSED size_t *loc) { - assert(begin <= end); + const struct lbr_truf *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = rtruffleExec(l->mask1, l->mask2, buf + begin, buf + end); + if (ptr == buf + begin - 1) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char lbrFwdScanDot(UNUSED const struct NFA *nfa, UNUSED const u8 *buf, + UNUSED size_t begin, UNUSED size_t end, + UNUSED size_t *loc) { + assert(begin <= end); assert(nfa->type == LBR_NFA_DOT); - // Nothing can kill a dot! - return 0; -} - -static really_inline -char lbrFwdScanVerm(const struct NFA *nfa, const u8 *buf, - size_t begin, size_t end, size_t *loc) { - assert(begin <= end); + // Nothing can kill a dot! + return 0; +} + +static really_inline +char lbrFwdScanVerm(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); assert(nfa->type == LBR_NFA_VERM); - const struct lbr_verm *l = getImplNfa(nfa); - - if (begin == end) { - return 0; - } - - const u8 *ptr = vermicelliExec(l->c, 0, buf + begin, buf + end); - if (ptr == buf + end) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - assert((char)*ptr == l->c); - return 1; -} - -static really_inline -char lbrFwdScanNVerm(const struct NFA *nfa, const u8 *buf, - size_t begin, size_t end, size_t *loc) { - assert(begin <= end); + const struct lbr_verm *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = vermicelliExec(l->c, 0, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + assert((char)*ptr == l->c); + return 1; +} + +static really_inline +char lbrFwdScanNVerm(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, size_t *loc) { + assert(begin <= end); assert(nfa->type == LBR_NFA_NVERM); - const struct lbr_verm *l = getImplNfa(nfa); - - if (begin == end) { - return 0; - } - - const u8 *ptr = nvermicelliExec(l->c, 0, buf + begin, buf + end); - if (ptr == buf + end) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - assert((char)*ptr != l->c); - return 1; -} - -static really_inline -char lbrFwdScanShuf(const struct NFA *nfa, const u8 *buf, - size_t begin, size_t end, - size_t *loc) { - assert(begin <= end); + const struct lbr_verm *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = nvermicelliExec(l->c, 0, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + assert((char)*ptr != l->c); + return 1; +} + +static really_inline +char lbrFwdScanShuf(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, + size_t *loc) { + assert(begin <= end); assert(nfa->type == LBR_NFA_SHUF); - const struct lbr_shuf *l = getImplNfa(nfa); - - if (begin == end) { - return 0; - } - - const u8 *ptr = shuftiExec(l->mask_lo, l->mask_hi, buf + begin, buf + end); - if (ptr == buf + end) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -static really_inline -char lbrFwdScanTruf(const struct NFA *nfa, const u8 *buf, - size_t begin, size_t end, - size_t *loc) { - assert(begin <= end); + const struct lbr_shuf *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = shuftiExec(l->mask_lo, l->mask_hi, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +static really_inline +char lbrFwdScanTruf(const struct NFA *nfa, const u8 *buf, + size_t begin, size_t end, + size_t *loc) { + assert(begin <= end); assert(nfa->type == LBR_NFA_TRUF); - const struct lbr_truf *l = getImplNfa(nfa); - - if (begin == end) { - return 0; - } - - const u8 *ptr = truffleExec(l->mask1, l->mask2, buf + begin, buf + end); - if (ptr == buf + end) { - DEBUG_PRINTF("no escape found\n"); - return 0; - } - - assert(loc); - *loc = (size_t)(ptr - buf); - DEBUG_PRINTF("escape found at offset %zu\n", *loc); - return 1; -} - -#define ENGINE_ROOT_NAME Dot -#include "lbr_common_impl.h" - -#define ENGINE_ROOT_NAME Verm -#include "lbr_common_impl.h" - -#define ENGINE_ROOT_NAME NVerm -#include "lbr_common_impl.h" - -#define ENGINE_ROOT_NAME Shuf -#include "lbr_common_impl.h" - -#define ENGINE_ROOT_NAME Truf -#include "lbr_common_impl.h" + const struct lbr_truf *l = getImplNfa(nfa); + + if (begin == end) { + return 0; + } + + const u8 *ptr = truffleExec(l->mask1, l->mask2, buf + begin, buf + end); + if (ptr == buf + end) { + DEBUG_PRINTF("no escape found\n"); + return 0; + } + + assert(loc); + *loc = (size_t)(ptr - buf); + DEBUG_PRINTF("escape found at offset %zu\n", *loc); + return 1; +} + +#define ENGINE_ROOT_NAME Dot +#include "lbr_common_impl.h" + +#define ENGINE_ROOT_NAME Verm +#include "lbr_common_impl.h" + +#define ENGINE_ROOT_NAME NVerm +#include "lbr_common_impl.h" + +#define ENGINE_ROOT_NAME Shuf +#include "lbr_common_impl.h" + +#define ENGINE_ROOT_NAME Truf +#include "lbr_common_impl.h" diff --git a/contrib/libs/hyperscan/src/nfa/lbr.h b/contrib/libs/hyperscan/src/nfa/lbr.h index 237bf8f4cb..a9e42046db 100644 --- a/contrib/libs/hyperscan/src/nfa/lbr.h +++ b/contrib/libs/hyperscan/src/nfa/lbr.h @@ -1,150 +1,150 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef LBR_H -#define LBR_H - -#include "ue2common.h" - -struct mq; -struct NFA; - -#ifdef __cplusplus -extern "C" -{ -#endif - -// LBR Dot - -char nfaExecLbrDot_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecLbrDot_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q); + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LBR_H +#define LBR_H + +#include "ue2common.h" + +struct mq; +struct NFA; + +#ifdef __cplusplus +extern "C" +{ +#endif + +// LBR Dot + +char nfaExecLbrDot_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrDot_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q); char nfaExecLbrDot_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecLbrDot_queueCompressState(const struct NFA *nfa, const struct mq *q, - s64a loc); -char nfaExecLbrDot_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); - -#define nfaExecLbrDot_testEOD NFA_API_NO_IMPL -#define nfaExecLbrDot_B_Reverse NFA_API_NO_IMPL -#define nfaExecLbrDot_zombie_status NFA_API_ZOMBIE_NO_IMPL - -// LBR Verm - -char nfaExecLbrVerm_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecLbrVerm_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecLbrVerm_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report, - struct mq *q); +char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecLbrDot_queueCompressState(const struct NFA *nfa, const struct mq *q, + s64a loc); +char nfaExecLbrDot_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecLbrDot_testEOD NFA_API_NO_IMPL +#define nfaExecLbrDot_B_Reverse NFA_API_NO_IMPL +#define nfaExecLbrDot_zombie_status NFA_API_ZOMBIE_NO_IMPL + +// LBR Verm + +char nfaExecLbrVerm_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrVerm_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrVerm_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report, + struct mq *q); char nfaExecLbrVerm_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecLbrVerm_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc); -char nfaExecLbrVerm_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); - -#define nfaExecLbrVerm_testEOD NFA_API_NO_IMPL -#define nfaExecLbrVerm_B_Reverse NFA_API_NO_IMPL -#define nfaExecLbrVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL - -// LBR Negated Verm - -char nfaExecLbrNVerm_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecLbrNVerm_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecLbrNVerm_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report, - struct mq *q); +char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecLbrVerm_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecLbrVerm_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecLbrVerm_testEOD NFA_API_NO_IMPL +#define nfaExecLbrVerm_B_Reverse NFA_API_NO_IMPL +#define nfaExecLbrVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL + +// LBR Negated Verm + +char nfaExecLbrNVerm_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrNVerm_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrNVerm_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report, + struct mq *q); char nfaExecLbrNVerm_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecLbrNVerm_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc); -char nfaExecLbrNVerm_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); - -#define nfaExecLbrNVerm_testEOD NFA_API_NO_IMPL -#define nfaExecLbrNVerm_B_Reverse NFA_API_NO_IMPL -#define nfaExecLbrNVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL - -// LBR Shuf - -char nfaExecLbrShuf_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecLbrShuf_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecLbrShuf_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report, - struct mq *q); +char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecLbrNVerm_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecLbrNVerm_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecLbrNVerm_testEOD NFA_API_NO_IMPL +#define nfaExecLbrNVerm_B_Reverse NFA_API_NO_IMPL +#define nfaExecLbrNVerm_zombie_status NFA_API_ZOMBIE_NO_IMPL + +// LBR Shuf + +char nfaExecLbrShuf_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrShuf_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrShuf_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report, + struct mq *q); char nfaExecLbrShuf_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecLbrShuf_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc); -char nfaExecLbrShuf_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); - -#define nfaExecLbrShuf_testEOD NFA_API_NO_IMPL -#define nfaExecLbrShuf_B_Reverse NFA_API_NO_IMPL -#define nfaExecLbrShuf_zombie_status NFA_API_ZOMBIE_NO_IMPL - -// LBR Truffle - -char nfaExecLbrTruf_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecLbrTruf_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecLbrTruf_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report, - struct mq *q); +char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecLbrShuf_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecLbrShuf_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecLbrShuf_testEOD NFA_API_NO_IMPL +#define nfaExecLbrShuf_B_Reverse NFA_API_NO_IMPL +#define nfaExecLbrShuf_zombie_status NFA_API_ZOMBIE_NO_IMPL + +// LBR Truffle + +char nfaExecLbrTruf_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrTruf_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecLbrTruf_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report, + struct mq *q); char nfaExecLbrTruf_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecLbrTruf_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc); -char nfaExecLbrTruf_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); - -#define nfaExecLbrTruf_testEOD NFA_API_NO_IMPL -#define nfaExecLbrTruf_B_Reverse NFA_API_NO_IMPL -#define nfaExecLbrTruf_zombie_status NFA_API_ZOMBIE_NO_IMPL - -#ifdef __cplusplus -} -#endif - -#endif +char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecLbrTruf_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecLbrTruf_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecLbrTruf_testEOD NFA_API_NO_IMPL +#define nfaExecLbrTruf_B_Reverse NFA_API_NO_IMPL +#define nfaExecLbrTruf_zombie_status NFA_API_ZOMBIE_NO_IMPL + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/lbr_common_impl.h b/contrib/libs/hyperscan/src/nfa/lbr_common_impl.h index 40516c4988..5ae35431e4 100644 --- a/contrib/libs/hyperscan/src/nfa/lbr_common_impl.h +++ b/contrib/libs/hyperscan/src/nfa/lbr_common_impl.h @@ -1,99 +1,99 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Large Bounded Repeat (LBR) engine: runtime impl X-macros. - */ - -#include "util/join.h" - -#define ENGINE_EXEC_NAME JOIN(nfaExecLbr, ENGINE_ROOT_NAME) -#define EXEC_FN JOIN(lbrExec, ENGINE_ROOT_NAME) -#define FWDSCAN_FN JOIN(lbrFwdScan, ENGINE_ROOT_NAME) -#define REVSCAN_FN JOIN(lbrRevScan, ENGINE_ROOT_NAME) - -char JOIN(ENGINE_EXEC_NAME, _queueCompressState)(const struct NFA *nfa, - const struct mq *q, s64a loc) { - assert(nfa && q); - assert(isLbrType(nfa->type)); - DEBUG_PRINTF("entry, q->offset=%llu, loc=%lld\n", q->offset, loc); - - const struct lbr_common *l = getImplNfa(nfa); - const struct lbr_state *lstate = (const struct lbr_state *)q->state; - - u64a offset = q->offset + loc; - lbrCompressState(l, offset, lstate, q->streamState); - return 0; -} - -char JOIN(ENGINE_EXEC_NAME, _expandState)(const struct NFA *nfa, void *dest, - const void *src, u64a offset, - UNUSED u8 key) { - assert(nfa); - assert(isLbrType(nfa->type)); - DEBUG_PRINTF("entry, offset=%llu\n", offset); - - const struct lbr_common *l = getImplNfa(nfa); - struct lbr_state *lstate = (struct lbr_state *)dest; - lbrExpandState(l, offset, src, lstate); - return 0; -} - -char JOIN(ENGINE_EXEC_NAME, _reportCurrent)(const struct NFA *nfa, - struct mq *q) { - assert(nfa && q); - assert(isLbrType(nfa->type)); - - const struct lbr_common *l = getImplNfa(nfa); - u64a offset = q_cur_offset(q); - DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset); + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Large Bounded Repeat (LBR) engine: runtime impl X-macros. + */ + +#include "util/join.h" + +#define ENGINE_EXEC_NAME JOIN(nfaExecLbr, ENGINE_ROOT_NAME) +#define EXEC_FN JOIN(lbrExec, ENGINE_ROOT_NAME) +#define FWDSCAN_FN JOIN(lbrFwdScan, ENGINE_ROOT_NAME) +#define REVSCAN_FN JOIN(lbrRevScan, ENGINE_ROOT_NAME) + +char JOIN(ENGINE_EXEC_NAME, _queueCompressState)(const struct NFA *nfa, + const struct mq *q, s64a loc) { + assert(nfa && q); + assert(isLbrType(nfa->type)); + DEBUG_PRINTF("entry, q->offset=%llu, loc=%lld\n", q->offset, loc); + + const struct lbr_common *l = getImplNfa(nfa); + const struct lbr_state *lstate = (const struct lbr_state *)q->state; + + u64a offset = q->offset + loc; + lbrCompressState(l, offset, lstate, q->streamState); + return 0; +} + +char JOIN(ENGINE_EXEC_NAME, _expandState)(const struct NFA *nfa, void *dest, + const void *src, u64a offset, + UNUSED u8 key) { + assert(nfa); + assert(isLbrType(nfa->type)); + DEBUG_PRINTF("entry, offset=%llu\n", offset); + + const struct lbr_common *l = getImplNfa(nfa); + struct lbr_state *lstate = (struct lbr_state *)dest; + lbrExpandState(l, offset, src, lstate); + return 0; +} + +char JOIN(ENGINE_EXEC_NAME, _reportCurrent)(const struct NFA *nfa, + struct mq *q) { + assert(nfa && q); + assert(isLbrType(nfa->type)); + + const struct lbr_common *l = getImplNfa(nfa); + u64a offset = q_cur_offset(q); + DEBUG_PRINTF("firing match %u at %llu\n", l->report, offset); q->cb(0, offset, l->report, q->context); - return 0; -} - -char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa, - ReportID report, struct mq *q) { - assert(nfa && q); - assert(isLbrType(nfa->type)); - DEBUG_PRINTF("entry\n"); - - const struct lbr_common *l = getImplNfa(nfa); - const struct RepeatInfo *info = getRepeatInfo(l); - const struct lbr_state *lstate = (const struct lbr_state *)q->state; - if (repeatIsDead(info, lstate)) { - DEBUG_PRINTF("repeat is dead\n"); - return 0; - } - - u64a offset = q->offset + q_last_loc(q); - return lbrInAccept(l, lstate, q->streamState, offset, report); -} - + return 0; +} + +char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa, + ReportID report, struct mq *q) { + assert(nfa && q); + assert(isLbrType(nfa->type)); + DEBUG_PRINTF("entry\n"); + + const struct lbr_common *l = getImplNfa(nfa); + const struct RepeatInfo *info = getRepeatInfo(l); + const struct lbr_state *lstate = (const struct lbr_state *)q->state; + if (repeatIsDead(info, lstate)) { + DEBUG_PRINTF("repeat is dead\n"); + return 0; + } + + u64a offset = q->offset + q_last_loc(q); + return lbrInAccept(l, lstate, q->streamState, offset, report); +} + char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { assert(nfa && q); assert(isLbrType(nfa->type)); @@ -103,360 +103,360 @@ char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q); } -char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa, - struct mq *q) { - assert(nfa && q); - assert(isLbrType(nfa->type)); - DEBUG_PRINTF("entry\n"); - - const struct lbr_common *l = getImplNfa(nfa); - const struct RepeatInfo *info = getRepeatInfo(l); - - assert(q->state); - struct lbr_state *lstate = (struct lbr_state *)q->state; - assert(ISALIGNED(lstate)); - - lstate->lastEscape = 0; - clearRepeat(info, lstate); - - return 0; -} - -char JOIN(ENGINE_EXEC_NAME, _initCompressedState)(const struct NFA *nfa, - u64a offset, - void *state, UNUSED u8 key) { - assert(nfa && state); - assert(isLbrType(nfa->type)); - DEBUG_PRINTF("entry\n"); - - const struct lbr_common *l = getImplNfa(nfa); - const struct RepeatInfo *info = getRepeatInfo(l); - struct lbr_state lstate; // temp control block on stack. - clearRepeat(info, &lstate); - lbrTop(l, &lstate, state, offset); - lbrCompressState(l, offset, &lstate, state); - - return 1; // LBR is alive -} - -// FIXME: this function could be much simpler for a Dot LBR, as all it needs to -// do is find the next top. -static really_inline -char JOIN(ENGINE_EXEC_NAME, _TopScan)(const struct NFA *nfa, struct mq *q, - s64a end) { - const struct lbr_common *l = getImplNfa(nfa); - const struct RepeatInfo *info = getRepeatInfo(l); - - const u64a offset = q->offset; - struct lbr_state *lstate = (struct lbr_state *)q->state; - assert(ISALIGNED(lstate)); - - assert(repeatIsDead(info, lstate)); - assert(q->cur < q->end); - - DEBUG_PRINTF("entry, end=%lld, offset=%llu, lastEscape=%llu\n", end, - offset, lstate->lastEscape); - - while (1) { - // Find the next top with location >= the last escape we saw. - for (; q->cur < q->end && q_cur_loc(q) <= end; q->cur++) { - u32 event = q_cur_type(q); - if ((event == MQE_TOP || event == MQE_TOP_FIRST) && - q_cur_offset(q) >= lstate->lastEscape) { - goto found_top; - } - DEBUG_PRINTF("skip event type=%u offset=%lld\n", event, q_cur_offset(q)); - } - - // No more tops, we're done. - break; - -found_top:; - assert(q->cur < q->end); - - u64a sp = q_cur_offset(q); - u64a first_match = sp + info->repeatMin; - DEBUG_PRINTF("first possible match is at %llu\n", first_match); - - u64a ep = MIN(MIN(end, (s64a)q->length) + offset, first_match); - if (ep > sp && sp >= offset) { - size_t eloc; - DEBUG_PRINTF("rev b%llu e%llu/%zu\n", sp - offset, ep - offset, - q->length); - assert(ep - offset <= q->length); - if (REVSCAN_FN(nfa, q->buffer, sp - offset, ep - offset, &eloc)) { - DEBUG_PRINTF("escape found at %llu\n", offset + eloc); - lstate->lastEscape = eloc; - q->cur++; - continue; - } - } - - lbrTop(l, lstate, q->streamState, sp); - return 1; - } - - DEBUG_PRINTF("exhausted queue\n"); - return 0; -} - -static really_inline -char JOIN(ENGINE_EXEC_NAME, _Q_i)(const struct NFA *nfa, struct mq *q, - s64a end, enum MatchMode mode) { - assert(nfa && q); - assert(isLbrType(nfa->type)); - - const struct lbr_common *l = getImplNfa(nfa); - const struct RepeatInfo *info = getRepeatInfo(l); - - struct lbr_state *lstate = (struct lbr_state *)q->state; - assert(ISALIGNED(lstate)); - - - if (q->report_current) { - DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q)); +char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa, + struct mq *q) { + assert(nfa && q); + assert(isLbrType(nfa->type)); + DEBUG_PRINTF("entry\n"); + + const struct lbr_common *l = getImplNfa(nfa); + const struct RepeatInfo *info = getRepeatInfo(l); + + assert(q->state); + struct lbr_state *lstate = (struct lbr_state *)q->state; + assert(ISALIGNED(lstate)); + + lstate->lastEscape = 0; + clearRepeat(info, lstate); + + return 0; +} + +char JOIN(ENGINE_EXEC_NAME, _initCompressedState)(const struct NFA *nfa, + u64a offset, + void *state, UNUSED u8 key) { + assert(nfa && state); + assert(isLbrType(nfa->type)); + DEBUG_PRINTF("entry\n"); + + const struct lbr_common *l = getImplNfa(nfa); + const struct RepeatInfo *info = getRepeatInfo(l); + struct lbr_state lstate; // temp control block on stack. + clearRepeat(info, &lstate); + lbrTop(l, &lstate, state, offset); + lbrCompressState(l, offset, &lstate, state); + + return 1; // LBR is alive +} + +// FIXME: this function could be much simpler for a Dot LBR, as all it needs to +// do is find the next top. +static really_inline +char JOIN(ENGINE_EXEC_NAME, _TopScan)(const struct NFA *nfa, struct mq *q, + s64a end) { + const struct lbr_common *l = getImplNfa(nfa); + const struct RepeatInfo *info = getRepeatInfo(l); + + const u64a offset = q->offset; + struct lbr_state *lstate = (struct lbr_state *)q->state; + assert(ISALIGNED(lstate)); + + assert(repeatIsDead(info, lstate)); + assert(q->cur < q->end); + + DEBUG_PRINTF("entry, end=%lld, offset=%llu, lastEscape=%llu\n", end, + offset, lstate->lastEscape); + + while (1) { + // Find the next top with location >= the last escape we saw. + for (; q->cur < q->end && q_cur_loc(q) <= end; q->cur++) { + u32 event = q_cur_type(q); + if ((event == MQE_TOP || event == MQE_TOP_FIRST) && + q_cur_offset(q) >= lstate->lastEscape) { + goto found_top; + } + DEBUG_PRINTF("skip event type=%u offset=%lld\n", event, q_cur_offset(q)); + } + + // No more tops, we're done. + break; + +found_top:; + assert(q->cur < q->end); + + u64a sp = q_cur_offset(q); + u64a first_match = sp + info->repeatMin; + DEBUG_PRINTF("first possible match is at %llu\n", first_match); + + u64a ep = MIN(MIN(end, (s64a)q->length) + offset, first_match); + if (ep > sp && sp >= offset) { + size_t eloc; + DEBUG_PRINTF("rev b%llu e%llu/%zu\n", sp - offset, ep - offset, + q->length); + assert(ep - offset <= q->length); + if (REVSCAN_FN(nfa, q->buffer, sp - offset, ep - offset, &eloc)) { + DEBUG_PRINTF("escape found at %llu\n", offset + eloc); + lstate->lastEscape = eloc; + q->cur++; + continue; + } + } + + lbrTop(l, lstate, q->streamState, sp); + return 1; + } + + DEBUG_PRINTF("exhausted queue\n"); + return 0; +} + +static really_inline +char JOIN(ENGINE_EXEC_NAME, _Q_i)(const struct NFA *nfa, struct mq *q, + s64a end, enum MatchMode mode) { + assert(nfa && q); + assert(isLbrType(nfa->type)); + + const struct lbr_common *l = getImplNfa(nfa); + const struct RepeatInfo *info = getRepeatInfo(l); + + struct lbr_state *lstate = (struct lbr_state *)q->state; + assert(ISALIGNED(lstate)); + + + if (q->report_current) { + DEBUG_PRINTF("report_current: fire match at %llu\n", q_cur_offset(q)); int rv = q->cb(0, q_cur_offset(q), l->report, q->context); - q->report_current = 0; - if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - - if (q->cur == q->end) { - return 1; - } - - assert(q->cur + 1 < q->end); /* require at least two items */ - assert(q_cur_type(q) == MQE_START); - u64a sp = q_cur_offset(q); - q->cur++; - DEBUG_PRINTF("sp=%llu, abs_end=%llu\n", sp, end + q->offset); - - while (q->cur < q->end) { - DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q), - q_cur_offset(q)); - - assert(sp >= q->offset); // not in history - - if (repeatIsDead(info, lstate)) { - DEBUG_PRINTF("repeat is currently dead, skipping scan\n"); - goto scan_done; - } - - u64a ep = q_cur_offset(q); - ep = MIN(ep, q->offset + end); - if (sp < ep) { - size_t eloc = 0; - char escape_found = 0; - DEBUG_PRINTF("scanning from sp=%llu to ep=%llu\n", sp, ep); - assert(sp >= q->offset && ep >= q->offset); - if (FWDSCAN_FN(nfa, q->buffer, sp - q->offset, ep - q->offset, &eloc)) { - escape_found = 1; - ep = q->offset + eloc; - DEBUG_PRINTF("escape found at %llu\n", ep); - assert(ep >= sp); - } - - assert(sp <= ep); - - if (mode == STOP_AT_MATCH) { - size_t mloc; - if (lbrFindMatch(l, sp, ep, lstate, q->streamState, &mloc)) { - DEBUG_PRINTF("storing match at %llu\n", sp + mloc); - q->cur--; - assert(q->cur < MAX_MQE_LEN); - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = (s64a)(sp - q->offset) + mloc; - return MO_MATCHES_PENDING; - } - } else { - assert(mode == CALLBACK_OUTPUT); - char rv = lbrMatchLoop(l, sp, ep, lstate, q->streamState, q->cb, - q->context); - if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - assert(rv == MO_CONTINUE_MATCHING); - } - - if (escape_found) { - DEBUG_PRINTF("clearing repeat due to escape\n"); - clearRepeat(info, lstate); - } - } - - scan_done: - if (q_cur_loc(q) > end) { - q->cur--; - assert(q->cur < MAX_MQE_LEN); - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - return MO_ALIVE; - } - - if (repeatIsDead(info, lstate)) { - if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, end)) { - assert(repeatIsDead(info, lstate)); - if (q->cur < q->end && q_cur_loc(q) > end) { - q->cur--; - assert(q->cur < MAX_MQE_LEN); - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - return MO_ALIVE; - } - return 0; - } - DEBUG_PRINTF("cur offset = %llu\n", q_cur_offset(q)); - } else { - switch (q_cur_type(q)) { - case MQE_TOP: - case MQE_TOP_FIRST: - lbrTop(l, lstate, q->streamState, q_cur_offset(q)); - break; - case MQE_START: - case MQE_END: - break; - default: - DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q)); - assert(0); - break; - } - } - - sp = q_cur_offset(q); - q->cur++; - } - - return lbrIsAlive(l, lstate, q->streamState, sp); -} - -char JOIN(ENGINE_EXEC_NAME, _Q)(const struct NFA *nfa, struct mq *q, s64a end) { - DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end); - return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, CALLBACK_OUTPUT); -} - -char JOIN(ENGINE_EXEC_NAME, _Q2)(const struct NFA *nfa, struct mq *q, s64a end) { - DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end); - return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, STOP_AT_MATCH); -} - -static really_inline -void JOIN(ENGINE_EXEC_NAME, _StreamSilent)(const struct NFA *nfa, struct mq *q, - const u8 *buf, size_t length) { - const struct lbr_common *l = getImplNfa(nfa); - const struct RepeatInfo *info = getRepeatInfo(l); - struct lbr_state *lstate = (struct lbr_state *)q->state; - assert(ISALIGNED(lstate)); - - assert(!repeatIsDead(info, lstate)); - - // This call doesn't produce matches, so we elide the lbrMatchLoop call - // entirely and just do escape scans to maintain the repeat. - - size_t eloc = 0; - char escaped = FWDSCAN_FN(nfa, buf, 0, length, &eloc); - if (escaped) { - assert(eloc < length); - DEBUG_PRINTF("escape found at %zu, clearing repeat\n", eloc); - clearRepeat(info, lstate); - } -} - -// Rose infix path. -char JOIN(ENGINE_EXEC_NAME, _QR)(const struct NFA *nfa, struct mq *q, - ReportID report) { - assert(nfa && q); - assert(isLbrType(nfa->type)); - - if (q->cur == q->end) { - return 1; - } - - assert(q->cur + 1 < q->end); /* require at least two items */ - assert(q_cur_type(q) == MQE_START); - u64a sp = q_cur_offset(q); - q->cur++; - DEBUG_PRINTF("sp=%llu\n", sp); - - const struct lbr_common *l = getImplNfa(nfa); - const struct RepeatInfo *info = getRepeatInfo(l); - struct lbr_state *lstate = (struct lbr_state *)q->state; - assert(ISALIGNED(lstate)); - const s64a lastLoc = q_last_loc(q); - - while (q->cur < q->end) { - DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q), - q_cur_offset(q)); - - if (repeatIsDead(info, lstate)) { - DEBUG_PRINTF("repeat is dead\n"); - goto scan_done; - } - - u64a ep = q_cur_offset(q); - - if (sp < q->offset) { - DEBUG_PRINTF("HISTORY BUFFER SCAN\n"); - assert(q->offset - sp <= q->hlength); - u64a local_ep = MIN(q->offset, ep); - const u8 *ptr = q->history + q->hlength + sp - q->offset; - JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, local_ep - sp); - sp = local_ep; - } - - if (repeatIsDead(info, lstate)) { - DEBUG_PRINTF("repeat is dead\n"); - goto scan_done; - } - - if (sp < ep) { - DEBUG_PRINTF("MAIN BUFFER SCAN\n"); - assert(ep - q->offset <= q->length); - const u8 *ptr = q->buffer + sp - q->offset; - JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, ep - sp); - } - - if (repeatIsDead(info, lstate)) { -scan_done: - if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, lastLoc)) { - assert(repeatIsDead(info, lstate)); - assert(q->cur == q->end); - return 0; - } - } else { - switch (q_cur_type(q)) { - case MQE_TOP: - case MQE_TOP_FIRST: - lbrTop(l, lstate, q->streamState, q_cur_offset(q)); - break; - case MQE_START: - case MQE_END: - break; - default: - DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q)); - assert(0); - break; - } - } - - sp = q_cur_offset(q); - q->cur++; - } - - if (repeatIsDead(info, lstate)) { - DEBUG_PRINTF("repeat is dead\n"); - return 0; - } - - if (lbrInAccept(l, lstate, q->streamState, sp, report)) { - return MO_MATCHES_PENDING; - } - - return lbrIsActive(l, lstate, q->streamState, sp); -} - -#undef ENGINE_EXEC_NAME -#undef EXEC_FN -#undef FWDSCAN_FN -#undef REVSCAN_FN -#undef ENGINE_ROOT_NAME + q->report_current = 0; + if (rv == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + + if (q->cur == q->end) { + return 1; + } + + assert(q->cur + 1 < q->end); /* require at least two items */ + assert(q_cur_type(q) == MQE_START); + u64a sp = q_cur_offset(q); + q->cur++; + DEBUG_PRINTF("sp=%llu, abs_end=%llu\n", sp, end + q->offset); + + while (q->cur < q->end) { + DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q), + q_cur_offset(q)); + + assert(sp >= q->offset); // not in history + + if (repeatIsDead(info, lstate)) { + DEBUG_PRINTF("repeat is currently dead, skipping scan\n"); + goto scan_done; + } + + u64a ep = q_cur_offset(q); + ep = MIN(ep, q->offset + end); + if (sp < ep) { + size_t eloc = 0; + char escape_found = 0; + DEBUG_PRINTF("scanning from sp=%llu to ep=%llu\n", sp, ep); + assert(sp >= q->offset && ep >= q->offset); + if (FWDSCAN_FN(nfa, q->buffer, sp - q->offset, ep - q->offset, &eloc)) { + escape_found = 1; + ep = q->offset + eloc; + DEBUG_PRINTF("escape found at %llu\n", ep); + assert(ep >= sp); + } + + assert(sp <= ep); + + if (mode == STOP_AT_MATCH) { + size_t mloc; + if (lbrFindMatch(l, sp, ep, lstate, q->streamState, &mloc)) { + DEBUG_PRINTF("storing match at %llu\n", sp + mloc); + q->cur--; + assert(q->cur < MAX_MQE_LEN); + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = (s64a)(sp - q->offset) + mloc; + return MO_MATCHES_PENDING; + } + } else { + assert(mode == CALLBACK_OUTPUT); + char rv = lbrMatchLoop(l, sp, ep, lstate, q->streamState, q->cb, + q->context); + if (rv == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + assert(rv == MO_CONTINUE_MATCHING); + } + + if (escape_found) { + DEBUG_PRINTF("clearing repeat due to escape\n"); + clearRepeat(info, lstate); + } + } + + scan_done: + if (q_cur_loc(q) > end) { + q->cur--; + assert(q->cur < MAX_MQE_LEN); + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + return MO_ALIVE; + } + + if (repeatIsDead(info, lstate)) { + if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, end)) { + assert(repeatIsDead(info, lstate)); + if (q->cur < q->end && q_cur_loc(q) > end) { + q->cur--; + assert(q->cur < MAX_MQE_LEN); + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + return MO_ALIVE; + } + return 0; + } + DEBUG_PRINTF("cur offset = %llu\n", q_cur_offset(q)); + } else { + switch (q_cur_type(q)) { + case MQE_TOP: + case MQE_TOP_FIRST: + lbrTop(l, lstate, q->streamState, q_cur_offset(q)); + break; + case MQE_START: + case MQE_END: + break; + default: + DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q)); + assert(0); + break; + } + } + + sp = q_cur_offset(q); + q->cur++; + } + + return lbrIsAlive(l, lstate, q->streamState, sp); +} + +char JOIN(ENGINE_EXEC_NAME, _Q)(const struct NFA *nfa, struct mq *q, s64a end) { + DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end); + return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, CALLBACK_OUTPUT); +} + +char JOIN(ENGINE_EXEC_NAME, _Q2)(const struct NFA *nfa, struct mq *q, s64a end) { + DEBUG_PRINTF("entry, offset=%llu, end=%lld\n", q->offset, end); + return JOIN(ENGINE_EXEC_NAME, _Q_i)(nfa, q, end, STOP_AT_MATCH); +} + +static really_inline +void JOIN(ENGINE_EXEC_NAME, _StreamSilent)(const struct NFA *nfa, struct mq *q, + const u8 *buf, size_t length) { + const struct lbr_common *l = getImplNfa(nfa); + const struct RepeatInfo *info = getRepeatInfo(l); + struct lbr_state *lstate = (struct lbr_state *)q->state; + assert(ISALIGNED(lstate)); + + assert(!repeatIsDead(info, lstate)); + + // This call doesn't produce matches, so we elide the lbrMatchLoop call + // entirely and just do escape scans to maintain the repeat. + + size_t eloc = 0; + char escaped = FWDSCAN_FN(nfa, buf, 0, length, &eloc); + if (escaped) { + assert(eloc < length); + DEBUG_PRINTF("escape found at %zu, clearing repeat\n", eloc); + clearRepeat(info, lstate); + } +} + +// Rose infix path. +char JOIN(ENGINE_EXEC_NAME, _QR)(const struct NFA *nfa, struct mq *q, + ReportID report) { + assert(nfa && q); + assert(isLbrType(nfa->type)); + + if (q->cur == q->end) { + return 1; + } + + assert(q->cur + 1 < q->end); /* require at least two items */ + assert(q_cur_type(q) == MQE_START); + u64a sp = q_cur_offset(q); + q->cur++; + DEBUG_PRINTF("sp=%llu\n", sp); + + const struct lbr_common *l = getImplNfa(nfa); + const struct RepeatInfo *info = getRepeatInfo(l); + struct lbr_state *lstate = (struct lbr_state *)q->state; + assert(ISALIGNED(lstate)); + const s64a lastLoc = q_last_loc(q); + + while (q->cur < q->end) { + DEBUG_PRINTF("q item type=%d offset=%llu\n", q_cur_type(q), + q_cur_offset(q)); + + if (repeatIsDead(info, lstate)) { + DEBUG_PRINTF("repeat is dead\n"); + goto scan_done; + } + + u64a ep = q_cur_offset(q); + + if (sp < q->offset) { + DEBUG_PRINTF("HISTORY BUFFER SCAN\n"); + assert(q->offset - sp <= q->hlength); + u64a local_ep = MIN(q->offset, ep); + const u8 *ptr = q->history + q->hlength + sp - q->offset; + JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, local_ep - sp); + sp = local_ep; + } + + if (repeatIsDead(info, lstate)) { + DEBUG_PRINTF("repeat is dead\n"); + goto scan_done; + } + + if (sp < ep) { + DEBUG_PRINTF("MAIN BUFFER SCAN\n"); + assert(ep - q->offset <= q->length); + const u8 *ptr = q->buffer + sp - q->offset; + JOIN(ENGINE_EXEC_NAME, _StreamSilent)(nfa, q, ptr, ep - sp); + } + + if (repeatIsDead(info, lstate)) { +scan_done: + if (!JOIN(ENGINE_EXEC_NAME, _TopScan)(nfa, q, lastLoc)) { + assert(repeatIsDead(info, lstate)); + assert(q->cur == q->end); + return 0; + } + } else { + switch (q_cur_type(q)) { + case MQE_TOP: + case MQE_TOP_FIRST: + lbrTop(l, lstate, q->streamState, q_cur_offset(q)); + break; + case MQE_START: + case MQE_END: + break; + default: + DEBUG_PRINTF("unhandled event %d!\n", q_cur_type(q)); + assert(0); + break; + } + } + + sp = q_cur_offset(q); + q->cur++; + } + + if (repeatIsDead(info, lstate)) { + DEBUG_PRINTF("repeat is dead\n"); + return 0; + } + + if (lbrInAccept(l, lstate, q->streamState, sp, report)) { + return MO_MATCHES_PENDING; + } + + return lbrIsActive(l, lstate, q->streamState, sp); +} + +#undef ENGINE_EXEC_NAME +#undef EXEC_FN +#undef FWDSCAN_FN +#undef REVSCAN_FN +#undef ENGINE_ROOT_NAME diff --git a/contrib/libs/hyperscan/src/nfa/lbr_internal.h b/contrib/libs/hyperscan/src/nfa/lbr_internal.h index 51bf42865c..8ba11dd4d2 100644 --- a/contrib/libs/hyperscan/src/nfa/lbr_internal.h +++ b/contrib/libs/hyperscan/src/nfa/lbr_internal.h @@ -1,82 +1,82 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Large Bounded Repeat (LBR): data structures. - */ - -#ifndef LBR_INTERNAL_H -#define LBR_INTERNAL_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -#include "repeat_internal.h" - -/** \brief Common LBR header. */ -struct lbr_common { - u32 repeatInfoOffset; //!< offset of RepeatInfo structure relative - // to the start of lbr_common - ReportID report; //!< report to raise on match -}; - -struct lbr_dot { - struct lbr_common common; -}; - -struct lbr_verm { - struct lbr_common common; - char c; //!< escape char -}; - -struct lbr_shuf { - struct lbr_common common; - m128 mask_lo; //!< shufti lo mask for escape chars - m128 mask_hi; //!< shufti hi mask for escape chars -}; - -struct lbr_truf { - struct lbr_common common; - m128 mask1; - m128 mask2; -}; - -/** \brief Uncompressed ("full") state structure used by the LBR. This is - * stored in scratch, not in stream state. */ -struct lbr_state { - u64a lastEscape; //!< \brief offset of last escape seen. - union RepeatControl ctrl; //!< \brief repeat control block. */ -}; - -#ifdef __cplusplus -} -#endif - -#endif // LBR_INTERNAL_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Large Bounded Repeat (LBR): data structures. + */ + +#ifndef LBR_INTERNAL_H +#define LBR_INTERNAL_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include "repeat_internal.h" + +/** \brief Common LBR header. */ +struct lbr_common { + u32 repeatInfoOffset; //!< offset of RepeatInfo structure relative + // to the start of lbr_common + ReportID report; //!< report to raise on match +}; + +struct lbr_dot { + struct lbr_common common; +}; + +struct lbr_verm { + struct lbr_common common; + char c; //!< escape char +}; + +struct lbr_shuf { + struct lbr_common common; + m128 mask_lo; //!< shufti lo mask for escape chars + m128 mask_hi; //!< shufti hi mask for escape chars +}; + +struct lbr_truf { + struct lbr_common common; + m128 mask1; + m128 mask2; +}; + +/** \brief Uncompressed ("full") state structure used by the LBR. This is + * stored in scratch, not in stream state. */ +struct lbr_state { + u64a lastEscape; //!< \brief offset of last escape seen. + union RepeatControl ctrl; //!< \brief repeat control block. */ +}; + +#ifdef __cplusplus +} +#endif + +#endif // LBR_INTERNAL_H diff --git a/contrib/libs/hyperscan/src/nfa/limex.h b/contrib/libs/hyperscan/src/nfa/limex.h index 0c9e276816..0223604dae 100644 --- a/contrib/libs/hyperscan/src/nfa/limex.h +++ b/contrib/libs/hyperscan/src/nfa/limex.h @@ -1,91 +1,91 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef LIMEX_H -#define LIMEX_H - -#ifdef __cplusplus + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LIMEX_H +#define LIMEX_H + +#ifdef __cplusplus #include <string> -extern "C" -{ -#endif - -#include "nfa_api.h" - -#if defined(DUMP_SUPPORT) && defined(__cplusplus) -#define GENERATE_NFA_DUMP_DECL(gf_name) \ - } /* extern "C" */ \ - namespace ue2 { \ +extern "C" +{ +#endif + +#include "nfa_api.h" + +#if defined(DUMP_SUPPORT) && defined(__cplusplus) +#define GENERATE_NFA_DUMP_DECL(gf_name) \ + } /* extern "C" */ \ + namespace ue2 { \ void gf_name##_dump(const struct NFA *nfa, const std::string &base); \ - } /* namespace ue2 */ \ - extern "C" { - -#else -#define GENERATE_NFA_DUMP_DECL(gf_name) -#endif - -#define GENERATE_NFA_DECL(gf_name) \ - char gf_name##_testEOD(const struct NFA *nfa, const char *state, \ - const char *streamState, u64a offset, \ + } /* namespace ue2 */ \ + extern "C" { + +#else +#define GENERATE_NFA_DUMP_DECL(gf_name) +#endif + +#define GENERATE_NFA_DECL(gf_name) \ + char gf_name##_testEOD(const struct NFA *nfa, const char *state, \ + const char *streamState, u64a offset, \ NfaCallback callback, void *context); \ - char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \ - char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \ - char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \ - char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \ - char gf_name##_inAccept(const struct NFA *n, ReportID report, \ - struct mq *q); \ + char gf_name##_Q(const struct NFA *n, struct mq *q, s64a end); \ + char gf_name##_Q2(const struct NFA *n, struct mq *q, s64a end); \ + char gf_name##_QR(const struct NFA *n, struct mq *q, ReportID report); \ + char gf_name##_reportCurrent(const struct NFA *n, struct mq *q); \ + char gf_name##_inAccept(const struct NFA *n, ReportID report, \ + struct mq *q); \ char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q); \ - char gf_name##_queueInitState(const struct NFA *n, struct mq *q); \ - char gf_name##_initCompressedState(const struct NFA *n, u64a offset, \ - void *state, u8 key); \ - char gf_name##_B_Reverse(const struct NFA *n, u64a offset, const u8 *buf, \ - size_t buflen, const u8 *hbuf, size_t hlen, \ + char gf_name##_queueInitState(const struct NFA *n, struct mq *q); \ + char gf_name##_initCompressedState(const struct NFA *n, u64a offset, \ + void *state, u8 key); \ + char gf_name##_B_Reverse(const struct NFA *n, u64a offset, const u8 *buf, \ + size_t buflen, const u8 *hbuf, size_t hlen, \ NfaCallback cb, void *context); \ - char gf_name##_queueCompressState(const struct NFA *nfa, \ - const struct mq *q, s64a loc); \ - char gf_name##_expandState(const struct NFA *nfa, void *dest, \ - const void *src, u64a offset, u8 key); \ + char gf_name##_queueCompressState(const struct NFA *nfa, \ + const struct mq *q, s64a loc); \ + char gf_name##_expandState(const struct NFA *nfa, void *dest, \ + const void *src, u64a offset, u8 key); \ enum nfa_zombie_status gf_name##_zombie_status(const struct NFA *nfa, \ struct mq *q, s64a loc); \ - GENERATE_NFA_DUMP_DECL(gf_name) - + GENERATE_NFA_DUMP_DECL(gf_name) + GENERATE_NFA_DECL(nfaExecLimEx32) GENERATE_NFA_DECL(nfaExecLimEx64) GENERATE_NFA_DECL(nfaExecLimEx128) GENERATE_NFA_DECL(nfaExecLimEx256) GENERATE_NFA_DECL(nfaExecLimEx384) GENERATE_NFA_DECL(nfaExecLimEx512) - -#undef GENERATE_NFA_DECL -#undef GENERATE_NFA_DUMP_DECL - -#ifdef __cplusplus -} -#endif - -#endif + +#undef GENERATE_NFA_DECL +#undef GENERATE_NFA_DUMP_DECL + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/limex_accel.c b/contrib/libs/hyperscan/src/nfa/limex_accel.c index 8553044d1a..4834b6a547 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_accel.c +++ b/contrib/libs/hyperscan/src/nfa/limex_accel.c @@ -1,85 +1,85 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Limex NFA: acceleration runtime. - */ - -#include "limex_accel.h" - -#include "accel.h" -#include "limex_internal.h" -#include "limex_limits.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Limex NFA: acceleration runtime. + */ + +#include "limex_accel.h" + +#include "accel.h" +#include "limex_internal.h" +#include "limex_limits.h" #include "limex_shuffle.h" -#include "nfa_internal.h" -#include "shufti.h" -#include "truffle.h" -#include "ue2common.h" -#include "vermicelli.h" +#include "nfa_internal.h" +#include "shufti.h" +#include "truffle.h" +#include "ue2common.h" +#include "vermicelli.h" #include "util/arch.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -static really_inline -size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux, - const u8 *input, u32 idx, size_t i, size_t end) { - assert(accelTable); - assert(aux); - - DEBUG_PRINTF("shuffle returned %u -> aux %u\n", idx, accelTable[idx]); - assert(idx < (1 << NFA_MAX_ACCEL_STATES)); - if (!idx) { - return end; - } - - u8 aux_idx = accelTable[idx]; - if (!aux_idx) { - assert(aux[0].accel_type == ACCEL_NONE); - DEBUG_PRINTF("no accel, bailing\n"); - return i; - } - - aux = aux + aux_idx; +#include "util/bitutils.h" +#include "util/simd_utils.h" + +static really_inline +size_t accelScanWrapper(const u8 *accelTable, const union AccelAux *aux, + const u8 *input, u32 idx, size_t i, size_t end) { + assert(accelTable); + assert(aux); + + DEBUG_PRINTF("shuffle returned %u -> aux %u\n", idx, accelTable[idx]); + assert(idx < (1 << NFA_MAX_ACCEL_STATES)); + if (!idx) { + return end; + } + + u8 aux_idx = accelTable[idx]; + if (!aux_idx) { + assert(aux[0].accel_type == ACCEL_NONE); + DEBUG_PRINTF("no accel, bailing\n"); + return i; + } + + aux = aux + aux_idx; const u8 *ptr = run_accel(aux, &input[i], &input[end]); - assert(ptr >= &input[i]); - size_t j = (size_t)(ptr - input); - DEBUG_PRINTF("accel skipped %zu of %zu chars\n", (j - i), (end - i)); - DEBUG_PRINTF("returning j=%zu (i=%zu, end=%zu)\n", j, i, end); - return j; -} - -size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, - const union AccelAux *aux, const u8 *input, size_t i, - size_t end) { + assert(ptr >= &input[i]); + size_t j = (size_t)(ptr - input); + DEBUG_PRINTF("accel skipped %zu of %zu chars\n", (j - i), (end - i)); + DEBUG_PRINTF("returning j=%zu (i=%zu, end=%zu)\n", j, i, end); + return j; +} + +size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end) { u32 idx = pext32(s, accel); - return accelScanWrapper(accelTable, aux, input, idx, i, end); -} - + return accelScanWrapper(accelTable, aux, input, idx, i, end); +} + #ifdef ARCH_64_BIT size_t doAccel64(u64a s, u64a accel, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, @@ -96,61 +96,61 @@ size_t doAccel64(m128 s, m128 accel, const u8 *accelTable, } #endif -size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex, - const u8 *accelTable, const union AccelAux *aux, - const u8 *input, size_t i, size_t end) { - u32 idx; - m128 s = *state; - DEBUG_PRINTF("using PSHUFB for 128-bit shuffle\n"); - m128 accelPerm = limex->accelPermute; - m128 accelComp = limex->accelCompare; +size_t doAccel128(const m128 *state, const struct LimExNFA128 *limex, + const u8 *accelTable, const union AccelAux *aux, + const u8 *input, size_t i, size_t end) { + u32 idx; + m128 s = *state; + DEBUG_PRINTF("using PSHUFB for 128-bit shuffle\n"); + m128 accelPerm = limex->accelPermute; + m128 accelComp = limex->accelCompare; idx = packedExtract128(s, accelPerm, accelComp); - return accelScanWrapper(accelTable, aux, input, idx, i, end); -} - -size_t doAccel256(const m256 *state, const struct LimExNFA256 *limex, - const u8 *accelTable, const union AccelAux *aux, - const u8 *input, size_t i, size_t end) { - u32 idx; - m256 s = *state; - DEBUG_PRINTF("using PSHUFB for 256-bit shuffle\n"); - m256 accelPerm = limex->accelPermute; - m256 accelComp = limex->accelCompare; + return accelScanWrapper(accelTable, aux, input, idx, i, end); +} + +size_t doAccel256(const m256 *state, const struct LimExNFA256 *limex, + const u8 *accelTable, const union AccelAux *aux, + const u8 *input, size_t i, size_t end) { + u32 idx; + m256 s = *state; + DEBUG_PRINTF("using PSHUFB for 256-bit shuffle\n"); + m256 accelPerm = limex->accelPermute; + m256 accelComp = limex->accelCompare; #if !defined(HAVE_AVX2) u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo); u32 idx2 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi); assert((idx1 & idx2) == 0); // should be no shared bits idx = idx1 | idx2; -#else +#else idx = packedExtract256(s, accelPerm, accelComp); -#endif - return accelScanWrapper(accelTable, aux, input, idx, i, end); -} - -size_t doAccel384(const m384 *state, const struct LimExNFA384 *limex, - const u8 *accelTable, const union AccelAux *aux, - const u8 *input, size_t i, size_t end) { - u32 idx; - m384 s = *state; - DEBUG_PRINTF("using PSHUFB for 384-bit shuffle\n"); - m384 accelPerm = limex->accelPermute; - m384 accelComp = limex->accelCompare; +#endif + return accelScanWrapper(accelTable, aux, input, idx, i, end); +} + +size_t doAccel384(const m384 *state, const struct LimExNFA384 *limex, + const u8 *accelTable, const union AccelAux *aux, + const u8 *input, size_t i, size_t end) { + u32 idx; + m384 s = *state; + DEBUG_PRINTF("using PSHUFB for 384-bit shuffle\n"); + m384 accelPerm = limex->accelPermute; + m384 accelComp = limex->accelCompare; u32 idx1 = packedExtract128(s.lo, accelPerm.lo, accelComp.lo); u32 idx2 = packedExtract128(s.mid, accelPerm.mid, accelComp.mid); u32 idx3 = packedExtract128(s.hi, accelPerm.hi, accelComp.hi); - assert((idx1 & idx2 & idx3) == 0); // should be no shared bits - idx = idx1 | idx2 | idx3; - return accelScanWrapper(accelTable, aux, input, idx, i, end); -} - -size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex, - const u8 *accelTable, const union AccelAux *aux, - const u8 *input, size_t i, size_t end) { - u32 idx; - m512 s = *state; - DEBUG_PRINTF("using PSHUFB for 512-bit shuffle\n"); - m512 accelPerm = limex->accelPermute; - m512 accelComp = limex->accelCompare; + assert((idx1 & idx2 & idx3) == 0); // should be no shared bits + idx = idx1 | idx2 | idx3; + return accelScanWrapper(accelTable, aux, input, idx, i, end); +} + +size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex, + const u8 *accelTable, const union AccelAux *aux, + const u8 *input, size_t i, size_t end) { + u32 idx; + m512 s = *state; + DEBUG_PRINTF("using PSHUFB for 512-bit shuffle\n"); + m512 accelPerm = limex->accelPermute; + m512 accelComp = limex->accelCompare; #if defined(HAVE_AVX512) idx = packedExtract512(s, accelPerm, accelComp); #elif defined(HAVE_AVX2) @@ -158,13 +158,13 @@ size_t doAccel512(const m512 *state, const struct LimExNFA512 *limex, u32 idx2 = packedExtract256(s.hi, accelPerm.hi, accelComp.hi); assert((idx1 & idx2) == 0); // should be no shared bits idx = idx1 | idx2; -#else +#else u32 idx1 = packedExtract128(s.lo.lo, accelPerm.lo.lo, accelComp.lo.lo); u32 idx2 = packedExtract128(s.lo.hi, accelPerm.lo.hi, accelComp.lo.hi); u32 idx3 = packedExtract128(s.hi.lo, accelPerm.hi.lo, accelComp.hi.lo); u32 idx4 = packedExtract128(s.hi.hi, accelPerm.hi.hi, accelComp.hi.hi); - assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits - idx = idx1 | idx2 | idx3 | idx4; + assert((idx1 & idx2 & idx3 & idx4) == 0); // should be no shared bits + idx = idx1 | idx2 | idx3 | idx4; #endif - return accelScanWrapper(accelTable, aux, input, idx, i, end); -} + return accelScanWrapper(accelTable, aux, input, idx, i, end); +} diff --git a/contrib/libs/hyperscan/src/nfa/limex_accel.h b/contrib/libs/hyperscan/src/nfa/limex_accel.h index 5c3f379cbf..e5c94e82ad 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_accel.h +++ b/contrib/libs/hyperscan/src/nfa/limex_accel.h @@ -1,55 +1,55 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Limex NFA: acceleration runtime. - * - * For the SIMD types (128 bits and above), we pass a pointer to the - * implementation NFA structure instead of three masks: otherwise we spend all - * our time building stack frames. - */ - -#ifndef LIMEX_ACCEL_H -#define LIMEX_ACCEL_H - -#include "util/simd_utils.h" // for m128 etc - -union AccelAux; + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Limex NFA: acceleration runtime. + * + * For the SIMD types (128 bits and above), we pass a pointer to the + * implementation NFA structure instead of three masks: otherwise we spend all + * our time building stack frames. + */ + +#ifndef LIMEX_ACCEL_H +#define LIMEX_ACCEL_H + +#include "util/simd_utils.h" // for m128 etc + +union AccelAux; struct LimExNFA64; -struct LimExNFA128; -struct LimExNFA256; -struct LimExNFA384; -struct LimExNFA512; - -size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, - const union AccelAux *aux, const u8 *input, size_t i, - size_t end); - +struct LimExNFA128; +struct LimExNFA256; +struct LimExNFA384; +struct LimExNFA512; + +size_t doAccel32(u32 s, u32 accel, const u8 *accelTable, + const union AccelAux *aux, const u8 *input, size_t i, + size_t end); + #ifdef ARCH_64_BIT size_t doAccel64(u64a s, u64a accel, const u8 *accelTable, const union AccelAux *aux, const u8 *input, size_t i, @@ -60,20 +60,20 @@ size_t doAccel64(m128 s, m128 accel, const u8 *accelTable, size_t end); #endif -size_t doAccel128(const m128 *s, const struct LimExNFA128 *limex, - const u8 *accelTable, const union AccelAux *aux, - const u8 *input, size_t i, size_t end); - -size_t doAccel256(const m256 *s, const struct LimExNFA256 *limex, - const u8 *accelTable, const union AccelAux *aux, - const u8 *input, size_t i, size_t end); - -size_t doAccel384(const m384 *s, const struct LimExNFA384 *limex, - const u8 *accelTable, const union AccelAux *aux, - const u8 *input, size_t i, size_t end); - -size_t doAccel512(const m512 *s, const struct LimExNFA512 *limex, - const u8 *accelTable, const union AccelAux *aux, - const u8 *input, size_t i, size_t end); - -#endif +size_t doAccel128(const m128 *s, const struct LimExNFA128 *limex, + const u8 *accelTable, const union AccelAux *aux, + const u8 *input, size_t i, size_t end); + +size_t doAccel256(const m256 *s, const struct LimExNFA256 *limex, + const u8 *accelTable, const union AccelAux *aux, + const u8 *input, size_t i, size_t end); + +size_t doAccel384(const m384 *s, const struct LimExNFA384 *limex, + const u8 *accelTable, const union AccelAux *aux, + const u8 *input, size_t i, size_t end); + +size_t doAccel512(const m512 *s, const struct LimExNFA512 *limex, + const u8 *accelTable, const union AccelAux *aux, + const u8 *input, size_t i, size_t end); + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/limex_common_impl.h b/contrib/libs/hyperscan/src/nfa/limex_common_impl.h index 2b429aeb04..e441945d70 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_common_impl.h +++ b/contrib/libs/hyperscan/src/nfa/limex_common_impl.h @@ -1,66 +1,66 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "repeat.h" -#include "util/join.h" - -/* impl of limex functions which depend only on state size */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "repeat.h" +#include "util/join.h" + +/* impl of limex functions which depend only on state size */ + #if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) \ || !defined(INLINE_ATTR) # error Must define SIZE, STATE_T, LOAD_FROM_ENG and INLINE_ATTR in includer. -#endif - -#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) - -#define TESTEOD_FN JOIN(moNfaTestEod, SIZE) -#define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE) +#endif + +#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) + +#define TESTEOD_FN JOIN(moNfaTestEod, SIZE) +#define LIMEX_INACCEPT_FN JOIN(limexInAccept, SIZE) #define LIMEX_INANYACCEPT_FN JOIN(limexInAnyAccept, SIZE) -#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE) -#define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE) -#define INITIAL_FN JOIN(moNfaInitial, SIZE) -#define TOP_FN JOIN(moNfaTop, SIZE) -#define TOPN_FN JOIN(moNfaTopN, SIZE) +#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE) +#define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE) +#define INITIAL_FN JOIN(moNfaInitial, SIZE) +#define TOP_FN JOIN(moNfaTop, SIZE) +#define TOPN_FN JOIN(moNfaTopN, SIZE) #define PROCESS_ACCEPTS_IMPL_FN JOIN(moProcessAcceptsImpl, SIZE) -#define PROCESS_ACCEPTS_FN JOIN(moProcessAccepts, SIZE) -#define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE) -#define CONTEXT_T JOIN(NFAContext, SIZE) -#define ONES_STATE JOIN(ones_, STATE_T) -#define AND_STATE JOIN(and_, STATE_T) -#define OR_STATE JOIN(or_, STATE_T) -#define ANDNOT_STATE JOIN(andnot_, STATE_T) -#define CLEARBIT_STATE JOIN(clearbit_, STATE_T) -#define TESTBIT_STATE JOIN(testbit_, STATE_T) -#define ISNONZERO_STATE JOIN(isNonZero_, STATE_T) -#define ISZERO_STATE JOIN(isZero_, STATE_T) -#define SQUASH_UNTUG_BR_FN JOIN(lazyTug, SIZE) -#define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE) - +#define PROCESS_ACCEPTS_FN JOIN(moProcessAccepts, SIZE) +#define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE) +#define CONTEXT_T JOIN(NFAContext, SIZE) +#define ONES_STATE JOIN(ones_, STATE_T) +#define AND_STATE JOIN(and_, STATE_T) +#define OR_STATE JOIN(or_, STATE_T) +#define ANDNOT_STATE JOIN(andnot_, STATE_T) +#define CLEARBIT_STATE JOIN(clearbit_, STATE_T) +#define TESTBIT_STATE JOIN(testbit_, STATE_T) +#define ISNONZERO_STATE JOIN(isNonZero_, STATE_T) +#define ISZERO_STATE JOIN(isZero_, STATE_T) +#define SQUASH_UNTUG_BR_FN JOIN(lazyTug, SIZE) +#define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE) + #if defined(ARCH_64_BIT) && (SIZE >= 64) #define CHUNK_T u64a #define FIND_AND_CLEAR_FN findAndClearLSB_64 @@ -75,56 +75,56 @@ #define NUM_STATE_CHUNKS (sizeof(STATE_T) / sizeof(CHUNK_T)) -static really_inline -void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex, - const union RepeatControl *repeat_ctrl, - const char *repeat_state, u64a offset, - STATE_T *accstate) { - // switch off cyclic tug-accepts which aren't tuggable right now. - - /* TODO: might be nice to work which br to examine based on accstate rather - * than iterating overall br */ - - if (!limex->repeatCount) { - return; - } - - assert(repeat_ctrl); - assert(repeat_state); - - for (u32 i = 0; i < limex->repeatCount; i++) { - const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); - - u32 cyclicState = info->cyclicState; +static really_inline +void SQUASH_UNTUG_BR_FN(const IMPL_NFA_T *limex, + const union RepeatControl *repeat_ctrl, + const char *repeat_state, u64a offset, + STATE_T *accstate) { + // switch off cyclic tug-accepts which aren't tuggable right now. + + /* TODO: might be nice to work which br to examine based on accstate rather + * than iterating overall br */ + + if (!limex->repeatCount) { + return; + } + + assert(repeat_ctrl); + assert(repeat_state); + + for (u32 i = 0; i < limex->repeatCount; i++) { + const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); + + u32 cyclicState = info->cyclicState; if (!TESTBIT_STATE(*accstate, cyclicState)) { - continue; - } - - DEBUG_PRINTF("repeat %u (cyclic state %u) is active\n", i, cyclicState); - DEBUG_PRINTF("checking if offset %llu would match\n", offset); - - const union RepeatControl *ctrl = repeat_ctrl + i; - const char *state = repeat_state + info->stateOffset; - const struct RepeatInfo *repeat = getRepeatInfo(info); - if (repeatHasMatch(repeat, ctrl, state, offset) != REPEAT_MATCH) { - DEBUG_PRINTF("not ready to accept yet\n"); - CLEARBIT_STATE(accstate, cyclicState); - } - } -} - + continue; + } + + DEBUG_PRINTF("repeat %u (cyclic state %u) is active\n", i, cyclicState); + DEBUG_PRINTF("checking if offset %llu would match\n", offset); + + const union RepeatControl *ctrl = repeat_ctrl + i; + const char *state = repeat_state + info->stateOffset; + const struct RepeatInfo *repeat = getRepeatInfo(info); + if (repeatHasMatch(repeat, ctrl, state, offset) != REPEAT_MATCH) { + DEBUG_PRINTF("not ready to accept yet\n"); + CLEARBIT_STATE(accstate, cyclicState); + } + } +} + static really_inline char PROCESS_ACCEPTS_IMPL_FN(const IMPL_NFA_T *limex, const STATE_T *s, STATE_T *squash, const STATE_T *acceptMask, const struct NFAAccept *acceptTable, u64a offset, NfaCallback callback, void *context) { - assert(s); - assert(limex); - assert(callback); - + assert(s); + assert(limex); + assert(callback); + const STATE_T accept_mask = *acceptMask; STATE_T accepts = AND_STATE(*s, accept_mask); - + // Caller must ensure that we have at least one accept state on. assert(ISNONZERO_STATE(accepts)); @@ -146,23 +146,23 @@ char PROCESS_ACCEPTS_IMPL_FN(const IMPL_NFA_T *limex, const STATE_T *s, bit + i * (u32)sizeof(chunk) * 8, a->reports, offset); int rv = limexRunAccept((const char *)limex, a, callback, context, offset); - if (unlikely(rv == MO_HALT_MATCHING)) { - return 1; - } + if (unlikely(rv == MO_HALT_MATCHING)) { + return 1; + } if (squash != NULL && a->squash != MO_INVALID_IDX) { DEBUG_PRINTF("applying squash mask at offset %u\n", a->squash); const ENG_STATE_T *sq = (const ENG_STATE_T *)((const char *)limex + a->squash); *squash = AND_STATE(*squash, LOAD_FROM_ENG(sq)); - } - } + } + } base_index += POPCOUNT_FN(mask_chunks[i]); - } - - return 0; -} - -static never_inline + } + + return 0; +} + +static never_inline char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s, const STATE_T *acceptMask, const struct NFAAccept *acceptTable, u64a offset, @@ -171,10 +171,10 @@ char PROCESS_ACCEPTS_FN(const IMPL_NFA_T *limex, STATE_T *s, STATE_T squash = ONES_STATE; return PROCESS_ACCEPTS_IMPL_FN(limex, s, &squash, acceptMask, acceptTable, offset, callback, context); - + *s = AND_STATE(*s, squash); -} - +} + static never_inline char PROCESS_ACCEPTS_NOSQUASH_FN(const IMPL_NFA_T *limex, const STATE_T *s, const STATE_T *acceptMask, @@ -188,169 +188,169 @@ char PROCESS_ACCEPTS_NOSQUASH_FN(const IMPL_NFA_T *limex, const STATE_T *s, // Run EOD accepts. Note that repeat_ctrl and repeat_state may be NULL if this // LimEx contains no repeat structures. -static really_inline -char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s, - const union RepeatControl *repeat_ctrl, +static really_inline +char TESTEOD_FN(const IMPL_NFA_T *limex, const STATE_T *s, + const union RepeatControl *repeat_ctrl, const char *repeat_state, u64a offset, - NfaCallback callback, void *context) { - assert(limex && s); - - // There may not be any EOD accepts in this NFA. - if (!limex->acceptEodCount) { - return MO_CONTINUE_MATCHING; - } - + NfaCallback callback, void *context) { + assert(limex && s); + + // There may not be any EOD accepts in this NFA. + if (!limex->acceptEodCount) { + return MO_CONTINUE_MATCHING; + } + const STATE_T acceptEodMask = LOAD_FROM_ENG(&limex->acceptAtEOD); STATE_T foundAccepts = AND_STATE(*s, acceptEodMask); - + SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset + 1 /* EOD 'symbol' */, &foundAccepts); - - if (unlikely(ISNONZERO_STATE(foundAccepts))) { - const struct NFAAccept *acceptEodTable = getAcceptEodTable(limex); + + if (unlikely(ISNONZERO_STATE(foundAccepts))) { + const struct NFAAccept *acceptEodTable = getAcceptEodTable(limex); if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &foundAccepts, &acceptEodMask, acceptEodTable, offset, callback, - context)) { - return MO_HALT_MATCHING; - } - } - - return MO_CONTINUE_MATCHING; -} - -// Run accepts corresponding to current state. -static really_inline -char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) { - assert(limex && q); - assert(q->state); - assert(q_cur_type(q) == MQE_START); - + context)) { + return MO_HALT_MATCHING; + } + } + + return MO_CONTINUE_MATCHING; +} + +// Run accepts corresponding to current state. +static really_inline +char REPORTCURRENT_FN(const IMPL_NFA_T *limex, const struct mq *q) { + assert(limex && q); + assert(q->state); + assert(q_cur_type(q) == MQE_START); + STATE_T s = *(STATE_T *)q->state; STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); - STATE_T foundAccepts = AND_STATE(s, acceptMask); - - if (unlikely(ISNONZERO_STATE(foundAccepts))) { - DEBUG_PRINTF("found accepts\n"); - DEBUG_PRINTF("for nfa %p\n", limex); - const struct NFAAccept *acceptTable = getAcceptTable(limex); - u64a offset = q_cur_offset(q); - + STATE_T foundAccepts = AND_STATE(s, acceptMask); + + if (unlikely(ISNONZERO_STATE(foundAccepts))) { + DEBUG_PRINTF("found accepts\n"); + DEBUG_PRINTF("for nfa %p\n", limex); + const struct NFAAccept *acceptTable = getAcceptTable(limex); + u64a offset = q_cur_offset(q); + if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &foundAccepts, &acceptMask, acceptTable, offset, q->cb, - q->context)) { - return MO_HALT_MATCHING; - } - } - - return MO_CONTINUE_MATCHING; -} - -static really_inline -STATE_T INITIAL_FN(const IMPL_NFA_T *impl, char onlyDs) { + q->context)) { + return MO_HALT_MATCHING; + } + } + + return MO_CONTINUE_MATCHING; +} + +static really_inline +STATE_T INITIAL_FN(const IMPL_NFA_T *impl, char onlyDs) { return LOAD_FROM_ENG(onlyDs ? &impl->initDS : &impl->init); -} - -static really_inline -STATE_T TOP_FN(const IMPL_NFA_T *impl, char onlyDs, STATE_T state) { - return OR_STATE(INITIAL_FN(impl, onlyDs), state); -} - -static really_inline -STATE_T TOPN_FN(const IMPL_NFA_T *limex, STATE_T state, u32 n) { - assert(n < limex->topCount); +} + +static really_inline +STATE_T TOP_FN(const IMPL_NFA_T *impl, char onlyDs, STATE_T state) { + return OR_STATE(INITIAL_FN(impl, onlyDs), state); +} + +static really_inline +STATE_T TOPN_FN(const IMPL_NFA_T *limex, STATE_T state, u32 n) { + assert(n < limex->topCount); const ENG_STATE_T *topsptr = (const ENG_STATE_T *)((const char *)limex + limex->topOffset); STATE_T top = LOAD_FROM_ENG(&topsptr[n]); - return OR_STATE(top, state); -} - -static really_inline -void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx, - u64a offset) { - assert(limex); - assert(ctx); - - if (!limex->repeatCount) { - return; - } - - DEBUG_PRINTF("expire estate at offset %llu\n", offset); - + return OR_STATE(top, state); +} + +static really_inline +void EXPIRE_ESTATE_FN(const IMPL_NFA_T *limex, struct CONTEXT_T *ctx, + u64a offset) { + assert(limex); + assert(ctx); + + if (!limex->repeatCount) { + return; + } + + DEBUG_PRINTF("expire estate at offset %llu\n", offset); + const STATE_T cyclics = AND_STATE(ctx->s, LOAD_FROM_ENG(&limex->repeatCyclicMask)); - if (ISZERO_STATE(cyclics)) { - DEBUG_PRINTF("no cyclic states are on\n"); - return; - } - - for (u32 i = 0; i < limex->repeatCount; i++) { - const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); - - u32 cyclicState = info->cyclicState; + if (ISZERO_STATE(cyclics)) { + DEBUG_PRINTF("no cyclic states are on\n"); + return; + } + + for (u32 i = 0; i < limex->repeatCount; i++) { + const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); + + u32 cyclicState = info->cyclicState; if (!TESTBIT_STATE(cyclics, cyclicState)) { - continue; - } - - DEBUG_PRINTF("repeat %u (cyclic state %u) is active\n", i, - cyclicState); - - const struct RepeatInfo *repeat = getRepeatInfo(info); - if (repeat->repeatMax == REPEAT_INF) { - continue; // can't expire - } - - const union RepeatControl *repeat_ctrl = ctx->repeat_ctrl + i; - const char *repeat_state = ctx->repeat_state + info->stateOffset; - u64a last_top = repeatLastTop(repeat, repeat_ctrl, repeat_state); - assert(repeat->repeatMax < REPEAT_INF); - DEBUG_PRINTF("offset %llu, last_top %llu repeatMax %u\n", offset, - last_top, repeat->repeatMax); - u64a adj = 0; - /* if the cycle's tugs are active at repeat max, it is still alive */ + continue; + } + + DEBUG_PRINTF("repeat %u (cyclic state %u) is active\n", i, + cyclicState); + + const struct RepeatInfo *repeat = getRepeatInfo(info); + if (repeat->repeatMax == REPEAT_INF) { + continue; // can't expire + } + + const union RepeatControl *repeat_ctrl = ctx->repeat_ctrl + i; + const char *repeat_state = ctx->repeat_state + info->stateOffset; + u64a last_top = repeatLastTop(repeat, repeat_ctrl, repeat_state); + assert(repeat->repeatMax < REPEAT_INF); + DEBUG_PRINTF("offset %llu, last_top %llu repeatMax %u\n", offset, + last_top, repeat->repeatMax); + u64a adj = 0; + /* if the cycle's tugs are active at repeat max, it is still alive */ if (TESTBIT_STATE(LOAD_FROM_ENG(&limex->accept), cyclicState) || TESTBIT_STATE(LOAD_FROM_ENG(&limex->acceptAtEOD), cyclicState)) { - DEBUG_PRINTF("lazy tug possible - may still be inspected\n"); - adj = 1; - } else { + DEBUG_PRINTF("lazy tug possible - may still be inspected\n"); + adj = 1; + } else { const ENG_STATE_T *tug_mask = (const ENG_STATE_T *)((const char *)info + info->tugMaskOffset); if (ISNONZERO_STATE(AND_STATE(ctx->s, LOAD_FROM_ENG(tug_mask)))) { - DEBUG_PRINTF("tug possible - may still be inspected\n"); - adj = 1; - } - } - - if (offset >= last_top + repeat->repeatMax + adj) { - DEBUG_PRINTF("repeat state is stale, squashing state %u\n", - cyclicState); - CLEARBIT_STATE(&ctx->s, cyclicState); - } - } -} - -// Specialised inAccept call: LimEx NFAs with the "lazy tug" optimisation (see -// UE-1636) need to guard cyclic tug-accepts as well. -static really_inline -char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, - union RepeatControl *repeat_ctrl, char *repeat_state, - u64a offset, ReportID report) { - assert(limex); - + DEBUG_PRINTF("tug possible - may still be inspected\n"); + adj = 1; + } + } + + if (offset >= last_top + repeat->repeatMax + adj) { + DEBUG_PRINTF("repeat state is stale, squashing state %u\n", + cyclicState); + CLEARBIT_STATE(&ctx->s, cyclicState); + } + } +} + +// Specialised inAccept call: LimEx NFAs with the "lazy tug" optimisation (see +// UE-1636) need to guard cyclic tug-accepts as well. +static really_inline +char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, + union RepeatControl *repeat_ctrl, char *repeat_state, + u64a offset, ReportID report) { + assert(limex); + const STATE_T accept_mask = LOAD_FROM_ENG(&limex->accept); STATE_T accepts = AND_STATE(state, accept_mask); - - // Are we in an accept state? + + // Are we in an accept state? if (ISZERO_STATE(accepts)) { - DEBUG_PRINTF("no accept states are on\n"); - return 0; - } - + DEBUG_PRINTF("no accept states are on\n"); + return 0; + } + SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accepts); - - DEBUG_PRINTF("looking for report %u\n", report); - + + DEBUG_PRINTF("looking for report %u\n", report); + const struct NFAAccept *acceptTable = getAcceptTable(limex); - + CHUNK_T chunks[NUM_STATE_CHUNKS]; memcpy(chunks, &accepts, sizeof(accepts)); @@ -373,13 +373,13 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, DEBUG_PRINTF("report %u is on\n", report); return 1; } - } + } base_index += POPCOUNT_FN(mask_chunks[i]); - } - - return 0; -} - + } + + return 0; +} + static really_inline char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, union RepeatControl *repeat_ctrl, char *repeat_state, @@ -400,30 +400,30 @@ char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state, return ISNONZERO_STATE(accstate); } -#undef TESTEOD_FN -#undef REPORTCURRENT_FN -#undef EXPIRE_ESTATE_FN -#undef LIMEX_INACCEPT_FN +#undef TESTEOD_FN +#undef REPORTCURRENT_FN +#undef EXPIRE_ESTATE_FN +#undef LIMEX_INACCEPT_FN #undef LIMEX_INANYACCEPT_FN -#undef INITIAL_FN -#undef TOP_FN -#undef TOPN_FN -#undef CONTEXT_T -#undef IMPL_NFA_T -#undef ONES_STATE -#undef AND_STATE -#undef OR_STATE -#undef ANDNOT_STATE -#undef CLEARBIT_STATE -#undef TESTBIT_STATE -#undef ISNONZERO_STATE -#undef ISZERO_STATE +#undef INITIAL_FN +#undef TOP_FN +#undef TOPN_FN +#undef CONTEXT_T +#undef IMPL_NFA_T +#undef ONES_STATE +#undef AND_STATE +#undef OR_STATE +#undef ANDNOT_STATE +#undef CLEARBIT_STATE +#undef TESTBIT_STATE +#undef ISNONZERO_STATE +#undef ISZERO_STATE #undef PROCESS_ACCEPTS_IMPL_FN -#undef PROCESS_ACCEPTS_FN -#undef PROCESS_ACCEPTS_NOSQUASH_FN -#undef SQUASH_UNTUG_BR_FN -#undef GET_NFA_REPEAT_INFO_FN - +#undef PROCESS_ACCEPTS_FN +#undef PROCESS_ACCEPTS_NOSQUASH_FN +#undef SQUASH_UNTUG_BR_FN +#undef GET_NFA_REPEAT_INFO_FN + #undef CHUNK_T #undef FIND_AND_CLEAR_FN #undef POPCOUNT_FN diff --git a/contrib/libs/hyperscan/src/nfa/limex_compile.cpp b/contrib/libs/hyperscan/src/nfa/limex_compile.cpp index fcf90538b0..9233ae515e 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_compile.cpp +++ b/contrib/libs/hyperscan/src/nfa/limex_compile.cpp @@ -1,84 +1,84 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file - * \brief Main NFA build code. - */ - -#include "limex_compile.h" - -#include "accel.h" -#include "accelcompile.h" -#include "grey.h" -#include "limex_internal.h" -#include "limex_limits.h" -#include "nfa_build_util.h" + * \brief Main NFA build code. + */ + +#include "limex_compile.h" + +#include "accel.h" +#include "accelcompile.h" +#include "grey.h" +#include "limex_internal.h" +#include "limex_limits.h" +#include "nfa_build_util.h" #include "nfagraph/ng_dominators.h" -#include "nfagraph/ng_holder.h" -#include "nfagraph/ng_limex_accel.h" -#include "nfagraph/ng_repeat.h" -#include "nfagraph/ng_squash.h" -#include "nfagraph/ng_util.h" -#include "ue2common.h" -#include "repeatcompile.h" -#include "util/alloc.h" -#include "util/bitutils.h" +#include "nfagraph/ng_holder.h" +#include "nfagraph/ng_limex_accel.h" +#include "nfagraph/ng_repeat.h" +#include "nfagraph/ng_squash.h" +#include "nfagraph/ng_util.h" +#include "ue2common.h" +#include "repeatcompile.h" +#include "util/alloc.h" +#include "util/bitutils.h" #include "util/bytecode_ptr.h" -#include "util/charreach.h" -#include "util/compile_context.h" -#include "util/container.h" +#include "util/charreach.h" +#include "util/compile_context.h" +#include "util/container.h" #include "util/flat_containers.h" -#include "util/graph.h" -#include "util/graph_range.h" +#include "util/graph.h" +#include "util/graph_range.h" #include "util/graph_small_color_map.h" -#include "util/order_check.h" +#include "util/order_check.h" #include "util/unordered.h" -#include "util/verify_types.h" - -#include <algorithm> -#include <cassert> -#include <cstddef> -#include <cstdlib> -#include <cstring> -#include <map> -#include <set> -#include <vector> - -#include <boost/graph/breadth_first_search.hpp> +#include "util/verify_types.h" + +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdlib> +#include <cstring> +#include <map> +#include <set> +#include <vector> + +#include <boost/graph/breadth_first_search.hpp> #include <boost/graph/depth_first_search.hpp> #include <boost/range/adaptor/map.hpp> - -using namespace std; + +using namespace std; using boost::adaptors::map_values; - -namespace ue2 { - + +namespace ue2 { + /** * \brief Special state index value meaning that the vertex will not * participate in an (NFA/DFA/etc) implementation. @@ -97,197 +97,197 @@ static constexpr u32 MAX_REPEAT_CHAR_REACH = 26; /* Minimum bounded repeat trigger distance to consider as a fast NFA */ static constexpr u8 MIN_REPEAT_TRIGGER_DISTANCE = 6; -namespace { - -struct precalcAccel { - precalcAccel() : single_offset(0), double_offset(0) {} - CharReach single_cr; - u32 single_offset; - - CharReach double_cr; - flat_set<pair<u8, u8>> double_lits; /* double-byte accel stop literals */ - u32 double_offset; -}; - +namespace { + +struct precalcAccel { + precalcAccel() : single_offset(0), double_offset(0) {} + CharReach single_cr; + u32 single_offset; + + CharReach double_cr; + flat_set<pair<u8, u8>> double_lits; /* double-byte accel stop literals */ + u32 double_offset; +}; + struct limex_accel_info { unordered_set<NFAVertex> accelerable; - map<NFAStateSet, precalcAccel> precalc; + map<NFAStateSet, precalcAccel> precalc; unordered_map<NFAVertex, flat_set<NFAVertex>> friends; unordered_map<NFAVertex, AccelScheme> accel_map; -}; - -static +}; + +static unordered_map<NFAVertex, NFAStateSet> reindexByStateId(const unordered_map<NFAVertex, NFAStateSet> &in, const NGHolder &g, const unordered_map<NFAVertex, u32> &state_ids, - const u32 num_states) { + const u32 num_states) { unordered_map<NFAVertex, NFAStateSet> out; out.reserve(in.size()); - - vector<u32> indexToState(num_vertices(g), NO_STATE); - for (const auto &m : state_ids) { - u32 vert_id = g[m.first].index; - assert(vert_id < indexToState.size()); - indexToState[vert_id] = m.second; - } - - for (const auto &m : in) { - NFAVertex v = m.first; - assert(m.second.size() <= indexToState.size()); - - NFAStateSet mask(num_states); - for (size_t i = m.second.find_first(); i != m.second.npos; - i = m.second.find_next(i)) { - u32 state_id = indexToState[i]; - if (state_id == NO_STATE) { - continue; - } - mask.set(state_id); - } - out.emplace(v, mask); - } - - return out; -} - -struct build_info { - build_info(NGHolder &hi, + + vector<u32> indexToState(num_vertices(g), NO_STATE); + for (const auto &m : state_ids) { + u32 vert_id = g[m.first].index; + assert(vert_id < indexToState.size()); + indexToState[vert_id] = m.second; + } + + for (const auto &m : in) { + NFAVertex v = m.first; + assert(m.second.size() <= indexToState.size()); + + NFAStateSet mask(num_states); + for (size_t i = m.second.find_first(); i != m.second.npos; + i = m.second.find_next(i)) { + u32 state_id = indexToState[i]; + if (state_id == NO_STATE) { + continue; + } + mask.set(state_id); + } + out.emplace(v, mask); + } + + return out; +} + +struct build_info { + build_info(NGHolder &hi, const unordered_map<NFAVertex, u32> &states_in, - const vector<BoundedRepeatData> &ri, + const vector<BoundedRepeatData> &ri, const unordered_map<NFAVertex, NFAStateSet> &rsmi, const unordered_map<NFAVertex, NFAStateSet> &smi, const map<u32, set<NFAVertex>> &ti, const set<NFAVertex> &zi, bool dai, bool sci, const CompileContext &cci, u32 nsi) : h(hi), state_ids(states_in), repeats(ri), tops(ti), tugs(nsi), zombies(zi), do_accel(dai), stateCompression(sci), cc(cci), - num_states(nsi) { - for (const auto &br : repeats) { + num_states(nsi) { + for (const auto &br : repeats) { for (auto v : br.tug_triggers) { assert(state_ids.at(v) != NO_STATE); tugs.set(state_ids.at(v)); } - br_cyclic[br.cyclic] = - BoundedRepeatSummary(br.repeatMin, br.repeatMax); - } - - // Convert squash maps to be indexed by state index rather than - // vertex_index. - squashMap = reindexByStateId(smi, h, state_ids, num_states); - reportSquashMap = reindexByStateId(rsmi, h, state_ids, num_states); - } - - NGHolder &h; + br_cyclic[br.cyclic] = + BoundedRepeatSummary(br.repeatMin, br.repeatMax); + } + + // Convert squash maps to be indexed by state index rather than + // vertex_index. + squashMap = reindexByStateId(smi, h, state_ids, num_states); + reportSquashMap = reindexByStateId(rsmi, h, state_ids, num_states); + } + + NGHolder &h; const unordered_map<NFAVertex, u32> &state_ids; - const vector<BoundedRepeatData> &repeats; - - // Squash maps; state sets are indexed by state_id. + const vector<BoundedRepeatData> &repeats; + + // Squash maps; state sets are indexed by state_id. unordered_map<NFAVertex, NFAStateSet> reportSquashMap; unordered_map<NFAVertex, NFAStateSet> squashMap; - + const map<u32, set<NFAVertex>> &tops; NFAStateSet tugs; - map<NFAVertex, BoundedRepeatSummary> br_cyclic; - const set<NFAVertex> &zombies; - bool do_accel; - bool stateCompression; - const CompileContext &cc; - u32 num_states; + map<NFAVertex, BoundedRepeatSummary> br_cyclic; + const set<NFAVertex> &zombies; + bool do_accel; + bool stateCompression; + const CompileContext &cc; + u32 num_states; limex_accel_info accel; -}; - +}; + #define LAST_LIMEX_NFA LIMEX_NFA_512 -// Constants for scoring mechanism +// Constants for scoring mechanism const int SHIFT_COST = 10; // limex: cost per shift mask -const int EXCEPTION_COST = 4; // limex: per exception - -template<NFAEngineType t> struct NFATraits { }; - -template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t, - NFAEngineType lb> -struct DISPATCH_BY_LIMEX_TYPE_INT { - static rv_t doOp(NFAEngineType i, const arg_t &arg) { - if (i == lb) { - return sfunc<lb>::call(arg); - } else { - return DISPATCH_BY_LIMEX_TYPE_INT<sfunc, rv_t, arg_t, - (NFAEngineType)(lb + 1)> - ::doOp(i, arg); - } - } -}; - -template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t> -struct DISPATCH_BY_LIMEX_TYPE_INT<sfunc, rv_t, arg_t, - (NFAEngineType)(LAST_LIMEX_NFA + 1)> { - // dummy - static rv_t doOp(NFAEngineType, const arg_t &) { - assert(0); - throw std::logic_error("Unreachable"); - } -}; - -#define DISPATCH_BY_LIMEX_TYPE(i, op, arg) \ - DISPATCH_BY_LIMEX_TYPE_INT<op, decltype(op<(NFAEngineType)0>::call(arg)), \ - decltype(arg), (NFAEngineType)0>::doOp(i, arg) - -// Given a number of states, find the size of the smallest container NFA it -// will fit in. We support NFAs of the following sizes: 32, 64, 128, 256, 384, -// 512. -size_t findContainerSize(size_t states) { - if (states > 256 && states <= 384) { - return 384; - } - return 1ULL << (lg2(states - 1) + 1); -} - -bool isLimitedTransition(int from, int to, int maxshift) { - int diff = to - from; - - // within our shift? - if (diff < 0 || diff > maxshift) { - return false; - } - - // can't jump over a bollard - return (from & ~63) == (to & ~63); -} - -// Fill a bit mask -template<class Mask> +const int EXCEPTION_COST = 4; // limex: per exception + +template<NFAEngineType t> struct NFATraits { }; + +template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t, + NFAEngineType lb> +struct DISPATCH_BY_LIMEX_TYPE_INT { + static rv_t doOp(NFAEngineType i, const arg_t &arg) { + if (i == lb) { + return sfunc<lb>::call(arg); + } else { + return DISPATCH_BY_LIMEX_TYPE_INT<sfunc, rv_t, arg_t, + (NFAEngineType)(lb + 1)> + ::doOp(i, arg); + } + } +}; + +template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t> +struct DISPATCH_BY_LIMEX_TYPE_INT<sfunc, rv_t, arg_t, + (NFAEngineType)(LAST_LIMEX_NFA + 1)> { + // dummy + static rv_t doOp(NFAEngineType, const arg_t &) { + assert(0); + throw std::logic_error("Unreachable"); + } +}; + +#define DISPATCH_BY_LIMEX_TYPE(i, op, arg) \ + DISPATCH_BY_LIMEX_TYPE_INT<op, decltype(op<(NFAEngineType)0>::call(arg)), \ + decltype(arg), (NFAEngineType)0>::doOp(i, arg) + +// Given a number of states, find the size of the smallest container NFA it +// will fit in. We support NFAs of the following sizes: 32, 64, 128, 256, 384, +// 512. +size_t findContainerSize(size_t states) { + if (states > 256 && states <= 384) { + return 384; + } + return 1ULL << (lg2(states - 1) + 1); +} + +bool isLimitedTransition(int from, int to, int maxshift) { + int diff = to - from; + + // within our shift? + if (diff < 0 || diff > maxshift) { + return false; + } + + // can't jump over a bollard + return (from & ~63) == (to & ~63); +} + +// Fill a bit mask +template<class Mask> void maskFill(Mask &m, u8 c) { - memset(&m, c, sizeof(m)); -} - -// Clear a bit mask. -template<class Mask> -void maskClear(Mask &m) { - memset(&m, 0, sizeof(m)); -} - -template<class Mask> -u8 *maskGetByte(Mask &m, u32 bit) { - assert(bit < sizeof(m)*8); - u8 *m8 = (u8 *)&m; - - return m8 + bit/8; -} - -// Set a bit in a mask, starting from the little end. -template<class Mask> -void maskSetBit(Mask &m, const unsigned int bit) { - u8 *byte = maskGetByte(m, bit); - *byte |= 1U << (bit % 8); -} - -template<class Mask> -void maskSetBits(Mask &m, const NFAStateSet &bits) { - for (size_t i = bits.find_first(); i != bits.npos; i = bits.find_next(i)) { - maskSetBit(m, i); - } -} - + memset(&m, c, sizeof(m)); +} + +// Clear a bit mask. +template<class Mask> +void maskClear(Mask &m) { + memset(&m, 0, sizeof(m)); +} + +template<class Mask> +u8 *maskGetByte(Mask &m, u32 bit) { + assert(bit < sizeof(m)*8); + u8 *m8 = (u8 *)&m; + + return m8 + bit/8; +} + +// Set a bit in a mask, starting from the little end. +template<class Mask> +void maskSetBit(Mask &m, const unsigned int bit) { + u8 *byte = maskGetByte(m, bit); + *byte |= 1U << (bit % 8); +} + +template<class Mask> +void maskSetBits(Mask &m, const NFAStateSet &bits) { + for (size_t i = bits.find_first(); i != bits.npos; i = bits.find_next(i)) { + maskSetBit(m, i); + } +} + template<class Mask> bool isMaskZero(Mask &m) { u8 *m8 = (u8 *)&m; @@ -299,251 +299,251 @@ bool isMaskZero(Mask &m) { return true; } -// Sets an entire byte in a mask to the given value -template<class Mask> -void maskSetByte(Mask &m, const unsigned int idx, const char val) { - assert(idx < sizeof(m)); - char *m8 = (char *)&m; - char &byte = m8[idx]; - byte = val; -} - -// Clear a bit in the mask, starting from the little end. -template<class Mask> -void maskClearBit(Mask &m, const u32 bit) { - u8 *byte = maskGetByte(m, bit); - *byte &= ~(1U << (bit % 8)); -} - -/* - * Common code: the following code operates on parts of the NFA that are common - * to both the (defunct) General and the LimEx models. - */ - -static -void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach, - vector<u8> &reachMap) { - const NGHolder &h = args.h; - const auto &state_ids = args.state_ids; - - // Build a list of vertices with a state index assigned. - vector<NFAVertex> verts; - verts.reserve(args.num_states); - for (auto v : vertices_range(h)) { - if (state_ids.at(v) != NO_STATE) { - verts.push_back(v); - } - } - - // Build a mapping from set-of-states -> reachability. - map<NFAStateSet, CharReach> mapping; - NFAStateSet states(args.num_states); - for (size_t i = 0; i < N_CHARS; i++) { - states.reset(); - for (auto v : verts) { - const CharReach &cr = h[v].char_reach; - if (cr.test(i)) { - u32 state_id = state_ids.at(v); - states.set(state_id); - } - } - mapping[states].set(i); - } - - DEBUG_PRINTF("%zu distinct reachability entries\n", mapping.size()); - assert(!mapping.empty()); - - // Build a vector of distinct reachability entries and a mapping from every - // character to one of those entries. - - reach.reserve(mapping.size()); - reachMap.assign(N_CHARS, 0); - - u8 num = 0; - for (auto mi = mapping.begin(), me = mapping.end(); mi != me; ++mi, ++num) { - // Reach entry. - reach.push_back(mi->first); - - // Character mapping. - const CharReach &cr = mi->second; - for (size_t i = cr.find_first(); i != CharReach::npos; - i = cr.find_next(i)) { - reachMap[i] = num; - } - } -} - -struct AccelBuild { +// Sets an entire byte in a mask to the given value +template<class Mask> +void maskSetByte(Mask &m, const unsigned int idx, const char val) { + assert(idx < sizeof(m)); + char *m8 = (char *)&m; + char &byte = m8[idx]; + byte = val; +} + +// Clear a bit in the mask, starting from the little end. +template<class Mask> +void maskClearBit(Mask &m, const u32 bit) { + u8 *byte = maskGetByte(m, bit); + *byte &= ~(1U << (bit % 8)); +} + +/* + * Common code: the following code operates on parts of the NFA that are common + * to both the (defunct) General and the LimEx models. + */ + +static +void buildReachMapping(const build_info &args, vector<NFAStateSet> &reach, + vector<u8> &reachMap) { + const NGHolder &h = args.h; + const auto &state_ids = args.state_ids; + + // Build a list of vertices with a state index assigned. + vector<NFAVertex> verts; + verts.reserve(args.num_states); + for (auto v : vertices_range(h)) { + if (state_ids.at(v) != NO_STATE) { + verts.push_back(v); + } + } + + // Build a mapping from set-of-states -> reachability. + map<NFAStateSet, CharReach> mapping; + NFAStateSet states(args.num_states); + for (size_t i = 0; i < N_CHARS; i++) { + states.reset(); + for (auto v : verts) { + const CharReach &cr = h[v].char_reach; + if (cr.test(i)) { + u32 state_id = state_ids.at(v); + states.set(state_id); + } + } + mapping[states].set(i); + } + + DEBUG_PRINTF("%zu distinct reachability entries\n", mapping.size()); + assert(!mapping.empty()); + + // Build a vector of distinct reachability entries and a mapping from every + // character to one of those entries. + + reach.reserve(mapping.size()); + reachMap.assign(N_CHARS, 0); + + u8 num = 0; + for (auto mi = mapping.begin(), me = mapping.end(); mi != me; ++mi, ++num) { + // Reach entry. + reach.push_back(mi->first); + + // Character mapping. + const CharReach &cr = mi->second; + for (size_t i = cr.find_first(); i != CharReach::npos; + i = cr.find_next(i)) { + reachMap[i] = num; + } + } +} + +struct AccelBuild { AccelBuild() : v(NGHolder::null_vertex()), state(0), offset(0) {} - NFAVertex v; - u32 state; - u32 offset; // offset correction to apply - CharReach stop1; // single-byte accel stop literals - flat_set<pair<u8, u8>> stop2; // double-byte accel stop literals -}; - -static -void findStopLiterals(const build_info &bi, NFAVertex v, AccelBuild &build) { - u32 state = bi.state_ids.at(v); - build.v = v; - build.state = state; - NFAStateSet ss(bi.num_states); - ss.set(state); - - if (!contains(bi.accel.precalc, ss)) { - build.stop1 = CharReach::dot(); - } else { - const precalcAccel &precalc = bi.accel.precalc.at(ss); - if (precalc.double_lits.empty()) { - build.stop1 = precalc.single_cr; - build.offset = precalc.single_offset; - } else { - build.stop1 = precalc.double_cr; - build.stop2 = precalc.double_lits; - build.offset = precalc.double_offset; - } - } - -#ifdef DEBUG - printf("state %u stop1:", state); - for (size_t j = build.stop1.find_first(); j != build.stop1.npos; - j = build.stop1.find_next(j)) { - printf(" 0x%02x", (u32)j); - } - printf("\n"); - printf("state %u stop2:", state); - for (auto it = build.stop2.begin(); it != build.stop2.end(); ++it) { - printf(" 0x%02hhx%02hhx", it->first, it->second); - } - printf("\n"); -#endif -} - -// Generate all the data we need for at most NFA_MAX_ACCEL_STATES accelerable -// states. -static -void gatherAccelStates(const build_info &bi, vector<AccelBuild> &accelStates) { - for (auto v : bi.accel.accelerable) { - DEBUG_PRINTF("state %u is accelerable\n", bi.state_ids.at(v)); - AccelBuild a; - findStopLiterals(bi, v, a); - accelStates.push_back(a); - } - - // AccelStates should be sorted by state number, so that we build our accel - // masks correctly. - sort(accelStates.begin(), accelStates.end(), - [](const AccelBuild &a, const AccelBuild &b) { - return a.state < b.state; - }); - - // Our caller shouldn't have fed us too many accel states. - assert(accelStates.size() <= NFA_MAX_ACCEL_STATES); - if (accelStates.size() > NFA_MAX_ACCEL_STATES) { - accelStates.resize(NFA_MAX_ACCEL_STATES); - } -} - -static -void combineAccel(const AccelBuild &in, AccelBuild &out) { - // stop1 and stop2 union - out.stop1 |= in.stop1; - out.stop2.insert(in.stop2.begin(), in.stop2.end()); - // offset is maximum of the two - out.offset = max(out.offset, in.offset); -} - -static -void minimiseAccel(AccelBuild &build) { - flat_set<pair<u8, u8>> new_stop2; - // Any two-byte accels beginning with a one-byte accel should be removed - for (const auto &si : build.stop2) { - if (!build.stop1.test(si.first)) { - new_stop2.insert(si); - } - } - build.stop2 = new_stop2; -} - -struct AccelAuxCmp { - explicit AccelAuxCmp(const AccelAux &aux_in) : aux(aux_in) {} - bool operator()(const AccelAux &a) const { - return !memcmp(&a, &aux, sizeof(AccelAux)); - } -private: - const AccelAux &aux; -}; - -static -bool allow_wide_accel(NFAVertex v, const NGHolder &g, NFAVertex sds_or_proxy) { - return v == sds_or_proxy || edge(g.start, v, g).second; -} - -static -bool allow_wide_accel(const vector<NFAVertex> &vv, const NGHolder &g, - NFAVertex sds_or_proxy) { - for (auto v : vv) { - if (allow_wide_accel(v, g, sds_or_proxy)) { - return true; - } - } - - return false; -} - -// identify and mark states that we feel are accelerable (for a limex NFA) -/* Note: leftfix nfas allow accepts to be accelerated */ -static -void nfaFindAccelSchemes(const NGHolder &g, - const map<NFAVertex, BoundedRepeatSummary> &br_cyclic, + NFAVertex v; + u32 state; + u32 offset; // offset correction to apply + CharReach stop1; // single-byte accel stop literals + flat_set<pair<u8, u8>> stop2; // double-byte accel stop literals +}; + +static +void findStopLiterals(const build_info &bi, NFAVertex v, AccelBuild &build) { + u32 state = bi.state_ids.at(v); + build.v = v; + build.state = state; + NFAStateSet ss(bi.num_states); + ss.set(state); + + if (!contains(bi.accel.precalc, ss)) { + build.stop1 = CharReach::dot(); + } else { + const precalcAccel &precalc = bi.accel.precalc.at(ss); + if (precalc.double_lits.empty()) { + build.stop1 = precalc.single_cr; + build.offset = precalc.single_offset; + } else { + build.stop1 = precalc.double_cr; + build.stop2 = precalc.double_lits; + build.offset = precalc.double_offset; + } + } + +#ifdef DEBUG + printf("state %u stop1:", state); + for (size_t j = build.stop1.find_first(); j != build.stop1.npos; + j = build.stop1.find_next(j)) { + printf(" 0x%02x", (u32)j); + } + printf("\n"); + printf("state %u stop2:", state); + for (auto it = build.stop2.begin(); it != build.stop2.end(); ++it) { + printf(" 0x%02hhx%02hhx", it->first, it->second); + } + printf("\n"); +#endif +} + +// Generate all the data we need for at most NFA_MAX_ACCEL_STATES accelerable +// states. +static +void gatherAccelStates(const build_info &bi, vector<AccelBuild> &accelStates) { + for (auto v : bi.accel.accelerable) { + DEBUG_PRINTF("state %u is accelerable\n", bi.state_ids.at(v)); + AccelBuild a; + findStopLiterals(bi, v, a); + accelStates.push_back(a); + } + + // AccelStates should be sorted by state number, so that we build our accel + // masks correctly. + sort(accelStates.begin(), accelStates.end(), + [](const AccelBuild &a, const AccelBuild &b) { + return a.state < b.state; + }); + + // Our caller shouldn't have fed us too many accel states. + assert(accelStates.size() <= NFA_MAX_ACCEL_STATES); + if (accelStates.size() > NFA_MAX_ACCEL_STATES) { + accelStates.resize(NFA_MAX_ACCEL_STATES); + } +} + +static +void combineAccel(const AccelBuild &in, AccelBuild &out) { + // stop1 and stop2 union + out.stop1 |= in.stop1; + out.stop2.insert(in.stop2.begin(), in.stop2.end()); + // offset is maximum of the two + out.offset = max(out.offset, in.offset); +} + +static +void minimiseAccel(AccelBuild &build) { + flat_set<pair<u8, u8>> new_stop2; + // Any two-byte accels beginning with a one-byte accel should be removed + for (const auto &si : build.stop2) { + if (!build.stop1.test(si.first)) { + new_stop2.insert(si); + } + } + build.stop2 = new_stop2; +} + +struct AccelAuxCmp { + explicit AccelAuxCmp(const AccelAux &aux_in) : aux(aux_in) {} + bool operator()(const AccelAux &a) const { + return !memcmp(&a, &aux, sizeof(AccelAux)); + } +private: + const AccelAux &aux; +}; + +static +bool allow_wide_accel(NFAVertex v, const NGHolder &g, NFAVertex sds_or_proxy) { + return v == sds_or_proxy || edge(g.start, v, g).second; +} + +static +bool allow_wide_accel(const vector<NFAVertex> &vv, const NGHolder &g, + NFAVertex sds_or_proxy) { + for (auto v : vv) { + if (allow_wide_accel(v, g, sds_or_proxy)) { + return true; + } + } + + return false; +} + +// identify and mark states that we feel are accelerable (for a limex NFA) +/* Note: leftfix nfas allow accepts to be accelerated */ +static +void nfaFindAccelSchemes(const NGHolder &g, + const map<NFAVertex, BoundedRepeatSummary> &br_cyclic, unordered_map<NFAVertex, AccelScheme> *out) { - vector<CharReach> refined_cr = reduced_cr(g, br_cyclic); - - NFAVertex sds_or_proxy = get_sds_or_proxy(g); - - for (auto v : vertices_range(g)) { - // We want to skip any vertices that don't lead to at least one other - // (self-loops don't count) vertex. - if (!has_proper_successor(v, g)) { + vector<CharReach> refined_cr = reduced_cr(g, br_cyclic); + + NFAVertex sds_or_proxy = get_sds_or_proxy(g); + + for (auto v : vertices_range(g)) { + // We want to skip any vertices that don't lead to at least one other + // (self-loops don't count) vertex. + if (!has_proper_successor(v, g)) { DEBUG_PRINTF("skipping vertex %zu\n", g[v].index); - continue; - } - - bool allow_wide = allow_wide_accel(v, g, sds_or_proxy); - - AccelScheme as; - if (nfaCheckAccel(g, v, refined_cr, br_cyclic, &as, allow_wide)) { + continue; + } + + bool allow_wide = allow_wide_accel(v, g, sds_or_proxy); + + AccelScheme as; + if (nfaCheckAccel(g, v, refined_cr, br_cyclic, &as, allow_wide)) { DEBUG_PRINTF("graph vertex %zu is accelerable with offset %u.\n", - g[v].index, as.offset); - (*out)[v] = as; - } - } -} - -struct fas_visitor : public boost::default_bfs_visitor { + g[v].index, as.offset); + (*out)[v] = as; + } + } +} + +struct fas_visitor : public boost::default_bfs_visitor { fas_visitor(const unordered_map<NFAVertex, AccelScheme> &am_in, unordered_map<NFAVertex, AccelScheme> *out_in) - : accel_map(am_in), out(out_in) {} - + : accel_map(am_in), out(out_in) {} + void discover_vertex(NFAVertex v, const NGHolder &) { - if (accel_map.find(v) != accel_map.end()) { - (*out)[v] = accel_map.find(v)->second; - } - if (out->size() >= NFA_MAX_ACCEL_STATES) { - throw this; /* done */ - } - } + if (accel_map.find(v) != accel_map.end()) { + (*out)[v] = accel_map.find(v)->second; + } + if (out->size() >= NFA_MAX_ACCEL_STATES) { + throw this; /* done */ + } + } const unordered_map<NFAVertex, AccelScheme> &accel_map; unordered_map<NFAVertex, AccelScheme> *out; -}; - -static +}; + +static void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops, unordered_map<NFAVertex, AccelScheme> *accel_map) { - /* We want the NFA_MAX_ACCEL_STATES best acceleration states, everything - * else should be ditched. We use a simple BFS to choose accel states near - * the start. */ - + /* We want the NFA_MAX_ACCEL_STATES best acceleration states, everything + * else should be ditched. We use a simple BFS to choose accel states near + * the start. */ + vector<NFAEdge> tempEdges; for (const auto &vv : tops | map_values) { for (NFAVertex v : vv) { @@ -552,51 +552,51 @@ void filterAccelStates(NGHolder &g, const map<u32, set<NFAVertex>> &tops, } } } - - // Similarly, connect (start, startDs) if necessary. - if (!edge(g.start, g.startDs, g).second) { + + // Similarly, connect (start, startDs) if necessary. + if (!edge(g.start, g.startDs, g).second) { NFAEdge e = add_edge(g.start, g.startDs, g); tempEdges.push_back(e); // Remove edge later. - } - + } + unordered_map<NFAVertex, AccelScheme> out; - - try { + + try { boost::breadth_first_search(g, g.start, visitor(fas_visitor(*accel_map, &out)) .color_map(make_small_color_map(g))); - } catch (fas_visitor *) { - ; /* found max accel_states */ - } - + } catch (fas_visitor *) { + ; /* found max accel_states */ + } + remove_edges(tempEdges, g); - - assert(out.size() <= NFA_MAX_ACCEL_STATES); - accel_map->swap(out); -} - -static + + assert(out.size() <= NFA_MAX_ACCEL_STATES); + accel_map->swap(out); +} + +static bool containsBadSubset(const limex_accel_info &accel, - const NFAStateSet &state_set, const u32 effective_sds) { - NFAStateSet subset(state_set.size()); - for (size_t j = state_set.find_first(); j != state_set.npos; - j = state_set.find_next(j)) { - subset = state_set; - subset.reset(j); - - if (effective_sds != NO_STATE && subset.count() == 1 && - subset.test(effective_sds)) { - continue; - } - - if (subset.any() && !contains(accel.precalc, subset)) { - return true; - } - } - return false; -} - -static + const NFAStateSet &state_set, const u32 effective_sds) { + NFAStateSet subset(state_set.size()); + for (size_t j = state_set.find_first(); j != state_set.npos; + j = state_set.find_next(j)) { + subset = state_set; + subset.reset(j); + + if (effective_sds != NO_STATE && subset.count() == 1 && + subset.test(effective_sds)) { + continue; + } + + if (subset.any() && !contains(accel.precalc, subset)) { + return true; + } + } + return false; +} + +static bool is_too_wide(const AccelScheme &as) { return as.cr.count() > MAX_MERGED_ACCEL_STOPS; } @@ -619,86 +619,86 @@ void fillAccelInfo(build_info &bi) { assert(accel_map.size() <= NFA_MAX_ACCEL_STATES); - vector<CharReach> refined_cr = reduced_cr(g, br_cyclic); - - vector<NFAVertex> astates; - for (const auto &m : accel_map) { - astates.push_back(m.first); - } - - NFAStateSet useful(num_states); - NFAStateSet state_set(num_states); - vector<NFAVertex> states; - - NFAVertex sds_or_proxy = get_sds_or_proxy(g); - const u32 effective_sds = state_ids.at(sds_or_proxy); - - /* for each subset of the accel keys need to find an accel scheme */ - assert(astates.size() < 32); + vector<CharReach> refined_cr = reduced_cr(g, br_cyclic); + + vector<NFAVertex> astates; + for (const auto &m : accel_map) { + astates.push_back(m.first); + } + + NFAStateSet useful(num_states); + NFAStateSet state_set(num_states); + vector<NFAVertex> states; + + NFAVertex sds_or_proxy = get_sds_or_proxy(g); + const u32 effective_sds = state_ids.at(sds_or_proxy); + + /* for each subset of the accel keys need to find an accel scheme */ + assert(astates.size() < 32); sort(astates.begin(), astates.end()); - - for (u32 i = 1, i_end = 1U << astates.size(); i < i_end; i++) { - DEBUG_PRINTF("saving info for accel %u\n", i); - states.clear(); - state_set.reset(); - for (u32 j = 0, j_end = astates.size(); j < j_end; j++) { - if (i & (1U << j)) { - NFAVertex v = astates[j]; - states.push_back(v); - state_set.set(state_ids.at(v)); - } - } - + + for (u32 i = 1, i_end = 1U << astates.size(); i < i_end; i++) { + DEBUG_PRINTF("saving info for accel %u\n", i); + states.clear(); + state_set.reset(); + for (u32 j = 0, j_end = astates.size(); j < j_end; j++) { + if (i & (1U << j)) { + NFAVertex v = astates[j]; + states.push_back(v); + state_set.set(state_ids.at(v)); + } + } + if (containsBadSubset(accel, state_set, effective_sds)) { - DEBUG_PRINTF("accel %u has bad subset\n", i); - continue; /* if a subset failed to build we would too */ - } - - const bool allow_wide = allow_wide_accel(states, g, sds_or_proxy); - - AccelScheme as = nfaFindAccel(g, states, refined_cr, br_cyclic, + DEBUG_PRINTF("accel %u has bad subset\n", i); + continue; /* if a subset failed to build we would too */ + } + + const bool allow_wide = allow_wide_accel(states, g, sds_or_proxy); + + AccelScheme as = nfaFindAccel(g, states, refined_cr, br_cyclic, allow_wide, true); if (is_too_wide(as)) { - DEBUG_PRINTF("accel %u too wide (%zu, %d)\n", i, - as.cr.count(), MAX_MERGED_ACCEL_STOPS); - continue; - } - + DEBUG_PRINTF("accel %u too wide (%zu, %d)\n", i, + as.cr.count(), MAX_MERGED_ACCEL_STOPS); + continue; + } + DEBUG_PRINTF("accel %u ok with offset s%u, d%u\n", i, as.offset, as.double_offset); - + precalcAccel &pa = accel.precalc[state_set]; - pa.single_offset = as.offset; - pa.single_cr = as.cr; - + pa.single_offset = as.offset; + pa.single_cr = as.cr; + if (as.double_byte.size() != 0) { pa.double_offset = as.double_offset; pa.double_lits = as.double_byte; pa.double_cr = as.double_cr; - } + } useful |= state_set; - } - - for (const auto &m : accel_map) { - NFAVertex v = m.first; - const u32 state_id = state_ids.at(v); - - /* if we we unable to make a scheme out of the state in any context, - * there is not point marking it as accelerable */ - if (!useful.test(state_id)) { - continue; - } - - u32 offset = 0; - state_set.reset(); - state_set.set(state_id); - + } + + for (const auto &m : accel_map) { + NFAVertex v = m.first; + const u32 state_id = state_ids.at(v); + + /* if we we unable to make a scheme out of the state in any context, + * there is not point marking it as accelerable */ + if (!useful.test(state_id)) { + continue; + } + + u32 offset = 0; + state_set.reset(); + state_set.set(state_id); + accel.accelerable.insert(v); findAccelFriends(g, v, br_cyclic, offset, &accel.friends[v]); - } -} - + } +} + /** The AccelAux structure has large alignment specified, and this makes some * compilers do odd things unless we specify a custom allocator. */ typedef vector<AccelAux, AlignedAllocator<AccelAux, alignof(AccelAux)>> @@ -706,7 +706,7 @@ typedef vector<AccelAux, AlignedAllocator<AccelAux, alignof(AccelAux)>> #define IMPOSSIBLE_ACCEL_MASK (~0U) -static +static u32 getEffectiveAccelStates(const build_info &args, const unordered_map<NFAVertex, NFAVertex> &dom_map, u32 active_accel_mask, @@ -752,8 +752,8 @@ u32 getEffectiveAccelStates(const build_info &args, for (u32 accel_id = 0; accel_id < accelStates.size(); accel_id++) { NFAVertex v = accelStates[accel_id].v; accel_id_map[v] = accel_id; - } - + } + /* Note: we want a slightly less strict defn of dominate as skip edges * prevent .* 'truly' dominating */ for (u32 local_accel_mask = active_accel_mask; local_accel_mask; ) { @@ -850,45 +850,45 @@ u32 getEffectiveAccelStates(const build_info &args, } return active_accel_mask & ~ignored; -} - -static -void buildAccel(const build_info &args, NFAStateSet &accelMask, - NFAStateSet &accelFriendsMask, AccelAuxVector &auxvec, - vector<u8> &accelTable) { +} + +static +void buildAccel(const build_info &args, NFAStateSet &accelMask, + NFAStateSet &accelFriendsMask, AccelAuxVector &auxvec, + vector<u8> &accelTable) { const limex_accel_info &accel = args.accel; - - // Init, all zeroes. - accelMask.resize(args.num_states); - accelFriendsMask.resize(args.num_states); - - if (!args.do_accel) { - return; - } - - vector<AccelBuild> accelStates; - gatherAccelStates(args, accelStates); - - if (accelStates.empty()) { - DEBUG_PRINTF("no accelerable states\n"); - return; - } - + + // Init, all zeroes. + accelMask.resize(args.num_states); + accelFriendsMask.resize(args.num_states); + + if (!args.do_accel) { + return; + } + + vector<AccelBuild> accelStates; + gatherAccelStates(args, accelStates); + + if (accelStates.empty()) { + DEBUG_PRINTF("no accelerable states\n"); + return; + } + const auto dom_map = findDominators(args.h); - // We have 2^n different accel entries, one for each possible - // combination of accelerable states. - assert(accelStates.size() < 32); - const u32 accelCount = 1U << accelStates.size(); - assert(accelCount <= 256); - - // Set up a unioned AccelBuild for every possible combination of the set - // bits in accelStates. - vector<AccelBuild> accelOuts(accelCount); + // We have 2^n different accel entries, one for each possible + // combination of accelerable states. + assert(accelStates.size() < 32); + const u32 accelCount = 1U << accelStates.size(); + assert(accelCount <= 256); + + // Set up a unioned AccelBuild for every possible combination of the set + // bits in accelStates. + vector<AccelBuild> accelOuts(accelCount); vector<u32> effective_accel_set; effective_accel_set.push_back(0); /* empty is effectively empty */ - for (u32 i = 1; i < accelCount; i++) { + for (u32 i = 1; i < accelCount; i++) { u32 effective_i = getEffectiveAccelStates(args, dom_map, i, accelStates); effective_accel_set.push_back(effective_i); @@ -897,38 +897,38 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, DEBUG_PRINTF("this combination of accel states is not possible\n"); accelOuts[i].stop1 = CharReach::dot(); continue; - } + } while (effective_i) { u32 base_accel_state = findAndClearLSB_32(&effective_i); combineAccel(accelStates[base_accel_state], accelOuts[i]); } - minimiseAccel(accelOuts[i]); - } - - accelTable.resize(accelCount); - - // We dedupe our AccelAux structures here, so that we only write one copy - // of each unique accel scheme into the bytecode, using the accelTable as - // an index. - - // Start with the NONE case. - auxvec.push_back(AccelAux()); - memset(&auxvec[0], 0, sizeof(AccelAux)); - auxvec[0].accel_type = ACCEL_NONE; // no states on. - - AccelAux aux; - for (u32 i = 1; i < accelCount; i++) { - memset(&aux, 0, sizeof(aux)); - + minimiseAccel(accelOuts[i]); + } + + accelTable.resize(accelCount); + + // We dedupe our AccelAux structures here, so that we only write one copy + // of each unique accel scheme into the bytecode, using the accelTable as + // an index. + + // Start with the NONE case. + auxvec.push_back(AccelAux()); + memset(&auxvec[0], 0, sizeof(AccelAux)); + auxvec[0].accel_type = ACCEL_NONE; // no states on. + + AccelAux aux; + for (u32 i = 1; i < accelCount; i++) { + memset(&aux, 0, sizeof(aux)); + NFAStateSet effective_states(args.num_states); u32 effective_i = effective_accel_set[i]; - - AccelInfo ainfo; - ainfo.double_offset = accelOuts[i].offset; - ainfo.double_stop1 = accelOuts[i].stop1; - ainfo.double_stop2 = accelOuts[i].stop2; - + + AccelInfo ainfo; + ainfo.double_offset = accelOuts[i].offset; + ainfo.double_stop1 = accelOuts[i].stop1; + ainfo.double_stop2 = accelOuts[i].stop2; + if (effective_i != IMPOSSIBLE_ACCEL_MASK) { while (effective_i) { u32 base_accel_id = findAndClearLSB_32(&effective_i); @@ -940,47 +940,47 @@ void buildAccel(const build_info &args, NFAStateSet &accelMask, ainfo.single_offset = precalc.single_offset; ainfo.single_stops = precalc.single_cr; } - } - - buildAccelAux(ainfo, &aux); - - // FIXME: We may want a faster way to find AccelAux structures that - // we've already built before. - auto it = find_if(auxvec.begin(), auxvec.end(), AccelAuxCmp(aux)); - if (it == auxvec.end()) { - accelTable[i] = verify_u8(auxvec.size()); - auxvec.push_back(aux); - } else { - accelTable[i] = verify_u8(it - auxvec.begin()); - } - } - - DEBUG_PRINTF("%zu unique accel schemes (of max %u)\n", auxvec.size(), - accelCount); - - // XXX: ACCEL_NONE? - for (const auto &as : accelStates) { - NFAVertex v = as.v; - assert(v && args.state_ids.at(v) == as.state); - - accelMask.set(as.state); - accelFriendsMask.set(as.state); - - if (!contains(accel.friends, v)) { - continue; - } - // Add the friends of this state to the friends mask. - const flat_set<NFAVertex> &friends = accel.friends.at(v); - DEBUG_PRINTF("%u has %zu friends\n", as.state, friends.size()); - for (auto friend_v : friends) { - u32 state_id = args.state_ids.at(friend_v); - DEBUG_PRINTF("--> %u\n", state_id); - accelFriendsMask.set(state_id); - } - } -} - -static + } + + buildAccelAux(ainfo, &aux); + + // FIXME: We may want a faster way to find AccelAux structures that + // we've already built before. + auto it = find_if(auxvec.begin(), auxvec.end(), AccelAuxCmp(aux)); + if (it == auxvec.end()) { + accelTable[i] = verify_u8(auxvec.size()); + auxvec.push_back(aux); + } else { + accelTable[i] = verify_u8(it - auxvec.begin()); + } + } + + DEBUG_PRINTF("%zu unique accel schemes (of max %u)\n", auxvec.size(), + accelCount); + + // XXX: ACCEL_NONE? + for (const auto &as : accelStates) { + NFAVertex v = as.v; + assert(v && args.state_ids.at(v) == as.state); + + accelMask.set(as.state); + accelFriendsMask.set(as.state); + + if (!contains(accel.friends, v)) { + continue; + } + // Add the friends of this state to the friends mask. + const flat_set<NFAVertex> &friends = accel.friends.at(v); + DEBUG_PRINTF("%u has %zu friends\n", as.state, friends.size()); + for (auto friend_v : friends) { + u32 state_id = args.state_ids.at(friend_v); + DEBUG_PRINTF("--> %u\n", state_id); + accelFriendsMask.set(state_id); + } + } +} + +static u32 addSquashMask(const build_info &args, const NFAVertex &v, vector<NFAStateSet> &squash) { auto sit = args.reportSquashMap.find(v); @@ -1049,7 +1049,7 @@ void buildAcceptsList(const build_info &args, ReportListCache &reports_cache, sort(begin(verts), end(verts), cmp_state_id); - const NGHolder &h = args.h; + const NGHolder &h = args.h; for (const auto &v : verts) { DEBUG_PRINTF("state=%u, reports: [%s]\n", args.state_ids.at(v), as_string_list(h[v].reports).c_str()); @@ -1067,7 +1067,7 @@ void buildAcceptsList(const build_info &args, ReportListCache &reports_cache, accepts.push_back(move(a)); } } - + static void buildAccepts(const build_info &args, ReportListCache &reports_cache, NFAStateSet &acceptMask, NFAStateSet &acceptEodMask, @@ -1075,463 +1075,463 @@ void buildAccepts(const build_info &args, ReportListCache &reports_cache, vector<ReportID> &reports, vector<NFAStateSet> &squash) { const NGHolder &h = args.h; - acceptMask.resize(args.num_states); - acceptEodMask.resize(args.num_states); - + acceptMask.resize(args.num_states); + acceptEodMask.resize(args.num_states); + vector<NFAVertex> verts_accept, verts_accept_eod; - for (auto v : vertices_range(h)) { - u32 state_id = args.state_ids.at(v); - - if (state_id == NO_STATE || !is_match_vertex(v, h)) { - continue; - } - - if (edge(v, h.accept, h).second) { - acceptMask.set(state_id); + for (auto v : vertices_range(h)) { + u32 state_id = args.state_ids.at(v); + + if (state_id == NO_STATE || !is_match_vertex(v, h)) { + continue; + } + + if (edge(v, h.accept, h).second) { + acceptMask.set(state_id); verts_accept.push_back(v); - } else { - assert(edge(v, h.acceptEod, h).second); - acceptEodMask.set(state_id); + } else { + assert(edge(v, h.acceptEod, h).second); + acceptEodMask.set(state_id); verts_accept_eod.push_back(v); - } + } } - + buildAcceptsList(args, reports_cache, verts_accept, accepts, reports, squash); buildAcceptsList(args, reports_cache, verts_accept_eod, acceptsEod, reports, squash); -} - -static -void buildTopMasks(const build_info &args, vector<NFAStateSet> &topMasks) { - if (args.tops.empty()) { - return; // No tops, probably an outfix NFA. - } - - u32 numMasks = args.tops.rbegin()->first + 1; // max mask index - DEBUG_PRINTF("we have %u top masks\n", numMasks); - - topMasks.assign(numMasks, NFAStateSet(args.num_states)); // all zeroes - - for (const auto &m : args.tops) { - u32 mask_idx = m.first; +} + +static +void buildTopMasks(const build_info &args, vector<NFAStateSet> &topMasks) { + if (args.tops.empty()) { + return; // No tops, probably an outfix NFA. + } + + u32 numMasks = args.tops.rbegin()->first + 1; // max mask index + DEBUG_PRINTF("we have %u top masks\n", numMasks); + + topMasks.assign(numMasks, NFAStateSet(args.num_states)); // all zeroes + + for (const auto &m : args.tops) { + u32 mask_idx = m.first; for (NFAVertex v : m.second) { u32 state_id = args.state_ids.at(v); DEBUG_PRINTF("state %u is in top mask %u\n", state_id, mask_idx); - + assert(mask_idx < numMasks); assert(state_id != NO_STATE); - + topMasks[mask_idx].set(state_id); } - } -} - -static -u32 uncompressedStateSize(u32 num_states) { - // Number of bytes required to store all our states. - return ROUNDUP_N(num_states, 8)/8; -} - -static -u32 compressedStateSize(const NGHolder &h, const NFAStateSet &maskedStates, + } +} + +static +u32 uncompressedStateSize(u32 num_states) { + // Number of bytes required to store all our states. + return ROUNDUP_N(num_states, 8)/8; +} + +static +u32 compressedStateSize(const NGHolder &h, const NFAStateSet &maskedStates, const unordered_map<NFAVertex, u32> &state_ids) { - // Shrink state requirement to enough to fit the compressed largest reach. - vector<u32> allreach(N_CHARS, 0); - - for (auto v : vertices_range(h)) { - u32 i = state_ids.at(v); - if (i == NO_STATE || maskedStates.test(i)) { - continue; - } - const CharReach &cr = h[v].char_reach; - for (size_t j = cr.find_first(); j != cr.npos; j = cr.find_next(j)) { - allreach[j]++; // state 'i' can reach character 'j'. - } - } - - u32 maxreach = *max_element(allreach.begin(), allreach.end()); - DEBUG_PRINTF("max reach is %u\n", maxreach); - return (maxreach + 7) / 8; -} - -static -bool hasSquashableInitDs(const build_info &args) { - const NGHolder &h = args.h; - - if (args.squashMap.empty()) { - DEBUG_PRINTF("squash map is empty\n"); - return false; - } - - NFAStateSet initDs(args.num_states); - u32 sds_state = args.state_ids.at(h.startDs); - if (sds_state == NO_STATE) { - DEBUG_PRINTF("no states in initds\n"); - return false; - } - - initDs.set(sds_state); - - /* TODO: simplify */ - - // Check normal squash map. - for (const auto &m : args.squashMap) { - DEBUG_PRINTF("checking squash mask for state %u\n", - args.state_ids.at(m.first)); - NFAStateSet squashed = ~(m.second); // flip mask - assert(squashed.size() == initDs.size()); - if (squashed.intersects(initDs)) { - DEBUG_PRINTF("state %u squashes initds states\n", - args.state_ids.at(m.first)); - return true; - } - } - - // Check report squash map. - for (const auto &m : args.reportSquashMap) { - DEBUG_PRINTF("checking report squash mask for state %u\n", - args.state_ids.at(m.first)); - NFAStateSet squashed = ~(m.second); // flip mask - assert(squashed.size() == initDs.size()); - if (squashed.intersects(initDs)) { - DEBUG_PRINTF("state %u squashes initds states\n", - args.state_ids.at(m.first)); - return true; - } - } - - return false; -} - -static -bool hasInitDsStates(const NGHolder &h, + // Shrink state requirement to enough to fit the compressed largest reach. + vector<u32> allreach(N_CHARS, 0); + + for (auto v : vertices_range(h)) { + u32 i = state_ids.at(v); + if (i == NO_STATE || maskedStates.test(i)) { + continue; + } + const CharReach &cr = h[v].char_reach; + for (size_t j = cr.find_first(); j != cr.npos; j = cr.find_next(j)) { + allreach[j]++; // state 'i' can reach character 'j'. + } + } + + u32 maxreach = *max_element(allreach.begin(), allreach.end()); + DEBUG_PRINTF("max reach is %u\n", maxreach); + return (maxreach + 7) / 8; +} + +static +bool hasSquashableInitDs(const build_info &args) { + const NGHolder &h = args.h; + + if (args.squashMap.empty()) { + DEBUG_PRINTF("squash map is empty\n"); + return false; + } + + NFAStateSet initDs(args.num_states); + u32 sds_state = args.state_ids.at(h.startDs); + if (sds_state == NO_STATE) { + DEBUG_PRINTF("no states in initds\n"); + return false; + } + + initDs.set(sds_state); + + /* TODO: simplify */ + + // Check normal squash map. + for (const auto &m : args.squashMap) { + DEBUG_PRINTF("checking squash mask for state %u\n", + args.state_ids.at(m.first)); + NFAStateSet squashed = ~(m.second); // flip mask + assert(squashed.size() == initDs.size()); + if (squashed.intersects(initDs)) { + DEBUG_PRINTF("state %u squashes initds states\n", + args.state_ids.at(m.first)); + return true; + } + } + + // Check report squash map. + for (const auto &m : args.reportSquashMap) { + DEBUG_PRINTF("checking report squash mask for state %u\n", + args.state_ids.at(m.first)); + NFAStateSet squashed = ~(m.second); // flip mask + assert(squashed.size() == initDs.size()); + if (squashed.intersects(initDs)) { + DEBUG_PRINTF("state %u squashes initds states\n", + args.state_ids.at(m.first)); + return true; + } + } + + return false; +} + +static +bool hasInitDsStates(const NGHolder &h, const unordered_map<NFAVertex, u32> &state_ids) { - if (state_ids.at(h.startDs) != NO_STATE) { - return true; - } - - if (is_triggered(h) && state_ids.at(h.start) != NO_STATE) { - return true; - } - - return false; -} - -static -void findMaskedCompressionStates(const build_info &args, - NFAStateSet &maskedStates) { - const NGHolder &h = args.h; - if (!generates_callbacks(h)) { - // Rose leftfixes can mask out initds, which is worth doing if it will - // stay on forever (i.e. it's not squashable). - u32 sds_i = args.state_ids.at(h.startDs); - if (sds_i != NO_STATE && !hasSquashableInitDs(args)) { - maskedStates.set(sds_i); - DEBUG_PRINTF("masking out initds state\n"); - } - } - - // Suffixes and outfixes can mask out leaf states, which should all be - // accepts. Right now we can only do this when there is nothing in initDs, - // as we switch that on unconditionally in the expand call. + if (state_ids.at(h.startDs) != NO_STATE) { + return true; + } + + if (is_triggered(h) && state_ids.at(h.start) != NO_STATE) { + return true; + } + + return false; +} + +static +void findMaskedCompressionStates(const build_info &args, + NFAStateSet &maskedStates) { + const NGHolder &h = args.h; + if (!generates_callbacks(h)) { + // Rose leftfixes can mask out initds, which is worth doing if it will + // stay on forever (i.e. it's not squashable). + u32 sds_i = args.state_ids.at(h.startDs); + if (sds_i != NO_STATE && !hasSquashableInitDs(args)) { + maskedStates.set(sds_i); + DEBUG_PRINTF("masking out initds state\n"); + } + } + + // Suffixes and outfixes can mask out leaf states, which should all be + // accepts. Right now we can only do this when there is nothing in initDs, + // as we switch that on unconditionally in the expand call. if (!inspects_states_for_accepts(h) && !hasInitDsStates(h, args.state_ids)) { - NFAStateSet nonleaf(args.num_states); - for (const auto &e : edges_range(h)) { - u32 from = args.state_ids.at(source(e, h)); - u32 to = args.state_ids.at(target(e, h)); - if (from == NO_STATE) { - continue; - } - - // We cannot mask out EOD accepts, as they have to perform an - // action after they're switched on that may be delayed until the - // next stream write. - if (to == NO_STATE && target(e, h) != h.acceptEod) { - continue; - } - - nonleaf.set(from); - } - - for (u32 i = 0; i < args.num_states; i++) { - if (!nonleaf.test(i)) { - maskedStates.set(i); - } - } - - DEBUG_PRINTF("masking out %zu leaf states\n", maskedStates.count()); - } -} - -/** \brief Sets a given flag in the LimEx structure. */ -template<class implNFA_t> -static -void setLimexFlag(implNFA_t *limex, u32 flag) { - assert(flag); - assert((flag & (flag - 1)) == 0); - limex->flags |= flag; -} - -/** \brief Sets a given flag in the NFA structure */ -static -void setNfaFlag(NFA *nfa, u32 flag) { - assert(flag); - assert((flag & (flag - 1)) == 0); - nfa->flags |= flag; -} - -// Some of our NFA types support compressing the state down if we're not using -// all of it. -template<class implNFA_t> -static -void findStateSize(const build_info &args, implNFA_t *limex) { - // Nothing is masked off by default. - maskFill(limex->compressMask, 0xff); - - u32 sizeUncompressed = uncompressedStateSize(args.num_states); - assert(sizeUncompressed <= sizeof(limex->compressMask)); - - if (!args.stateCompression) { - DEBUG_PRINTF("compression disabled, uncompressed state size %u\n", - sizeUncompressed); - limex->stateSize = sizeUncompressed; - return; - } - - NFAStateSet maskedStates(args.num_states); - findMaskedCompressionStates(args, maskedStates); - - u32 sizeCompressed = compressedStateSize(args.h, maskedStates, args.state_ids); - assert(sizeCompressed <= sizeof(limex->compressMask)); - - DEBUG_PRINTF("compressed=%u, uncompressed=%u\n", sizeCompressed, - sizeUncompressed); - - // Must be at least a 10% saving. - if ((sizeCompressed * 100) <= (sizeUncompressed * 90)) { - DEBUG_PRINTF("using compression, state size %u\n", - sizeCompressed); - setLimexFlag(limex, LIMEX_FLAG_COMPRESS_STATE); - limex->stateSize = sizeCompressed; - - if (maskedStates.any()) { - DEBUG_PRINTF("masking %zu states\n", maskedStates.count()); - setLimexFlag(limex, LIMEX_FLAG_COMPRESS_MASKED); - for (size_t i = maskedStates.find_first(); i != NFAStateSet::npos; - i = maskedStates.find_next(i)) { - maskClearBit(limex->compressMask, i); - } - } - } else { - DEBUG_PRINTF("not using compression, state size %u\n", - sizeUncompressed); - limex->stateSize = sizeUncompressed; - } -} - -/* - * LimEx NFA: code for building NFAs in the Limited+Exceptional model. Most - * transitions are limited, with transitions outside the constraints of our - * shifts taken care of as 'exceptions'. Exceptions are also used to handle - * accepts and squash behaviour. - */ - -/** - * \brief Prototype exception class. - * - * Used to build up the map of exceptions before being converted to real - * NFAException32 (etc) structures. - */ -struct ExceptionProto { - u32 reports_index = MO_INVALID_IDX; - NFAStateSet succ_states; - NFAStateSet squash_states; - u32 repeat_index = MO_INVALID_IDX; - enum LimExTrigger trigger = LIMEX_TRIGGER_NONE; - enum LimExSquash squash = LIMEX_SQUASH_NONE; - - explicit ExceptionProto(u32 num_states) - : succ_states(num_states), squash_states(num_states) { - // Squash states are represented as the set of states to leave on, - // so we start with all-ones. - squash_states.set(); - } - - bool operator<(const ExceptionProto &b) const { - const ExceptionProto &a = *this; - - ORDER_CHECK(reports_index); - ORDER_CHECK(repeat_index); - ORDER_CHECK(trigger); - ORDER_CHECK(squash); - ORDER_CHECK(succ_states); - ORDER_CHECK(squash_states); - - return false; - } -}; - -static + NFAStateSet nonleaf(args.num_states); + for (const auto &e : edges_range(h)) { + u32 from = args.state_ids.at(source(e, h)); + u32 to = args.state_ids.at(target(e, h)); + if (from == NO_STATE) { + continue; + } + + // We cannot mask out EOD accepts, as they have to perform an + // action after they're switched on that may be delayed until the + // next stream write. + if (to == NO_STATE && target(e, h) != h.acceptEod) { + continue; + } + + nonleaf.set(from); + } + + for (u32 i = 0; i < args.num_states; i++) { + if (!nonleaf.test(i)) { + maskedStates.set(i); + } + } + + DEBUG_PRINTF("masking out %zu leaf states\n", maskedStates.count()); + } +} + +/** \brief Sets a given flag in the LimEx structure. */ +template<class implNFA_t> +static +void setLimexFlag(implNFA_t *limex, u32 flag) { + assert(flag); + assert((flag & (flag - 1)) == 0); + limex->flags |= flag; +} + +/** \brief Sets a given flag in the NFA structure */ +static +void setNfaFlag(NFA *nfa, u32 flag) { + assert(flag); + assert((flag & (flag - 1)) == 0); + nfa->flags |= flag; +} + +// Some of our NFA types support compressing the state down if we're not using +// all of it. +template<class implNFA_t> +static +void findStateSize(const build_info &args, implNFA_t *limex) { + // Nothing is masked off by default. + maskFill(limex->compressMask, 0xff); + + u32 sizeUncompressed = uncompressedStateSize(args.num_states); + assert(sizeUncompressed <= sizeof(limex->compressMask)); + + if (!args.stateCompression) { + DEBUG_PRINTF("compression disabled, uncompressed state size %u\n", + sizeUncompressed); + limex->stateSize = sizeUncompressed; + return; + } + + NFAStateSet maskedStates(args.num_states); + findMaskedCompressionStates(args, maskedStates); + + u32 sizeCompressed = compressedStateSize(args.h, maskedStates, args.state_ids); + assert(sizeCompressed <= sizeof(limex->compressMask)); + + DEBUG_PRINTF("compressed=%u, uncompressed=%u\n", sizeCompressed, + sizeUncompressed); + + // Must be at least a 10% saving. + if ((sizeCompressed * 100) <= (sizeUncompressed * 90)) { + DEBUG_PRINTF("using compression, state size %u\n", + sizeCompressed); + setLimexFlag(limex, LIMEX_FLAG_COMPRESS_STATE); + limex->stateSize = sizeCompressed; + + if (maskedStates.any()) { + DEBUG_PRINTF("masking %zu states\n", maskedStates.count()); + setLimexFlag(limex, LIMEX_FLAG_COMPRESS_MASKED); + for (size_t i = maskedStates.find_first(); i != NFAStateSet::npos; + i = maskedStates.find_next(i)) { + maskClearBit(limex->compressMask, i); + } + } + } else { + DEBUG_PRINTF("not using compression, state size %u\n", + sizeUncompressed); + limex->stateSize = sizeUncompressed; + } +} + +/* + * LimEx NFA: code for building NFAs in the Limited+Exceptional model. Most + * transitions are limited, with transitions outside the constraints of our + * shifts taken care of as 'exceptions'. Exceptions are also used to handle + * accepts and squash behaviour. + */ + +/** + * \brief Prototype exception class. + * + * Used to build up the map of exceptions before being converted to real + * NFAException32 (etc) structures. + */ +struct ExceptionProto { + u32 reports_index = MO_INVALID_IDX; + NFAStateSet succ_states; + NFAStateSet squash_states; + u32 repeat_index = MO_INVALID_IDX; + enum LimExTrigger trigger = LIMEX_TRIGGER_NONE; + enum LimExSquash squash = LIMEX_SQUASH_NONE; + + explicit ExceptionProto(u32 num_states) + : succ_states(num_states), squash_states(num_states) { + // Squash states are represented as the set of states to leave on, + // so we start with all-ones. + squash_states.set(); + } + + bool operator<(const ExceptionProto &b) const { + const ExceptionProto &a = *this; + + ORDER_CHECK(reports_index); + ORDER_CHECK(repeat_index); + ORDER_CHECK(trigger); + ORDER_CHECK(squash); + ORDER_CHECK(succ_states); + ORDER_CHECK(squash_states); + + return false; + } +}; + +static u32 buildExceptionMap(const build_info &args, ReportListCache &reports_cache, const unordered_set<NFAEdge> &exceptional, map<ExceptionProto, vector<u32>> &exceptionMap, vector<ReportID> &reportList) { - const NGHolder &h = args.h; - const u32 num_states = args.num_states; + const NGHolder &h = args.h; + const u32 num_states = args.num_states; u32 exceptionCount = 0; - + unordered_map<NFAVertex, u32> pos_trigger; unordered_map<NFAVertex, u32> tug_trigger; - - for (u32 i = 0; i < args.repeats.size(); i++) { - const BoundedRepeatData &br = args.repeats[i]; - assert(!contains(pos_trigger, br.pos_trigger)); - pos_trigger[br.pos_trigger] = i; - for (auto v : br.tug_triggers) { - assert(!contains(tug_trigger, v)); - tug_trigger[v] = i; - } - } - - for (auto v : vertices_range(h)) { - const u32 i = args.state_ids.at(v); - - if (i == NO_STATE) { - continue; - } - - bool addMe = false; - ExceptionProto e(num_states); - - if (edge(v, h.accept, h).second && generates_callbacks(h)) { - /* if nfa is never used to produce callbacks, no need to mark - * states as exceptional */ - const auto &reports = h[v].reports; - - DEBUG_PRINTF("state %u is exceptional due to accept " - "(%zu reports)\n", i, reports.size()); - + + for (u32 i = 0; i < args.repeats.size(); i++) { + const BoundedRepeatData &br = args.repeats[i]; + assert(!contains(pos_trigger, br.pos_trigger)); + pos_trigger[br.pos_trigger] = i; + for (auto v : br.tug_triggers) { + assert(!contains(tug_trigger, v)); + tug_trigger[v] = i; + } + } + + for (auto v : vertices_range(h)) { + const u32 i = args.state_ids.at(v); + + if (i == NO_STATE) { + continue; + } + + bool addMe = false; + ExceptionProto e(num_states); + + if (edge(v, h.accept, h).second && generates_callbacks(h)) { + /* if nfa is never used to produce callbacks, no need to mark + * states as exceptional */ + const auto &reports = h[v].reports; + + DEBUG_PRINTF("state %u is exceptional due to accept " + "(%zu reports)\n", i, reports.size()); + if (reports.empty()) { e.reports_index = MO_INVALID_IDX; } else { e.reports_index = addReports(reports, reportList, reports_cache); } - - // We may be applying a report squash too. - auto mi = args.reportSquashMap.find(v); - if (mi != args.reportSquashMap.end()) { - DEBUG_PRINTF("report squashes states\n"); - assert(e.squash_states.size() == mi->second.size()); - e.squash_states = mi->second; - e.squash = LIMEX_SQUASH_REPORT; - } - - addMe = true; - } - - if (contains(pos_trigger, v)) { - u32 repeat_index = pos_trigger[v]; - assert(e.trigger == LIMEX_TRIGGER_NONE); - e.trigger = LIMEX_TRIGGER_POS; - e.repeat_index = repeat_index; - DEBUG_PRINTF("state %u has pos trigger for repeat %u\n", i, - repeat_index); - addMe = true; - } - - if (contains(tug_trigger, v)) { - u32 repeat_index = tug_trigger[v]; - assert(e.trigger == LIMEX_TRIGGER_NONE); - e.trigger = LIMEX_TRIGGER_TUG; - e.repeat_index = repeat_index; - - // TUG triggers can squash the preceding cyclic state. - u32 cyclic = args.state_ids.at(args.repeats[repeat_index].cyclic); - e.squash_states.reset(cyclic); - e.squash = LIMEX_SQUASH_TUG; - DEBUG_PRINTF("state %u has tug trigger for repeat %u, can squash " - "state %u\n", i, repeat_index, cyclic); - addMe = true; - } - - // are we a non-limited transition? - for (const auto &oe : out_edges_range(v, h)) { - if (contains(exceptional, oe)) { - NFAVertex w = target(oe, h); - u32 w_idx = args.state_ids.at(w); - assert(w_idx != NO_STATE); - e.succ_states.set(w_idx); - DEBUG_PRINTF("exceptional transition %u->%u\n", i, w_idx); - addMe = true; - } - } - - // do we lead SOLELY to a squasher state? (we use the successors as - // a proxy for the out-edge here, so there must be only one for us - // to do this safely) - /* The above comment is IMHO bogus and would result in all squashing - * being disabled around stars */ - if (e.trigger != LIMEX_TRIGGER_TUG) { - for (auto w : adjacent_vertices_range(v, h)) { - if (w == v) { - continue; - } - u32 j = args.state_ids.at(w); - if (j == NO_STATE) { - continue; - } - DEBUG_PRINTF("we are checking if succ %u is a squasher\n", j); - auto mi = args.squashMap.find(w); - if (mi != args.squashMap.end()) { - DEBUG_PRINTF("squasher edge (%u, %u)\n", i, j); - DEBUG_PRINTF("e.squash_states.size() == %zu, " - "mi->second.size() = %zu\n", - e.squash_states.size(), mi->second.size()); - assert(e.squash_states.size() == mi->second.size()); - e.squash_states = mi->second; - - // NOTE: this might be being combined with the report - // squashing above. - - e.squash = LIMEX_SQUASH_CYCLIC; - DEBUG_PRINTF("squashing succ %u (turns off %zu states)\n", - j, mi->second.size() - mi->second.count()); - addMe = true; - } - } - } - - if (addMe) { - // Add 'e' if it isn't in the map, and push state i on to its list - // of states. - assert(e.succ_states.size() == num_states); - assert(e.squash_states.size() == num_states); - exceptionMap[e].push_back(i); + + // We may be applying a report squash too. + auto mi = args.reportSquashMap.find(v); + if (mi != args.reportSquashMap.end()) { + DEBUG_PRINTF("report squashes states\n"); + assert(e.squash_states.size() == mi->second.size()); + e.squash_states = mi->second; + e.squash = LIMEX_SQUASH_REPORT; + } + + addMe = true; + } + + if (contains(pos_trigger, v)) { + u32 repeat_index = pos_trigger[v]; + assert(e.trigger == LIMEX_TRIGGER_NONE); + e.trigger = LIMEX_TRIGGER_POS; + e.repeat_index = repeat_index; + DEBUG_PRINTF("state %u has pos trigger for repeat %u\n", i, + repeat_index); + addMe = true; + } + + if (contains(tug_trigger, v)) { + u32 repeat_index = tug_trigger[v]; + assert(e.trigger == LIMEX_TRIGGER_NONE); + e.trigger = LIMEX_TRIGGER_TUG; + e.repeat_index = repeat_index; + + // TUG triggers can squash the preceding cyclic state. + u32 cyclic = args.state_ids.at(args.repeats[repeat_index].cyclic); + e.squash_states.reset(cyclic); + e.squash = LIMEX_SQUASH_TUG; + DEBUG_PRINTF("state %u has tug trigger for repeat %u, can squash " + "state %u\n", i, repeat_index, cyclic); + addMe = true; + } + + // are we a non-limited transition? + for (const auto &oe : out_edges_range(v, h)) { + if (contains(exceptional, oe)) { + NFAVertex w = target(oe, h); + u32 w_idx = args.state_ids.at(w); + assert(w_idx != NO_STATE); + e.succ_states.set(w_idx); + DEBUG_PRINTF("exceptional transition %u->%u\n", i, w_idx); + addMe = true; + } + } + + // do we lead SOLELY to a squasher state? (we use the successors as + // a proxy for the out-edge here, so there must be only one for us + // to do this safely) + /* The above comment is IMHO bogus and would result in all squashing + * being disabled around stars */ + if (e.trigger != LIMEX_TRIGGER_TUG) { + for (auto w : adjacent_vertices_range(v, h)) { + if (w == v) { + continue; + } + u32 j = args.state_ids.at(w); + if (j == NO_STATE) { + continue; + } + DEBUG_PRINTF("we are checking if succ %u is a squasher\n", j); + auto mi = args.squashMap.find(w); + if (mi != args.squashMap.end()) { + DEBUG_PRINTF("squasher edge (%u, %u)\n", i, j); + DEBUG_PRINTF("e.squash_states.size() == %zu, " + "mi->second.size() = %zu\n", + e.squash_states.size(), mi->second.size()); + assert(e.squash_states.size() == mi->second.size()); + e.squash_states = mi->second; + + // NOTE: this might be being combined with the report + // squashing above. + + e.squash = LIMEX_SQUASH_CYCLIC; + DEBUG_PRINTF("squashing succ %u (turns off %zu states)\n", + j, mi->second.size() - mi->second.count()); + addMe = true; + } + } + } + + if (addMe) { + // Add 'e' if it isn't in the map, and push state i on to its list + // of states. + assert(e.succ_states.size() == num_states); + assert(e.squash_states.size() == num_states); + exceptionMap[e].push_back(i); exceptionCount++; - } - } - + } + } + DEBUG_PRINTF("%u exceptions found (%zu unique)\n", exceptionCount, exceptionMap.size()); return exceptionCount; -} - -static -u32 depth_to_u32(const depth &d) { - assert(d.is_reachable()); - if (d.is_infinite()) { - return REPEAT_INF; - } - - u32 d_val = d; - assert(d_val < REPEAT_INF); - return d_val; -} - +} + +static +u32 depth_to_u32(const depth &d) { + assert(d.is_reachable()); + if (d.is_infinite()) { + return REPEAT_INF; + } + + u32 d_val = d; + assert(d_val < REPEAT_INF); + return d_val; +} + static bool isExceptionalTransition(u32 from, u32 to, const build_info &args, u32 maxShift) { @@ -1690,196 +1690,196 @@ bool cannotDie(const build_info &args) { }); } -template<NFAEngineType dtype> -struct Factory { - // typedefs for readability, for types derived from traits - typedef typename NFATraits<dtype>::exception_t exception_t; - typedef typename NFATraits<dtype>::implNFA_t implNFA_t; - typedef typename NFATraits<dtype>::tableRow_t tableRow_t; - - static - void allocState(NFA *nfa, u32 repeatscratchStateSize, - u32 repeatStreamState) { - implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa); - - // LimEx NFAs now store the following in state: - // 1. state bitvector (always present) - // 2. space associated with repeats - // This function just needs to size these correctly. - - u32 stateSize = limex->stateSize; - - DEBUG_PRINTF("bitvector=%zu/%u, repeat full=%u, stream=%u\n", - sizeof(limex->init), stateSize, repeatscratchStateSize, - repeatStreamState); - +template<NFAEngineType dtype> +struct Factory { + // typedefs for readability, for types derived from traits + typedef typename NFATraits<dtype>::exception_t exception_t; + typedef typename NFATraits<dtype>::implNFA_t implNFA_t; + typedef typename NFATraits<dtype>::tableRow_t tableRow_t; + + static + void allocState(NFA *nfa, u32 repeatscratchStateSize, + u32 repeatStreamState) { + implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa); + + // LimEx NFAs now store the following in state: + // 1. state bitvector (always present) + // 2. space associated with repeats + // This function just needs to size these correctly. + + u32 stateSize = limex->stateSize; + + DEBUG_PRINTF("bitvector=%zu/%u, repeat full=%u, stream=%u\n", + sizeof(limex->init), stateSize, repeatscratchStateSize, + repeatStreamState); + size_t scratchStateSize = NFATraits<dtype>::scratch_state_size; - if (repeatscratchStateSize) { - scratchStateSize - = ROUNDUP_N(scratchStateSize, alignof(RepeatControl)); - scratchStateSize += repeatscratchStateSize; - } - size_t streamStateSize = stateSize + repeatStreamState; - - nfa->scratchStateSize = verify_u32(scratchStateSize); - nfa->streamStateSize = verify_u32(streamStateSize); - } - - static - size_t repeatAllocSize(const BoundedRepeatData &br, u32 *tableOffset, - u32 *tugMaskOffset) { - size_t len = sizeof(NFARepeatInfo) + sizeof(RepeatInfo); - - // sparse lookup table. - if (br.type == REPEAT_SPARSE_OPTIMAL_P) { - len = ROUNDUP_N(len, alignof(u64a)); - *tableOffset = verify_u32(len); - len += sizeof(u64a) * (br.repeatMax + 1); - } else { - *tableOffset = 0; - } - - // tug mask. - len = ROUNDUP_N(len, alignof(tableRow_t)); - *tugMaskOffset = verify_u32(len); - len += sizeof(tableRow_t); - - // to simplify layout. - len = ROUNDUP_CL(len); - - return len; - } - - static - void buildRepeats(const build_info &args, + if (repeatscratchStateSize) { + scratchStateSize + = ROUNDUP_N(scratchStateSize, alignof(RepeatControl)); + scratchStateSize += repeatscratchStateSize; + } + size_t streamStateSize = stateSize + repeatStreamState; + + nfa->scratchStateSize = verify_u32(scratchStateSize); + nfa->streamStateSize = verify_u32(streamStateSize); + } + + static + size_t repeatAllocSize(const BoundedRepeatData &br, u32 *tableOffset, + u32 *tugMaskOffset) { + size_t len = sizeof(NFARepeatInfo) + sizeof(RepeatInfo); + + // sparse lookup table. + if (br.type == REPEAT_SPARSE_OPTIMAL_P) { + len = ROUNDUP_N(len, alignof(u64a)); + *tableOffset = verify_u32(len); + len += sizeof(u64a) * (br.repeatMax + 1); + } else { + *tableOffset = 0; + } + + // tug mask. + len = ROUNDUP_N(len, alignof(tableRow_t)); + *tugMaskOffset = verify_u32(len); + len += sizeof(tableRow_t); + + // to simplify layout. + len = ROUNDUP_CL(len); + + return len; + } + + static + void buildRepeats(const build_info &args, vector<bytecode_ptr<NFARepeatInfo>> &out, u32 *scratchStateSize, u32 *streamState) { - out.reserve(args.repeats.size()); - - u32 repeat_idx = 0; - for (auto it = args.repeats.begin(), ite = args.repeats.end(); - it != ite; ++it, ++repeat_idx) { - const BoundedRepeatData &br = *it; - assert(args.state_ids.at(br.cyclic) != NO_STATE); - - u32 tableOffset, tugMaskOffset; - size_t len = repeatAllocSize(br, &tableOffset, &tugMaskOffset); + out.reserve(args.repeats.size()); + + u32 repeat_idx = 0; + for (auto it = args.repeats.begin(), ite = args.repeats.end(); + it != ite; ++it, ++repeat_idx) { + const BoundedRepeatData &br = *it; + assert(args.state_ids.at(br.cyclic) != NO_STATE); + + u32 tableOffset, tugMaskOffset; + size_t len = repeatAllocSize(br, &tableOffset, &tugMaskOffset); auto info = make_zeroed_bytecode_ptr<NFARepeatInfo>(len); - char *info_ptr = (char *)info.get(); - - // Collect state space info. - RepeatStateInfo rsi(br.type, br.repeatMin, br.repeatMax, br.minPeriod); - u32 streamStateLen = rsi.packedCtrlSize + rsi.stateSize; - - // Fill the NFARepeatInfo structure. - info->cyclicState = args.state_ids.at(br.cyclic); - info->ctrlIndex = repeat_idx; - info->packedCtrlOffset = *streamState; - info->stateOffset = *streamState + rsi.packedCtrlSize; - info->stateSize = streamStateLen; - info->tugMaskOffset = tugMaskOffset; - - // Fill the RepeatInfo structure. - RepeatInfo *repeat = - (RepeatInfo *)(info_ptr + sizeof(NFARepeatInfo)); - repeat->type = br.type; - repeat->repeatMin = depth_to_u32(br.repeatMin); - repeat->repeatMax = depth_to_u32(br.repeatMax); - repeat->horizon = rsi.horizon; - repeat->packedCtrlSize = rsi.packedCtrlSize; - repeat->stateSize = rsi.stateSize; - copy_bytes(repeat->packedFieldSizes, rsi.packedFieldSizes); - repeat->patchCount = rsi.patchCount; - repeat->patchSize = rsi.patchSize; - repeat->encodingSize = rsi.encodingSize; - repeat->patchesOffset = rsi.patchesOffset; - - u32 repeat_len = sizeof(RepeatInfo); - if (br.type == REPEAT_SPARSE_OPTIMAL_P) { - repeat_len += sizeof(u64a) * (rsi.patchSize + 1); - } - repeat->length = repeat_len; - - // Copy in the sparse lookup table. - if (br.type == REPEAT_SPARSE_OPTIMAL_P) { - assert(!rsi.table.empty()); - copy_bytes(info_ptr + tableOffset, rsi.table); - } - - // Fill the tug mask. - tableRow_t *tugMask = (tableRow_t *)(info_ptr + tugMaskOffset); - for (auto v : br.tug_triggers) { - u32 state_id = args.state_ids.at(v); - assert(state_id != NO_STATE); - maskSetBit(*tugMask, state_id); - } - - assert(streamStateLen); - *streamState += streamStateLen; - *scratchStateSize += sizeof(RepeatControl); - + char *info_ptr = (char *)info.get(); + + // Collect state space info. + RepeatStateInfo rsi(br.type, br.repeatMin, br.repeatMax, br.minPeriod); + u32 streamStateLen = rsi.packedCtrlSize + rsi.stateSize; + + // Fill the NFARepeatInfo structure. + info->cyclicState = args.state_ids.at(br.cyclic); + info->ctrlIndex = repeat_idx; + info->packedCtrlOffset = *streamState; + info->stateOffset = *streamState + rsi.packedCtrlSize; + info->stateSize = streamStateLen; + info->tugMaskOffset = tugMaskOffset; + + // Fill the RepeatInfo structure. + RepeatInfo *repeat = + (RepeatInfo *)(info_ptr + sizeof(NFARepeatInfo)); + repeat->type = br.type; + repeat->repeatMin = depth_to_u32(br.repeatMin); + repeat->repeatMax = depth_to_u32(br.repeatMax); + repeat->horizon = rsi.horizon; + repeat->packedCtrlSize = rsi.packedCtrlSize; + repeat->stateSize = rsi.stateSize; + copy_bytes(repeat->packedFieldSizes, rsi.packedFieldSizes); + repeat->patchCount = rsi.patchCount; + repeat->patchSize = rsi.patchSize; + repeat->encodingSize = rsi.encodingSize; + repeat->patchesOffset = rsi.patchesOffset; + + u32 repeat_len = sizeof(RepeatInfo); + if (br.type == REPEAT_SPARSE_OPTIMAL_P) { + repeat_len += sizeof(u64a) * (rsi.patchSize + 1); + } + repeat->length = repeat_len; + + // Copy in the sparse lookup table. + if (br.type == REPEAT_SPARSE_OPTIMAL_P) { + assert(!rsi.table.empty()); + copy_bytes(info_ptr + tableOffset, rsi.table); + } + + // Fill the tug mask. + tableRow_t *tugMask = (tableRow_t *)(info_ptr + tugMaskOffset); + for (auto v : br.tug_triggers) { + u32 state_id = args.state_ids.at(v); + assert(state_id != NO_STATE); + maskSetBit(*tugMask, state_id); + } + + assert(streamStateLen); + *streamState += streamStateLen; + *scratchStateSize += sizeof(RepeatControl); + out.emplace_back(move(info)); - } - } - - static - void writeLimexMasks(const build_info &args, implNFA_t *limex) { - const NGHolder &h = args.h; - - // Init masks. - u32 s_i = args.state_ids.at(h.start); - u32 sds_i = args.state_ids.at(h.startDs); - - if (s_i != NO_STATE) { - maskSetBit(limex->init, s_i); - if (is_triggered(h)) { - maskSetBit(limex->initDS, s_i); - } - } - - if (sds_i != NO_STATE) { - maskSetBit(limex->init, sds_i); - maskSetBit(limex->initDS, sds_i); - } - - // Zombie mask. - for (auto v : args.zombies) { - u32 state_id = args.state_ids.at(v); - assert(state_id != NO_STATE); - maskSetBit(limex->zombieMask, state_id); - } - - // Repeat cyclic mask. - for (const auto &br : args.repeats) { - u32 cyclic = args.state_ids.at(br.cyclic); - assert(cyclic != NO_STATE); - maskSetBit(limex->repeatCyclicMask, cyclic); - } + } + } + + static + void writeLimexMasks(const build_info &args, implNFA_t *limex) { + const NGHolder &h = args.h; + + // Init masks. + u32 s_i = args.state_ids.at(h.start); + u32 sds_i = args.state_ids.at(h.startDs); + + if (s_i != NO_STATE) { + maskSetBit(limex->init, s_i); + if (is_triggered(h)) { + maskSetBit(limex->initDS, s_i); + } + } + + if (sds_i != NO_STATE) { + maskSetBit(limex->init, sds_i); + maskSetBit(limex->initDS, sds_i); + } + + // Zombie mask. + for (auto v : args.zombies) { + u32 state_id = args.state_ids.at(v); + assert(state_id != NO_STATE); + maskSetBit(limex->zombieMask, state_id); + } + + // Repeat cyclic mask. + for (const auto &br : args.repeats) { + u32 cyclic = args.state_ids.at(br.cyclic); + assert(cyclic != NO_STATE); + maskSetBit(limex->repeatCyclicMask, cyclic); + } /* also include tugs in repeat cyclic mask */ for (size_t i = args.tugs.find_first(); i != args.tugs.npos; i = args.tugs.find_next(i)) { maskSetBit(limex->repeatCyclicMask, i); } - } - - static - void writeShiftMasks(const build_info &args, implNFA_t *limex) { - const NGHolder &h = args.h; + } + + static + void writeShiftMasks(const build_info &args, implNFA_t *limex) { + const NGHolder &h = args.h; u32 maxShift = findMaxVarShift(args, limex->shiftCount); u32 shiftMask = 0; int shiftMaskIdx = 0; - - for (const auto &e : edges_range(h)) { - u32 from = args.state_ids.at(source(e, h)); - u32 to = args.state_ids.at(target(e, h)); - if (from == NO_STATE || to == NO_STATE) { - continue; - } - - // We check for exceptional transitions here, as we don't want tug - // trigger transitions emitted as limited transitions (even if they - // could be in this model). + + for (const auto &e : edges_range(h)) { + u32 from = args.state_ids.at(source(e, h)); + u32 to = args.state_ids.at(target(e, h)); + if (from == NO_STATE || to == NO_STATE) { + continue; + } + + // We check for exceptional transitions here, as we don't want tug + // trigger transitions emitted as limited transitions (even if they + // could be in this model). if (!isExceptionalTransition(from, to, args, maxShift)) { u32 shift = to - from; if ((shiftMask & (1UL << shift)) == 0UL) { @@ -1893,55 +1893,55 @@ struct Factory { break; } } - } - } + } + } if (maxShift && limex->shiftCount > 1) { for (u32 i = 0; i < limex->shiftCount; i++) { assert(!isMaskZero(limex->shift[i])); } } - } - - static - void findExceptionalTransitions(const build_info &args, + } + + static + void findExceptionalTransitions(const build_info &args, unordered_set<NFAEdge> &exceptional, u32 maxShift) { - const NGHolder &h = args.h; - - for (const auto &e : edges_range(h)) { - u32 from = args.state_ids.at(source(e, h)); - u32 to = args.state_ids.at(target(e, h)); - if (from == NO_STATE || to == NO_STATE) { - continue; - } - + const NGHolder &h = args.h; + + for (const auto &e : edges_range(h)) { + u32 from = args.state_ids.at(source(e, h)); + u32 to = args.state_ids.at(target(e, h)); + if (from == NO_STATE || to == NO_STATE) { + continue; + } + if (isExceptionalTransition(from, to, args, maxShift)) { - exceptional.insert(e); - } - } - } - - static + exceptional.insert(e); + } + } + } + + static void writeExceptions(const build_info &args, const map<ExceptionProto, vector<u32>> &exceptionMap, const vector<u32> &repeatOffsets, implNFA_t *limex, const u32 exceptionsOffset, const u32 reportListOffset) { - DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset); - - exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset); - assert(ISALIGNED(etable)); - + DEBUG_PRINTF("exceptionsOffset=%u\n", exceptionsOffset); + + exception_t *etable = (exception_t *)((char *)limex + exceptionsOffset); + assert(ISALIGNED(etable)); + map<u32, ExceptionProto> exception_by_state; - for (const auto &m : exceptionMap) { - const ExceptionProto &proto = m.first; - const vector<u32> &states = m.second; + for (const auto &m : exceptionMap) { + const ExceptionProto &proto = m.first; + const vector<u32> &states = m.second; for (u32 i : states) { assert(!contains(exception_by_state, i)); exception_by_state.emplace(i, proto); } } - + u32 ecount = 0; for (const auto &m : exception_by_state) { const ExceptionProto &proto = m.second; @@ -1949,32 +1949,32 @@ struct Factory { DEBUG_PRINTF("exception %u, triggered by state %u\n", ecount, state_id); - // Write the exception entry. - exception_t &e = etable[ecount]; - maskSetBits(e.squash, proto.squash_states); - maskSetBits(e.successors, proto.succ_states); + // Write the exception entry. + exception_t &e = etable[ecount]; + maskSetBits(e.squash, proto.squash_states); + maskSetBits(e.successors, proto.succ_states); if (proto.reports_index == MO_INVALID_IDX) { e.reports = MO_INVALID_IDX; } else { e.reports = reportListOffset + proto.reports_index * sizeof(ReportID); } - e.hasSquash = verify_u8(proto.squash); - e.trigger = verify_u8(proto.trigger); - u32 repeat_offset = proto.repeat_index == MO_INVALID_IDX - ? MO_INVALID_IDX - : repeatOffsets[proto.repeat_index]; - e.repeatOffset = repeat_offset; - + e.hasSquash = verify_u8(proto.squash); + e.trigger = verify_u8(proto.trigger); + u32 repeat_offset = proto.repeat_index == MO_INVALID_IDX + ? MO_INVALID_IDX + : repeatOffsets[proto.repeat_index]; + e.repeatOffset = repeat_offset; + // for the state that can switch it on // set this bit in the exception mask maskSetBit(limex->exceptionMask, state_id); - ecount++; - } - - limex->exceptionOffset = exceptionsOffset; - limex->exceptionCount = ecount; + ecount++; + } + + limex->exceptionOffset = exceptionsOffset; + limex->exceptionCount = ecount; if (args.num_states > 64 && args.cc.target_info.has_avx512vbmi()) { const u8 *exceptionMask = (const u8 *)(&limex->exceptionMask); @@ -2028,118 +2028,118 @@ struct Factory { setLimexFlag(limex, LIMEX_FLAG_EXTRACT_EXP); } } - } - - static - void writeReachMapping(const vector<NFAStateSet> &reach, - const vector<u8> &reachMap, implNFA_t *limex, - const u32 reachOffset) { - DEBUG_PRINTF("reachOffset=%u\n", reachOffset); - - // Reach mapping is inside the LimEx structure. - copy(reachMap.begin(), reachMap.end(), &limex->reachMap[0]); - - // Reach table is right after the LimEx structure. - tableRow_t *reachMask = (tableRow_t *)((char *)limex + reachOffset); - assert(ISALIGNED(reachMask)); - for (size_t i = 0, end = reach.size(); i < end; i++) { - maskSetBits(reachMask[i], reach[i]); - } - limex->reachSize = verify_u32(reach.size()); - } - - static - void writeTopMasks(const vector<NFAStateSet> &tops, implNFA_t *limex, - const u32 topsOffset) { - DEBUG_PRINTF("topsOffset=%u\n", topsOffset); - - limex->topOffset = topsOffset; - tableRow_t *topMasks = (tableRow_t *)((char *)limex + topsOffset); - assert(ISALIGNED(topMasks)); - - for (size_t i = 0, end = tops.size(); i < end; i++) { - maskSetBits(topMasks[i], tops[i]); - } - - limex->topCount = verify_u32(tops.size()); - } - - static - void writeAccelSsse3Masks(const NFAStateSet &accelMask, implNFA_t *limex) { - char *perm_base = (char *)&limex->accelPermute; - char *comp_base = (char *)&limex->accelCompare; - - u32 num = 0; // index in accel table. - for (size_t i = accelMask.find_first(); i != accelMask.npos; - i = accelMask.find_next(i), ++num) { - u32 state_id = verify_u32(i); - DEBUG_PRINTF("accel num=%u, state=%u\n", num, state_id); - - // PSHUFB permute and compare masks - size_t mask_idx = sizeof(u_128) * (state_id / 128U); - DEBUG_PRINTF("mask_idx=%zu\n", mask_idx); - u_128 *perm = (u_128 *)(perm_base + mask_idx); - u_128 *comp = (u_128 *)(comp_base + mask_idx); - maskSetByte(*perm, num, ((state_id % 128U) / 8U)); - maskSetByte(*comp, num, ~(1U << (state_id % 8U))); - } - } - - static - void writeAccel(const NFAStateSet &accelMask, - const NFAStateSet &accelFriendsMask, - const AccelAuxVector &accelAux, - const vector<u8> &accelTable, implNFA_t *limex, - const u32 accelTableOffset, const u32 accelAuxOffset) { - DEBUG_PRINTF("accelTableOffset=%u, accelAuxOffset=%u\n", - accelTableOffset, accelAuxOffset); - - // Write accel lookup table. - limex->accelTableOffset = accelTableOffset; - copy(accelTable.begin(), accelTable.end(), - (u8 *)((char *)limex + accelTableOffset)); - - // Write accel aux structures. - limex->accelAuxOffset = accelAuxOffset; - AccelAux *auxTable = (AccelAux *)((char *)limex + accelAuxOffset); - assert(ISALIGNED(auxTable)); - copy(accelAux.begin(), accelAux.end(), auxTable); - - // Write LimEx structure members. - limex->accelCount = verify_u32(accelTable.size()); - // FIXME: accelAuxCount is unused? - limex->accelAuxCount = verify_u32(accelAux.size()); - - // Write LimEx masks. - maskSetBits(limex->accel, accelMask); - maskSetBits(limex->accel_and_friends, accelFriendsMask); - - // We can use PSHUFB-based shuffles for models >= 128 states. These - // require some additional masks in the bytecode. - maskClear(limex->accelCompare); - maskFill(limex->accelPermute, (char)0x80); - if (NFATraits<dtype>::maxStates >= 128) { - writeAccelSsse3Masks(accelMask, limex); - } - } - - static - void writeAccepts(const NFAStateSet &acceptMask, - const NFAStateSet &acceptEodMask, - const vector<NFAAccept> &accepts, - const vector<NFAAccept> &acceptsEod, - const vector<NFAStateSet> &squash, implNFA_t *limex, - const u32 acceptsOffset, const u32 acceptsEodOffset, + } + + static + void writeReachMapping(const vector<NFAStateSet> &reach, + const vector<u8> &reachMap, implNFA_t *limex, + const u32 reachOffset) { + DEBUG_PRINTF("reachOffset=%u\n", reachOffset); + + // Reach mapping is inside the LimEx structure. + copy(reachMap.begin(), reachMap.end(), &limex->reachMap[0]); + + // Reach table is right after the LimEx structure. + tableRow_t *reachMask = (tableRow_t *)((char *)limex + reachOffset); + assert(ISALIGNED(reachMask)); + for (size_t i = 0, end = reach.size(); i < end; i++) { + maskSetBits(reachMask[i], reach[i]); + } + limex->reachSize = verify_u32(reach.size()); + } + + static + void writeTopMasks(const vector<NFAStateSet> &tops, implNFA_t *limex, + const u32 topsOffset) { + DEBUG_PRINTF("topsOffset=%u\n", topsOffset); + + limex->topOffset = topsOffset; + tableRow_t *topMasks = (tableRow_t *)((char *)limex + topsOffset); + assert(ISALIGNED(topMasks)); + + for (size_t i = 0, end = tops.size(); i < end; i++) { + maskSetBits(topMasks[i], tops[i]); + } + + limex->topCount = verify_u32(tops.size()); + } + + static + void writeAccelSsse3Masks(const NFAStateSet &accelMask, implNFA_t *limex) { + char *perm_base = (char *)&limex->accelPermute; + char *comp_base = (char *)&limex->accelCompare; + + u32 num = 0; // index in accel table. + for (size_t i = accelMask.find_first(); i != accelMask.npos; + i = accelMask.find_next(i), ++num) { + u32 state_id = verify_u32(i); + DEBUG_PRINTF("accel num=%u, state=%u\n", num, state_id); + + // PSHUFB permute and compare masks + size_t mask_idx = sizeof(u_128) * (state_id / 128U); + DEBUG_PRINTF("mask_idx=%zu\n", mask_idx); + u_128 *perm = (u_128 *)(perm_base + mask_idx); + u_128 *comp = (u_128 *)(comp_base + mask_idx); + maskSetByte(*perm, num, ((state_id % 128U) / 8U)); + maskSetByte(*comp, num, ~(1U << (state_id % 8U))); + } + } + + static + void writeAccel(const NFAStateSet &accelMask, + const NFAStateSet &accelFriendsMask, + const AccelAuxVector &accelAux, + const vector<u8> &accelTable, implNFA_t *limex, + const u32 accelTableOffset, const u32 accelAuxOffset) { + DEBUG_PRINTF("accelTableOffset=%u, accelAuxOffset=%u\n", + accelTableOffset, accelAuxOffset); + + // Write accel lookup table. + limex->accelTableOffset = accelTableOffset; + copy(accelTable.begin(), accelTable.end(), + (u8 *)((char *)limex + accelTableOffset)); + + // Write accel aux structures. + limex->accelAuxOffset = accelAuxOffset; + AccelAux *auxTable = (AccelAux *)((char *)limex + accelAuxOffset); + assert(ISALIGNED(auxTable)); + copy(accelAux.begin(), accelAux.end(), auxTable); + + // Write LimEx structure members. + limex->accelCount = verify_u32(accelTable.size()); + // FIXME: accelAuxCount is unused? + limex->accelAuxCount = verify_u32(accelAux.size()); + + // Write LimEx masks. + maskSetBits(limex->accel, accelMask); + maskSetBits(limex->accel_and_friends, accelFriendsMask); + + // We can use PSHUFB-based shuffles for models >= 128 states. These + // require some additional masks in the bytecode. + maskClear(limex->accelCompare); + maskFill(limex->accelPermute, (char)0x80); + if (NFATraits<dtype>::maxStates >= 128) { + writeAccelSsse3Masks(accelMask, limex); + } + } + + static + void writeAccepts(const NFAStateSet &acceptMask, + const NFAStateSet &acceptEodMask, + const vector<NFAAccept> &accepts, + const vector<NFAAccept> &acceptsEod, + const vector<NFAStateSet> &squash, implNFA_t *limex, + const u32 acceptsOffset, const u32 acceptsEodOffset, const u32 squashOffset, const u32 reportListOffset) { char *limex_base = (char *)limex; - DEBUG_PRINTF("acceptsOffset=%u, acceptsEodOffset=%u, squashOffset=%u\n", - acceptsOffset, acceptsEodOffset, squashOffset); - - // LimEx masks (in structure) - maskSetBits(limex->accept, acceptMask); - maskSetBits(limex->acceptAtEOD, acceptEodMask); - + DEBUG_PRINTF("acceptsOffset=%u, acceptsEodOffset=%u, squashOffset=%u\n", + acceptsOffset, acceptsEodOffset, squashOffset); + + // LimEx masks (in structure) + maskSetBits(limex->accept, acceptMask); + maskSetBits(limex->acceptAtEOD, acceptEodMask); + // Transforms the indices (report list, squash mask) into offsets // relative to the base of the limex. auto transform_offset_fn = [&](NFAAccept a) { @@ -2150,272 +2150,272 @@ struct Factory { return a; }; - // Write accept table. - limex->acceptOffset = acceptsOffset; - limex->acceptCount = verify_u32(accepts.size()); - DEBUG_PRINTF("NFA has %zu accepts\n", accepts.size()); + // Write accept table. + limex->acceptOffset = acceptsOffset; + limex->acceptCount = verify_u32(accepts.size()); + DEBUG_PRINTF("NFA has %zu accepts\n", accepts.size()); NFAAccept *acceptsTable = (NFAAccept *)(limex_base + acceptsOffset); - assert(ISALIGNED(acceptsTable)); + assert(ISALIGNED(acceptsTable)); transform(accepts.begin(), accepts.end(), acceptsTable, transform_offset_fn); - - // Write eod accept table. - limex->acceptEodOffset = acceptsEodOffset; - limex->acceptEodCount = verify_u32(acceptsEod.size()); - DEBUG_PRINTF("NFA has %zu EOD accepts\n", acceptsEod.size()); + + // Write eod accept table. + limex->acceptEodOffset = acceptsEodOffset; + limex->acceptEodCount = verify_u32(acceptsEod.size()); + DEBUG_PRINTF("NFA has %zu EOD accepts\n", acceptsEod.size()); NFAAccept *acceptsEodTable = (NFAAccept *)(limex_base + acceptsEodOffset); - assert(ISALIGNED(acceptsEodTable)); + assert(ISALIGNED(acceptsEodTable)); transform(acceptsEod.begin(), acceptsEod.end(), acceptsEodTable, transform_offset_fn); - - // Write squash mask table. - limex->squashCount = verify_u32(squash.size()); - limex->squashOffset = squashOffset; - DEBUG_PRINTF("NFA has %zu report squash masks\n", squash.size()); + + // Write squash mask table. + limex->squashCount = verify_u32(squash.size()); + limex->squashOffset = squashOffset; + DEBUG_PRINTF("NFA has %zu report squash masks\n", squash.size()); tableRow_t *mask = (tableRow_t *)(limex_base + squashOffset); - assert(ISALIGNED(mask)); - for (size_t i = 0, end = squash.size(); i < end; i++) { - maskSetBits(mask[i], squash[i]); - } - } - - static + assert(ISALIGNED(mask)); + for (size_t i = 0, end = squash.size(); i < end; i++) { + maskSetBits(mask[i], squash[i]); + } + } + + static void writeRepeats(const vector<bytecode_ptr<NFARepeatInfo>> &repeats, - vector<u32> &repeatOffsets, implNFA_t *limex, - const u32 repeatOffsetsOffset, const u32 repeatOffset) { - const u32 num_repeats = verify_u32(repeats.size()); - - DEBUG_PRINTF("repeatOffsetsOffset=%u, repeatOffset=%u\n", - repeatOffsetsOffset, repeatOffset); - - repeatOffsets.resize(num_repeats); - u32 offset = repeatOffset; - - for (u32 i = 0; i < num_repeats; i++) { - repeatOffsets[i] = offset; + vector<u32> &repeatOffsets, implNFA_t *limex, + const u32 repeatOffsetsOffset, const u32 repeatOffset) { + const u32 num_repeats = verify_u32(repeats.size()); + + DEBUG_PRINTF("repeatOffsetsOffset=%u, repeatOffset=%u\n", + repeatOffsetsOffset, repeatOffset); + + repeatOffsets.resize(num_repeats); + u32 offset = repeatOffset; + + for (u32 i = 0; i < num_repeats; i++) { + repeatOffsets[i] = offset; assert(repeats[i]); memcpy((char *)limex + offset, repeats[i].get(), repeats[i].size()); offset += repeats[i].size(); - } - - // Write repeat offset lookup table. - assert(ISALIGNED_N((char *)limex + repeatOffsetsOffset, alignof(u32))); - copy_bytes((char *)limex + repeatOffsetsOffset, repeatOffsets); - - limex->repeatOffset = repeatOffsetsOffset; - limex->repeatCount = num_repeats; - } - - static + } + + // Write repeat offset lookup table. + assert(ISALIGNED_N((char *)limex + repeatOffsetsOffset, alignof(u32))); + copy_bytes((char *)limex + repeatOffsetsOffset, repeatOffsets); + + limex->repeatOffset = repeatOffsetsOffset; + limex->repeatCount = num_repeats; + } + + static void writeReportList(const vector<ReportID> &reports, implNFA_t *limex, const u32 reportListOffset) { DEBUG_PRINTF("reportListOffset=%u\n", reportListOffset); assert(ISALIGNED_N((char *)limex + reportListOffset, - alignof(ReportID))); + alignof(ReportID))); copy_bytes((char *)limex + reportListOffset, reports); - } - - static + } + + static bytecode_ptr<NFA> generateNfa(const build_info &args) { - if (args.num_states > NFATraits<dtype>::maxStates) { - return nullptr; - } - - // Build bounded repeat structures. + if (args.num_states > NFATraits<dtype>::maxStates) { + return nullptr; + } + + // Build bounded repeat structures. vector<bytecode_ptr<NFARepeatInfo>> repeats; - u32 repeats_full_state = 0; - u32 repeats_stream_state = 0; - buildRepeats(args, repeats, &repeats_full_state, &repeats_stream_state); - size_t repeatSize = 0; - for (size_t i = 0; i < repeats.size(); i++) { + u32 repeats_full_state = 0; + u32 repeats_stream_state = 0; + buildRepeats(args, repeats, &repeats_full_state, &repeats_stream_state); + size_t repeatSize = 0; + for (size_t i = 0; i < repeats.size(); i++) { repeatSize += repeats[i].size(); - } - + } + // We track report lists that have already been written into the global // list in case we can reuse them. ReportListCache reports_cache; - + unordered_set<NFAEdge> exceptional; u32 shiftCount = findBestNumOfVarShifts(args); assert(shiftCount); u32 maxShift = findMaxVarShift(args, shiftCount); findExceptionalTransitions(args, exceptional, maxShift); - + map<ExceptionProto, vector<u32>> exceptionMap; vector<ReportID> reportList; - + u32 exceptionCount = buildExceptionMap(args, reports_cache, exceptional, exceptionMap, reportList); assert(exceptionCount <= args.num_states); - // Build reach table and character mapping. - vector<NFAStateSet> reach; - vector<u8> reachMap; - buildReachMapping(args, reach, reachMap); - - // Build top masks. - vector<NFAStateSet> tops; - buildTopMasks(args, tops); - - // Build all our accept info. - NFAStateSet acceptMask, acceptEodMask; - vector<NFAAccept> accepts, acceptsEod; - vector<NFAStateSet> squash; + // Build reach table and character mapping. + vector<NFAStateSet> reach; + vector<u8> reachMap; + buildReachMapping(args, reach, reachMap); + + // Build top masks. + vector<NFAStateSet> tops; + buildTopMasks(args, tops); + + // Build all our accept info. + NFAStateSet acceptMask, acceptEodMask; + vector<NFAAccept> accepts, acceptsEod; + vector<NFAStateSet> squash; buildAccepts(args, reports_cache, acceptMask, acceptEodMask, accepts, acceptsEod, reportList, squash); - - // Build all our accel info. - NFAStateSet accelMask, accelFriendsMask; - AccelAuxVector accelAux; - vector<u8> accelTable; - buildAccel(args, accelMask, accelFriendsMask, accelAux, accelTable); - - // Compute the offsets in the bytecode for this LimEx NFA for all of - // our structures. First, the NFA and LimEx structures. All other - // offsets are relative to the start of the LimEx struct, starting with - // the reach table. - u32 offset = sizeof(implNFA_t); - - const u32 reachOffset = offset; - offset += sizeof(tableRow_t) * reach.size(); - - const u32 topsOffset = offset; - offset += sizeof(tableRow_t) * tops.size(); - - const u32 accelTableOffset = offset; - offset += sizeof(u8) * accelTable.size(); - - offset = ROUNDUP_N(offset, alignof(AccelAux)); - const u32 accelAuxOffset = offset; - offset += sizeof(AccelAux) * accelAux.size(); - - offset = ROUNDUP_N(offset, alignof(NFAAccept)); - const u32 acceptsOffset = offset; - offset += sizeof(NFAAccept) * accepts.size(); - const u32 acceptsEodOffset = offset; - offset += sizeof(NFAAccept) * acceptsEod.size(); - - offset = ROUNDUP_CL(offset); - const u32 squashOffset = offset; - offset += sizeof(tableRow_t) * squash.size(); - - offset = ROUNDUP_CL(offset); - const u32 exceptionsOffset = offset; + + // Build all our accel info. + NFAStateSet accelMask, accelFriendsMask; + AccelAuxVector accelAux; + vector<u8> accelTable; + buildAccel(args, accelMask, accelFriendsMask, accelAux, accelTable); + + // Compute the offsets in the bytecode for this LimEx NFA for all of + // our structures. First, the NFA and LimEx structures. All other + // offsets are relative to the start of the LimEx struct, starting with + // the reach table. + u32 offset = sizeof(implNFA_t); + + const u32 reachOffset = offset; + offset += sizeof(tableRow_t) * reach.size(); + + const u32 topsOffset = offset; + offset += sizeof(tableRow_t) * tops.size(); + + const u32 accelTableOffset = offset; + offset += sizeof(u8) * accelTable.size(); + + offset = ROUNDUP_N(offset, alignof(AccelAux)); + const u32 accelAuxOffset = offset; + offset += sizeof(AccelAux) * accelAux.size(); + + offset = ROUNDUP_N(offset, alignof(NFAAccept)); + const u32 acceptsOffset = offset; + offset += sizeof(NFAAccept) * accepts.size(); + const u32 acceptsEodOffset = offset; + offset += sizeof(NFAAccept) * acceptsEod.size(); + + offset = ROUNDUP_CL(offset); + const u32 squashOffset = offset; + offset += sizeof(tableRow_t) * squash.size(); + + offset = ROUNDUP_CL(offset); + const u32 exceptionsOffset = offset; offset += sizeof(exception_t) * exceptionCount; - + const u32 reportListOffset = offset; offset += sizeof(ReportID) * reportList.size(); - - const u32 repeatOffsetsOffset = offset; - offset += sizeof(u32) * args.repeats.size(); - - offset = ROUNDUP_CL(offset); - const u32 repeatsOffset = offset; - offset += repeatSize; - - // Now we can allocate space for the NFA and get to work on layout. - - size_t nfaSize = sizeof(NFA) + offset; - DEBUG_PRINTF("nfa size %zu\n", nfaSize); + + const u32 repeatOffsetsOffset = offset; + offset += sizeof(u32) * args.repeats.size(); + + offset = ROUNDUP_CL(offset); + const u32 repeatsOffset = offset; + offset += repeatSize; + + // Now we can allocate space for the NFA and get to work on layout. + + size_t nfaSize = sizeof(NFA) + offset; + DEBUG_PRINTF("nfa size %zu\n", nfaSize); auto nfa = make_zeroed_bytecode_ptr<NFA>(nfaSize); - assert(nfa); // otherwise we would have thrown std::bad_alloc - - implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa.get()); - assert(ISALIGNED(limex)); - - writeReachMapping(reach, reachMap, limex, reachOffset); - - writeTopMasks(tops, limex, topsOffset); - - writeAccel(accelMask, accelFriendsMask, accelAux, accelTable, - limex, accelTableOffset, accelAuxOffset); - - writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash, + assert(nfa); // otherwise we would have thrown std::bad_alloc + + implNFA_t *limex = (implNFA_t *)getMutableImplNfa(nfa.get()); + assert(ISALIGNED(limex)); + + writeReachMapping(reach, reachMap, limex, reachOffset); + + writeTopMasks(tops, limex, topsOffset); + + writeAccel(accelMask, accelFriendsMask, accelAux, accelTable, + limex, accelTableOffset, accelAuxOffset); + + writeAccepts(acceptMask, acceptEodMask, accepts, acceptsEod, squash, limex, acceptsOffset, acceptsEodOffset, squashOffset, reportListOffset); - + limex->shiftCount = shiftCount; - writeShiftMasks(args, limex); - + writeShiftMasks(args, limex); + if (cannotDie(args)) { DEBUG_PRINTF("nfa cannot die\n"); setLimexFlag(limex, LIMEX_FLAG_CANNOT_DIE); } - // Determine the state required for our state vector. - findStateSize(args, limex); - + // Determine the state required for our state vector. + findStateSize(args, limex); + writeReportList(reportList, limex, reportListOffset); - - // Repeat structures and offset table. - vector<u32> repeatOffsets; - writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset, - repeatsOffset); - + + // Repeat structures and offset table. + vector<u32> repeatOffsets; + writeRepeats(repeats, repeatOffsets, limex, repeatOffsetsOffset, + repeatsOffset); + writeExceptions(args, exceptionMap, repeatOffsets, limex, exceptionsOffset, reportListOffset); - - writeLimexMasks(args, limex); - - allocState(nfa.get(), repeats_full_state, repeats_stream_state); - - nfa->type = dtype; - nfa->length = verify_u32(nfaSize); - nfa->nPositions = args.num_states; - - if (!args.zombies.empty()) { - setNfaFlag(nfa.get(), NFA_ZOMBIE); - } - if (!acceptsEod.empty()) { - setNfaFlag(nfa.get(), NFA_ACCEPTS_EOD); - } - - return nfa; - } - - static int score(const build_info &args) { - // LimEx NFAs are available in sizes from 32 to 512-bit. - size_t num_states = args.num_states; - - size_t sz = findContainerSize(num_states); - if (sz < 32) { - sz = 32; - } - - if (args.cc.grey.nfaForceSize) { - sz = args.cc.grey.nfaForceSize; - } - - if (sz != NFATraits<dtype>::maxStates) { - return -1; // fail, size not appropriate - } - - // We are of the right size, calculate a score based on the number - // of exceptions and the number of shifts used by this LimEx. + + writeLimexMasks(args, limex); + + allocState(nfa.get(), repeats_full_state, repeats_stream_state); + + nfa->type = dtype; + nfa->length = verify_u32(nfaSize); + nfa->nPositions = args.num_states; + + if (!args.zombies.empty()) { + setNfaFlag(nfa.get(), NFA_ZOMBIE); + } + if (!acceptsEod.empty()) { + setNfaFlag(nfa.get(), NFA_ACCEPTS_EOD); + } + + return nfa; + } + + static int score(const build_info &args) { + // LimEx NFAs are available in sizes from 32 to 512-bit. + size_t num_states = args.num_states; + + size_t sz = findContainerSize(num_states); + if (sz < 32) { + sz = 32; + } + + if (args.cc.grey.nfaForceSize) { + sz = args.cc.grey.nfaForceSize; + } + + if (sz != NFATraits<dtype>::maxStates) { + return -1; // fail, size not appropriate + } + + // We are of the right size, calculate a score based on the number + // of exceptions and the number of shifts used by this LimEx. int score; u32 shiftCount = findBestNumOfVarShifts(args, &score); if (shiftCount == 0) { return -1; - } - return score; - } -}; - -template<NFAEngineType dtype> -struct generateNfa { + } + return score; + } +}; + +template<NFAEngineType dtype> +struct generateNfa { static bytecode_ptr<NFA> call(const build_info &args) { - return Factory<dtype>::generateNfa(args); - } -}; - -template<NFAEngineType dtype> -struct scoreNfa { - static int call(const build_info &args) { - return Factory<dtype>::score(args); - } -}; - + return Factory<dtype>::generateNfa(args); + } +}; + +template<NFAEngineType dtype> +struct scoreNfa { + static int call(const build_info &args) { + return Factory<dtype>::score(args); + } +}; + #define MAKE_LIMEX_TRAITS(mlt_size) \ template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \ typedef LimExNFA##mlt_size implNFA_t; \ @@ -2425,70 +2425,70 @@ struct scoreNfa { static const size_t scratch_state_size = mlt_size == 64 ? sizeof(m128) \ : sizeof(tableRow_t); \ }; - + MAKE_LIMEX_TRAITS(32) MAKE_LIMEX_TRAITS(64) MAKE_LIMEX_TRAITS(128) MAKE_LIMEX_TRAITS(256) MAKE_LIMEX_TRAITS(384) MAKE_LIMEX_TRAITS(512) - -} // namespace - -#ifndef NDEBUG -// Some sanity tests, called by an assertion in generate(). -static UNUSED + +} // namespace + +#ifndef NDEBUG +// Some sanity tests, called by an assertion in generate(). +static UNUSED bool isSane(const NGHolder &h, const map<u32, set<NFAVertex>> &tops, const unordered_map<NFAVertex, u32> &state_ids, - u32 num_states) { + u32 num_states) { unordered_set<u32> seen; unordered_set<NFAVertex> top_starts; for (const auto &vv : tops | map_values) { insert(&top_starts, vv); - } - - for (auto v : vertices_range(h)) { - if (!contains(state_ids, v)) { + } + + for (auto v : vertices_range(h)) { + if (!contains(state_ids, v)) { DEBUG_PRINTF("no entry for vertex %zu in state map\n", h[v].index); - return false; - } - const u32 i = state_ids.at(v); - if (i == NO_STATE) { - continue; - } - + return false; + } + const u32 i = state_ids.at(v); + if (i == NO_STATE) { + continue; + } + DEBUG_PRINTF("checking vertex %zu (state %u)\n", h[v].index, i); - - if (i >= num_states || contains(seen, i)) { - DEBUG_PRINTF("vertex %u/%u has invalid state\n", i, num_states); - return false; - } - seen.insert(i); - - // All our states should be reachable and have a state assigned. - if (h[v].char_reach.none()) { + + if (i >= num_states || contains(seen, i)) { + DEBUG_PRINTF("vertex %u/%u has invalid state\n", i, num_states); + return false; + } + seen.insert(i); + + // All our states should be reachable and have a state assigned. + if (h[v].char_reach.none()) { DEBUG_PRINTF("vertex %zu has empty reachability\n", h[v].index); - return false; - } - - // Every state that isn't a start state (or top, in triggered NFAs) - // must have at least one predecessor that is not itself. - if (v != h.start && v != h.startDs && !contains(top_starts, v) - && !proper_in_degree(v, h)) { + return false; + } + + // Every state that isn't a start state (or top, in triggered NFAs) + // must have at least one predecessor that is not itself. + if (v != h.start && v != h.startDs && !contains(top_starts, v) + && !proper_in_degree(v, h)) { DEBUG_PRINTF("vertex %zu has no pred\n", h[v].index); - return false; - } - } - - if (seen.size() != num_states) { - return false; - } - - return true; -} -#endif // NDEBUG - -static + return false; + } + } + + if (seen.size() != num_states) { + return false; + } + + return true; +} +#endif // NDEBUG + +static bool isFast(const build_info &args) { const NGHolder &h = args.h; const u32 num_states = args.num_states; @@ -2552,17 +2552,17 @@ bool isFast(const build_info &args) { static u32 max_state(const unordered_map<NFAVertex, u32> &state_ids) { - u32 rv = 0; - for (const auto &m : state_ids) { - DEBUG_PRINTF("state %u\n", m.second); - if (m.second != NO_STATE) { - rv = max(m.second, rv); - } - } - DEBUG_PRINTF("max %u\n", rv); - return rv; -} - + u32 rv = 0; + for (const auto &m : state_ids) { + DEBUG_PRINTF("state %u\n", m.second); + if (m.second != NO_STATE) { + rv = max(m.second, rv); + } + } + DEBUG_PRINTF("max %u\n", rv); + return rv; +} + bytecode_ptr<NFA> generate(NGHolder &h, const unordered_map<NFAVertex, u32> &states, const vector<BoundedRepeatData> &repeats, @@ -2572,52 +2572,52 @@ bytecode_ptr<NFA> generate(NGHolder &h, const set<NFAVertex> &zombies, bool do_accel, bool stateCompression, bool &fast, u32 hint, const CompileContext &cc) { - const u32 num_states = max_state(states) + 1; - DEBUG_PRINTF("total states: %u\n", num_states); - - if (!cc.grey.allowLimExNFA) { - DEBUG_PRINTF("limex not allowed\n"); - return nullptr; - } - - // If you ask for a particular type, it had better be an NFA. - assert(hint == INVALID_NFA || hint <= LAST_LIMEX_NFA); - DEBUG_PRINTF("hint=%u\n", hint); - - // Sanity check the input data. - assert(isSane(h, tops, states, num_states)); - - // Build arguments used in the rest of this file. - build_info arg(h, states, repeats, reportSquashMap, squashMap, tops, - zombies, do_accel, stateCompression, cc, num_states); - - // Acceleration analysis. - fillAccelInfo(arg); - + const u32 num_states = max_state(states) + 1; + DEBUG_PRINTF("total states: %u\n", num_states); + + if (!cc.grey.allowLimExNFA) { + DEBUG_PRINTF("limex not allowed\n"); + return nullptr; + } + + // If you ask for a particular type, it had better be an NFA. + assert(hint == INVALID_NFA || hint <= LAST_LIMEX_NFA); + DEBUG_PRINTF("hint=%u\n", hint); + + // Sanity check the input data. + assert(isSane(h, tops, states, num_states)); + + // Build arguments used in the rest of this file. + build_info arg(h, states, repeats, reportSquashMap, squashMap, tops, + zombies, do_accel, stateCompression, cc, num_states); + + // Acceleration analysis. + fillAccelInfo(arg); + vector<pair<int, NFAEngineType>> scores; - - if (hint != INVALID_NFA) { - // The caller has told us what to (attempt to) build. + + if (hint != INVALID_NFA) { + // The caller has told us what to (attempt to) build. scores.emplace_back(0, (NFAEngineType)hint); - } else { - for (size_t i = 0; i <= LAST_LIMEX_NFA; i++) { - NFAEngineType ntype = (NFAEngineType)i; - int score = DISPATCH_BY_LIMEX_TYPE(ntype, scoreNfa, arg); - if (score >= 0) { - DEBUG_PRINTF("%s scores %d\n", nfa_type_name(ntype), score); + } else { + for (size_t i = 0; i <= LAST_LIMEX_NFA; i++) { + NFAEngineType ntype = (NFAEngineType)i; + int score = DISPATCH_BY_LIMEX_TYPE(ntype, scoreNfa, arg); + if (score >= 0) { + DEBUG_PRINTF("%s scores %d\n", nfa_type_name(ntype), score); scores.emplace_back(score, ntype); - } - } - } - - if (scores.empty()) { - DEBUG_PRINTF("No NFA returned a valid score for this case.\n"); - return nullptr; - } - + } + } + } + + if (scores.empty()) { + DEBUG_PRINTF("No NFA returned a valid score for this case.\n"); + return nullptr; + } + // Sort acceptable models in priority order, lowest score first. sort(scores.begin(), scores.end()); - + for (const auto &elem : scores) { assert(elem.first >= 0); NFAEngineType limex_model = elem.second; @@ -2628,13 +2628,13 @@ bytecode_ptr<NFA> generate(NGHolder &h, fast = isFast(arg); return nfa; } - } - + } + DEBUG_PRINTF("NFA build failed.\n"); return nullptr; -} - -u32 countAccelStates(NGHolder &h, +} + +u32 countAccelStates(NGHolder &h, const unordered_map<NFAVertex, u32> &states, const vector<BoundedRepeatData> &repeats, const unordered_map<NFAVertex, NFAStateSet> &reportSquashMap, @@ -2642,30 +2642,30 @@ u32 countAccelStates(NGHolder &h, const map<u32, set<NFAVertex>> &tops, const set<NFAVertex> &zombies, const CompileContext &cc) { - const u32 num_states = max_state(states) + 1; - DEBUG_PRINTF("total states: %u\n", num_states); - - if (!cc.grey.allowLimExNFA) { - DEBUG_PRINTF("limex not allowed\n"); + const u32 num_states = max_state(states) + 1; + DEBUG_PRINTF("total states: %u\n", num_states); + + if (!cc.grey.allowLimExNFA) { + DEBUG_PRINTF("limex not allowed\n"); return 0; - } - - // Sanity check the input data. - assert(isSane(h, tops, states, num_states)); - - const bool do_accel = true; - const bool state_compression = false; - - // Build arguments used in the rest of this file. - build_info bi(h, states, repeats, reportSquashMap, squashMap, tops, zombies, - do_accel, state_compression, cc, num_states); - - // Acceleration analysis. + } + + // Sanity check the input data. + assert(isSane(h, tops, states, num_states)); + + const bool do_accel = true; + const bool state_compression = false; + + // Build arguments used in the rest of this file. + build_info bi(h, states, repeats, reportSquashMap, squashMap, tops, zombies, + do_accel, state_compression, cc, num_states); + + // Acceleration analysis. nfaFindAccelSchemes(bi.h, bi.br_cyclic, &bi.accel.accel_map); - + u32 num_accel = verify_u32(bi.accel.accel_map.size()); - DEBUG_PRINTF("found %u accel states\n", num_accel); + DEBUG_PRINTF("found %u accel states\n", num_accel); return num_accel; -} - -} // namespace ue2 +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/limex_compile.h b/contrib/libs/hyperscan/src/nfa/limex_compile.h index fc3f461fbf..4afdcdb3e4 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_compile.h +++ b/contrib/libs/hyperscan/src/nfa/limex_compile.h @@ -1,74 +1,74 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file - * \brief Main NFA build code. - */ - -#ifndef LIMEX_COMPILE_H -#define LIMEX_COMPILE_H - + * \brief Main NFA build code. + */ + +#ifndef LIMEX_COMPILE_H +#define LIMEX_COMPILE_H + #include "nfagraph/ng_holder.h" #include "nfagraph/ng_squash.h" // for NFAStateSet #include "ue2common.h" #include "util/bytecode_ptr.h" #include <set> -#include <map> -#include <memory> +#include <map> +#include <memory> #include <unordered_map> -#include <vector> - -struct NFA; - -namespace ue2 { - -struct BoundedRepeatData; -struct CompileContext; - +#include <vector> + +struct NFA; + +namespace ue2 { + +struct BoundedRepeatData; +struct CompileContext; + /** * \brief Construct a LimEx NFA from an NGHolder. - * - * \param g Input NFA graph. Must have state IDs assigned. - * \param repeats Bounded repeat information, if any. - * \param reportSquashMap Single-match mode squash map. - * \param squashMap More general squash map. - * \param tops Tops and their start vertices, - * \param zombies The set of zombifying states. - * \param do_accel Calculate acceleration schemes. - * \param stateCompression Allow (and calculate masks for) state compression. - * \param hint If not INVALID_NFA, this allows a particular LimEx NFA model - to be requested. - * \param cc Compile context. - * \return a built NFA, or nullptr if no NFA could be constructed for this - * graph. - */ + * + * \param g Input NFA graph. Must have state IDs assigned. + * \param repeats Bounded repeat information, if any. + * \param reportSquashMap Single-match mode squash map. + * \param squashMap More general squash map. + * \param tops Tops and their start vertices, + * \param zombies The set of zombifying states. + * \param do_accel Calculate acceleration schemes. + * \param stateCompression Allow (and calculate masks for) state compression. + * \param hint If not INVALID_NFA, this allows a particular LimEx NFA model + to be requested. + * \param cc Compile context. + * \return a built NFA, or nullptr if no NFA could be constructed for this + * graph. + */ bytecode_ptr<NFA> generate(NGHolder &g, const std::unordered_map<NFAVertex, u32> &states, const std::vector<BoundedRepeatData> &repeats, @@ -81,14 +81,14 @@ bytecode_ptr<NFA> generate(NGHolder &g, bool &fast, u32 hint, const CompileContext &cc); - -/** + +/** * \brief For a given graph, count the number of accelerable states it has. - * + * * Note that this number may be greater than the number that are actually * implementable. - */ -u32 countAccelStates(NGHolder &h, + */ +u32 countAccelStates(NGHolder &h, const std::unordered_map<NFAVertex, u32> &states, const std::vector<BoundedRepeatData> &repeats, const std::unordered_map<NFAVertex, NFAStateSet> &reportSquashMap, @@ -96,7 +96,7 @@ u32 countAccelStates(NGHolder &h, const std::map<u32, std::set<NFAVertex>> &tops, const std::set<NFAVertex> &zombies, const CompileContext &cc); - -} // namespace ue2 - -#endif + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/limex_context.h b/contrib/libs/hyperscan/src/nfa/limex_context.h index 06be3c9461..60d2087935 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_context.h +++ b/contrib/libs/hyperscan/src/nfa/limex_context.h @@ -1,44 +1,44 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Runtime context structures (NFAContext128 and friends) for the NFA. - */ - -#ifndef LIMEX_CONTEXT_H -#define LIMEX_CONTEXT_H - -#include "ue2common.h" -#include "callback.h" -#include "util/simd_utils.h" // for m128 etc - -// Runtime context structures. - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Runtime context structures (NFAContext128 and friends) for the NFA. + */ + +#ifndef LIMEX_CONTEXT_H +#define LIMEX_CONTEXT_H + +#include "ue2common.h" +#include "callback.h" +#include "util/simd_utils.h" // for m128 etc + +// Runtime context structures. + /* Note: The size of the context structures may vary from platform to platform * (notably, for the Limex64 structure). As a result, information based on the * size and other detail of these structures should not be written into the @@ -49,43 +49,43 @@ #error ue2 runtime only file #endif -/* cached_estate/esucc etc... - * - * If the exception state matches the cached_estate we will apply - * the or in the cached_esucc to the successor states rather than processing - * the exceptions. - * - * If the current exception state is a superset of the cached_estate, the - * cache is NOT used at all. - * - * The cache is updated when we see a different cacheable estate. - */ - -#define GEN_CONTEXT_STRUCT(nsize, ntype) \ -struct ALIGN_CL_DIRECTIVE NFAContext##nsize { \ - ntype s; /**< state bitvector (on entry/exit) */ \ - ntype local_succ; /**< used by exception handling for large models */ \ - ntype cached_estate; /* inited to 0 */ \ - ntype cached_esucc; \ - char cached_br; /**< cached_estate contains a br state */ \ - const ReportID *cached_reports; \ - union RepeatControl *repeat_ctrl; \ - char *repeat_state; \ - NfaCallback callback; \ - void *context; \ -}; - -GEN_CONTEXT_STRUCT(32, u32) +/* cached_estate/esucc etc... + * + * If the exception state matches the cached_estate we will apply + * the or in the cached_esucc to the successor states rather than processing + * the exceptions. + * + * If the current exception state is a superset of the cached_estate, the + * cache is NOT used at all. + * + * The cache is updated when we see a different cacheable estate. + */ + +#define GEN_CONTEXT_STRUCT(nsize, ntype) \ +struct ALIGN_CL_DIRECTIVE NFAContext##nsize { \ + ntype s; /**< state bitvector (on entry/exit) */ \ + ntype local_succ; /**< used by exception handling for large models */ \ + ntype cached_estate; /* inited to 0 */ \ + ntype cached_esucc; \ + char cached_br; /**< cached_estate contains a br state */ \ + const ReportID *cached_reports; \ + union RepeatControl *repeat_ctrl; \ + char *repeat_state; \ + NfaCallback callback; \ + void *context; \ +}; + +GEN_CONTEXT_STRUCT(32, u32) #ifdef ARCH_64_BIT GEN_CONTEXT_STRUCT(64, u64a) #else GEN_CONTEXT_STRUCT(64, m128) #endif -GEN_CONTEXT_STRUCT(128, m128) -GEN_CONTEXT_STRUCT(256, m256) -GEN_CONTEXT_STRUCT(384, m384) -GEN_CONTEXT_STRUCT(512, m512) - -#undef GEN_CONTEXT_STRUCT - -#endif +GEN_CONTEXT_STRUCT(128, m128) +GEN_CONTEXT_STRUCT(256, m256) +GEN_CONTEXT_STRUCT(384, m384) +GEN_CONTEXT_STRUCT(512, m512) + +#undef GEN_CONTEXT_STRUCT + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/limex_exceptional.h b/contrib/libs/hyperscan/src/nfa/limex_exceptional.h index 2d1123dca0..6c7335f1b9 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_exceptional.h +++ b/contrib/libs/hyperscan/src/nfa/limex_exceptional.h @@ -1,240 +1,240 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief LimEx NFA: runtime exception processing code. - * - * X-macro generic impl, included into the various LimEx model implementations. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief LimEx NFA: runtime exception processing code. + * + * X-macro generic impl, included into the various LimEx model implementations. + */ + #if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) # error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. -#endif - -#include "config.h" -#include "limex_ring.h" -#include "util/join.h" -#include "util/uniform_ops.h" - -#define PE_FN JOIN(processExceptional, SIZE) -#define RUN_EXCEPTION_FN JOIN(runException, SIZE) -#define ZERO_STATE JOIN(zero_, STATE_T) -#define AND_STATE JOIN(and_, STATE_T) -#define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b))) -#define OR_STATE JOIN(or_, STATE_T) +#endif + +#include "config.h" +#include "limex_ring.h" +#include "util/join.h" +#include "util/uniform_ops.h" + +#define PE_FN JOIN(processExceptional, SIZE) +#define RUN_EXCEPTION_FN JOIN(runException, SIZE) +#define ZERO_STATE JOIN(zero_, STATE_T) +#define AND_STATE JOIN(and_, STATE_T) +#define EQ_STATE(a, b) (!JOIN(noteq_, STATE_T)((a), (b))) +#define OR_STATE JOIN(or_, STATE_T) #define EXPAND_STATE JOIN(expand_, STATE_T) #define SHUFFLE_BYTE_STATE JOIN(shuffle_byte_, STATE_T) -#define TESTBIT_STATE JOIN(testbit_, STATE_T) -#define EXCEPTION_T JOIN(struct NFAException, SIZE) -#define CONTEXT_T JOIN(NFAContext, SIZE) -#define IMPL_NFA_T JOIN(LimExNFA, SIZE) -#define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE) - -#ifdef ESTATE_ON_STACK -#define ESTATE_ARG STATE_T estate -#else -#define ESTATE_ARG const STATE_T *estatep +#define TESTBIT_STATE JOIN(testbit_, STATE_T) +#define EXCEPTION_T JOIN(struct NFAException, SIZE) +#define CONTEXT_T JOIN(NFAContext, SIZE) +#define IMPL_NFA_T JOIN(LimExNFA, SIZE) +#define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE) + +#ifdef ESTATE_ON_STACK +#define ESTATE_ARG STATE_T estate +#else +#define ESTATE_ARG const STATE_T *estatep #define estate (*estatep) -#endif - -#ifdef STATE_ON_STACK -#define STATE_ARG_NAME s -#define STATE_ARG STATE_T STATE_ARG_NAME -#define STATE_ARG_P &s -#else -#define STATE_ARG_NAME sp -#define STATE_ARG const STATE_T *STATE_ARG_NAME -#define STATE_ARG_P sp -#endif - -#ifndef STATE_ON_STACK -#define BIG_MODEL -#endif - -#ifdef ARCH_64_BIT -#define CHUNK_T u64a -#define FIND_AND_CLEAR_FN findAndClearLSB_64 +#endif + +#ifdef STATE_ON_STACK +#define STATE_ARG_NAME s +#define STATE_ARG STATE_T STATE_ARG_NAME +#define STATE_ARG_P &s +#else +#define STATE_ARG_NAME sp +#define STATE_ARG const STATE_T *STATE_ARG_NAME +#define STATE_ARG_P sp +#endif + +#ifndef STATE_ON_STACK +#define BIG_MODEL +#endif + +#ifdef ARCH_64_BIT +#define CHUNK_T u64a +#define FIND_AND_CLEAR_FN findAndClearLSB_64 #define POPCOUNT_FN popcount64 #define RANK_IN_MASK_FN rank_in_mask64 -#else -#define CHUNK_T u32 -#define FIND_AND_CLEAR_FN findAndClearLSB_32 +#else +#define CHUNK_T u32 +#define FIND_AND_CLEAR_FN findAndClearLSB_32 #define POPCOUNT_FN popcount32 #define RANK_IN_MASK_FN rank_in_mask32 -#endif - -/** \brief Process a single exception. Returns 1 if exception handling should - * continue, 0 if an accept callback has instructed us to halt. */ -static really_inline -int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, - STATE_T *succ, -#ifndef BIG_MODEL - STATE_T *local_succ, -#endif - const struct IMPL_NFA_T *limex, - u64a offset, - struct CONTEXT_T *ctx, - struct proto_cache *new_cache, - enum CacheResult *cacheable, - char in_rev, - const char flags) { - assert(e); - -#ifdef DEBUG_EXCEPTIONS - printf("EXCEPTION e=%p reports=%u trigger=", e, e->reports); - if (e->trigger == LIMEX_TRIGGER_NONE) { - printf("none"); - } else if (e->trigger == LIMEX_TRIGGER_POS) { - printf("pos"); - } else if (e->trigger == LIMEX_TRIGGER_TUG) { - printf("tug"); - } else { - printf("unknown!"); - } - printf("\n"); -#endif - - // Trigger exceptions, used in bounded repeats. - assert(!in_rev || e->trigger == LIMEX_TRIGGER_NONE); - if (!in_rev && e->trigger != LIMEX_TRIGGER_NONE) { - assert(e->repeatOffset != MO_INVALID_IDX); - const struct NFARepeatInfo *info = - (const struct NFARepeatInfo *)((const char *)limex + - e->repeatOffset); - const struct RepeatInfo *repeat = getRepeatInfo(info); - assert(ctx->repeat_ctrl && ctx->repeat_state); - union RepeatControl *repeat_ctrl = ctx->repeat_ctrl + info->ctrlIndex; - char *repeat_state = ctx->repeat_state + info->stateOffset; - - if (e->trigger == LIMEX_TRIGGER_POS) { +#endif + +/** \brief Process a single exception. Returns 1 if exception handling should + * continue, 0 if an accept callback has instructed us to halt. */ +static really_inline +int RUN_EXCEPTION_FN(const EXCEPTION_T *e, STATE_ARG, + STATE_T *succ, +#ifndef BIG_MODEL + STATE_T *local_succ, +#endif + const struct IMPL_NFA_T *limex, + u64a offset, + struct CONTEXT_T *ctx, + struct proto_cache *new_cache, + enum CacheResult *cacheable, + char in_rev, + const char flags) { + assert(e); + +#ifdef DEBUG_EXCEPTIONS + printf("EXCEPTION e=%p reports=%u trigger=", e, e->reports); + if (e->trigger == LIMEX_TRIGGER_NONE) { + printf("none"); + } else if (e->trigger == LIMEX_TRIGGER_POS) { + printf("pos"); + } else if (e->trigger == LIMEX_TRIGGER_TUG) { + printf("tug"); + } else { + printf("unknown!"); + } + printf("\n"); +#endif + + // Trigger exceptions, used in bounded repeats. + assert(!in_rev || e->trigger == LIMEX_TRIGGER_NONE); + if (!in_rev && e->trigger != LIMEX_TRIGGER_NONE) { + assert(e->repeatOffset != MO_INVALID_IDX); + const struct NFARepeatInfo *info = + (const struct NFARepeatInfo *)((const char *)limex + + e->repeatOffset); + const struct RepeatInfo *repeat = getRepeatInfo(info); + assert(ctx->repeat_ctrl && ctx->repeat_state); + union RepeatControl *repeat_ctrl = ctx->repeat_ctrl + info->ctrlIndex; + char *repeat_state = ctx->repeat_state + info->stateOffset; + + if (e->trigger == LIMEX_TRIGGER_POS) { char cyclic_on = TESTBIT_STATE(*STATE_ARG_P, info->cyclicState); - processPosTrigger(repeat, repeat_ctrl, repeat_state, offset, - cyclic_on); - *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; - } else { - assert(e->trigger == LIMEX_TRIGGER_TUG); - enum TriggerResult rv = - processTugTrigger(repeat, repeat_ctrl, repeat_state, offset); - if (rv == TRIGGER_FAIL) { - *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; - DEBUG_PRINTF("tug found no valid matches in repeat state\n"); - return 1; // continue - } else if (rv == TRIGGER_STALE) { - *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; - DEBUG_PRINTF("stale history, squashing cyclic state\n"); - assert(e->hasSquash == LIMEX_SQUASH_TUG); + processPosTrigger(repeat, repeat_ctrl, repeat_state, offset, + cyclic_on); + *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; + } else { + assert(e->trigger == LIMEX_TRIGGER_TUG); + enum TriggerResult rv = + processTugTrigger(repeat, repeat_ctrl, repeat_state, offset); + if (rv == TRIGGER_FAIL) { + *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; + DEBUG_PRINTF("tug found no valid matches in repeat state\n"); + return 1; // continue + } else if (rv == TRIGGER_STALE) { + *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; + DEBUG_PRINTF("stale history, squashing cyclic state\n"); + assert(e->hasSquash == LIMEX_SQUASH_TUG); *succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash)); - return 1; // continue - } else if (rv == TRIGGER_SUCCESS_CACHE) { - new_cache->br = 1; - } else { - assert(rv == TRIGGER_SUCCESS); - *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; - } - } - } - - // Some exceptions fire accepts. - if (e->reports != MO_INVALID_IDX) { - if (flags & CALLBACK_OUTPUT) { + return 1; // continue + } else if (rv == TRIGGER_SUCCESS_CACHE) { + new_cache->br = 1; + } else { + assert(rv == TRIGGER_SUCCESS); + *cacheable = DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES; + } + } + } + + // Some exceptions fire accepts. + if (e->reports != MO_INVALID_IDX) { + if (flags & CALLBACK_OUTPUT) { const ReportID *reports = (const ReportID *)((const char *)limex + e->reports); - if (unlikely(limexRunReports(reports, ctx->callback, - ctx->context, offset) - == MO_HALT_MATCHING)) { - DEBUG_PRINTF("callback instructed us to stop\n"); - return 0; // halt - } - if (*cacheable == CACHE_RESULT) { - if (!new_cache->reports || new_cache->reports == reports) { - new_cache->reports = reports; - } else { - *cacheable = DO_NOT_CACHE_RESULT; - } - } - } else { - if ((flags & FIRST_BYTE) && *cacheable == CACHE_RESULT) { - *cacheable = DO_NOT_CACHE_RESULT; - } /* otherwise we can cache as we never care about accepts */ - } - } - - // Most exceptions have a set of successors to switch on. `local_succ' is - // ORed into `succ' at the end of the caller's loop. -#ifndef BIG_MODEL + if (unlikely(limexRunReports(reports, ctx->callback, + ctx->context, offset) + == MO_HALT_MATCHING)) { + DEBUG_PRINTF("callback instructed us to stop\n"); + return 0; // halt + } + if (*cacheable == CACHE_RESULT) { + if (!new_cache->reports || new_cache->reports == reports) { + new_cache->reports = reports; + } else { + *cacheable = DO_NOT_CACHE_RESULT; + } + } + } else { + if ((flags & FIRST_BYTE) && *cacheable == CACHE_RESULT) { + *cacheable = DO_NOT_CACHE_RESULT; + } /* otherwise we can cache as we never care about accepts */ + } + } + + // Most exceptions have a set of successors to switch on. `local_succ' is + // ORed into `succ' at the end of the caller's loop. +#ifndef BIG_MODEL *local_succ = OR_STATE(*local_succ, LOAD_FROM_ENG(&e->successors)); -#else +#else ctx->local_succ = OR_STATE(ctx->local_succ, LOAD_FROM_ENG(&e->successors)); -#endif - - // Some exceptions squash states behind them. Note that we squash states in - // 'succ', not local_succ. +#endif + + // Some exceptions squash states behind them. Note that we squash states in + // 'succ', not local_succ. if (e->hasSquash == LIMEX_SQUASH_CYCLIC || e->hasSquash == LIMEX_SQUASH_REPORT) { *succ = AND_STATE(*succ, LOAD_FROM_ENG(&e->squash)); - if (*cacheable == CACHE_RESULT) { - *cacheable = DO_NOT_CACHE_RESULT; - } - } - - return 1; // continue -} - -#ifndef RUN_EXCEPTION_FN_ONLY - + if (*cacheable == CACHE_RESULT) { + *cacheable = DO_NOT_CACHE_RESULT; + } + } + + return 1; // continue +} + +#ifndef RUN_EXCEPTION_FN_ONLY + /** \brief Process all of the exceptions associated with the states in the \a * estate. */ -static really_inline +static really_inline int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, const struct IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, - u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) { - assert(diffmask > 0); // guaranteed by caller macro - + u64a offset, struct CONTEXT_T *ctx, char in_rev, char flags) { + assert(diffmask > 0); // guaranteed by caller macro + if (EQ_STATE(estate, ctx->cached_estate)) { - DEBUG_PRINTF("using cached succ from previous state\n"); + DEBUG_PRINTF("using cached succ from previous state\n"); *succ = OR_STATE(*succ, ctx->cached_esucc); - if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) { - DEBUG_PRINTF("firing cached reports from previous state\n"); - if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback, - ctx->context, offset) - == MO_HALT_MATCHING)) { - return PE_RV_HALT; // halt; - } - } - return 0; - } - -#ifndef BIG_MODEL - STATE_T local_succ = ZERO_STATE; -#else + if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) { + DEBUG_PRINTF("firing cached reports from previous state\n"); + if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback, + ctx->context, offset) + == MO_HALT_MATCHING)) { + return PE_RV_HALT; // halt; + } + } + return 0; + } + +#ifndef BIG_MODEL + STATE_T local_succ = ZERO_STATE; +#else ctx->local_succ = ZERO_STATE; -#endif - +#endif + struct proto_cache new_cache = {0, NULL}; enum CacheResult cacheable = CACHE_RESULT; @@ -301,101 +301,101 @@ int PE_FN(STATE_ARG, ESTATE_ARG, UNUSED u32 diffmask, STATE_T *succ, } while (diffmask); } #else - // A copy of the estate as an array of GPR-sized chunks. - CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; + // A copy of the estate as an array of GPR-sized chunks. + CHUNK_T chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; CHUNK_T emask_chunks[sizeof(STATE_T) / sizeof(CHUNK_T)]; -#ifdef ESTATE_ON_STACK - memcpy(chunks, &estate, sizeof(STATE_T)); -#else - memcpy(chunks, estatep, sizeof(STATE_T)); -#endif +#ifdef ESTATE_ON_STACK + memcpy(chunks, &estate, sizeof(STATE_T)); +#else + memcpy(chunks, estatep, sizeof(STATE_T)); +#endif memcpy(emask_chunks, &limex->exceptionMask, sizeof(STATE_T)); - + u32 base_index[sizeof(STATE_T) / sizeof(CHUNK_T)]; base_index[0] = 0; for (s32 i = 0; i < (s32)ARRAY_LENGTH(base_index) - 1; i++) { base_index[i + 1] = base_index[i] + POPCOUNT_FN(emask_chunks[i]); } - do { - u32 t = findAndClearLSB_32(&diffmask); -#ifdef ARCH_64_BIT - t >>= 1; // Due to diffmask64, which leaves holes in the bitmask. -#endif - assert(t < ARRAY_LENGTH(chunks)); - CHUNK_T word = chunks[t]; - assert(word != 0); - do { + do { + u32 t = findAndClearLSB_32(&diffmask); +#ifdef ARCH_64_BIT + t >>= 1; // Due to diffmask64, which leaves holes in the bitmask. +#endif + assert(t < ARRAY_LENGTH(chunks)); + CHUNK_T word = chunks[t]; + assert(word != 0); + do { u32 bit = FIND_AND_CLEAR_FN(&word); u32 local_index = RANK_IN_MASK_FN(emask_chunks[t], bit); u32 idx = local_index + base_index[t]; - const EXCEPTION_T *e = &exceptions[idx]; - - if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ, -#ifndef BIG_MODEL - &local_succ, -#endif + const EXCEPTION_T *e = &exceptions[idx]; + + if (!RUN_EXCEPTION_FN(e, STATE_ARG_NAME, succ, +#ifndef BIG_MODEL + &local_succ, +#endif limex, offset, ctx, &new_cache, &cacheable, in_rev, flags)) { - return PE_RV_HALT; - } - } while (word); - } while (diffmask); + return PE_RV_HALT; + } + } while (word); + } while (diffmask); #endif - -#ifndef BIG_MODEL + +#ifndef BIG_MODEL *succ = OR_STATE(*succ, local_succ); -#else +#else *succ = OR_STATE(*succ, ctx->local_succ); -#endif - - if (cacheable == CACHE_RESULT) { +#endif + + if (cacheable == CACHE_RESULT) { ctx->cached_estate = estate; -#ifndef BIG_MODEL - ctx->cached_esucc = local_succ; -#else +#ifndef BIG_MODEL + ctx->cached_esucc = local_succ; +#else ctx->cached_esucc = ctx->local_succ; -#endif - ctx->cached_reports = new_cache.reports; - ctx->cached_br = new_cache.br; - } else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) { - if (ctx->cached_br) { +#endif + ctx->cached_reports = new_cache.reports; + ctx->cached_br = new_cache.br; + } else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) { + if (ctx->cached_br) { ctx->cached_estate = ZERO_STATE; - } - } - - return 0; -} - -#endif - -#undef ZERO_STATE -#undef AND_STATE -#undef EQ_STATE -#undef OR_STATE + } + } + + return 0; +} + +#endif + +#undef ZERO_STATE +#undef AND_STATE +#undef EQ_STATE +#undef OR_STATE #undef EXPAND_STATE #undef SHUFFLE_BYTE_STATE -#undef TESTBIT_STATE -#undef PE_FN -#undef RUN_EXCEPTION_FN -#undef CONTEXT_T -#undef EXCEPTION_T - -#ifdef estate -#undef estate -#endif - -#ifdef BIG_MODEL -#undef BIG_MODEL -#endif - -#undef STATE_ARG -#undef STATE_ARG_NAME -#undef STATE_ARG_P - +#undef TESTBIT_STATE +#undef PE_FN +#undef RUN_EXCEPTION_FN +#undef CONTEXT_T +#undef EXCEPTION_T + +#ifdef estate +#undef estate +#endif + +#ifdef BIG_MODEL +#undef BIG_MODEL +#endif + +#undef STATE_ARG +#undef STATE_ARG_NAME +#undef STATE_ARG_P + #undef IMPL_NFA_T -#undef CHUNK_T -#undef FIND_AND_CLEAR_FN +#undef CHUNK_T +#undef FIND_AND_CLEAR_FN #undef POPCOUNT_FN #undef RANK_IN_MASK_FN diff --git a/contrib/libs/hyperscan/src/nfa/limex_internal.h b/contrib/libs/hyperscan/src/nfa/limex_internal.h index 412f507d7c..23b1bd9707 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_internal.h +++ b/contrib/libs/hyperscan/src/nfa/limex_internal.h @@ -1,193 +1,193 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - This file provides the internal structures and definitions required for the - real NFAs (aka limex NFAs ); - - Limex NFAs now have variable length in memory. They look like this: - - LimExNFA structure - Fixed length, e.g. LimExNFA256. - Reachability table - Variable length array of state bitvectors, mapped into by - NFACommonXXX.reachMap. - Tops - Variable length array of state bitvectors, used for TOP_N events. - Acceleration structures - Variable length array of AccelAux structs. - Accepts - Variable length array of NFAAccept structs. - EOD Accepts - Variable length array of NFAAccept structs. - Exceptions - Variable length array of NFAExceptionXXX structs. - Repeat Structure Offsets - Array of u32 offsets that point at each "Repeat Structure" (below) - Repeat Structures - Variable length repeat structures, addressed via - NFAException32::repeatOffset etc. - - The state associated with the NFA is split into: - - -# The "traditional" NFA state as a bitvector. This is stored in the - first N bytes of the state space (length given in - NFACommonXXX.stateSize), and may be stored shrunk to CEIL(stateSize/8) - or compressed. If it is stored compressed, than the - LIMEX_FLAG_COMPRESS_STATE flag is set in NFACommonXXX.flags. - -# Extended NFA state, only used in some LimEx NFAs. This consists of a - variable length array of LimExNFAExtendedState structures, each with - pointers to a packed list of mmbit structures that follows them. Only - present when used. - - The value of NFA.stateSize gives the total state size in bytes (the sum of - all the above). - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + This file provides the internal structures and definitions required for the + real NFAs (aka limex NFAs ); + + Limex NFAs now have variable length in memory. They look like this: + + LimExNFA structure + Fixed length, e.g. LimExNFA256. + Reachability table + Variable length array of state bitvectors, mapped into by + NFACommonXXX.reachMap. + Tops + Variable length array of state bitvectors, used for TOP_N events. + Acceleration structures + Variable length array of AccelAux structs. + Accepts + Variable length array of NFAAccept structs. + EOD Accepts + Variable length array of NFAAccept structs. + Exceptions + Variable length array of NFAExceptionXXX structs. + Repeat Structure Offsets + Array of u32 offsets that point at each "Repeat Structure" (below) + Repeat Structures + Variable length repeat structures, addressed via + NFAException32::repeatOffset etc. + + The state associated with the NFA is split into: + + -# The "traditional" NFA state as a bitvector. This is stored in the + first N bytes of the state space (length given in + NFACommonXXX.stateSize), and may be stored shrunk to CEIL(stateSize/8) + or compressed. If it is stored compressed, than the + LIMEX_FLAG_COMPRESS_STATE flag is set in NFACommonXXX.flags. + -# Extended NFA state, only used in some LimEx NFAs. This consists of a + variable length array of LimExNFAExtendedState structures, each with + pointers to a packed list of mmbit structures that follows them. Only + present when used. + + The value of NFA.stateSize gives the total state size in bytes (the sum of + all the above). + Number of shifts should be always greater or equal to 1 Number of shifts 0 means that no appropriate NFA engine was found. -*/ - -#ifndef LIMEX_INTERNAL_H -#define LIMEX_INTERNAL_H - -#include "nfa_internal.h" -#include "repeat_internal.h" - -// Constants +*/ + +#ifndef LIMEX_INTERNAL_H +#define LIMEX_INTERNAL_H + +#include "nfa_internal.h" +#include "repeat_internal.h" + +// Constants #define MAX_SHIFT_COUNT 8 /**< largest number of shifts used by a LimEx NFA */ #define MAX_SHIFT_AMOUNT 16 /**< largest shift amount used by a LimEx NFA */ - -#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */ -#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */ + +#define LIMEX_FLAG_COMPRESS_STATE 1 /**< pack state into stream state */ +#define LIMEX_FLAG_COMPRESS_MASKED 2 /**< use reach mask-based compression */ #define LIMEX_FLAG_CANNOT_DIE 4 /**< limex cannot have no states on */ #define LIMEX_FLAG_EXTRACT_EXP 8 /**< use limex exception bit extraction */ - -enum LimExTrigger { - LIMEX_TRIGGER_NONE = 0, - LIMEX_TRIGGER_POS = 1, - LIMEX_TRIGGER_TUG = 2 -}; - -enum LimExSquash { - LIMEX_SQUASH_NONE = 0, //!< no squash for you! - LIMEX_SQUASH_CYCLIC = 1, //!< squash due to cyclic state - LIMEX_SQUASH_TUG = 2, //!< squash due to tug trigger with stale estate - LIMEX_SQUASH_REPORT = 3 //!< squash when report is raised -}; - -/* uniform looking types for the macros */ -typedef u8 u_8; -typedef u16 u_16; -typedef u32 u_32; -typedef u64a u_64; -typedef m128 u_128; -typedef m256 u_256; -typedef m384 u_384; -typedef m512 u_512; - -#define CREATE_NFA_LIMEX(size) \ -struct NFAException##size { \ - u_##size squash; /**< mask of states to leave on */ \ - u_##size successors; /**< mask of states to switch on */ \ - u32 reports; /**< offset to start of reports list, or MO_INVALID_IDX */ \ - u32 repeatOffset; /**< offset to NFARepeatInfo, or MO_INVALID_IDX */ \ - u8 hasSquash; /**< from enum LimExSquash */ \ - u8 trigger; /**< from enum LimExTrigger */ \ -}; \ - \ + +enum LimExTrigger { + LIMEX_TRIGGER_NONE = 0, + LIMEX_TRIGGER_POS = 1, + LIMEX_TRIGGER_TUG = 2 +}; + +enum LimExSquash { + LIMEX_SQUASH_NONE = 0, //!< no squash for you! + LIMEX_SQUASH_CYCLIC = 1, //!< squash due to cyclic state + LIMEX_SQUASH_TUG = 2, //!< squash due to tug trigger with stale estate + LIMEX_SQUASH_REPORT = 3 //!< squash when report is raised +}; + +/* uniform looking types for the macros */ +typedef u8 u_8; +typedef u16 u_16; +typedef u32 u_32; +typedef u64a u_64; +typedef m128 u_128; +typedef m256 u_256; +typedef m384 u_384; +typedef m512 u_512; + +#define CREATE_NFA_LIMEX(size) \ +struct NFAException##size { \ + u_##size squash; /**< mask of states to leave on */ \ + u_##size successors; /**< mask of states to switch on */ \ + u32 reports; /**< offset to start of reports list, or MO_INVALID_IDX */ \ + u32 repeatOffset; /**< offset to NFARepeatInfo, or MO_INVALID_IDX */ \ + u8 hasSquash; /**< from enum LimExSquash */ \ + u8 trigger; /**< from enum LimExTrigger */ \ +}; \ + \ struct LimExNFA##size { \ - u8 reachMap[N_CHARS]; /**< map of char -> entry in reach[] */ \ - u32 reachSize; /**< number of reach masks */ \ - u32 accelCount; /**< number of entries in accel table */ \ - u32 accelTableOffset; /* rel. to start of LimExNFA */ \ - u32 accelAuxCount; /**< number of entries in aux table */ \ - u32 accelAuxOffset; /* rel. to start of LimExNFA */ \ - u32 acceptCount; \ - u32 acceptOffset; /* rel. to start of LimExNFA */ \ - u32 acceptEodCount; \ - u32 acceptEodOffset; /* rel. to start of LimExNFA */ \ - u32 exceptionCount; \ - u32 exceptionOffset; /* rel. to start of LimExNFA */ \ - u32 repeatCount; \ - u32 repeatOffset; \ - u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \ - u32 squashCount; \ - u32 topCount; \ - u32 topOffset; /* rel. to start of LimExNFA */ \ - u32 stateSize; /**< not including extended history */ \ - u32 flags; \ - u_##size init; \ - u_##size initDS; \ - u_##size accept; /**< mask of accept states */ \ - u_##size acceptAtEOD; /**< mask of states that accept at EOD */ \ - u_##size accel; /**< mask of accelerable states */ \ - u_##size accelPermute; /**< pshufb permute mask (not GPR) */ \ - u_##size accelCompare; /**< pshufb compare mask (not GPR) */ \ - u_##size accel_and_friends; /**< mask of accelerable states + likely - * followers */ \ - u_##size compressMask; /**< switch off before compress */ \ - u_##size exceptionMask; \ + u8 reachMap[N_CHARS]; /**< map of char -> entry in reach[] */ \ + u32 reachSize; /**< number of reach masks */ \ + u32 accelCount; /**< number of entries in accel table */ \ + u32 accelTableOffset; /* rel. to start of LimExNFA */ \ + u32 accelAuxCount; /**< number of entries in aux table */ \ + u32 accelAuxOffset; /* rel. to start of LimExNFA */ \ + u32 acceptCount; \ + u32 acceptOffset; /* rel. to start of LimExNFA */ \ + u32 acceptEodCount; \ + u32 acceptEodOffset; /* rel. to start of LimExNFA */ \ + u32 exceptionCount; \ + u32 exceptionOffset; /* rel. to start of LimExNFA */ \ + u32 repeatCount; \ + u32 repeatOffset; \ + u32 squashOffset; /* rel. to start of LimExNFA; for accept squashing */ \ + u32 squashCount; \ + u32 topCount; \ + u32 topOffset; /* rel. to start of LimExNFA */ \ + u32 stateSize; /**< not including extended history */ \ + u32 flags; \ + u_##size init; \ + u_##size initDS; \ + u_##size accept; /**< mask of accept states */ \ + u_##size acceptAtEOD; /**< mask of states that accept at EOD */ \ + u_##size accel; /**< mask of accelerable states */ \ + u_##size accelPermute; /**< pshufb permute mask (not GPR) */ \ + u_##size accelCompare; /**< pshufb compare mask (not GPR) */ \ + u_##size accel_and_friends; /**< mask of accelerable states + likely + * followers */ \ + u_##size compressMask; /**< switch off before compress */ \ + u_##size exceptionMask; \ u_##size repeatCyclicMask; /**< also includes tug states */ \ - u_##size zombieMask; /**< zombie if in any of the set states */ \ + u_##size zombieMask; /**< zombie if in any of the set states */ \ u_##size shift[MAX_SHIFT_COUNT]; \ u32 shiftCount; /**< number of shift masks used */ \ u8 shiftAmount[MAX_SHIFT_COUNT]; /**< shift amount for each mask */ \ m512 exceptionShufMask; /**< exception byte shuffle mask */ \ m512 exceptionBitMask; /**< exception bit mask */ \ m512 exceptionAndMask; /**< exception and mask */ \ -}; - -CREATE_NFA_LIMEX(32) +}; + +CREATE_NFA_LIMEX(32) CREATE_NFA_LIMEX(64) -CREATE_NFA_LIMEX(128) -CREATE_NFA_LIMEX(256) -CREATE_NFA_LIMEX(384) -CREATE_NFA_LIMEX(512) - -/** \brief Structure describing a bounded repeat within the LimEx NFA. - * - * This struct is followed in memory by: - * - * -# a RepeatInfo structure - * -# a variable-sized lookup table for REPEAT_SPARSE_OPTIMAL_P repeats - * -# a TUG mask - */ -struct NFARepeatInfo { - u32 cyclicState; //!< index of this repeat's cyclic state - u32 ctrlIndex; //!< index of this repeat's control block - u32 packedCtrlOffset; //!< offset to packed control block in stream state - u32 stateOffset; //!< offset to repeat state in stream state - u32 stateSize; //!< total size of packed stream state for this repeat - u32 tugMaskOffset; //!< offset to tug mask (rel. to NFARepeatInfo) -}; - -struct NFAAccept { +CREATE_NFA_LIMEX(128) +CREATE_NFA_LIMEX(256) +CREATE_NFA_LIMEX(384) +CREATE_NFA_LIMEX(512) + +/** \brief Structure describing a bounded repeat within the LimEx NFA. + * + * This struct is followed in memory by: + * + * -# a RepeatInfo structure + * -# a variable-sized lookup table for REPEAT_SPARSE_OPTIMAL_P repeats + * -# a TUG mask + */ +struct NFARepeatInfo { + u32 cyclicState; //!< index of this repeat's cyclic state + u32 ctrlIndex; //!< index of this repeat's control block + u32 packedCtrlOffset; //!< offset to packed control block in stream state + u32 stateOffset; //!< offset to repeat state in stream state + u32 stateSize; //!< total size of packed stream state for this repeat + u32 tugMaskOffset; //!< offset to tug mask (rel. to NFARepeatInfo) +}; + +struct NFAAccept { u8 single_report; //!< If true, 'reports' is report id. /** @@ -198,6 +198,6 @@ struct NFAAccept { u32 reports; u32 squash; //!< Offset (from LimEx) into squash masks, or MO_INVALID_IDX. -}; - -#endif +}; + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/limex_limits.h b/contrib/libs/hyperscan/src/nfa/limex_limits.h index 1ceaf6fa0b..f4df54a4b0 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_limits.h +++ b/contrib/libs/hyperscan/src/nfa/limex_limits.h @@ -1,35 +1,35 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef LIMEX_LIMITS_H -#define LIMEX_LIMITS_H - -#define NFA_MAX_STATES 512 /**< max states in an NFA */ -#define NFA_MAX_ACCEL_STATES 8 /**< max accel states in a NFA */ - -#endif + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LIMEX_LIMITS_H +#define LIMEX_LIMITS_H + +#define NFA_MAX_STATES 512 /**< max states in an NFA */ +#define NFA_MAX_ACCEL_STATES 8 /**< max accel states in a NFA */ + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/limex_native.c b/contrib/libs/hyperscan/src/nfa/limex_native.c index c66cef599e..f6f5809c36 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_native.c +++ b/contrib/libs/hyperscan/src/nfa/limex_native.c @@ -1,129 +1,129 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief LimEx NFA: native GPR runtime implementations. - */ - -//#define DEBUG -//#define DEBUG_INPUT -//#define DEBUG_EXCEPTIONS - -#include "limex.h" - -#include "accel.h" -#include "limex_internal.h" -#include "nfa_internal.h" -#include "ue2common.h" -#include "util/bitutils.h" - -// Common code -#define STATE_ON_STACK -#define ESTATE_ON_STACK - -#include "limex_runtime.h" - -// Other implementation code from X-Macro impl. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief LimEx NFA: native GPR runtime implementations. + */ + +//#define DEBUG +//#define DEBUG_INPUT +//#define DEBUG_EXCEPTIONS + +#include "limex.h" + +#include "accel.h" +#include "limex_internal.h" +#include "nfa_internal.h" +#include "ue2common.h" +#include "util/bitutils.h" + +// Common code +#define STATE_ON_STACK +#define ESTATE_ON_STACK + +#include "limex_runtime.h" + +// Other implementation code from X-Macro impl. #define SIZE 32 #define STATE_T u32 #define ENG_STATE_T u32 #define LOAD_FROM_ENG load_u32 -#include "limex_state_impl.h" - -#define INLINE_ATTR really_inline -#include "limex_common_impl.h" - -//////////////////////////////////////////////////////////////////////////// -// LimEx NFA implementation code - general purpose registers -//////////////////////////////////////////////////////////////////////////// - -// Process exceptional states - -#define STATE_ON_STACK -#define ESTATE_ON_STACK -#define RUN_EXCEPTION_FN_ONLY -#include "limex_exceptional.h" - -static really_inline -int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, - const struct LimExNFA32 *limex, +#include "limex_state_impl.h" + +#define INLINE_ATTR really_inline +#include "limex_common_impl.h" + +//////////////////////////////////////////////////////////////////////////// +// LimEx NFA implementation code - general purpose registers +//////////////////////////////////////////////////////////////////////////// + +// Process exceptional states + +#define STATE_ON_STACK +#define ESTATE_ON_STACK +#define RUN_EXCEPTION_FN_ONLY +#include "limex_exceptional.h" + +static really_inline +int processExceptional32(u32 s, u32 estate, UNUSED u32 diffmask, u32 *succ, + const struct LimExNFA32 *limex, const struct NFAException32 *exceptions, u64a offset, - struct NFAContext32 *ctx, char in_rev, char flags) { - assert(estate != 0); // guaranteed by calling macro - - if (estate == ctx->cached_estate) { - DEBUG_PRINTF("using cached succ from previous state\n"); - *succ |= ctx->cached_esucc; - if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) { - DEBUG_PRINTF("firing cached reports from previous state\n"); - if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback, - ctx->context, offset) - == MO_HALT_MATCHING)) { - return PE_RV_HALT; // halt; - } - } - return 0; - } - - u32 orig_estate = estate; // for caching - u32 local_succ = 0; - struct proto_cache new_cache = {0, NULL}; - enum CacheResult cacheable = CACHE_RESULT; - - /* Note that only exception-states that consist of exceptions that _only_ - * set successors (not fire accepts or squash states) are cacheable. */ - - do { - u32 bit = findAndClearLSB_32(&estate); + struct NFAContext32 *ctx, char in_rev, char flags) { + assert(estate != 0); // guaranteed by calling macro + + if (estate == ctx->cached_estate) { + DEBUG_PRINTF("using cached succ from previous state\n"); + *succ |= ctx->cached_esucc; + if (ctx->cached_reports && (flags & CALLBACK_OUTPUT)) { + DEBUG_PRINTF("firing cached reports from previous state\n"); + if (unlikely(limexRunReports(ctx->cached_reports, ctx->callback, + ctx->context, offset) + == MO_HALT_MATCHING)) { + return PE_RV_HALT; // halt; + } + } + return 0; + } + + u32 orig_estate = estate; // for caching + u32 local_succ = 0; + struct proto_cache new_cache = {0, NULL}; + enum CacheResult cacheable = CACHE_RESULT; + + /* Note that only exception-states that consist of exceptions that _only_ + * set successors (not fire accepts or squash states) are cacheable. */ + + do { + u32 bit = findAndClearLSB_32(&estate); u32 idx = rank_in_mask32(limex->exceptionMask, bit); - const struct NFAException32 *e = &exceptions[idx]; + const struct NFAException32 *e = &exceptions[idx]; if (!runException32(e, s, succ, &local_succ, limex, offset, ctx, &new_cache, &cacheable, in_rev, flags)) { - return PE_RV_HALT; - } - } while (estate != 0); - - *succ |= local_succ; - - if (cacheable == CACHE_RESULT) { - ctx->cached_estate = orig_estate; - ctx->cached_esucc = local_succ; - ctx->cached_reports = new_cache.reports; - ctx->cached_br = new_cache.br; - } else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) { - if (ctx->cached_br) { - ctx->cached_estate = 0U; - } - } - - return 0; -} - -// 32-bit models. -#include "limex_runtime_impl.h" + return PE_RV_HALT; + } + } while (estate != 0); + + *succ |= local_succ; + + if (cacheable == CACHE_RESULT) { + ctx->cached_estate = orig_estate; + ctx->cached_esucc = local_succ; + ctx->cached_reports = new_cache.reports; + ctx->cached_br = new_cache.br; + } else if (cacheable == DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES) { + if (ctx->cached_br) { + ctx->cached_estate = 0U; + } + } + + return 0; +} + +// 32-bit models. +#include "limex_runtime_impl.h" diff --git a/contrib/libs/hyperscan/src/nfa/limex_ring.h b/contrib/libs/hyperscan/src/nfa/limex_ring.h index 4aac689a73..522cfa12bc 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_ring.h +++ b/contrib/libs/hyperscan/src/nfa/limex_ring.h @@ -1,106 +1,106 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Bounded Repeat implementation for the LimEx NFA. - */ - -#ifndef LIMEX_RING_H -#define LIMEX_RING_H - -#include "ue2common.h" -#include "repeat.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/** \brief Return values from \ref processTugTrigger, used to provide feedback - * about a bounded repeat to the caller. - * - * TRIGGER_FAIL does not get cached as we prefer to use TRIGGER_STALE which - * allows the exception to squash the cyclic state as well. */ -enum TriggerResult { - TRIGGER_FAIL, /**< no valid matches, but history still valid */ - TRIGGER_SUCCESS, /**< valid match found */ - TRIGGER_STALE, /**< no valid matches and history is invalid (stale) */ - TRIGGER_SUCCESS_CACHE /**< valid match found; can cache as the repeat has no - upper bound. */ -}; - -/** \brief Handle a TUG trigger: given an \p offset, returns whether a repeat - * matches or not. */ -static really_inline -enum TriggerResult processTugTrigger(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const char *state, u64a offset) { - DEBUG_PRINTF("tug trigger, %s history, repeat={%u,%u}, offset=%llu, " - "ctrl=%p, state=%p\n", - repeatTypeName(info->type), info->repeatMin, info->repeatMax, - offset, ctrl, state); - - assert(ISALIGNED(ctrl)); - - enum RepeatMatch rv = repeatHasMatch(info, ctrl, state, offset); - switch (rv) { - case REPEAT_NOMATCH: - return TRIGGER_FAIL; - case REPEAT_STALE: - return TRIGGER_STALE; - case REPEAT_MATCH: - if (info->repeatMax == REPEAT_INF) { - // {N,} repeats can be cached. - return TRIGGER_SUCCESS_CACHE; - } else { - return TRIGGER_SUCCESS; - } - } - - assert(0); // unreachable - return TRIGGER_FAIL; -} - -/** \brief Handle a POS trigger: stores a top in the repeat. */ -static really_inline -void processPosTrigger(const struct RepeatInfo *info, union RepeatControl *ctrl, - char *state, u64a offset, char is_alive) { - DEBUG_PRINTF("pos trigger, %s history, repeat={%u,%u}, offset=%llu, " - "is_alive=%d\n", repeatTypeName(info->type), - info->repeatMin, info->repeatMax, offset, is_alive); - - assert(ISALIGNED(ctrl)); - - repeatStore(info, ctrl, state, offset, is_alive); -} - -#ifdef __cplusplus -} -#endif - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Bounded Repeat implementation for the LimEx NFA. + */ + +#ifndef LIMEX_RING_H +#define LIMEX_RING_H + +#include "ue2common.h" +#include "repeat.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/** \brief Return values from \ref processTugTrigger, used to provide feedback + * about a bounded repeat to the caller. + * + * TRIGGER_FAIL does not get cached as we prefer to use TRIGGER_STALE which + * allows the exception to squash the cyclic state as well. */ +enum TriggerResult { + TRIGGER_FAIL, /**< no valid matches, but history still valid */ + TRIGGER_SUCCESS, /**< valid match found */ + TRIGGER_STALE, /**< no valid matches and history is invalid (stale) */ + TRIGGER_SUCCESS_CACHE /**< valid match found; can cache as the repeat has no + upper bound. */ +}; + +/** \brief Handle a TUG trigger: given an \p offset, returns whether a repeat + * matches or not. */ +static really_inline +enum TriggerResult processTugTrigger(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const char *state, u64a offset) { + DEBUG_PRINTF("tug trigger, %s history, repeat={%u,%u}, offset=%llu, " + "ctrl=%p, state=%p\n", + repeatTypeName(info->type), info->repeatMin, info->repeatMax, + offset, ctrl, state); + + assert(ISALIGNED(ctrl)); + + enum RepeatMatch rv = repeatHasMatch(info, ctrl, state, offset); + switch (rv) { + case REPEAT_NOMATCH: + return TRIGGER_FAIL; + case REPEAT_STALE: + return TRIGGER_STALE; + case REPEAT_MATCH: + if (info->repeatMax == REPEAT_INF) { + // {N,} repeats can be cached. + return TRIGGER_SUCCESS_CACHE; + } else { + return TRIGGER_SUCCESS; + } + } + + assert(0); // unreachable + return TRIGGER_FAIL; +} + +/** \brief Handle a POS trigger: stores a top in the repeat. */ +static really_inline +void processPosTrigger(const struct RepeatInfo *info, union RepeatControl *ctrl, + char *state, u64a offset, char is_alive) { + DEBUG_PRINTF("pos trigger, %s history, repeat={%u,%u}, offset=%llu, " + "is_alive=%d\n", repeatTypeName(info->type), + info->repeatMin, info->repeatMax, offset, is_alive); + + assert(ISALIGNED(ctrl)); + + repeatStore(info, ctrl, state, offset, is_alive); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/limex_runtime.h b/contrib/libs/hyperscan/src/nfa/limex_runtime.h index 3395a44830..6109d382d8 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_runtime.h +++ b/contrib/libs/hyperscan/src/nfa/limex_runtime.h @@ -1,108 +1,108 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - \brief Limex Execution Engine Or: - How I Learned To Stop Worrying And Love The Preprocessor - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + \brief Limex Execution Engine Or: + How I Learned To Stop Worrying And Love The Preprocessor + This file includes utility functions which do not depend on the size of the state or shift masks directly. -*/ - -#ifndef LIMEX_RUNTIME_H -#define LIMEX_RUNTIME_H - -#include "limex_accel.h" -#include "limex_context.h" -#include "limex_internal.h" -#include "nfa_api_util.h" -#include "nfa_internal.h" -#include "util/uniform_ops.h" - -//////////////////////////////////////////////////////////////////////////// -// LimEx NFA implementation code - common macros -//////////////////////////////////////////////////////////////////////////// - -#ifdef DEBUG_INPUT -#include <ctype.h> -#define DUMP_INPUT(index) DEBUG_PRINTF("input %p i=%zu: %02hhx (%c)\n", \ - &input[index], index, input[index], \ - isprint(input[index]) ? input[index] : ' ') -#else -#define DUMP_INPUT(index) do { } while(0) -#endif - -#define NO_OUTPUT 0 -#define CALLBACK_OUTPUT 1 -#define FIRST_BYTE 16 - -enum CacheResult { - DO_NOT_CACHE_RESULT, - CACHE_RESULT, - DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES -}; - -struct proto_cache { - char br; - const ReportID *reports; -}; - -#define PE_RV_HALT 1 - -#ifdef STATE_ON_STACK -#define pass_state s -#else -#define pass_state &s -#endif - -#ifdef ESTATE_ON_STACK -#define pass_estate estate -#else -#define pass_estate &estate -#endif - -static really_inline -int limexRunReports(const ReportID *reports, NfaCallback callback, - void *context, u64a offset) { - assert(reports); - assert(callback); - - for (; *reports != MO_INVALID_IDX; ++reports) { - DEBUG_PRINTF("firing report for id %u at offset %llu\n", - *reports, offset); +*/ + +#ifndef LIMEX_RUNTIME_H +#define LIMEX_RUNTIME_H + +#include "limex_accel.h" +#include "limex_context.h" +#include "limex_internal.h" +#include "nfa_api_util.h" +#include "nfa_internal.h" +#include "util/uniform_ops.h" + +//////////////////////////////////////////////////////////////////////////// +// LimEx NFA implementation code - common macros +//////////////////////////////////////////////////////////////////////////// + +#ifdef DEBUG_INPUT +#include <ctype.h> +#define DUMP_INPUT(index) DEBUG_PRINTF("input %p i=%zu: %02hhx (%c)\n", \ + &input[index], index, input[index], \ + isprint(input[index]) ? input[index] : ' ') +#else +#define DUMP_INPUT(index) do { } while(0) +#endif + +#define NO_OUTPUT 0 +#define CALLBACK_OUTPUT 1 +#define FIRST_BYTE 16 + +enum CacheResult { + DO_NOT_CACHE_RESULT, + CACHE_RESULT, + DO_NOT_CACHE_RESULT_AND_FLUSH_BR_ENTRIES +}; + +struct proto_cache { + char br; + const ReportID *reports; +}; + +#define PE_RV_HALT 1 + +#ifdef STATE_ON_STACK +#define pass_state s +#else +#define pass_state &s +#endif + +#ifdef ESTATE_ON_STACK +#define pass_estate estate +#else +#define pass_estate &estate +#endif + +static really_inline +int limexRunReports(const ReportID *reports, NfaCallback callback, + void *context, u64a offset) { + assert(reports); + assert(callback); + + for (; *reports != MO_INVALID_IDX; ++reports) { + DEBUG_PRINTF("firing report for id %u at offset %llu\n", + *reports, offset); int rv = callback(0, offset, *reports, context); - if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - return MO_CONTINUE_MATCHING; // continue -} - + if (rv == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + return MO_CONTINUE_MATCHING; // continue +} + static really_inline int limexRunAccept(const char *limex_base, const struct NFAAccept *accept, NfaCallback callback, void *context, u64a offset) { @@ -135,67 +135,67 @@ int limexAcceptHasReport(const char *limex_base, const struct NFAAccept *accept, return 0; } -/** \brief Return a (correctly typed) pointer to the exception table. */ -#define getExceptionTable(exc_type, lim) \ - ((const exc_type *)((const char *)(lim) + (lim)->exceptionOffset)) - -/** \brief Return a pointer to the ordinary accepts table. */ -#define getAcceptTable(lim) \ - ((const struct NFAAccept *)((const char *)(lim) + (lim)->acceptOffset)) - -/** \brief Return a pointer to the EOD accepts table. */ -#define getAcceptEodTable(lim) \ - ((const struct NFAAccept *)((const char *)(lim) + (lim)->acceptEodOffset)) - -#define MAKE_GET_NFA_REPEAT_INFO(size) \ - static really_inline const struct NFARepeatInfo *getNfaRepeatInfo##size( \ - const struct LimExNFA##size *limex, unsigned num) { \ - assert(num < limex->repeatCount); \ - \ - const char *base = (const char *)limex; \ - const u32 *repeatOffset = (const u32 *)(base + limex->repeatOffset); \ - assert(ISALIGNED(repeatOffset)); \ - \ - const struct NFARepeatInfo *info = \ - (const struct NFARepeatInfo *)(base + repeatOffset[num]); \ - assert(ISALIGNED(info)); \ - return info; \ - } - -MAKE_GET_NFA_REPEAT_INFO(32) +/** \brief Return a (correctly typed) pointer to the exception table. */ +#define getExceptionTable(exc_type, lim) \ + ((const exc_type *)((const char *)(lim) + (lim)->exceptionOffset)) + +/** \brief Return a pointer to the ordinary accepts table. */ +#define getAcceptTable(lim) \ + ((const struct NFAAccept *)((const char *)(lim) + (lim)->acceptOffset)) + +/** \brief Return a pointer to the EOD accepts table. */ +#define getAcceptEodTable(lim) \ + ((const struct NFAAccept *)((const char *)(lim) + (lim)->acceptEodOffset)) + +#define MAKE_GET_NFA_REPEAT_INFO(size) \ + static really_inline const struct NFARepeatInfo *getNfaRepeatInfo##size( \ + const struct LimExNFA##size *limex, unsigned num) { \ + assert(num < limex->repeatCount); \ + \ + const char *base = (const char *)limex; \ + const u32 *repeatOffset = (const u32 *)(base + limex->repeatOffset); \ + assert(ISALIGNED(repeatOffset)); \ + \ + const struct NFARepeatInfo *info = \ + (const struct NFARepeatInfo *)(base + repeatOffset[num]); \ + assert(ISALIGNED(info)); \ + return info; \ + } + +MAKE_GET_NFA_REPEAT_INFO(32) MAKE_GET_NFA_REPEAT_INFO(64) -MAKE_GET_NFA_REPEAT_INFO(128) -MAKE_GET_NFA_REPEAT_INFO(256) -MAKE_GET_NFA_REPEAT_INFO(384) -MAKE_GET_NFA_REPEAT_INFO(512) - -static really_inline -const struct RepeatInfo *getRepeatInfo(const struct NFARepeatInfo *info) { - const struct RepeatInfo *repeat = - (const struct RepeatInfo *)((const char *)info + sizeof(*info)); - assert(ISALIGNED(repeat)); - return repeat; -} - -static really_inline -union RepeatControl *getRepeatControlBase(char *state, size_t nfa_state_size) { - union RepeatControl *ctrl_base = - (union RepeatControl *)(state + - ROUNDUP_N(nfa_state_size, - alignof(union RepeatControl))); - assert(ISALIGNED(ctrl_base)); - return ctrl_base; -} - -static really_inline -const union RepeatControl *getRepeatControlBaseConst(const char *state, - size_t nfa_state_size) { - const union RepeatControl *ctrl_base = - (const union RepeatControl *)(state + - ROUNDUP_N(nfa_state_size, - alignof(union RepeatControl))); - assert(ISALIGNED(ctrl_base)); - return ctrl_base; -} - -#endif +MAKE_GET_NFA_REPEAT_INFO(128) +MAKE_GET_NFA_REPEAT_INFO(256) +MAKE_GET_NFA_REPEAT_INFO(384) +MAKE_GET_NFA_REPEAT_INFO(512) + +static really_inline +const struct RepeatInfo *getRepeatInfo(const struct NFARepeatInfo *info) { + const struct RepeatInfo *repeat = + (const struct RepeatInfo *)((const char *)info + sizeof(*info)); + assert(ISALIGNED(repeat)); + return repeat; +} + +static really_inline +union RepeatControl *getRepeatControlBase(char *state, size_t nfa_state_size) { + union RepeatControl *ctrl_base = + (union RepeatControl *)(state + + ROUNDUP_N(nfa_state_size, + alignof(union RepeatControl))); + assert(ISALIGNED(ctrl_base)); + return ctrl_base; +} + +static really_inline +const union RepeatControl *getRepeatControlBaseConst(const char *state, + size_t nfa_state_size) { + const union RepeatControl *ctrl_base = + (const union RepeatControl *)(state + + ROUNDUP_N(nfa_state_size, + alignof(union RepeatControl))); + assert(ISALIGNED(ctrl_base)); + return ctrl_base; +} + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/limex_runtime_impl.h b/contrib/libs/hyperscan/src/nfa/limex_runtime_impl.h index 541744cec0..7b89182bea 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_runtime_impl.h +++ b/contrib/libs/hyperscan/src/nfa/limex_runtime_impl.h @@ -1,164 +1,164 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "util/join.h" -#include <string.h> - -/** \file - * \brief Limex Execution Engine Or: - * How I Learned To Stop Worrying And Love The Preprocessor - * - * Version 2.0: now with X-Macros, so you get line numbers in your debugger. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "util/join.h" +#include <string.h> + +/** \file + * \brief Limex Execution Engine Or: + * How I Learned To Stop Worrying And Love The Preprocessor + * + * Version 2.0: now with X-Macros, so you get line numbers in your debugger. + */ + #if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) # error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. -#endif - +#endif + #define LIMEX_API_ROOT JOIN(nfaExecLimEx, SIZE) - -#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) - -#define TESTEOD_FN JOIN(moNfaTestEod, SIZE) -#define INITIAL_FN JOIN(moNfaInitial, SIZE) -#define TOP_FN JOIN(moNfaTop, SIZE) -#define TOPN_FN JOIN(moNfaTopN, SIZE) -#define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE) -#define COMPRESS_FN JOIN(moNfaCompressState, SIZE) -#define EXPAND_FN JOIN(moNfaExpandState, SIZE) -#define COMPRESS_REPEATS_FN JOIN(LIMEX_API_ROOT, _Compress_Repeats) -#define EXPAND_REPEATS_FN JOIN(LIMEX_API_ROOT, _Expand_Repeats) -#define PROCESS_ACCEPTS_FN JOIN(moProcessAccepts, SIZE) -#define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE) -#define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE) -#define RUN_ACCEL_FN JOIN(LIMEX_API_ROOT, _Run_Accel) -#define RUN_EXCEPTIONS_FN JOIN(LIMEX_API_ROOT, _Run_Exceptions) -#define REV_STREAM_FN JOIN(LIMEX_API_ROOT, _Rev_Stream) + +#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) + +#define TESTEOD_FN JOIN(moNfaTestEod, SIZE) +#define INITIAL_FN JOIN(moNfaInitial, SIZE) +#define TOP_FN JOIN(moNfaTop, SIZE) +#define TOPN_FN JOIN(moNfaTopN, SIZE) +#define REPORTCURRENT_FN JOIN(moNfaReportCurrent, SIZE) +#define COMPRESS_FN JOIN(moNfaCompressState, SIZE) +#define EXPAND_FN JOIN(moNfaExpandState, SIZE) +#define COMPRESS_REPEATS_FN JOIN(LIMEX_API_ROOT, _Compress_Repeats) +#define EXPAND_REPEATS_FN JOIN(LIMEX_API_ROOT, _Expand_Repeats) +#define PROCESS_ACCEPTS_FN JOIN(moProcessAccepts, SIZE) +#define PROCESS_ACCEPTS_NOSQUASH_FN JOIN(moProcessAcceptsNoSquash, SIZE) +#define GET_NFA_REPEAT_INFO_FN JOIN(getNfaRepeatInfo, SIZE) +#define RUN_ACCEL_FN JOIN(LIMEX_API_ROOT, _Run_Accel) +#define RUN_EXCEPTIONS_FN JOIN(LIMEX_API_ROOT, _Run_Exceptions) +#define REV_STREAM_FN JOIN(LIMEX_API_ROOT, _Rev_Stream) #define LOOP_NOACCEL_FN JOIN(LIMEX_API_ROOT, _Loop_No_Accel) -#define STREAM_FN JOIN(LIMEX_API_ROOT, _Stream) -#define STREAMCB_FN JOIN(LIMEX_API_ROOT, _Stream_CB) -#define STREAMFIRST_FN JOIN(LIMEX_API_ROOT, _Stream_First) -#define STREAMSILENT_FN JOIN(LIMEX_API_ROOT, _Stream_Silent) -#define CONTEXT_T JOIN(NFAContext, SIZE) -#define EXCEPTION_T JOIN(struct NFAException, SIZE) -#define AND_STATE JOIN(and_, STATE_T) -#define ANDNOT_STATE JOIN(andnot_, STATE_T) -#define OR_STATE JOIN(or_, STATE_T) +#define STREAM_FN JOIN(LIMEX_API_ROOT, _Stream) +#define STREAMCB_FN JOIN(LIMEX_API_ROOT, _Stream_CB) +#define STREAMFIRST_FN JOIN(LIMEX_API_ROOT, _Stream_First) +#define STREAMSILENT_FN JOIN(LIMEX_API_ROOT, _Stream_Silent) +#define CONTEXT_T JOIN(NFAContext, SIZE) +#define EXCEPTION_T JOIN(struct NFAException, SIZE) +#define AND_STATE JOIN(and_, STATE_T) +#define ANDNOT_STATE JOIN(andnot_, STATE_T) +#define OR_STATE JOIN(or_, STATE_T) #define LSHIFT_STATE JOIN(lshift_, STATE_T) -#define TESTBIT_STATE JOIN(testbit_, STATE_T) +#define TESTBIT_STATE JOIN(testbit_, STATE_T) #define CLEARBIT_STATE JOIN(clearbit_, STATE_T) -#define ZERO_STATE JOIN(zero_, STATE_T) -#define ISNONZERO_STATE JOIN(isNonZero_, STATE_T) -#define ISZERO_STATE JOIN(isZero_, STATE_T) -#define NOTEQ_STATE JOIN(noteq_, STATE_T) - -// Pick an appropriate diffrich function for this platform. -#ifdef ARCH_64_BIT -#define DIFFRICH_STATE JOIN(diffrich64_, STATE_T) -#else -#define DIFFRICH_STATE JOIN(diffrich_, STATE_T) -#endif - -#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE) -#define SQUASH_UNTUG_BR_FN JOIN(lazyTug, SIZE) - -// Acceleration and exception masks: we load them on the fly for really big -// models. -#if SIZE < 256 -#define ACCEL_MASK accelMask -#define ACCEL_AND_FRIENDS_MASK accel_and_friendsMask -#define EXCEPTION_MASK exceptionMask -#else +#define ZERO_STATE JOIN(zero_, STATE_T) +#define ISNONZERO_STATE JOIN(isNonZero_, STATE_T) +#define ISZERO_STATE JOIN(isZero_, STATE_T) +#define NOTEQ_STATE JOIN(noteq_, STATE_T) + +// Pick an appropriate diffrich function for this platform. +#ifdef ARCH_64_BIT +#define DIFFRICH_STATE JOIN(diffrich64_, STATE_T) +#else +#define DIFFRICH_STATE JOIN(diffrich_, STATE_T) +#endif + +#define EXPIRE_ESTATE_FN JOIN(limexExpireExtendedState, SIZE) +#define SQUASH_UNTUG_BR_FN JOIN(lazyTug, SIZE) + +// Acceleration and exception masks: we load them on the fly for really big +// models. +#if SIZE < 256 +#define ACCEL_MASK accelMask +#define ACCEL_AND_FRIENDS_MASK accel_and_friendsMask +#define EXCEPTION_MASK exceptionMask +#else #define ACCEL_MASK LOAD_FROM_ENG(&limex->accel) #define ACCEL_AND_FRIENDS_MASK LOAD_FROM_ENG(&limex->accel_and_friends) #define EXCEPTION_MASK LOAD_FROM_ENG(&limex->exceptionMask) -#endif - -// Run exception processing, if necessary. Returns 0 if scanning should -// continue, 1 if an accept was fired and the user instructed us to halt. -static really_inline -char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, - STATE_T s, const STATE_T emask, size_t i, u64a offset, - STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx, - const char flags, const char in_rev, - const char first_match) { - STATE_T estate = AND_STATE(s, emask); - u32 diffmask = DIFFRICH_STATE(ZERO_STATE, estate); - if (likely(!diffmask)) { - return 0; // No exceptions to process. - } - - if (first_match && i) { +#endif + +// Run exception processing, if necessary. Returns 0 if scanning should +// continue, 1 if an accept was fired and the user instructed us to halt. +static really_inline +char RUN_EXCEPTIONS_FN(const IMPL_NFA_T *limex, const EXCEPTION_T *exceptions, + STATE_T s, const STATE_T emask, size_t i, u64a offset, + STATE_T *succ, u64a *final_loc, struct CONTEXT_T *ctx, + const char flags, const char in_rev, + const char first_match) { + STATE_T estate = AND_STATE(s, emask); + u32 diffmask = DIFFRICH_STATE(ZERO_STATE, estate); + if (likely(!diffmask)) { + return 0; // No exceptions to process. + } + + if (first_match && i) { STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); - STATE_T foundAccepts = AND_STATE(s, acceptMask); - if (unlikely(ISNONZERO_STATE(foundAccepts))) { - DEBUG_PRINTF("first match at %zu\n", i); - DEBUG_PRINTF("for nfa %p\n", limex); - assert(final_loc); + STATE_T foundAccepts = AND_STATE(s, acceptMask); + if (unlikely(ISNONZERO_STATE(foundAccepts))) { + DEBUG_PRINTF("first match at %zu\n", i); + DEBUG_PRINTF("for nfa %p\n", limex); + assert(final_loc); ctx->s = s; - *final_loc = i; - return 1; // Halt matching. - } - } - - u64a callback_offset = i + offset; - char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags; - - int rv = JOIN(processExceptional, SIZE)( + *final_loc = i; + return 1; // Halt matching. + } + } + + u64a callback_offset = i + offset; + char localflags = (!i && !in_rev) ? NO_OUTPUT | FIRST_BYTE : flags; + + int rv = JOIN(processExceptional, SIZE)( pass_state, pass_estate, diffmask, succ, limex, exceptions, callback_offset, ctx, in_rev, localflags); - if (rv == PE_RV_HALT) { - return 1; // Halt matching. - } - - return 0; -} - -static really_inline -size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask, - UNUSED const IMPL_NFA_T *limex, const u8 *accelTable, - const union AccelAux *accelAux, const u8 *input, size_t i, - size_t length) { - size_t j; -#if SIZE < 128 - // For small cases, we pass the state by value. - j = JOIN(doAccel, SIZE)(s, accelMask, accelTable, accelAux, input, i, - length); -#else - j = JOIN(doAccel, SIZE)(&s, limex, accelTable, accelAux, input, i, length); -#endif - - assert(j >= i); - assert(i <= length); - return j; -} - + if (rv == PE_RV_HALT) { + return 1; // Halt matching. + } + + return 0; +} + +static really_inline +size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask, + UNUSED const IMPL_NFA_T *limex, const u8 *accelTable, + const union AccelAux *accelAux, const u8 *input, size_t i, + size_t length) { + size_t j; +#if SIZE < 128 + // For small cases, we pass the state by value. + j = JOIN(doAccel, SIZE)(s, accelMask, accelTable, accelAux, input, i, + length); +#else + j = JOIN(doAccel, SIZE)(&s, limex, accelTable, accelAux, input, i, length); +#endif + + assert(j >= i); + assert(i <= length); + return j; +} + // Shift macros for Limited NFAs. Defined in terms of uniform ops. // LimExNFAxxx ptr in 'limex' and the current state in 's' #define NFA_EXEC_LIM_SHIFT(limex_m, curr_m, shift_idx) \ @@ -206,7 +206,7 @@ size_t RUN_ACCEL_FN(const STATE_T s, UNUSED const STATE_T accelMask, * true. * */ -static really_inline +static really_inline char LOOP_NOACCEL_FN(const IMPL_NFA_T *limex, const u8 *input, size_t *loc, size_t length, STATE_T *s_ptr, struct CONTEXT_T *ctx, u64a offset, const char flags, u64a *final_loc, @@ -244,203 +244,203 @@ char LOOP_NOACCEL_FN(const IMPL_NFA_T *limex, const u8 *input, size_t *loc, } static really_inline -char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, - struct CONTEXT_T *ctx, u64a offset, const char flags, - u64a *final_loc, const char first_match) { +char STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, + struct CONTEXT_T *ctx, u64a offset, const char flags, + u64a *final_loc, const char first_match) { const ENG_STATE_T *reach = get_reach_table(limex); -#if SIZE < 256 +#if SIZE < 256 const STATE_T accelMask = LOAD_FROM_ENG(&limex->accel); const STATE_T accel_and_friendsMask = LOAD_FROM_ENG(&limex->accel_and_friends); const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); -#endif +#endif const u8 *accelTable = (const u8 *)((const char *)limex + limex->accelTableOffset); - const union AccelAux *accelAux = - (const union AccelAux *)((const char *)limex + limex->accelAuxOffset); - const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); + const union AccelAux *accelAux = + (const union AccelAux *)((const char *)limex + limex->accelAuxOffset); + const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); STATE_T s = ctx->s; - - /* assert(ISALIGNED_16(exceptions)); */ - /* assert(ISALIGNED_16(reach)); */ - - size_t i = 0; - size_t min_accel_offset = 0; - if (!limex->accelCount || length < ACCEL_MIN_LEN) { - min_accel_offset = length; - goto without_accel; - } else { - goto with_accel; - } - -without_accel: + + /* assert(ISALIGNED_16(exceptions)); */ + /* assert(ISALIGNED_16(reach)); */ + + size_t i = 0; + size_t min_accel_offset = 0; + if (!limex->accelCount || length < ACCEL_MIN_LEN) { + min_accel_offset = length; + goto without_accel; + } else { + goto with_accel; + } + +without_accel: if (limex->flags & LIMEX_FLAG_CANNOT_DIE) { const char can_die = 0; if (LOOP_NOACCEL_FN(limex, input, &i, min_accel_offset, &s, ctx, offset, flags, final_loc, first_match, can_die) == MO_HALT_MATCHING) { return MO_HALT_MATCHING; - } + } } else { const char can_die = 1; if (LOOP_NOACCEL_FN(limex, input, &i, min_accel_offset, &s, ctx, offset, flags, final_loc, first_match, can_die) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - -with_accel: - for (; i != length; i++) { - DUMP_INPUT(i); - if (i + 16 <= length && - ISZERO_STATE(ANDNOT_STATE(ACCEL_AND_FRIENDS_MASK, s))) { - DEBUG_PRINTF("current states are all accelerable\n"); - assert(i + 16 <= length); - size_t post_idx = - RUN_ACCEL_FN(s, ACCEL_MASK, limex, accelTable, accelAux, input, - i, length); - if (post_idx != i) { - /* squashing any friends as they may no longer be valid; - * offset back off should ensure they weren't doing anything - * important */ - s = AND_STATE(ACCEL_MASK, s); - } - - if (i && post_idx < min_accel_offset + BAD_ACCEL_DIST) { - min_accel_offset = post_idx + BIG_ACCEL_PENALTY; - } else { - min_accel_offset = post_idx + SMALL_ACCEL_PENALTY; - } - - if (min_accel_offset >= length - ACCEL_MIN_LEN) { - min_accel_offset = length; - } - - DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", - post_idx - i, min_accel_offset - post_idx, - length - post_idx); - - i = post_idx; - if (i == length) { - break; /* all chars eaten, break out of loop */ - } - goto without_accel; - } - - STATE_T succ; + return MO_HALT_MATCHING; + } + } + +with_accel: + for (; i != length; i++) { + DUMP_INPUT(i); + if (i + 16 <= length && + ISZERO_STATE(ANDNOT_STATE(ACCEL_AND_FRIENDS_MASK, s))) { + DEBUG_PRINTF("current states are all accelerable\n"); + assert(i + 16 <= length); + size_t post_idx = + RUN_ACCEL_FN(s, ACCEL_MASK, limex, accelTable, accelAux, input, + i, length); + if (post_idx != i) { + /* squashing any friends as they may no longer be valid; + * offset back off should ensure they weren't doing anything + * important */ + s = AND_STATE(ACCEL_MASK, s); + } + + if (i && post_idx < min_accel_offset + BAD_ACCEL_DIST) { + min_accel_offset = post_idx + BIG_ACCEL_PENALTY; + } else { + min_accel_offset = post_idx + SMALL_ACCEL_PENALTY; + } + + if (min_accel_offset >= length - ACCEL_MIN_LEN) { + min_accel_offset = length; + } + + DEBUG_PRINTF("advanced %zd, next accel chance in %zd/%zd\n", + post_idx - i, min_accel_offset - post_idx, + length - post_idx); + + i = post_idx; + if (i == length) { + break; /* all chars eaten, break out of loop */ + } + goto without_accel; + } + + STATE_T succ; NFA_EXEC_GET_LIM_SUCC(limex, s, succ); - + if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, flags, 0, first_match)) { - return MO_HALT_MATCHING; - } - + return MO_HALT_MATCHING; + } + u8 c = input[i]; s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); - } - + } + ctx->s = s; - - if ((first_match || (flags & CALLBACK_OUTPUT)) && limex->acceptCount) { + + if ((first_match || (flags & CALLBACK_OUTPUT)) && limex->acceptCount) { STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); - const struct NFAAccept *acceptTable = getAcceptTable(limex); - STATE_T foundAccepts = AND_STATE(s, acceptMask); - if (unlikely(ISNONZERO_STATE(foundAccepts))) { - if (first_match) { + const struct NFAAccept *acceptTable = getAcceptTable(limex); + STATE_T foundAccepts = AND_STATE(s, acceptMask); + if (unlikely(ISNONZERO_STATE(foundAccepts))) { + if (first_match) { ctx->s = s; - assert(final_loc); - *final_loc = length; - return MO_HALT_MATCHING; + assert(final_loc); + *final_loc = length; + return MO_HALT_MATCHING; } else if (PROCESS_ACCEPTS_FN(limex, &ctx->s, &acceptMask, acceptTable, offset + length, - ctx->callback, ctx->context)) { - return MO_HALT_MATCHING; - } - } - } - if (first_match) { - assert(final_loc); - *final_loc = length; - } - return MO_CONTINUE_MATCHING; -} - -static never_inline -char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, - struct CONTEXT_T *ctx, u64a offset) { + ctx->callback, ctx->context)) { + return MO_HALT_MATCHING; + } + } + } + if (first_match) { + assert(final_loc); + *final_loc = length; + } + return MO_CONTINUE_MATCHING; +} + +static never_inline +char REV_STREAM_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, + struct CONTEXT_T *ctx, u64a offset) { const ENG_STATE_T *reach = get_reach_table(limex); -#if SIZE < 256 +#if SIZE < 256 const STATE_T exceptionMask = LOAD_FROM_ENG(&limex->exceptionMask); -#endif - const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); +#endif + const EXCEPTION_T *exceptions = getExceptionTable(EXCEPTION_T, limex); STATE_T s = ctx->s; - - /* assert(ISALIGNED_16(exceptions)); */ - /* assert(ISALIGNED_16(reach)); */ - const char flags = CALLBACK_OUTPUT; - u64a *final_loc = NULL; - - for (size_t i = length; i != 0; i--) { + + /* assert(ISALIGNED_16(exceptions)); */ + /* assert(ISALIGNED_16(reach)); */ + const char flags = CALLBACK_OUTPUT; + u64a *final_loc = NULL; + + for (size_t i = length; i != 0; i--) { DUMP_INPUT(i - 1); - if (ISZERO_STATE(s)) { - DEBUG_PRINTF("no states are switched on, early exit\n"); + if (ISZERO_STATE(s)) { + DEBUG_PRINTF("no states are switched on, early exit\n"); ctx->s = s; - return MO_CONTINUE_MATCHING; - } - - STATE_T succ; + return MO_CONTINUE_MATCHING; + } + + STATE_T succ; NFA_EXEC_GET_LIM_SUCC(limex, s, succ); - + if (RUN_EXCEPTIONS_FN(limex, exceptions, s, EXCEPTION_MASK, i, offset, &succ, final_loc, ctx, flags, 1, 0)) { - return MO_HALT_MATCHING; - } - + return MO_HALT_MATCHING; + } + u8 c = input[i - 1]; s = AND_STATE(succ, LOAD_FROM_ENG(&reach[limex->reachMap[c]])); - } - + } + ctx->s = s; - + STATE_T acceptMask = LOAD_FROM_ENG(&limex->accept); - const struct NFAAccept *acceptTable = getAcceptTable(limex); - const u32 acceptCount = limex->acceptCount; - assert(flags & CALLBACK_OUTPUT); - if (acceptCount) { - STATE_T foundAccepts = AND_STATE(s, acceptMask); - if (unlikely(ISNONZERO_STATE(foundAccepts))) { + const struct NFAAccept *acceptTable = getAcceptTable(limex); + const u32 acceptCount = limex->acceptCount; + assert(flags & CALLBACK_OUTPUT); + if (acceptCount) { + STATE_T foundAccepts = AND_STATE(s, acceptMask); + if (unlikely(ISNONZERO_STATE(foundAccepts))) { if (PROCESS_ACCEPTS_NOSQUASH_FN(limex, &ctx->s, &acceptMask, acceptTable, offset, ctx->callback, - ctx->context)) { - return MO_HALT_MATCHING; - } - } - } - return MO_CONTINUE_MATCHING; -} - -static really_inline + ctx->context)) { + return MO_HALT_MATCHING; + } + } + } + return MO_CONTINUE_MATCHING; +} + +static really_inline void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src, - u64a offset) { - if (!limex->repeatCount) { - return; - } - + u64a offset) { + if (!limex->repeatCount) { + return; + } + STATE_T s = *(STATE_T *)src; - + if (ISZERO_STATE(AND_STATE(LOAD_FROM_ENG(&limex->repeatCyclicMask), s))) { DEBUG_PRINTF("no cyclics are on\n"); return; } - const union RepeatControl *ctrl = - getRepeatControlBaseConst((const char *)src, sizeof(STATE_T)); - char *state_base = (char *)dest + limex->stateSize; - - for (u32 i = 0; i < limex->repeatCount; i++) { + const union RepeatControl *ctrl = + getRepeatControlBaseConst((const char *)src, sizeof(STATE_T)); + char *state_base = (char *)dest + limex->stateSize; + + for (u32 i = 0; i < limex->repeatCount; i++) { DEBUG_PRINTF("repeat %u\n", i); - const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); + const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); const ENG_STATE_T *tug_mask = (const ENG_STATE_T *)((const char *)info + info->tugMaskOffset); @@ -451,34 +451,34 @@ void COMPRESS_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, void *src, continue; } - const struct RepeatInfo *repeat = getRepeatInfo(info); + const struct RepeatInfo *repeat = getRepeatInfo(info); DEBUG_PRINTF("packing state (packedCtrlOffset=%u)\n", info->packedCtrlOffset); - repeatPack(state_base + info->packedCtrlOffset, repeat, &ctrl[i], - offset); - } + repeatPack(state_base + info->packedCtrlOffset, repeat, &ctrl[i], + offset); + } *(STATE_T *)src = s; -} - -char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n, +} + +char JOIN(LIMEX_API_ROOT, _queueCompressState)(const struct NFA *n, const struct mq *q, s64a loc) { - void *dest = q->streamState; + void *dest = q->streamState; void *src = q->state; - u8 key = queue_prev_byte(q, loc); - const IMPL_NFA_T *limex = getImplNfa(n); + u8 key = queue_prev_byte(q, loc); + const IMPL_NFA_T *limex = getImplNfa(n); COMPRESS_REPEATS_FN(limex, dest, src, q->offset + loc); - COMPRESS_FN(limex, dest, src, key); - return 0; -} - -static really_inline -void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src, - u64a offset) { - if (!limex->repeatCount) { - return; - } - + COMPRESS_FN(limex, dest, src, key); + return 0; +} + +static really_inline +void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src, + u64a offset) { + if (!limex->repeatCount) { + return; + } + // Note: state has already been expanded into 'dest'. const STATE_T cyclics = AND_STATE(*(STATE_T *)dest, LOAD_FROM_ENG(&limex->repeatCyclicMask)); @@ -486,14 +486,14 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src, DEBUG_PRINTF("no cyclics are on\n"); return; } - - union RepeatControl *ctrl = - getRepeatControlBase((char *)dest, sizeof(STATE_T)); - const char *state_base = (const char *)src + limex->stateSize; - - for (u32 i = 0; i < limex->repeatCount; i++) { + + union RepeatControl *ctrl = + getRepeatControlBase((char *)dest, sizeof(STATE_T)); + const char *state_base = (const char *)src + limex->stateSize; + + for (u32 i = 0; i < limex->repeatCount; i++) { DEBUG_PRINTF("repeat %u\n", i); - const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); + const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); const ENG_STATE_T *tug_mask = (const ENG_STATE_T *)((const char *)info + info->tugMaskOffset); @@ -505,137 +505,137 @@ void EXPAND_REPEATS_FN(const IMPL_NFA_T *limex, void *dest, const void *src, DEBUG_PRINTF("unpacking state (packedCtrlOffset=%u)\n", info->packedCtrlOffset); - const struct RepeatInfo *repeat = getRepeatInfo(info); - repeatUnpack(state_base + info->packedCtrlOffset, repeat, offset, - &ctrl[i]); - } -} - -char JOIN(LIMEX_API_ROOT, _expandState)(const struct NFA *n, void *dest, - const void *src, u64a offset, - u8 key) { - const IMPL_NFA_T *limex = getImplNfa(n); - EXPAND_FN(limex, dest, src, key); - EXPAND_REPEATS_FN(limex, dest, src, offset); - return 0; -} - + const struct RepeatInfo *repeat = getRepeatInfo(info); + repeatUnpack(state_base + info->packedCtrlOffset, repeat, offset, + &ctrl[i]); + } +} + +char JOIN(LIMEX_API_ROOT, _expandState)(const struct NFA *n, void *dest, + const void *src, u64a offset, + u8 key) { + const IMPL_NFA_T *limex = getImplNfa(n); + EXPAND_FN(limex, dest, src, key); + EXPAND_REPEATS_FN(limex, dest, src, offset); + return 0; +} + char JOIN(LIMEX_API_ROOT, _queueInitState)(const struct NFA *n, struct mq *q) { *(STATE_T *)q->state = ZERO_STATE; - - // Zero every bounded repeat control block in state. - const IMPL_NFA_T *limex = getImplNfa(n); - union RepeatControl *ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); - for (u32 i = 0; i < limex->repeatCount; i++) { - memset(&ctrl[i], 0, sizeof(*ctrl)); - } - - return 0; -} - -char JOIN(LIMEX_API_ROOT, _initCompressedState)(const struct NFA *n, - u64a offset, void *state, - u8 key) { - const IMPL_NFA_T *limex = getImplNfa(n); - - STATE_T s = INITIAL_FN(limex, !!offset); - if (ISZERO_STATE(s)) { - DEBUG_PRINTF("state went to zero\n"); - return 0; - } - - // NFA is still active, compress its state and ship it out. - COMPRESS_FN(limex, state, &s, key); - - // Zero every packed bounded repeat control block in stream state. - char *repeat_region = (char *)state + limex->stateSize; - for (u32 i = 0; i < limex->repeatCount; i++) { - const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); - const struct RepeatInfo *repeat = getRepeatInfo(info); - - memset(repeat_region + info->packedCtrlOffset, 0, - repeat->packedCtrlSize); - } - - return 1; -} - -// Helper for history buffer scans, which catch up the NFA state but don't emit -// matches. -static never_inline -void STREAMSILENT_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, - struct CONTEXT_T *ctx, u64a offset) { - const char first_match = 0; - - UNUSED char rv = STREAM_FN(limex, input, length, ctx, offset, NO_OUTPUT, - NULL, first_match); - assert(rv != MO_HALT_MATCHING); -} - -static never_inline -char STREAMCB_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, - struct CONTEXT_T *ctx, u64a offset) { - const char first_match = 0; - assert(ISALIGNED_CL(ctx)); - return STREAM_FN(limex, input, length, ctx, offset, CALLBACK_OUTPUT, NULL, - first_match); -} - -static never_inline -char STREAMFIRST_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, - struct CONTEXT_T *ctx, u64a offset, u64a *final_loc) { - const char first_match = 1; // Run to first match and stop, no callbacks. - return STREAM_FN(limex, input, length, ctx, offset, NO_OUTPUT, final_loc, - first_match); -} - -// Common code for handling the current event on the queue. -static really_inline -void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex, - struct mq *q, struct CONTEXT_T *ctx, - u64a sp) { -#define DEFINE_CASE(ee) \ - case ee: \ - DEBUG_PRINTF(#ee "\n"); - - u32 e = q->items[q->cur].type; - switch (e) { - DEFINE_CASE(MQE_TOP) + + // Zero every bounded repeat control block in state. + const IMPL_NFA_T *limex = getImplNfa(n); + union RepeatControl *ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); + for (u32 i = 0; i < limex->repeatCount; i++) { + memset(&ctrl[i], 0, sizeof(*ctrl)); + } + + return 0; +} + +char JOIN(LIMEX_API_ROOT, _initCompressedState)(const struct NFA *n, + u64a offset, void *state, + u8 key) { + const IMPL_NFA_T *limex = getImplNfa(n); + + STATE_T s = INITIAL_FN(limex, !!offset); + if (ISZERO_STATE(s)) { + DEBUG_PRINTF("state went to zero\n"); + return 0; + } + + // NFA is still active, compress its state and ship it out. + COMPRESS_FN(limex, state, &s, key); + + // Zero every packed bounded repeat control block in stream state. + char *repeat_region = (char *)state + limex->stateSize; + for (u32 i = 0; i < limex->repeatCount; i++) { + const struct NFARepeatInfo *info = GET_NFA_REPEAT_INFO_FN(limex, i); + const struct RepeatInfo *repeat = getRepeatInfo(info); + + memset(repeat_region + info->packedCtrlOffset, 0, + repeat->packedCtrlSize); + } + + return 1; +} + +// Helper for history buffer scans, which catch up the NFA state but don't emit +// matches. +static never_inline +void STREAMSILENT_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, + struct CONTEXT_T *ctx, u64a offset) { + const char first_match = 0; + + UNUSED char rv = STREAM_FN(limex, input, length, ctx, offset, NO_OUTPUT, + NULL, first_match); + assert(rv != MO_HALT_MATCHING); +} + +static never_inline +char STREAMCB_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, + struct CONTEXT_T *ctx, u64a offset) { + const char first_match = 0; + assert(ISALIGNED_CL(ctx)); + return STREAM_FN(limex, input, length, ctx, offset, CALLBACK_OUTPUT, NULL, + first_match); +} + +static never_inline +char STREAMFIRST_FN(const IMPL_NFA_T *limex, const u8 *input, size_t length, + struct CONTEXT_T *ctx, u64a offset, u64a *final_loc) { + const char first_match = 1; // Run to first match and stop, no callbacks. + return STREAM_FN(limex, input, length, ctx, offset, NO_OUTPUT, final_loc, + first_match); +} + +// Common code for handling the current event on the queue. +static really_inline +void JOIN(LIMEX_API_ROOT, _HandleEvent)(const IMPL_NFA_T *limex, + struct mq *q, struct CONTEXT_T *ctx, + u64a sp) { +#define DEFINE_CASE(ee) \ + case ee: \ + DEBUG_PRINTF(#ee "\n"); + + u32 e = q->items[q->cur].type; + switch (e) { + DEFINE_CASE(MQE_TOP) ctx->s = TOP_FN(limex, !!sp, ctx->s); - break; - DEFINE_CASE(MQE_START) - break; - DEFINE_CASE(MQE_END) - break; - default: - assert(e >= MQE_TOP_FIRST); - assert(e < MQE_INVALID); - DEBUG_PRINTF("MQE_TOP + %d\n", ((int)e - MQE_TOP_FIRST)); + break; + DEFINE_CASE(MQE_START) + break; + DEFINE_CASE(MQE_END) + break; + default: + assert(e >= MQE_TOP_FIRST); + assert(e < MQE_INVALID); + DEBUG_PRINTF("MQE_TOP + %d\n", ((int)e - MQE_TOP_FIRST)); ctx->s = TOPN_FN(limex, ctx->s, e - MQE_TOP_FIRST); - } -#undef DEFINE_CASE -} - -// "Classic" queue call, used by outfixes -char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { - const IMPL_NFA_T *limex = getImplNfa(n); - - if (q->report_current) { - char rv = REPORTCURRENT_FN(limex, q); - - q->report_current = 0; - - if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - - if (q->cur == q->end) { - return 1; - } - - assert(q->cur + 1 < q->end); /* require at least two items */ - + } +#undef DEFINE_CASE +} + +// "Classic" queue call, used by outfixes +char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { + const IMPL_NFA_T *limex = getImplNfa(n); + + if (q->report_current) { + char rv = REPORTCURRENT_FN(limex, q); + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + + if (q->cur == q->end) { + return 1; + } + + assert(q->cur + 1 < q->end); /* require at least two items */ + struct CONTEXT_T ctx; ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); ctx.repeat_state = q->streamState + limex->stateSize; @@ -643,94 +643,94 @@ char JOIN(LIMEX_API_ROOT, _Q)(const struct NFA *n, struct mq *q, s64a end) { ctx.context = q->context; ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; - - assert(q->items[q->cur].location >= 0); - DEBUG_PRINTF("LOAD STATE\n"); + + assert(q->items[q->cur].location >= 0); + DEBUG_PRINTF("LOAD STATE\n"); ctx.s = *(STATE_T *)q->state; - assert(q->items[q->cur].type == MQE_START); - - u64a offset = q->offset; - u64a sp = offset + q->items[q->cur].location; - u64a end_abs = offset + end; - q->cur++; - - while (q->cur < q->end && sp <= end_abs) { - u64a ep = offset + q->items[q->cur].location; - ep = MIN(ep, end_abs); - assert(ep >= sp); - - assert(sp >= offset); // We no longer do history buffer scans here. - - if (sp >= ep) { - goto scan_done; - } - - /* do main buffer region */ - DEBUG_PRINTF("MAIN BUFFER SCAN\n"); - assert(ep - offset <= q->length); + assert(q->items[q->cur].type == MQE_START); + + u64a offset = q->offset; + u64a sp = offset + q->items[q->cur].location; + u64a end_abs = offset + end; + q->cur++; + + while (q->cur < q->end && sp <= end_abs) { + u64a ep = offset + q->items[q->cur].location; + ep = MIN(ep, end_abs); + assert(ep >= sp); + + assert(sp >= offset); // We no longer do history buffer scans here. + + if (sp >= ep) { + goto scan_done; + } + + /* do main buffer region */ + DEBUG_PRINTF("MAIN BUFFER SCAN\n"); + assert(ep - offset <= q->length); if (STREAMCB_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp) - == MO_HALT_MATCHING) { + == MO_HALT_MATCHING) { *(STATE_T *)q->state = ZERO_STATE; - return 0; - } - - DEBUG_PRINTF("SCAN DONE\n"); - scan_done: - sp = ep; - - if (sp != offset + q->items[q->cur].location) { - assert(q->cur); - DEBUG_PRINTF("bail: sp = %llu end_abs == %llu offset == %llu\n", - sp, end_abs, offset); - assert(sp == end_abs); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = sp - offset; - DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); + return 0; + } + + DEBUG_PRINTF("SCAN DONE\n"); + scan_done: + sp = ep; + + if (sp != offset + q->items[q->cur].location) { + assert(q->cur); + DEBUG_PRINTF("bail: sp = %llu end_abs == %llu offset == %llu\n", + sp, end_abs, offset); + assert(sp == end_abs); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = sp - offset; + DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); *(STATE_T *)q->state = ctx.s; - return MO_ALIVE; - } - + return MO_ALIVE; + } + JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp); - - q->cur++; - } - + + q->cur++; + } + EXPIRE_ESTATE_FN(limex, &ctx, sp); - - DEBUG_PRINTF("END\n"); + + DEBUG_PRINTF("END\n"); *(STATE_T *)q->state = ctx.s; - - if (q->cur != q->end) { - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = sp - offset; - return MO_ALIVE; - } - + + if (q->cur != q->end) { + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = sp - offset; + return MO_ALIVE; + } + return ISNONZERO_STATE(ctx.s); -} - -/* used by suffix execution in Rose */ -char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { - const IMPL_NFA_T *limex = getImplNfa(n); - - if (q->report_current) { - char rv = REPORTCURRENT_FN(limex, q); - - q->report_current = 0; - - if (rv == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; - } - } - - if (q->cur == q->end) { - return 1; - } - - assert(q->cur + 1 < q->end); /* require at least two items */ - +} + +/* used by suffix execution in Rose */ +char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { + const IMPL_NFA_T *limex = getImplNfa(n); + + if (q->report_current) { + char rv = REPORTCURRENT_FN(limex, q); + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { + return MO_HALT_MATCHING; + } + } + + if (q->cur == q->end) { + return 1; + } + + assert(q->cur + 1 < q->end); /* require at least two items */ + struct CONTEXT_T ctx; ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); ctx.repeat_state = q->streamState + limex->stateSize; @@ -738,23 +738,23 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { ctx.context = q->context; ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; - - DEBUG_PRINTF("LOAD STATE\n"); + + DEBUG_PRINTF("LOAD STATE\n"); ctx.s = *(STATE_T *)q->state; - assert(q->items[q->cur].type == MQE_START); - - u64a offset = q->offset; - u64a sp = offset + q->items[q->cur].location; - u64a end_abs = offset + end; - q->cur++; - - while (q->cur < q->end && sp <= end_abs) { - u64a ep = offset + q->items[q->cur].location; - DEBUG_PRINTF("sp = %llu, ep = %llu, end_abs = %llu\n", - sp, ep, end_abs); - ep = MIN(ep, end_abs); - assert(ep >= sp); - + assert(q->items[q->cur].type == MQE_START); + + u64a offset = q->offset; + u64a sp = offset + q->items[q->cur].location; + u64a end_abs = offset + end; + q->cur++; + + while (q->cur < q->end && sp <= end_abs) { + u64a ep = offset + q->items[q->cur].location; + DEBUG_PRINTF("sp = %llu, ep = %llu, end_abs = %llu\n", + sp, ep, end_abs); + ep = MIN(ep, end_abs); + assert(ep >= sp); + if (sp < offset) { DEBUG_PRINTF("HISTORY BUFFER SCAN\n"); assert(offset - sp <= q->hlength); @@ -773,76 +773,76 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) { *(STATE_T *)q->state = ctx.s; return MO_MATCHES_PENDING; } - + sp = local_ep; } - if (sp >= ep) { - goto scan_done; - } - - /* do main buffer region */ - u64a final_look = 0; - assert(ep - offset <= q->length); + if (sp >= ep) { + goto scan_done; + } + + /* do main buffer region */ + u64a final_look = 0; + assert(ep - offset <= q->length); if (STREAMFIRST_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp, - &final_look) == MO_HALT_MATCHING) { - DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu offset:%llu\n", - final_look, sp, end_abs, offset); - assert(q->cur); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = sp + final_look - offset; + &final_look) == MO_HALT_MATCHING) { + DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu offset:%llu\n", + final_look, sp, end_abs, offset); + assert(q->cur); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = sp + final_look - offset; *(STATE_T *)q->state = ctx.s; - return MO_MATCHES_PENDING; - } - - scan_done: - sp = ep; - - if (sp != offset + q->items[q->cur].location) { - assert(q->cur); - DEBUG_PRINTF("bail: sp = %llu end_abs == %llu offset == %llu\n", - sp, end_abs, offset); - assert(sp == end_abs); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = sp - offset; - DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); + return MO_MATCHES_PENDING; + } + + scan_done: + sp = ep; + + if (sp != offset + q->items[q->cur].location) { + assert(q->cur); + DEBUG_PRINTF("bail: sp = %llu end_abs == %llu offset == %llu\n", + sp, end_abs, offset); + assert(sp == end_abs); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = sp - offset; + DEBUG_PRINTF("bailing q->cur %u q->end %u\n", q->cur, q->end); *(STATE_T *)q->state = ctx.s; - return MO_ALIVE; - } - + return MO_ALIVE; + } + JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp); - - q->cur++; - } - + + q->cur++; + } + EXPIRE_ESTATE_FN(limex, &ctx, sp); - - DEBUG_PRINTF("END\n"); + + DEBUG_PRINTF("END\n"); *(STATE_T *)q->state = ctx.s; - - if (q->cur != q->end) { - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = sp - offset; - return MO_ALIVE; - } - + + if (q->cur != q->end) { + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = sp - offset; + return MO_ALIVE; + } + return ISNONZERO_STATE(ctx.s); -} - -// Used for execution Rose prefix/infixes. -char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, - ReportID report) { - const IMPL_NFA_T *limex = getImplNfa(n); - - if (q->cur == q->end) { - return 1; - } - - assert(q->cur + 1 < q->end); /* require at least two items */ - +} + +// Used for execution Rose prefix/infixes. +char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, + ReportID report) { + const IMPL_NFA_T *limex = getImplNfa(n); + + if (q->cur == q->end) { + return 1; + } + + assert(q->cur + 1 < q->end); /* require at least two items */ + struct CONTEXT_T ctx; ctx.repeat_ctrl = getRepeatControlBase(q->state, sizeof(STATE_T)); ctx.repeat_state = q->streamState + limex->stateSize; @@ -850,97 +850,97 @@ char JOIN(LIMEX_API_ROOT, _QR)(const struct NFA *n, struct mq *q, ctx.context = NULL; ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; - - DEBUG_PRINTF("LOAD STATE\n"); + + DEBUG_PRINTF("LOAD STATE\n"); ctx.s = *(STATE_T *)q->state; - assert(q->items[q->cur].type == MQE_START); - - u64a offset = q->offset; - u64a sp = offset + q->items[q->cur].location; - q->cur++; - - while (q->cur < q->end) { - u64a ep = offset + q->items[q->cur].location; - if (n->maxWidth) { - if (ep - sp > n->maxWidth) { - sp = ep - n->maxWidth; + assert(q->items[q->cur].type == MQE_START); + + u64a offset = q->offset; + u64a sp = offset + q->items[q->cur].location; + q->cur++; + + while (q->cur < q->end) { + u64a ep = offset + q->items[q->cur].location; + if (n->maxWidth) { + if (ep - sp > n->maxWidth) { + sp = ep - n->maxWidth; ctx.s = INITIAL_FN(limex, !!sp); - } - } - assert(ep >= sp); - - if (sp < offset) { - DEBUG_PRINTF("HISTORY BUFFER SCAN\n"); - assert(offset - sp <= q->hlength); - u64a local_ep = MIN(offset, ep); - /* we are starting inside the history buffer */ - STREAMSILENT_FN(limex, q->history + q->hlength + sp - offset, + } + } + assert(ep >= sp); + + if (sp < offset) { + DEBUG_PRINTF("HISTORY BUFFER SCAN\n"); + assert(offset - sp <= q->hlength); + u64a local_ep = MIN(offset, ep); + /* we are starting inside the history buffer */ + STREAMSILENT_FN(limex, q->history + q->hlength + sp - offset, local_ep - sp, &ctx, sp); - - sp = local_ep; - } - - if (sp >= ep) { - goto scan_done; - } - - /* do main buffer region */ - DEBUG_PRINTF("MAIN BUFFER SCAN\n"); - assert(ep - offset <= q->length); + + sp = local_ep; + } + + if (sp >= ep) { + goto scan_done; + } + + /* do main buffer region */ + DEBUG_PRINTF("MAIN BUFFER SCAN\n"); + assert(ep - offset <= q->length); STREAMSILENT_FN(limex, q->buffer + sp - offset, ep - sp, &ctx, sp); - - DEBUG_PRINTF("SCAN DONE\n"); - scan_done: - sp = ep; - + + DEBUG_PRINTF("SCAN DONE\n"); + scan_done: + sp = ep; + JOIN(LIMEX_API_ROOT, _HandleEvent)(limex, q, &ctx, sp); - - q->cur++; - } - + + q->cur++; + } + EXPIRE_ESTATE_FN(limex, &ctx, sp); - - DEBUG_PRINTF("END, nfa is %s\n", + + DEBUG_PRINTF("END, nfa is %s\n", ISNONZERO_STATE(ctx.s) ? "still alive" : "dead"); - + *(STATE_T *)q->state = ctx.s; - + if (JOIN(limexInAccept, SIZE)(limex, ctx.s, ctx.repeat_ctrl, ctx.repeat_state, sp + 1, report)) { - return MO_MATCHES_PENDING; - } - + return MO_MATCHES_PENDING; + } + return ISNONZERO_STATE(ctx.s); -} - -char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state, +} + +char JOIN(LIMEX_API_ROOT, _testEOD)(const struct NFA *n, const char *state, const char *streamState, u64a offset, NfaCallback callback, void *context) { - assert(n && state); - - const IMPL_NFA_T *limex = getImplNfa(n); - const STATE_T *sptr = (const STATE_T *)state; - const union RepeatControl *repeat_ctrl = - getRepeatControlBaseConst(state, sizeof(STATE_T)); - const char *repeat_state = streamState + limex->stateSize; + assert(n && state); + + const IMPL_NFA_T *limex = getImplNfa(n); + const STATE_T *sptr = (const STATE_T *)state; + const union RepeatControl *repeat_ctrl = + getRepeatControlBaseConst(state, sizeof(STATE_T)); + const char *repeat_state = streamState + limex->stateSize; return TESTEOD_FN(limex, sptr, repeat_ctrl, repeat_state, offset, callback, context); -} - -char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) { - const IMPL_NFA_T *limex = getImplNfa(n); - REPORTCURRENT_FN(limex, q); - return 1; -} - -// Block mode reverse scan. -char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, +} + +char JOIN(LIMEX_API_ROOT, _reportCurrent)(const struct NFA *n, struct mq *q) { + const IMPL_NFA_T *limex = getImplNfa(n); + REPORTCURRENT_FN(limex, q); + return 1; +} + +// Block mode reverse scan. +char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, const u8 *buf, size_t buflen, const u8 *hbuf, size_t hlen, NfaCallback cb, void *context) { - assert(buf || hbuf); - assert(buflen || hlen); - + assert(buf || hbuf); + assert(buflen || hlen); + struct CONTEXT_T ctx; ctx.repeat_ctrl = NULL; ctx.repeat_state = NULL; @@ -948,52 +948,52 @@ char JOIN(LIMEX_API_ROOT, _B_Reverse)(const struct NFA *n, u64a offset, ctx.context = context; ctx.cached_estate = ZERO_STATE; ctx.cached_br = 0; - - const IMPL_NFA_T *limex = getImplNfa(n); + + const IMPL_NFA_T *limex = getImplNfa(n); ctx.s = INITIAL_FN(limex, 0); // always anchored - - // 'buf' may be null, for example when we're scanning at EOD time. - if (buflen) { - assert(buf); - DEBUG_PRINTF("MAIN BUFFER SCAN, %zu bytes\n", buflen); - offset -= buflen; + + // 'buf' may be null, for example when we're scanning at EOD time. + if (buflen) { + assert(buf); + DEBUG_PRINTF("MAIN BUFFER SCAN, %zu bytes\n", buflen); + offset -= buflen; REV_STREAM_FN(limex, buf, buflen, &ctx, offset); - } - - if (hlen) { - assert(hbuf); - DEBUG_PRINTF("HISTORY BUFFER SCAN, %zu bytes\n", hlen); - offset -= hlen; + } + + if (hlen) { + assert(hbuf); + DEBUG_PRINTF("HISTORY BUFFER SCAN, %zu bytes\n", hlen); + offset -= hlen; REV_STREAM_FN(limex, hbuf, hlen, &ctx, offset); - } - + } + if (offset == 0 && limex->acceptEodCount && ISNONZERO_STATE(ctx.s)) { const union RepeatControl *repeat_ctrl = NULL; const char *repeat_state = NULL; TESTEOD_FN(limex, &ctx.s, repeat_ctrl, repeat_state, offset, cb, context); - } - - // NOTE: return value is unused. - return 0; -} - -char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa, - ReportID report, struct mq *q) { - assert(nfa && q); - assert(q->state && q->streamState); - - const IMPL_NFA_T *limex = getImplNfa(nfa); - union RepeatControl *repeat_ctrl = - getRepeatControlBase(q->state, sizeof(STATE_T)); - char *repeat_state = q->streamState + limex->stateSize; + } + + // NOTE: return value is unused. + return 0; +} + +char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa, + ReportID report, struct mq *q) { + assert(nfa && q); + assert(q->state && q->streamState); + + const IMPL_NFA_T *limex = getImplNfa(nfa); + union RepeatControl *repeat_ctrl = + getRepeatControlBase(q->state, sizeof(STATE_T)); + char *repeat_state = q->streamState + limex->stateSize; STATE_T state = *(STATE_T *)q->state; - u64a offset = q->offset + q_last_loc(q) + 1; - - return JOIN(limexInAccept, SIZE)(limex, state, repeat_ctrl, repeat_state, - offset, report); -} - + u64a offset = q->offset + q_last_loc(q) + 1; + + return JOIN(limexInAccept, SIZE)(limex, state, repeat_ctrl, repeat_state, + offset, report); +} + char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { assert(nfa && q); assert(q->state && q->streamState); @@ -1009,67 +1009,67 @@ char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) { offset); } -enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( - const struct NFA *nfa, - struct mq *q, - s64a loc) { - assert(nfa->flags & NFA_ZOMBIE); - const IMPL_NFA_T *limex = getImplNfa(nfa); +enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)( + const struct NFA *nfa, + struct mq *q, + s64a loc) { + assert(nfa->flags & NFA_ZOMBIE); + const IMPL_NFA_T *limex = getImplNfa(nfa); STATE_T state = *(STATE_T *)q->state; STATE_T zmask = LOAD_FROM_ENG(&limex->zombieMask); - - if (limex->repeatCount) { - u64a offset = q->offset + loc + 1; - union RepeatControl *repeat_ctrl = - getRepeatControlBase(q->state, sizeof(STATE_T)); - char *repeat_state = q->streamState + limex->stateSize; - SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &state); - } - - if (ISNONZERO_STATE(AND_STATE(state, zmask))) { - return NFA_ZOMBIE_ALWAYS_YES; - } - - return NFA_ZOMBIE_NO; -} - -#undef TESTEOD_FN -#undef INITIAL_FN -#undef TOP_FN -#undef TOPN_FN -#undef REPORTCURRENT_FN -#undef COMPRESS_FN -#undef EXPAND_FN -#undef COMPRESS_REPEATS_FN -#undef EXPAND_REPEATS_FN -#undef PROCESS_ACCEPTS_FN -#undef PROCESS_ACCEPTS_NOSQUASH_FN -#undef GET_NFA_REPEAT_INFO_FN -#undef RUN_ACCEL_FN -#undef RUN_EXCEPTIONS_FN -#undef REV_STREAM_FN + + if (limex->repeatCount) { + u64a offset = q->offset + loc + 1; + union RepeatControl *repeat_ctrl = + getRepeatControlBase(q->state, sizeof(STATE_T)); + char *repeat_state = q->streamState + limex->stateSize; + SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &state); + } + + if (ISNONZERO_STATE(AND_STATE(state, zmask))) { + return NFA_ZOMBIE_ALWAYS_YES; + } + + return NFA_ZOMBIE_NO; +} + +#undef TESTEOD_FN +#undef INITIAL_FN +#undef TOP_FN +#undef TOPN_FN +#undef REPORTCURRENT_FN +#undef COMPRESS_FN +#undef EXPAND_FN +#undef COMPRESS_REPEATS_FN +#undef EXPAND_REPEATS_FN +#undef PROCESS_ACCEPTS_FN +#undef PROCESS_ACCEPTS_NOSQUASH_FN +#undef GET_NFA_REPEAT_INFO_FN +#undef RUN_ACCEL_FN +#undef RUN_EXCEPTIONS_FN +#undef REV_STREAM_FN #undef LOOP_NOACCEL_FN -#undef STREAM_FN -#undef STREAMCB_FN -#undef STREAMFIRST_FN -#undef STREAMSILENT_FN -#undef CONTEXT_T -#undef EXCEPTION_T -#undef AND_STATE -#undef ANDNOT_STATE -#undef OR_STATE +#undef STREAM_FN +#undef STREAMCB_FN +#undef STREAMFIRST_FN +#undef STREAMSILENT_FN +#undef CONTEXT_T +#undef EXCEPTION_T +#undef AND_STATE +#undef ANDNOT_STATE +#undef OR_STATE #undef LSHIFT_STATE -#undef TESTBIT_STATE +#undef TESTBIT_STATE #undef CLEARBIT_STATE -#undef ZERO_STATE -#undef ISNONZERO_STATE -#undef ISZERO_STATE -#undef NOTEQ_STATE -#undef DIFFRICH_STATE -#undef INLINE_ATTR_INT -#undef IMPL_NFA_T -#undef SQUASH_UNTUG_BR_FN -#undef ACCEL_MASK -#undef ACCEL_AND_FRIENDS_MASK -#undef EXCEPTION_MASK -#undef LIMEX_API_ROOT +#undef ZERO_STATE +#undef ISNONZERO_STATE +#undef ISZERO_STATE +#undef NOTEQ_STATE +#undef DIFFRICH_STATE +#undef INLINE_ATTR_INT +#undef IMPL_NFA_T +#undef SQUASH_UNTUG_BR_FN +#undef ACCEL_MASK +#undef ACCEL_AND_FRIENDS_MASK +#undef EXCEPTION_MASK +#undef LIMEX_API_ROOT diff --git a/contrib/libs/hyperscan/src/nfa/limex_simd128.c b/contrib/libs/hyperscan/src/nfa/limex_simd128.c index 2076423172..c5f2b33e3e 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_simd128.c +++ b/contrib/libs/hyperscan/src/nfa/limex_simd128.c @@ -1,63 +1,63 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief LimEx NFA: 128-bit SIMD runtime implementations. - */ - -//#define DEBUG_INPUT -//#define DEBUG_EXCEPTIONS - -#include "limex.h" - -#include "accel.h" -#include "limex_internal.h" -#include "nfa_internal.h" -#include "ue2common.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -// Common code -#define STATE_ON_STACK -#define ESTATE_ON_STACK - -#include "limex_runtime.h" - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief LimEx NFA: 128-bit SIMD runtime implementations. + */ + +//#define DEBUG_INPUT +//#define DEBUG_EXCEPTIONS + +#include "limex.h" + +#include "accel.h" +#include "limex_internal.h" +#include "nfa_internal.h" +#include "ue2common.h" +#include "util/bitutils.h" +#include "util/simd_utils.h" + +// Common code +#define STATE_ON_STACK +#define ESTATE_ON_STACK + +#include "limex_runtime.h" + #define SIZE 128 #define STATE_T m128 #define ENG_STATE_T m128 #define LOAD_FROM_ENG load_m128 -#include "limex_exceptional.h" - -#include "limex_state_impl.h" - -#define INLINE_ATTR really_inline -#include "limex_common_impl.h" - -#include "limex_runtime_impl.h" +#include "limex_exceptional.h" + +#include "limex_state_impl.h" + +#define INLINE_ATTR really_inline +#include "limex_common_impl.h" + +#include "limex_runtime_impl.h" diff --git a/contrib/libs/hyperscan/src/nfa/limex_simd256.c b/contrib/libs/hyperscan/src/nfa/limex_simd256.c index ebe1e6bc81..cc23290810 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_simd256.c +++ b/contrib/libs/hyperscan/src/nfa/limex_simd256.c @@ -1,60 +1,60 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief LimEx NFA: 256-bit SIMD runtime implementations. - */ - -//#define DEBUG_INPUT -//#define DEBUG_EXCEPTIONS - -#include "limex.h" - -#include "accel.h" -#include "limex_internal.h" -#include "nfa_internal.h" -#include "ue2common.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -// Common code -#include "limex_runtime.h" - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief LimEx NFA: 256-bit SIMD runtime implementations. + */ + +//#define DEBUG_INPUT +//#define DEBUG_EXCEPTIONS + +#include "limex.h" + +#include "accel.h" +#include "limex_internal.h" +#include "nfa_internal.h" +#include "ue2common.h" +#include "util/bitutils.h" +#include "util/simd_utils.h" + +// Common code +#include "limex_runtime.h" + #define SIZE 256 #define STATE_T m256 #define ENG_STATE_T m256 #define LOAD_FROM_ENG load_m256 -#include "limex_exceptional.h" - -#include "limex_state_impl.h" - -#define INLINE_ATTR really_inline -#include "limex_common_impl.h" - -#include "limex_runtime_impl.h" +#include "limex_exceptional.h" + +#include "limex_state_impl.h" + +#define INLINE_ATTR really_inline +#include "limex_common_impl.h" + +#include "limex_runtime_impl.h" diff --git a/contrib/libs/hyperscan/src/nfa/limex_simd384.c b/contrib/libs/hyperscan/src/nfa/limex_simd384.c index 0474e0706a..7e596e48b0 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_simd384.c +++ b/contrib/libs/hyperscan/src/nfa/limex_simd384.c @@ -1,60 +1,60 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief LimEx NFA: 384-bit SIMD runtime implementations. - */ - -//#define DEBUG_INPUT -//#define DEBUG_EXCEPTIONS - -#include "limex.h" - -#include "accel.h" -#include "limex_internal.h" -#include "nfa_internal.h" -#include "ue2common.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - -// Common code -#include "limex_runtime.h" - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief LimEx NFA: 384-bit SIMD runtime implementations. + */ + +//#define DEBUG_INPUT +//#define DEBUG_EXCEPTIONS + +#include "limex.h" + +#include "accel.h" +#include "limex_internal.h" +#include "nfa_internal.h" +#include "ue2common.h" +#include "util/bitutils.h" +#include "util/simd_utils.h" + +// Common code +#include "limex_runtime.h" + #define SIZE 384 #define STATE_T m384 #define ENG_STATE_T m384 #define LOAD_FROM_ENG load_m384 -#include "limex_exceptional.h" - -#include "limex_state_impl.h" - -#define INLINE_ATTR really_inline -#include "limex_common_impl.h" - -#include "limex_runtime_impl.h" +#include "limex_exceptional.h" + +#include "limex_state_impl.h" + +#define INLINE_ATTR really_inline +#include "limex_common_impl.h" + +#include "limex_runtime_impl.h" diff --git a/contrib/libs/hyperscan/src/nfa/limex_state_impl.h b/contrib/libs/hyperscan/src/nfa/limex_state_impl.h index 0a71678792..81153f7171 100644 --- a/contrib/libs/hyperscan/src/nfa/limex_state_impl.h +++ b/contrib/libs/hyperscan/src/nfa/limex_state_impl.h @@ -1,141 +1,141 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief NFA stream state handling. - */ - -#include "util/join.h" -#include "util/partial_store.h" -#include "util/state_compress.h" -#include <string.h> - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief NFA stream state handling. + */ + +#include "util/join.h" +#include "util/partial_store.h" +#include "util/state_compress.h" +#include <string.h> + #if !defined(SIZE) || !defined(STATE_T) || !defined(LOAD_FROM_ENG) # error Must define SIZE, STATE_T, LOAD_FROM_ENG in includer. -#endif - -#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) -#define COMMON_T JOIN(NFACommon, SIZE) -#define REACHMASK_FN JOIN(moNfaReachMask, SIZE) -#define COMPRESS_FN JOIN(moNfaCompressState, SIZE) -#define EXPAND_FN JOIN(moNfaExpandState, SIZE) +#endif + +#define IMPL_NFA_T JOIN(struct LimExNFA, SIZE) +#define COMMON_T JOIN(NFACommon, SIZE) +#define REACHMASK_FN JOIN(moNfaReachMask, SIZE) +#define COMPRESS_FN JOIN(moNfaCompressState, SIZE) +#define EXPAND_FN JOIN(moNfaExpandState, SIZE) #define COMPRESSED_STORE_FN JOIN(store_compressed_, STATE_T) #define COMPRESSED_LOAD_FN JOIN(load_compressed_, STATE_T) -#define PARTIAL_STORE_FN JOIN(partial_store_, STATE_T) -#define PARTIAL_LOAD_FN JOIN(partial_load_, STATE_T) -#define OR_STATE JOIN(or_, STATE_T) -#define AND_STATE JOIN(and_, STATE_T) -#define ISZERO_STATE JOIN(isZero_, STATE_T) - -static really_inline +#define PARTIAL_STORE_FN JOIN(partial_store_, STATE_T) +#define PARTIAL_LOAD_FN JOIN(partial_load_, STATE_T) +#define OR_STATE JOIN(or_, STATE_T) +#define AND_STATE JOIN(and_, STATE_T) +#define ISZERO_STATE JOIN(isZero_, STATE_T) + +static really_inline const ENG_STATE_T *get_reach_table(const IMPL_NFA_T *limex) { const ENG_STATE_T *reach = (const ENG_STATE_T *)((const char *)limex + sizeof(*limex)); assert(ISALIGNED_N(reach, alignof(ENG_STATE_T))); return reach; -} - -static really_inline +} + +static really_inline STATE_T REACHMASK_FN(const IMPL_NFA_T *limex, const u8 key) { const ENG_STATE_T *reach = get_reach_table(limex); return LOAD_FROM_ENG(&reach[limex->reachMap[key]]); } static really_inline -void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src, - u8 key) { - assert(ISALIGNED_N(src, alignof(STATE_T))); +void COMPRESS_FN(const IMPL_NFA_T *limex, u8 *dest, const STATE_T *src, + u8 key) { + assert(ISALIGNED_N(src, alignof(STATE_T))); STATE_T a_src = *src; - - DEBUG_PRINTF("compress state: %p -> %p\n", src, dest); - - if (!(limex->flags & LIMEX_FLAG_COMPRESS_STATE)) { - // No key-based compression, just a partial store. - DEBUG_PRINTF("store state into %u bytes\n", limex->stateSize); - PARTIAL_STORE_FN(dest, a_src, limex->stateSize); - } else { - DEBUG_PRINTF("compress state, key=%hhx\n", key); - + + DEBUG_PRINTF("compress state: %p -> %p\n", src, dest); + + if (!(limex->flags & LIMEX_FLAG_COMPRESS_STATE)) { + // No key-based compression, just a partial store. + DEBUG_PRINTF("store state into %u bytes\n", limex->stateSize); + PARTIAL_STORE_FN(dest, a_src, limex->stateSize); + } else { + DEBUG_PRINTF("compress state, key=%hhx\n", key); + STATE_T reachmask = REACHMASK_FN(limex, key); - - // Masked compression means that we mask off the initDs states and - // provide a shortcut for the all-zeroes case. Note that these must be - // switched on in the EXPAND call below. - if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) { + + // Masked compression means that we mask off the initDs states and + // provide a shortcut for the all-zeroes case. Note that these must be + // switched on in the EXPAND call below. + if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) { STATE_T s = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), a_src); - if (ISZERO_STATE(s)) { - DEBUG_PRINTF("after compression mask, all states are zero\n"); - memset(dest, 0, limex->stateSize); - return; - } - + if (ISZERO_STATE(s)) { + DEBUG_PRINTF("after compression mask, all states are zero\n"); + memset(dest, 0, limex->stateSize); + return; + } + STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), reachmask); - COMPRESSED_STORE_FN(dest, &s, &mask, limex->stateSize); - } else { + COMPRESSED_STORE_FN(dest, &s, &mask, limex->stateSize); + } else { COMPRESSED_STORE_FN(dest, src, &reachmask, limex->stateSize); - } - } -} - -static really_inline + } + } +} + +static really_inline void EXPAND_FN(const IMPL_NFA_T *limex, STATE_T *dest, const u8 *src, u8 key) { - assert(ISALIGNED_N(dest, alignof(STATE_T))); - DEBUG_PRINTF("expand state: %p -> %p\n", src, dest); - - if (!(limex->flags & LIMEX_FLAG_COMPRESS_STATE)) { - // No key-based compression, just a partial load. - DEBUG_PRINTF("load state from %u bytes\n", limex->stateSize); - *dest = PARTIAL_LOAD_FN(src, limex->stateSize); - } else { - DEBUG_PRINTF("expand state, key=%hhx\n", key); + assert(ISALIGNED_N(dest, alignof(STATE_T))); + DEBUG_PRINTF("expand state: %p -> %p\n", src, dest); + + if (!(limex->flags & LIMEX_FLAG_COMPRESS_STATE)) { + // No key-based compression, just a partial load. + DEBUG_PRINTF("load state from %u bytes\n", limex->stateSize); + *dest = PARTIAL_LOAD_FN(src, limex->stateSize); + } else { + DEBUG_PRINTF("expand state, key=%hhx\n", key); STATE_T reachmask = REACHMASK_FN(limex, key); - - if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) { + + if (limex->flags & LIMEX_FLAG_COMPRESS_MASKED) { STATE_T mask = AND_STATE(LOAD_FROM_ENG(&limex->compressMask), reachmask); - COMPRESSED_LOAD_FN(dest, src, &mask, limex->stateSize); + COMPRESSED_LOAD_FN(dest, src, &mask, limex->stateSize); *dest = OR_STATE(LOAD_FROM_ENG(&limex->initDS), *dest); - } else { + } else { COMPRESSED_LOAD_FN(dest, src, &reachmask, limex->stateSize); - } - } -} - -#undef IMPL_NFA_T -#undef COMMON_T -#undef REACHMASK_FN -#undef COMPRESS_FN -#undef EXPAND_FN -#undef COMPRESSED_STORE_FN -#undef COMPRESSED_LOAD_FN -#undef PARTIAL_STORE_FN -#undef PARTIAL_LOAD_FN -#undef OR_STATE -#undef AND_STATE -#undef ISZERO_STATE + } + } +} + +#undef IMPL_NFA_T +#undef COMMON_T +#undef REACHMASK_FN +#undef COMPRESS_FN +#undef EXPAND_FN +#undef COMPRESSED_STORE_FN +#undef COMPRESSED_LOAD_FN +#undef PARTIAL_STORE_FN +#undef PARTIAL_LOAD_FN +#undef OR_STATE +#undef AND_STATE +#undef ISZERO_STATE diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan.c b/contrib/libs/hyperscan/src/nfa/mcclellan.c index d0b2f8bbbd..71f71e3275 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellan.c +++ b/contrib/libs/hyperscan/src/nfa/mcclellan.c @@ -1,101 +1,101 @@ -/* +/* * Copyright (c) 2015-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "mcclellan.h" - -#include "accel.h" -#include "mcclellan_internal.h" -#include "nfa_api.h" -#include "nfa_api_queue.h" -#include "nfa_internal.h" -#include "util/bitutils.h" -#include "util/compare.h" -#include "util/simd_utils.h" -#include "ue2common.h" - -#include "mcclellan_common_impl.h" - -static really_inline -char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m, + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcclellan.h" + +#include "accel.h" +#include "mcclellan_internal.h" +#include "nfa_api.h" +#include "nfa_api_queue.h" +#include "nfa_internal.h" +#include "util/bitutils.h" +#include "util/compare.h" +#include "util/simd_utils.h" +#include "ue2common.h" + +#include "mcclellan_common_impl.h" + +static really_inline +char doComplexReport(NfaCallback cb, void *ctxt, const struct mcclellan *m, u32 s, u64a loc, char eod, u32 *cached_accept_state, u32 *cached_accept_id) { DEBUG_PRINTF("reporting state = %u, loc=%llu, eod %hhu\n", s & STATE_MASK, loc, eod); - - if (!eod && s == *cached_accept_state) { + + if (!eod && s == *cached_accept_state) { if (cb(0, loc, *cached_accept_id, ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - - return MO_CONTINUE_MATCHING; /* continue execution */ - } - - const struct mstate_aux *aux = get_aux(m, s); - size_t offset = eod ? aux->accept_eod : aux->accept; - - assert(offset); - const struct report_list *rl - = (const void *)((const char *)m + offset - sizeof(struct NFA)); - assert(ISALIGNED(rl)); - - DEBUG_PRINTF("report list size %u\n", rl->count); - u32 count = rl->count; - - if (!eod && count == 1) { - *cached_accept_state = s; - *cached_accept_id = rl->report[0]; - - DEBUG_PRINTF("reporting %u\n", rl->report[0]); + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + const struct mstate_aux *aux = get_aux(m, s); + size_t offset = eod ? aux->accept_eod : aux->accept; + + assert(offset); + const struct report_list *rl + = (const void *)((const char *)m + offset - sizeof(struct NFA)); + assert(ISALIGNED(rl)); + + DEBUG_PRINTF("report list size %u\n", rl->count); + u32 count = rl->count; + + if (!eod && count == 1) { + *cached_accept_state = s; + *cached_accept_id = rl->report[0]; + + DEBUG_PRINTF("reporting %u\n", rl->report[0]); if (cb(0, loc, rl->report[0], ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - - return MO_CONTINUE_MATCHING; /* continue execution */ - } - - for (u32 i = 0; i < count; i++) { - DEBUG_PRINTF("reporting %u\n", rl->report[i]); + return MO_HALT_MATCHING; /* termination requested */ + } + + return MO_CONTINUE_MATCHING; /* continue execution */ + } + + for (u32 i = 0; i < count; i++) { + DEBUG_PRINTF("reporting %u\n", rl->report[i]); if (cb(0, loc, rl->report[i], ctxt) == MO_HALT_MATCHING) { - return MO_HALT_MATCHING; /* termination requested */ - } - } - - return MO_CONTINUE_MATCHING; /* continue execution */ -} - -static really_inline + return MO_HALT_MATCHING; /* termination requested */ + } + } + + return MO_CONTINUE_MATCHING; /* continue execution */ +} + +static really_inline const u8 *run_mcclellan_accel(const struct mcclellan *m, const struct mstate_aux *aux, u32 s, const u8 **min_accel_offset, const u8 *c, const u8 *c_end) { DEBUG_PRINTF("skipping\n"); u32 accel_offset = aux[s].accel_offset; - + assert(aux[s].accel_offset); assert(accel_offset >= m->aux_offset); assert(!m->sherman_offset || accel_offset < m->sherman_offset); @@ -126,14 +126,14 @@ u32 doNormal16(const struct mcclellan *m, const u8 **c_inout, const u8 *end, const u16 *succ_table = (const u16 *)((const char *)m + sizeof(struct mcclellan)); - assert(ISALIGNED_N(succ_table, 2)); + assert(ISALIGNED_N(succ_table, 2)); u32 sherman_base = m->sherman_limit; - const char *sherman_base_offset - = (const char *)m - sizeof(struct NFA) + m->sherman_offset; + const char *sherman_base_offset + = (const char *)m - sizeof(struct NFA) + m->sherman_offset; u32 as = m->alphaShift; - - s &= STATE_MASK; - + + s &= STATE_MASK; + while (c < end && s) { u8 cprime = m->remap[*c]; DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u)\n", *c, @@ -247,26 +247,26 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, char *qstate, s &= STATE_MASK; - u32 cached_accept_id = 0; + u32 cached_accept_id = 0; u32 cached_accept_state = 0; - + DEBUG_PRINTF("s: %u, len %zu\n", s, len); - - const u8 *min_accel_offset = c; - if (!m->has_accel || len < ACCEL_MIN_LEN) { - min_accel_offset = c_end; - goto without_accel; - } - - goto with_accel; - -without_accel: + + const u8 *min_accel_offset = c; + if (!m->has_accel || len < ACCEL_MIN_LEN) { + min_accel_offset = c_end; + goto without_accel; + } + + goto with_accel; + +without_accel: do { assert(c < min_accel_offset); if (!s) { goto exit; - } - + } + if (unlikely(m->has_wide)) { s = doNormalWide16(m, &c, min_accel_offset, s, qstate, &offset, 0, mode); @@ -274,27 +274,27 @@ without_accel: s = doNormal16(m, &c, min_accel_offset, s, 0, mode); } - if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { - if (mode == STOP_AT_MATCH) { - *state = s & STATE_MASK; - *c_final = c - 1; + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + if (mode == STOP_AT_MATCH) { + *state = s & STATE_MASK; + *c_final = c - 1; return MO_MATCHES_PENDING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); + } + + u64a loc = (c - 1) - buf + offAdj + 1; + + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { return MO_DEAD; /* termination requested */ - } - } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, + } + } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, &cached_accept_state, &cached_accept_id) == MO_HALT_MATCHING) { return MO_DEAD; - } - } - + } + } + assert(c <= min_accel_offset); } while (c < min_accel_offset); @@ -304,15 +304,15 @@ without_accel: goto exit; } else { goto with_accel; - } - -with_accel: + } + +with_accel: do { assert(c < c_end); if (!s) { goto exit; - } - + } + if (s & ACCEL_FLAG) { DEBUG_PRINTF("skipping\n"); s &= STATE_MASK; @@ -330,87 +330,87 @@ with_accel: s = doNormal16(m, &c, c_end, s, 1, mode); } - if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { - if (mode == STOP_AT_MATCH) { - *state = s & STATE_MASK; - *c_final = c - 1; + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + if (mode == STOP_AT_MATCH) { + *state = s & STATE_MASK; + *c_final = c - 1; return MO_MATCHES_PENDING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); + } + + u64a loc = (c - 1) - buf + offAdj + 1; + + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { return MO_DEAD; /* termination requested */ - } - } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, + } + } else if (doComplexReport(cb, ctxt, m, s & STATE_MASK, loc, 0, &cached_accept_state, &cached_accept_id) == MO_HALT_MATCHING) { return MO_DEAD; - } + } } - + assert(c <= c_end); } while (c < c_end); - + exit: s &= STATE_MASK; - - if (mode == STOP_AT_MATCH) { - *c_final = c_end; - } - *state = s; - + + if (mode == STOP_AT_MATCH) { + *c_final = c_end; + } + *state = s; + return MO_ALIVE; -} - -static never_inline +} + +static never_inline char mcclellanExec16_i_cb(const struct mcclellan *m, u32 *state, char *qstate, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, single, final_point, CALLBACK_OUTPUT); -} - -static never_inline +} + +static never_inline char mcclellanExec16_i_sam(const struct mcclellan *m, u32 *state, char *qstate, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, single, final_point, STOP_AT_MATCH); -} - -static never_inline +} + +static never_inline char mcclellanExec16_i_nm(const struct mcclellan *m, u32 *state, char *qstate, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point) { return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, single, final_point, NO_MATCHES); -} - -static really_inline +} + +static really_inline char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, char *qstate, const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, void *ctxt, char single, const u8 **final_point, enum MatchMode mode) { - if (mode == CALLBACK_OUTPUT) { + if (mode == CALLBACK_OUTPUT) { return mcclellanExec16_i_cb(m, state, qstate, buf, len, offAdj, cb, ctxt, single, final_point); - } else if (mode == STOP_AT_MATCH) { + } else if (mode == STOP_AT_MATCH) { return mcclellanExec16_i_sam(m, state, qstate, buf, len, offAdj, cb, ctxt, single, final_point); - } else { + } else { assert(mode == NO_MATCHES); return mcclellanExec16_i_nm(m, state, qstate, buf, len, offAdj, cb, ctxt, single, final_point); - } -} - -static really_inline + } +} + +static really_inline u32 doNormal8(const struct mcclellan *m, const u8 **c_inout, const u8 *end, u32 s, char do_accel, enum MatchMode mode) { const u8 *c = *c_inout; @@ -418,14 +418,14 @@ u32 doNormal8(const struct mcclellan *m, const u8 **c_inout, const u8 *end, u32 accept_limit = m->accept_limit_8; const u32 as = m->alphaShift; - const u8 *succ_table = (const u8 *)((const char *)m - + sizeof(struct mcclellan)); + const u8 *succ_table = (const u8 *)((const char *)m + + sizeof(struct mcclellan)); while (c < end && s) { u8 cprime = m->remap[*c]; DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx\n", *c, ourisprint(*c) ? *c : '?', cprime); s = succ_table[(s << as) + cprime]; - + DEBUG_PRINTF("s: %u\n", s); c++; if (do_accel) { @@ -458,70 +458,70 @@ char mcclellanExec8_i(const struct mcclellan *m, u32 *state, const u8 *buf, const struct mstate_aux *aux = (const struct mstate_aux *)((const char *)m + m->aux_offset - - sizeof(struct NFA)); + - sizeof(struct NFA)); u32 accept_limit = m->accept_limit_8; - - u32 cached_accept_id = 0; + + u32 cached_accept_id = 0; u32 cached_accept_state = 0; - + DEBUG_PRINTF("accel %hu, accept %u\n", m->accel_limit_8, accept_limit); - + DEBUG_PRINTF("s: %u, len %zu\n", s, len); - - const u8 *min_accel_offset = c; - if (!m->has_accel || len < ACCEL_MIN_LEN) { - min_accel_offset = c_end; - goto without_accel; - } - - goto with_accel; - -without_accel: + + const u8 *min_accel_offset = c; + if (!m->has_accel || len < ACCEL_MIN_LEN) { + min_accel_offset = c_end; + goto without_accel; + } + + goto with_accel; + +without_accel: do { assert(c < min_accel_offset); if (!s) { goto exit; } - + s = doNormal8(m, &c, min_accel_offset, s, 0, mode); - if (mode != NO_MATCHES && s >= accept_limit) { - if (mode == STOP_AT_MATCH) { - DEBUG_PRINTF("match - pausing\n"); - *state = s; - *c_final = c - 1; + if (mode != NO_MATCHES && s >= accept_limit) { + if (mode == STOP_AT_MATCH) { + DEBUG_PRINTF("match - pausing\n"); + *state = s; + *c_final = c - 1; return MO_MATCHES_PENDING; - } - - u64a loc = (c - 1) - buf + offAdj + 1; - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); + } + + u64a loc = (c - 1) - buf + offAdj + 1; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { return MO_DEAD; - } - } else if (doComplexReport(cb, ctxt, m, s, loc, 0, + } + } else if (doComplexReport(cb, ctxt, m, s, loc, 0, &cached_accept_state, &cached_accept_id) - == MO_HALT_MATCHING) { + == MO_HALT_MATCHING) { return MO_DEAD; - } - } + } + } assert(c <= min_accel_offset); } while (c < min_accel_offset); if (c == c_end) { goto exit; - } - -with_accel: + } + +with_accel: do { u32 accel_limit = m->accel_limit_8; assert(c < c_end); - + if (!s) { goto exit; } - + if (s >= accel_limit && aux[s].accel_offset) { c = run_mcclellan_accel(m, aux, s, &min_accel_offset, c, c_end); if (c == c_end) { @@ -531,7 +531,7 @@ with_accel: } } s = doNormal8(m, &c, c_end, s, 1, mode); - + if (mode != NO_MATCHES && s >= accept_limit) { if (mode == STOP_AT_MATCH) { DEBUG_PRINTF("match - pausing\n"); @@ -539,548 +539,548 @@ with_accel: *c_final = c - 1; return MO_MATCHES_PENDING; } - + u64a loc = (c - 1) - buf + offAdj + 1; if (single) { DEBUG_PRINTF("reporting %u\n", m->arb_report); if (cb(0, loc, m->arb_report, ctxt) == MO_HALT_MATCHING) { return MO_DEAD; - } + } } else if (doComplexReport(cb, ctxt, m, s, loc, 0, &cached_accept_state, &cached_accept_id) == MO_HALT_MATCHING) { return MO_DEAD; - } - } - + } + } + assert(c <= c_end); } while (c < c_end); exit: - *state = s; - if (mode == STOP_AT_MATCH) { - *c_final = c_end; - } + *state = s; + if (mode == STOP_AT_MATCH) { + *c_final = c_end; + } return MO_ALIVE; -} - -static never_inline +} + +static never_inline char mcclellanExec8_i_cb(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, final_point, CALLBACK_OUTPUT); -} - -static never_inline +} + +static never_inline char mcclellanExec8_i_sam(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, final_point, STOP_AT_MATCH); -} - -static never_inline +} + +static never_inline char mcclellanExec8_i_nm(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point) { + return mcclellanExec8_i(m, state, buf, len, offAdj, cb, ctxt, single, final_point, NO_MATCHES); -} - -static really_inline +} + +static really_inline char mcclellanExec8_i_ni(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point, - enum MatchMode mode) { - if (mode == CALLBACK_OUTPUT) { - return mcclellanExec8_i_cb(m, state, buf, len, offAdj, cb, ctxt, single, - final_point); - } else if (mode == STOP_AT_MATCH) { - return mcclellanExec8_i_sam(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); - } else { - assert(mode == NO_MATCHES); - return mcclellanExec8_i_nm(m, state, buf, len, offAdj, cb, ctxt, single, - final_point); - } -} - -static really_inline + size_t len, u64a offAdj, NfaCallback cb, void *ctxt, + char single, const u8 **final_point, + enum MatchMode mode) { + if (mode == CALLBACK_OUTPUT) { + return mcclellanExec8_i_cb(m, state, buf, len, offAdj, cb, ctxt, single, + final_point); + } else if (mode == STOP_AT_MATCH) { + return mcclellanExec8_i_sam(m, state, buf, len, offAdj, cb, ctxt, + single, final_point); + } else { + assert(mode == NO_MATCHES); + return mcclellanExec8_i_nm(m, state, buf, len, offAdj, cb, ctxt, single, + final_point); + } +} + +static really_inline char mcclellanCheckEOD(const struct NFA *nfa, u32 s, u64a offset, - NfaCallback cb, void *ctxt) { - const struct mcclellan *m = getImplNfa(nfa); - const struct mstate_aux *aux = get_aux(m, s); - + NfaCallback cb, void *ctxt) { + const struct mcclellan *m = getImplNfa(nfa); + const struct mstate_aux *aux = get_aux(m, s); + if (m->has_wide == 1 && s >= m->wide_limit) { return MO_CONTINUE_MATCHING; } if (!aux->accept_eod) { return MO_CONTINUE_MATCHING; - } + } return doComplexReport(cb, ctxt, m, s, offset, 1, NULL, NULL); -} - -static really_inline -char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, - const u8 *hend, NfaCallback cb, void *context, - struct mq *q, char single, s64a end, - enum MatchMode mode) { - assert(n->type == MCCLELLAN_NFA_16); - const struct mcclellan *m = getImplNfa(n); - s64a sp; - - assert(ISALIGNED_N(q->state, 2)); +} + +static really_inline +char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, + const u8 *hend, NfaCallback cb, void *context, + struct mq *q, char single, s64a end, + enum MatchMode mode) { + assert(n->type == MCCLELLAN_NFA_16); + const struct mcclellan *m = getImplNfa(n); + s64a sp; + + assert(ISALIGNED_N(q->state, 2)); u32 s = *(u16 *)q->state; - - if (q->report_current) { - assert(s); - assert(get_aux(m, s)->accept); - - int rv; - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); + + if (q->report_current) { + assert(s); + assert(get_aux(m, s)->accept); + + int rv; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); rv = cb(0, q_cur_offset(q), m->arb_report, context); - } else { - u32 cached_accept_id = 0; + } else { + u32 cached_accept_id = 0; u32 cached_accept_state = 0; - - rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, - &cached_accept_state, &cached_accept_id); - } - - q->report_current = 0; - - if (rv == MO_HALT_MATCHING) { + + rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, + &cached_accept_state, &cached_accept_id); + } + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { return MO_DEAD; - } - } - - sp = q_cur_loc(q); - q->cur++; - - const u8 *cur_buf = sp < 0 ? hend : buffer; - - assert(q->cur); - if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { - DEBUG_PRINTF("this is as far as we go\n"); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u16 *)q->state = s; - return MO_ALIVE; - } - - while (1) { - assert(q->cur < q->end); - s64a ep = q->items[q->cur].location; - if (mode != NO_MATCHES) { - ep = MIN(ep, end); - } - - assert(ep >= sp); - - s64a local_ep = ep; - if (sp < 0) { - local_ep = MIN(0, ep); - } - - /* do main buffer region */ - const u8 *final_look; + } + } + + sp = q_cur_loc(q); + q->cur++; + + const u8 *cur_buf = sp < 0 ? hend : buffer; + + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u16 *)q->state = s; + return MO_ALIVE; + } + + while (1) { + assert(q->cur < q->end); + s64a ep = q->items[q->cur].location; + if (mode != NO_MATCHES) { + ep = MIN(ep, end); + } + + assert(ep >= sp); + + s64a local_ep = ep; + if (sp < 0) { + local_ep = MIN(0, ep); + } + + /* do main buffer region */ + const u8 *final_look; char rv = mcclellanExec16_i_ni(m, &s, q->state, cur_buf + sp, local_ep - sp, offset + sp, cb, context, single, &final_look, mode); if (rv == MO_DEAD) { - *(u16 *)q->state = 0; + *(u16 *)q->state = 0; return MO_DEAD; - } + } if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { - DEBUG_PRINTF("this is as far as we go\n"); + DEBUG_PRINTF("this is as far as we go\n"); DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); - assert(q->cur); + assert(q->cur); assert(final_look != cur_buf + local_ep); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = final_look - cur_buf + 1; /* due to - * early -1 */ - *(u16 *)q->state = s; - return MO_MATCHES_PENDING; - } - + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = final_look - cur_buf + 1; /* due to + * early -1 */ + *(u16 *)q->state = s; + return MO_MATCHES_PENDING; + } + assert(rv == MO_ALIVE); - assert(q->cur); - if (mode != NO_MATCHES && q->items[q->cur].location > end) { - DEBUG_PRINTF("this is as far as we go\n"); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u16 *)q->state = s; - return MO_ALIVE; - } - - sp = local_ep; - - if (sp == 0) { - cur_buf = buffer; - } - - if (sp != ep) { - continue; - } - - switch (q->items[q->cur].type) { - case MQE_TOP: - assert(sp + offset || !s); - if (sp + offset == 0) { - s = m->start_anchored; - break; - } - s = mcclellanEnableStarts(m, s); - break; - case MQE_END: - *(u16 *)q->state = s; - q->cur++; + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u16 *)q->state = s; + return MO_ALIVE; + } + + sp = local_ep; + + if (sp == 0) { + cur_buf = buffer; + } + + if (sp != ep) { + continue; + } + + switch (q->items[q->cur].type) { + case MQE_TOP: + assert(sp + offset || !s); + if (sp + offset == 0) { + s = m->start_anchored; + break; + } + s = mcclellanEnableStarts(m, s); + break; + case MQE_END: + *(u16 *)q->state = s; + q->cur++; return s ? MO_ALIVE : MO_DEAD; - default: - assert(!"invalid queue event"); - } - - q->cur++; - } -} - + default: + assert(!"invalid queue event"); + } + + q->cur++; + } +} + static really_inline char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer, size_t length, NfaCallback cb, void *context, char single) { - assert(n->type == MCCLELLAN_NFA_16); - const struct mcclellan *m = getImplNfa(n); + assert(n->type == MCCLELLAN_NFA_16); + const struct mcclellan *m = getImplNfa(n); u32 s = m->start_anchored; - + if (mcclellanExec16_i(m, &s, NULL, buffer, length, offset, cb, context, single, NULL, CALLBACK_OUTPUT) == MO_DEAD) { return s ? MO_ALIVE : MO_DEAD; - } - + } + if (m->has_wide == 1 && s >= m->wide_limit) { return MO_ALIVE; } - const struct mstate_aux *aux = get_aux(m, s); - - if (aux->accept_eod) { - doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL); - } - + const struct mstate_aux *aux = get_aux(m, s); + + if (aux->accept_eod) { + doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL); + } + return MO_ALIVE; -} - -static really_inline -char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, +} + +static really_inline +char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, const u8 *hend, NfaCallback cb, void *context, struct mq *q, char single, s64a end, enum MatchMode mode) { - assert(n->type == MCCLELLAN_NFA_8); - const struct mcclellan *m = getImplNfa(n); - s64a sp; - + assert(n->type == MCCLELLAN_NFA_8); + const struct mcclellan *m = getImplNfa(n); + s64a sp; + u32 s = *(u8 *)q->state; - - if (q->report_current) { - assert(s); - assert(s >= m->accept_limit_8); - - int rv; - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); + + if (q->report_current) { + assert(s); + assert(s >= m->accept_limit_8); + + int rv; + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); rv = cb(0, q_cur_offset(q), m->arb_report, context); - } else { - u32 cached_accept_id = 0; + } else { + u32 cached_accept_id = 0; u32 cached_accept_state = 0; - - rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, - &cached_accept_state, &cached_accept_id); - } - - q->report_current = 0; - - if (rv == MO_HALT_MATCHING) { + + rv = doComplexReport(cb, context, m, s, q_cur_offset(q), 0, + &cached_accept_state, &cached_accept_id); + } + + q->report_current = 0; + + if (rv == MO_HALT_MATCHING) { return MO_DEAD; - } - } - - sp = q_cur_loc(q); - q->cur++; - - const u8 *cur_buf = sp < 0 ? hend : buffer; - - if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { - DEBUG_PRINTF("this is as far as we go\n"); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u8 *)q->state = s; - return MO_ALIVE; - } - - while (1) { - DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" : - q->items[q->cur].type == MQE_END ? "END" : "???", - q->items[q->cur].location + offset); - assert(q->cur < q->end); - s64a ep = q->items[q->cur].location; - if (mode != NO_MATCHES) { - ep = MIN(ep, end); - } - - assert(ep >= sp); - - s64a local_ep = ep; - if (sp < 0) { - local_ep = MIN(0, ep); - } - - const u8 *final_look; + } + } + + sp = q_cur_loc(q); + q->cur++; + + const u8 *cur_buf = sp < 0 ? hend : buffer; + + if (mode != NO_MATCHES && q->items[q->cur - 1].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u8 *)q->state = s; + return MO_ALIVE; + } + + while (1) { + DEBUG_PRINTF("%s @ %llu\n", q->items[q->cur].type == MQE_TOP ? "TOP" : + q->items[q->cur].type == MQE_END ? "END" : "???", + q->items[q->cur].location + offset); + assert(q->cur < q->end); + s64a ep = q->items[q->cur].location; + if (mode != NO_MATCHES) { + ep = MIN(ep, end); + } + + assert(ep >= sp); + + s64a local_ep = ep; + if (sp < 0) { + local_ep = MIN(0, ep); + } + + const u8 *final_look; char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, offset + sp, cb, context, single, &final_look, mode); if (rv == MO_HALT_MATCHING) { - *(u8 *)q->state = 0; + *(u8 *)q->state = 0; return MO_DEAD; - } + } if (mode == STOP_AT_MATCH && rv == MO_MATCHES_PENDING) { DEBUG_PRINTF("this is as far as we go\n"); DEBUG_PRINTF("state %u final_look %zd\n", s, final_look - cur_buf); - assert(q->cur); + assert(q->cur); assert(final_look != cur_buf + local_ep); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = final_look - cur_buf + 1; /* due to - * early -1 */ - *(u8 *)q->state = s; - return MO_MATCHES_PENDING; - } - + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = final_look - cur_buf + 1; /* due to + * early -1 */ + *(u8 *)q->state = s; + return MO_MATCHES_PENDING; + } + assert(rv == MO_ALIVE); - assert(q->cur); - if (mode != NO_MATCHES && q->items[q->cur].location > end) { - DEBUG_PRINTF("this is as far as we go\n"); - assert(q->cur); - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - *(u8 *)q->state = s; - return MO_ALIVE; - } - - sp = local_ep; - - if (sp == 0) { - cur_buf = buffer; - } - - if (sp != ep) { - continue; - } - - switch (q->items[q->cur].type) { - case MQE_TOP: - assert(sp + offset || !s); - if (sp + offset == 0) { - s = (u8)m->start_anchored; - break; - } - s = mcclellanEnableStarts(m, s); - break; - case MQE_END: - *(u8 *)q->state = s; - q->cur++; + assert(q->cur); + if (mode != NO_MATCHES && q->items[q->cur].location > end) { + DEBUG_PRINTF("this is as far as we go\n"); + assert(q->cur); + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + *(u8 *)q->state = s; + return MO_ALIVE; + } + + sp = local_ep; + + if (sp == 0) { + cur_buf = buffer; + } + + if (sp != ep) { + continue; + } + + switch (q->items[q->cur].type) { + case MQE_TOP: + assert(sp + offset || !s); + if (sp + offset == 0) { + s = (u8)m->start_anchored; + break; + } + s = mcclellanEnableStarts(m, s); + break; + case MQE_END: + *(u8 *)q->state = s; + q->cur++; return s ? MO_ALIVE : MO_DEAD; - default: - assert(!"invalid queue event"); - } - - q->cur++; - } -} - + default: + assert(!"invalid queue event"); + } + + q->cur++; + } +} + static really_inline -char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer, - size_t length, NfaCallback cb, void *context, - char single) { - assert(n->type == MCCLELLAN_NFA_8); - const struct mcclellan *m = getImplNfa(n); +char nfaExecMcClellan8_Bi(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context, + char single) { + assert(n->type == MCCLELLAN_NFA_8); + const struct mcclellan *m = getImplNfa(n); u32 s = m->start_anchored; - - if (mcclellanExec8_i(m, &s, buffer, length, offset, cb, context, single, - NULL, CALLBACK_OUTPUT) + + if (mcclellanExec8_i(m, &s, buffer, length, offset, cb, context, single, + NULL, CALLBACK_OUTPUT) == MO_DEAD) { return MO_DEAD; - } - - const struct mstate_aux *aux = get_aux(m, s); - - if (aux->accept_eod) { - doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL); - } - + } + + const struct mstate_aux *aux = get_aux(m, s); + + if (aux->accept_eod) { + doComplexReport(cb, context, m, s, offset + length, 1, NULL, NULL); + } + return s ? MO_ALIVE : MO_DEAD; -} - -char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer, - size_t length, NfaCallback cb, void *context) { - assert(n->type == MCCLELLAN_NFA_8); - const struct mcclellan *m = getImplNfa(n); - - if (m->flags & MCCLELLAN_FLAG_SINGLE) { - return nfaExecMcClellan8_Bi(n, offset, buffer, length, cb, context, 1); - } else { - return nfaExecMcClellan8_Bi(n, offset, buffer, length, cb, context, 0); - } -} - -char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCCLELLAN_NFA_8); - const struct mcclellan *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCCLELLAN_FLAG_SINGLE, end, - CALLBACK_OUTPUT); -} - -char nfaExecMcClellan16_B(const struct NFA *n, u64a offset, const u8 *buffer, - size_t length, NfaCallback cb, void *context) { - assert(n->type == MCCLELLAN_NFA_16); - const struct mcclellan *m = getImplNfa(n); - - if (m->flags & MCCLELLAN_FLAG_SINGLE) { - return nfaExecMcClellan16_Bi(n, offset, buffer, length, cb, context, 1); - } else { - return nfaExecMcClellan16_Bi(n, offset, buffer, length, cb, context, 0); - } -} - -char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCCLELLAN_NFA_16); - const struct mcclellan *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCCLELLAN_FLAG_SINGLE, end, - CALLBACK_OUTPUT); -} - -char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) { - const struct mcclellan *m = getImplNfa(n); - NfaCallback cb = q->cb; - void *ctxt = q->context; +} + +char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context) { + assert(n->type == MCCLELLAN_NFA_8); + const struct mcclellan *m = getImplNfa(n); + + if (m->flags & MCCLELLAN_FLAG_SINGLE) { + return nfaExecMcClellan8_Bi(n, offset, buffer, length, cb, context, 1); + } else { + return nfaExecMcClellan8_Bi(n, offset, buffer, length, cb, context, 0); + } +} + +char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCCLELLAN_NFA_8); + const struct mcclellan *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCCLELLAN_FLAG_SINGLE, end, + CALLBACK_OUTPUT); +} + +char nfaExecMcClellan16_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context) { + assert(n->type == MCCLELLAN_NFA_16); + const struct mcclellan *m = getImplNfa(n); + + if (m->flags & MCCLELLAN_FLAG_SINGLE) { + return nfaExecMcClellan16_Bi(n, offset, buffer, length, cb, context, 1); + } else { + return nfaExecMcClellan16_Bi(n, offset, buffer, length, cb, context, 0); + } +} + +char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCCLELLAN_NFA_16); + const struct mcclellan *m = getImplNfa(n); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCCLELLAN_FLAG_SINGLE, end, + CALLBACK_OUTPUT); +} + +char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) { + const struct mcclellan *m = getImplNfa(n); + NfaCallback cb = q->cb; + void *ctxt = q->context; u32 s = *(u8 *)q->state; - u8 single = m->flags & MCCLELLAN_FLAG_SINGLE; - u64a offset = q_cur_offset(q); - assert(q_cur_type(q) == MQE_START); - assert(s); - - if (s >= m->accept_limit_8) { - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); + u8 single = m->flags & MCCLELLAN_FLAG_SINGLE; + u64a offset = q_cur_offset(q); + assert(q_cur_type(q) == MQE_START); + assert(s); + + if (s >= m->accept_limit_8) { + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); cb(0, offset, m->arb_report, ctxt); - } else { - u32 cached_accept_id = 0; + } else { + u32 cached_accept_id = 0; u32 cached_accept_state = 0; - - doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, - &cached_accept_id); - } - } - - return 0; -} - -char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) { + + doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, + &cached_accept_id); + } + } + + return 0; +} + +char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) { const struct mcclellan *m = getImplNfa(n); - NfaCallback cb = q->cb; - void *ctxt = q->context; + NfaCallback cb = q->cb; + void *ctxt = q->context; u32 s = *(u16 *)q->state; - const struct mstate_aux *aux = get_aux(m, s); - u8 single = m->flags & MCCLELLAN_FLAG_SINGLE; - u64a offset = q_cur_offset(q); - assert(q_cur_type(q) == MQE_START); + const struct mstate_aux *aux = get_aux(m, s); + u8 single = m->flags & MCCLELLAN_FLAG_SINGLE; + u64a offset = q_cur_offset(q); + assert(q_cur_type(q) == MQE_START); DEBUG_PRINTF("state %u\n", s); - assert(s); - - if (aux->accept) { - if (single) { - DEBUG_PRINTF("reporting %u\n", m->arb_report); + assert(s); + + if (aux->accept) { + if (single) { + DEBUG_PRINTF("reporting %u\n", m->arb_report); cb(0, offset, m->arb_report, ctxt); - } else { - u32 cached_accept_id = 0; + } else { + u32 cached_accept_id = 0; u32 cached_accept_state = 0; - - doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, - &cached_accept_id); - } - } - - return 0; -} - -static -char mcclellanHasAccept(const struct mcclellan *m, const struct mstate_aux *aux, - ReportID report) { - assert(m && aux); - - if (!aux->accept) { - return 0; - } - - const struct report_list *rl = (const struct report_list *) - ((const char *)m + aux->accept - sizeof(struct NFA)); - assert(ISALIGNED_N(rl, 4)); - - DEBUG_PRINTF("report list has %u entries\n", rl->count); - - for (u32 i = 0; i < rl->count; i++) { - if (rl->report[i] == report) { - return 1; - } - } - - return 0; -} - -char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { - assert(n && q); - + + doComplexReport(cb, ctxt, m, s, offset, 0, &cached_accept_state, + &cached_accept_id); + } + } + + return 0; +} + +static +char mcclellanHasAccept(const struct mcclellan *m, const struct mstate_aux *aux, + ReportID report) { + assert(m && aux); + + if (!aux->accept) { + return 0; + } + + const struct report_list *rl = (const struct report_list *) + ((const char *)m + aux->accept - sizeof(struct NFA)); + assert(ISALIGNED_N(rl, 4)); + + DEBUG_PRINTF("report list has %u entries\n", rl->count); + + for (u32 i = 0; i < rl->count; i++) { + if (rl->report[i] == report) { + return 1; + } + } + + return 0; +} + +char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + const struct mcclellan *m = getImplNfa(n); - u8 s = *(u8 *)q->state; - DEBUG_PRINTF("checking accepts for %hhu\n", s); - if (s < m->accept_limit_8) { - return 0; - } - - return mcclellanHasAccept(m, get_aux(m, s), report); -} - + u8 s = *(u8 *)q->state; + DEBUG_PRINTF("checking accepts for %hhu\n", s); + if (s < m->accept_limit_8) { + return 0; + } + + return mcclellanHasAccept(m, get_aux(m, s), report); +} + char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q) { assert(n && q); - + const struct mcclellan *m = getImplNfa(n); u8 s = *(u8 *)q->state; DEBUG_PRINTF("checking accepts for %hhu\n", s); @@ -1089,18 +1089,18 @@ char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q) { return s >= m->accept_limit_8; } -char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, - struct mq *q) { - assert(n && q); - +char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, + struct mq *q) { + assert(n && q); + const struct mcclellan *m = getImplNfa(n); - u16 s = *(u16 *)q->state; - DEBUG_PRINTF("checking accepts for %hu\n", s); - + u16 s = *(u16 *)q->state; + DEBUG_PRINTF("checking accepts for %hu\n", s); + return (m->has_wide == 1 && s >= m->wide_limit) ? 0 : mcclellanHasAccept(m, get_aux(m, s), report); -} - +} + char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) { assert(n && q); @@ -1112,125 +1112,125 @@ char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) { 0 : !!get_aux(m, s)->accept; } -char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCCLELLAN_NFA_8); +char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCCLELLAN_NFA_8); const struct mcclellan *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCCLELLAN_FLAG_SINGLE, end, - STOP_AT_MATCH); -} - -char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCCLELLAN_NFA_16); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCCLELLAN_FLAG_SINGLE, end, + STOP_AT_MATCH); +} + +char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCCLELLAN_NFA_16); const struct mcclellan *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCCLELLAN_FLAG_SINGLE, end, - STOP_AT_MATCH); -} - -char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCCLELLAN_NFA_8); + const u8 *hend = q->history + q->hlength; + + return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCCLELLAN_FLAG_SINGLE, end, + STOP_AT_MATCH); +} + +char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCCLELLAN_NFA_8); const struct mcclellan *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCCLELLAN_FLAG_SINGLE, 0 /* end */, - NO_MATCHES); - if (rv && nfaExecMcClellan8_inAccept(n, report, q)) { - return MO_MATCHES_PENDING; - } else { - return rv; - } -} - -char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - NfaCallback cb = q->cb; - void *context = q->context; - assert(n->type == MCCLELLAN_NFA_16); + const u8 *hend = q->history + q->hlength; + + char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCCLELLAN_FLAG_SINGLE, 0 /* end */, + NO_MATCHES); + if (rv && nfaExecMcClellan8_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } else { + return rv; + } +} + +char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) { + u64a offset = q->offset; + const u8 *buffer = q->buffer; + NfaCallback cb = q->cb; + void *context = q->context; + assert(n->type == MCCLELLAN_NFA_16); const struct mcclellan *m = getImplNfa(n); - const u8 *hend = q->history + q->hlength; - - char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q, - m->flags & MCCLELLAN_FLAG_SINGLE, - 0 /* end */, NO_MATCHES); - - if (rv && nfaExecMcClellan16_inAccept(n, report, q)) { - return MO_MATCHES_PENDING; - } else { - return rv; - } -} - -char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, UNUSED u8 key) { + const u8 *hend = q->history + q->hlength; + + char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q, + m->flags & MCCLELLAN_FLAG_SINGLE, + 0 /* end */, NO_MATCHES); + + if (rv && nfaExecMcClellan16_inAccept(n, report, q)) { + return MO_MATCHES_PENDING; + } else { + return rv; + } +} + +char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { const struct mcclellan *m = getImplNfa(nfa); - u8 s = offset ? m->start_floating : m->start_anchored; - if (s) { - *(u8 *)state = s; - return 1; - } - return 0; -} - -char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset, - void *state, UNUSED u8 key) { + u8 s = offset ? m->start_floating : m->start_anchored; + if (s) { + *(u8 *)state = s; + return 1; + } + return 0; +} + +char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset, + void *state, UNUSED u8 key) { const struct mcclellan *m = getImplNfa(nfa); - u16 s = offset ? m->start_floating : m->start_anchored; + u16 s = offset ? m->start_floating : m->start_anchored; // new byte if (m->has_wide) { unaligned_store_u16((u16 *)state + 1, 0); } - if (s) { - unaligned_store_u16(state, s); - return 1; - } - return 0; -} - -void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state, - const u8 *buf, char top, size_t start_off, - size_t len, NfaCallback cb, void *ctxt) { + if (s) { + unaligned_store_u16(state, s); + return 1; + } + return 0; +} + +void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state, + const u8 *buf, char top, size_t start_off, + size_t len, NfaCallback cb, void *ctxt) { const struct mcclellan *m = getImplNfa(nfa); - + u32 s = top ? m->start_anchored : *(u8 *)state; - if (m->flags & MCCLELLAN_FLAG_SINGLE) { + if (m->flags & MCCLELLAN_FLAG_SINGLE) { mcclellanExec8_i(m, &s, buf + start_off, len - start_off, - start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); - } else { + start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); + } else { mcclellanExec8_i(m, &s, buf + start_off, len - start_off, - start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); - } + start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); + } *(u8 *)state = s; -} - -void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, - const u8 *buf, char top, size_t start_off, - size_t len, NfaCallback cb, void *ctxt) { +} + +void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, + const u8 *buf, char top, size_t start_off, + size_t len, NfaCallback cb, void *ctxt) { const struct mcclellan *m = getImplNfa(nfa); u32 s; - + if (top) { s = m->start_anchored; @@ -1242,109 +1242,109 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, s = unaligned_load_u16(state); } - if (m->flags & MCCLELLAN_FLAG_SINGLE) { + if (m->flags & MCCLELLAN_FLAG_SINGLE) { mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off, start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); - } else { + } else { mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off, start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); - } + } unaligned_store_u16(state, s); -} - -char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state, +} + +char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state, UNUSED const char *streamState, u64a offset, NfaCallback callback, void *context) { return mcclellanCheckEOD(nfa, *(const u8 *)state, offset, callback, context); -} - -char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state, +} + +char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state, UNUSED const char *streamState, u64a offset, NfaCallback callback, void *context) { - assert(ISALIGNED_N(state, 2)); + assert(ISALIGNED_N(state, 2)); return mcclellanCheckEOD(nfa, *(const u16 *)state, offset, callback, context); -} - +} + char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { - assert(nfa->scratchStateSize == 1); - *(u8 *)q->state = 0; - return 0; -} - + assert(nfa->scratchStateSize == 1); + *(u8 *)q->state = 0; + return 0; +} + char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { const struct mcclellan *m = getImplNfa(nfa); assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 : nfa->scratchStateSize == 2); - assert(ISALIGNED_N(q->state, 2)); - *(u16 *)q->state = 0; + assert(ISALIGNED_N(q->state, 2)); + *(u16 *)q->state = 0; // new byte if (m->has_wide) { unaligned_store_u16((u16 *)q->state + 1, 0); } - return 0; -} - -char nfaExecMcClellan8_queueCompressState(UNUSED const struct NFA *nfa, - const struct mq *q, UNUSED s64a loc) { - void *dest = q->streamState; - const void *src = q->state; - assert(nfa->scratchStateSize == 1); - assert(nfa->streamStateSize == 1); - *(u8 *)dest = *(const u8 *)src; - return 0; -} - -char nfaExecMcClellan8_expandState(UNUSED const struct NFA *nfa, void *dest, - const void *src, UNUSED u64a offset, - UNUSED u8 key) { - assert(nfa->scratchStateSize == 1); - assert(nfa->streamStateSize == 1); - *(u8 *)dest = *(const u8 *)src; - return 0; -} - -char nfaExecMcClellan16_queueCompressState(UNUSED const struct NFA *nfa, - const struct mq *q, - UNUSED s64a loc) { + return 0; +} + +char nfaExecMcClellan8_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, UNUSED s64a loc) { + void *dest = q->streamState; + const void *src = q->state; + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecMcClellan8_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { + assert(nfa->scratchStateSize == 1); + assert(nfa->streamStateSize == 1); + *(u8 *)dest = *(const u8 *)src; + return 0; +} + +char nfaExecMcClellan16_queueCompressState(UNUSED const struct NFA *nfa, + const struct mq *q, + UNUSED s64a loc) { const struct mcclellan *m = getImplNfa(nfa); - void *dest = q->streamState; - const void *src = q->state; + void *dest = q->streamState; + const void *src = q->state; assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 : nfa->scratchStateSize == 2); assert(m->has_wide == 1 ? nfa->streamStateSize == 4 : nfa->streamStateSize == 2); - assert(ISALIGNED_N(src, 2)); - unaligned_store_u16(dest, *(const u16 *)(src)); + assert(ISALIGNED_N(src, 2)); + unaligned_store_u16(dest, *(const u16 *)(src)); // new byte if (m->has_wide) { unaligned_store_u16((u16 *)dest + 1, *((const u16 *)src + 1)); } - return 0; -} - -char nfaExecMcClellan16_expandState(UNUSED const struct NFA *nfa, void *dest, - const void *src, UNUSED u64a offset, - UNUSED u8 key) { + return 0; +} + +char nfaExecMcClellan16_expandState(UNUSED const struct NFA *nfa, void *dest, + const void *src, UNUSED u64a offset, + UNUSED u8 key) { const struct mcclellan *m = getImplNfa(nfa); assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 : nfa->scratchStateSize == 2); assert(m->has_wide == 1 ? nfa->streamStateSize == 4 : nfa->streamStateSize == 2); - assert(ISALIGNED_N(dest, 2)); - *(u16 *)dest = unaligned_load_u16(src); + assert(ISALIGNED_N(dest, 2)); + *(u16 *)dest = unaligned_load_u16(src); // new byte if (m->has_wide) { *((u16 *)dest + 1) = unaligned_load_u16((const u16 *)src + 1); } - return 0; -} + return 0; +} diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan.h b/contrib/libs/hyperscan/src/nfa/mcclellan.h index 92a6dd8f2c..9c6b3eecb1 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellan.h +++ b/contrib/libs/hyperscan/src/nfa/mcclellan.h @@ -1,109 +1,109 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MCCLELLAN_H -#define MCCLELLAN_H - -#include "callback.h" -#include "ue2common.h" - -struct mq; -struct NFA; - -// 8-bit McClellan - -char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state, - const char *streamState, u64a offset, + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCCLELLAN_H +#define MCCLELLAN_H + +#include "callback.h" +#include "ue2common.h" + +struct mq; +struct NFA; + +// 8-bit McClellan + +char nfaExecMcClellan8_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, NfaCallback callback, void *context); -char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report, - struct mq *q); +char nfaExecMcClellan8_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report, + struct mq *q); char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecMcClellan8_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc); -char nfaExecMcClellan8_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); - -#define nfaExecMcClellan8_B_Reverse NFA_API_NO_IMPL -#define nfaExecMcClellan8_zombie_status NFA_API_ZOMBIE_NO_IMPL - -// 16-bit McClellan - -char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state, - const char *streamState, u64a offset, +char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecMcClellan8_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecMcClellan8_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecMcClellan8_B_Reverse NFA_API_NO_IMPL +#define nfaExecMcClellan8_zombie_status NFA_API_ZOMBIE_NO_IMPL + +// 16-bit McClellan + +char nfaExecMcClellan16_testEOD(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, NfaCallback callback, void *context); -char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end); -char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end); -char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report); -char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q); -char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, - struct mq *q); +char nfaExecMcClellan16_Q(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end); +char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report); +char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q); +char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, + struct mq *q); char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q); -char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q); -char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset, - void *state, u8 key); -char nfaExecMcClellan16_queueCompressState(const struct NFA *nfa, - const struct mq *q, s64a loc); -char nfaExecMcClellan16_expandState(const struct NFA *nfa, void *dest, - const void *src, u64a offset, u8 key); - -#define nfaExecMcClellan16_B_Reverse NFA_API_NO_IMPL -#define nfaExecMcClellan16_zombie_status NFA_API_ZOMBIE_NO_IMPL - -/** - * Simple streaming mode calls: - * - always uses the anchored start state regardless if top is set regardless of - * start_off - * - never checks eod - */ -void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state, - const u8 *buf, char top, size_t start_off, - size_t len, NfaCallback cb, void *ctxt); - -void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, - const u8 *buf, char top, size_t start_off, - size_t len, NfaCallback cb, void *ctxt); - -/** - * Simple block mode calls: - * - always uses the anchored start state regardless of initial start - */ - -char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer, - size_t length, NfaCallback cb, void *context); - -char nfaExecMcClellan16_B(const struct NFA *n, u64a offset, const u8 *buffer, - size_t length, NfaCallback cb, void *context); - -#endif +char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q); +char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset, + void *state, u8 key); +char nfaExecMcClellan16_queueCompressState(const struct NFA *nfa, + const struct mq *q, s64a loc); +char nfaExecMcClellan16_expandState(const struct NFA *nfa, void *dest, + const void *src, u64a offset, u8 key); + +#define nfaExecMcClellan16_B_Reverse NFA_API_NO_IMPL +#define nfaExecMcClellan16_zombie_status NFA_API_ZOMBIE_NO_IMPL + +/** + * Simple streaming mode calls: + * - always uses the anchored start state regardless if top is set regardless of + * start_off + * - never checks eod + */ +void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state, + const u8 *buf, char top, size_t start_off, + size_t len, NfaCallback cb, void *ctxt); + +void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, + const u8 *buf, char top, size_t start_off, + size_t len, NfaCallback cb, void *ctxt); + +/** + * Simple block mode calls: + * - always uses the anchored start state regardless of initial start + */ + +char nfaExecMcClellan8_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context); + +char nfaExecMcClellan16_B(const struct NFA *n, u64a offset, const u8 *buffer, + size_t length, NfaCallback cb, void *context); + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h b/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h index 74b27e7534..7b0e7f48cd 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h +++ b/contrib/libs/hyperscan/src/nfa/mcclellan_common_impl.h @@ -1,87 +1,87 @@ -/* +/* * Copyright (c) 2015-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -enum MatchMode { - CALLBACK_OUTPUT, - STOP_AT_MATCH, - NO_MATCHES -}; - -static really_inline + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +enum MatchMode { + CALLBACK_OUTPUT, + STOP_AT_MATCH, + NO_MATCHES +}; + +static really_inline const struct mstate_aux *get_aux(const struct mcclellan *m, u32 s) { - const char *nfa = (const char *)m - sizeof(struct NFA); - const struct mstate_aux *aux - = s + (const struct mstate_aux *)(nfa + m->aux_offset); - - assert(ISALIGNED(aux)); - return aux; -} - -static really_inline + const char *nfa = (const char *)m - sizeof(struct NFA); + const struct mstate_aux *aux + = s + (const struct mstate_aux *)(nfa + m->aux_offset); + + assert(ISALIGNED(aux)); + return aux; +} + +static really_inline u32 mcclellanEnableStarts(const struct mcclellan *m, u32 s) { - const struct mstate_aux *aux = get_aux(m, s); - + const struct mstate_aux *aux = get_aux(m, s); + DEBUG_PRINTF("enabling starts %u->%hu\n", s, aux->top); - return aux->top; -} - -static really_inline + return aux->top; +} + +static really_inline u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, - u32 as) { - assert(ISALIGNED_N(sherman_state, 16)); - - u8 len = *(const u8 *)(sherman_state + SHERMAN_LEN_OFFSET); - - if (len) { - m128 ss_char = load128(sherman_state); - m128 cur_char = set16x8(cprime); - - u32 z = movemask128(eq128(ss_char, cur_char)); - - /* remove header cruft: type 1, len 1, daddy 2*/ - z &= ~0xf; - z &= (1U << (len + 4)) - 1; - - if (z) { - u32 i = ctz32(z & ~0xf) - 4; - + u32 as) { + assert(ISALIGNED_N(sherman_state, 16)); + + u8 len = *(const u8 *)(sherman_state + SHERMAN_LEN_OFFSET); + + if (len) { + m128 ss_char = load128(sherman_state); + m128 cur_char = set16x8(cprime); + + u32 z = movemask128(eq128(ss_char, cur_char)); + + /* remove header cruft: type 1, len 1, daddy 2*/ + z &= ~0xf; + z &= (1U << (len + 4)) - 1; + + if (z) { + u32 i = ctz32(z & ~0xf) - 4; + u32 s_out = unaligned_load_u16((const u8 *)sherman_state - + SHERMAN_STATES_OFFSET(len) - + sizeof(u16) * i); + + SHERMAN_STATES_OFFSET(len) + + sizeof(u16) * i); DEBUG_PRINTF("found sherman match at %u/%u for c'=%hhu s=%u\n", i, len, cprime, s_out); - return s_out; - } - } - + return s_out; + } + } + u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); return succ_table[(daddy << as) + cprime]; -} +} static really_inline u16 doWide16(const char *wide_entry, const u8 **c_inout, const u8 *end, diff --git a/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h b/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h index 3c237f5153..482fdb1bc9 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h +++ b/contrib/libs/hyperscan/src/nfa/mcclellan_internal.h @@ -1,55 +1,55 @@ -/* +/* * Copyright (c) 2015-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MCCLELLAN_INTERNAL_H -#define MCCLELLAN_INTERNAL_H - -#include "nfa_internal.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -#define ACCEPT_FLAG 0x8000 -#define ACCEL_FLAG 0x4000 -#define STATE_MASK 0x3fff - -#define SHERMAN_STATE 1 - -#define SHERMAN_TYPE_OFFSET 0 -#define SHERMAN_FIXED_SIZE 32 - -#define SHERMAN_LEN_OFFSET 1 -#define SHERMAN_DADDY_OFFSET 2 -#define SHERMAN_CHARS_OFFSET 4 -#define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len)) - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCCLELLAN_INTERNAL_H +#define MCCLELLAN_INTERNAL_H + +#include "nfa_internal.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +#define ACCEPT_FLAG 0x8000 +#define ACCEL_FLAG 0x4000 +#define STATE_MASK 0x3fff + +#define SHERMAN_STATE 1 + +#define SHERMAN_TYPE_OFFSET 0 +#define SHERMAN_FIXED_SIZE 32 + +#define SHERMAN_LEN_OFFSET 1 +#define SHERMAN_DADDY_OFFSET 2 +#define SHERMAN_CHARS_OFFSET 4 +#define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len)) + #define WIDE_STATE 2 #define WIDE_ENTRY_OFFSET8(weo_pos) (2 + (weo_pos)) #define WIDE_ENTRY_OFFSET16(weo_pos) (4 + (weo_pos)) @@ -60,66 +60,66 @@ extern "C" #define WIDE_SYMBOL_OFFSET16 2 #define WIDE_TRANSITION_OFFSET16(wto_width) (2 + ROUNDUP_N(wto_width, 2)) -struct report_list { - u32 count; - ReportID report[]; -}; - -struct mstate_aux { - u32 accept; - u32 accept_eod; - u16 top; - u32 accel_offset; /* relative to start of struct mcclellan; 0 if no accel */ -}; - -#define MCCLELLAN_FLAG_SINGLE 1 /**< we raise only single accept id */ - -struct mcclellan { - u16 state_count; /**< total number of states */ - u32 length; /**< length of dfa in bytes */ - u16 start_anchored; /**< anchored start state */ - u16 start_floating; /**< floating start state */ - u32 aux_offset; /**< offset of the aux structures relative to the start of - * the nfa structure */ +struct report_list { + u32 count; + ReportID report[]; +}; + +struct mstate_aux { + u32 accept; + u32 accept_eod; + u16 top; + u32 accel_offset; /* relative to start of struct mcclellan; 0 if no accel */ +}; + +#define MCCLELLAN_FLAG_SINGLE 1 /**< we raise only single accept id */ + +struct mcclellan { + u16 state_count; /**< total number of states */ + u32 length; /**< length of dfa in bytes */ + u16 start_anchored; /**< anchored start state */ + u16 start_floating; /**< floating start state */ + u32 aux_offset; /**< offset of the aux structures relative to the start of + * the nfa structure */ u32 sherman_offset; /**< offset of array of sherman state offsets the * state_info structures relative to the start of the * nfa structure */ u32 sherman_end; /**< offset of the end of the state_info structures * relative to the start of the nfa structure */ - u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ - u16 accept_limit_8; /**< 8 bit, lowest accept state */ - u16 sherman_limit; /**< lowest sherman state */ + u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ + u16 accept_limit_8; /**< 8 bit, lowest accept state */ + u16 sherman_limit; /**< lowest sherman state */ u16 wide_limit; /**< 8/16 bit, lowest wide head state */ - u8 alphaShift; - u8 flags; + u8 alphaShift; + u8 flags; u8 has_accel; /**< 1 iff there are any accel plans */ u8 has_wide; /**< 1 iff there exists any wide state */ - u8 remap[256]; /**< remaps characters to a smaller alphabet */ - ReportID arb_report; /**< one of the accepts that this dfa may raise */ + u8 remap[256]; /**< remaps characters to a smaller alphabet */ + ReportID arb_report; /**< one of the accepts that this dfa may raise */ u32 accel_offset; /**< offset of accel structures from start of McClellan */ - u32 haig_offset; /**< reserved for use by Haig, relative to start of NFA */ + u32 haig_offset; /**< reserved for use by Haig, relative to start of NFA */ u32 wide_offset; /**< offset of the wide state entries to the start of the * nfa structure */ -}; - -static really_inline -const char *findShermanState(UNUSED const struct mcclellan *m, +}; + +static really_inline +const char *findShermanState(UNUSED const struct mcclellan *m, const char *sherman_base_offset, u32 sherman_base, u32 s) { - const char *rv - = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); - assert(rv < (const char *)m + m->length - sizeof(struct NFA)); - UNUSED u8 type = *(const u8 *)(rv + SHERMAN_TYPE_OFFSET); - assert(type == SHERMAN_STATE); - return rv; -} - -static really_inline -char *findMutableShermanState(char *sherman_base_offset, u16 sherman_base, + const char *rv + = sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); + assert(rv < (const char *)m + m->length - sizeof(struct NFA)); + UNUSED u8 type = *(const u8 *)(rv + SHERMAN_TYPE_OFFSET); + assert(type == SHERMAN_STATE); + return rv; +} + +static really_inline +char *findMutableShermanState(char *sherman_base_offset, u16 sherman_base, u32 s) { - return sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); -} - + return sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); +} + static really_inline const char *findWideEntry8(UNUSED const struct mcclellan *m, const char *wide_base, u32 wide_limit, u32 s) { @@ -157,8 +157,8 @@ char *findMutableWideEntry16(char *wide_base, u32 wide_limit, u32 s) { return wide_base + entry_offset; } -#ifdef __cplusplus -} -#endif - -#endif +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp b/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp index 4612e53d3f..27ec1716e9 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile.cpp @@ -1,127 +1,127 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "mcclellancompile.h" - -#include "accel.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcclellancompile.h" + +#include "accel.h" #include "accelcompile.h" -#include "grey.h" -#include "mcclellan_internal.h" +#include "grey.h" +#include "mcclellan_internal.h" #include "mcclellancompile_util.h" -#include "nfa_internal.h" -#include "shufticompile.h" -#include "trufflecompile.h" -#include "ue2common.h" -#include "util/alloc.h" -#include "util/bitutils.h" -#include "util/charreach.h" -#include "util/compare.h" -#include "util/compile_context.h" -#include "util/container.h" -#include "util/make_unique.h" -#include "util/order_check.h" +#include "nfa_internal.h" +#include "shufticompile.h" +#include "trufflecompile.h" +#include "ue2common.h" +#include "util/alloc.h" +#include "util/bitutils.h" +#include "util/charreach.h" +#include "util/compare.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/make_unique.h" +#include "util/order_check.h" #include "util/report_manager.h" #include "util/flat_containers.h" -#include "util/unaligned.h" -#include "util/verify_types.h" - -#include <algorithm> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include <map> -#include <memory> +#include "util/unaligned.h" +#include "util/verify_types.h" + +#include <algorithm> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <map> +#include <memory> #include <queue> -#include <set> -#include <vector> - +#include <set> +#include <vector> + #include <boost/range/adaptor/map.hpp> #include "mcclellandump.h" #include "util/dump_util.h" #include "util/dump_charclass.h" -using namespace std; +using namespace std; using boost::adaptors::map_keys; using boost::dynamic_bitset; - + #define ACCEL_DFA_MAX_OFFSET_DEPTH 4 - + /** Maximum tolerated number of escape character from an accel state. * This is larger than nfa, as we don't have a budget and the nfa cheats on stop * characters for sets of states */ #define ACCEL_DFA_MAX_STOP_CHAR 160 - + /** Maximum tolerated number of escape character from a sds accel state. Larger * than normal states as accelerating sds is important. Matches NFA value */ #define ACCEL_DFA_MAX_FLOATING_STOP_CHAR 192 - + namespace ue2 { -namespace /* anon */ { - -struct dstate_extra { +namespace /* anon */ { + +struct dstate_extra { u16 daddytaken = 0; bool shermanState = false; bool wideState = false; bool wideHead = false; -}; - -struct dfa_info { +}; + +struct dfa_info { accel_dfa_build_strat &strat; - raw_dfa &raw; - vector<dstate> &states; - vector<dstate_extra> extra; + raw_dfa &raw; + vector<dstate> &states; + vector<dstate_extra> extra; vector<vector<dstate_id_t>> wide_state_chain; vector<vector<symbol_t>> wide_symbol_chain; - const u16 alpha_size; /* including special symbols */ - const array<u16, ALPHABET_SIZE> &alpha_remap; - const u16 impl_alpha_size; - - u8 getAlphaShift() const; - + const u16 alpha_size; /* including special symbols */ + const array<u16, ALPHABET_SIZE> &alpha_remap; + const u16 impl_alpha_size; + + u8 getAlphaShift() const; + explicit dfa_info(accel_dfa_build_strat &s) - : strat(s), - raw(s.get_raw()), - states(raw.states), - extra(raw.states.size()), - alpha_size(raw.alpha_size), - alpha_remap(raw.alpha_remap), - impl_alpha_size(raw.getImplAlphaSize()) {} - - dstate_id_t implId(dstate_id_t raw_id) const { - return states[raw_id].impl_id; - } - - bool is_sherman(dstate_id_t raw_id) const { - return extra[raw_id].shermanState; - } - + : strat(s), + raw(s.get_raw()), + states(raw.states), + extra(raw.states.size()), + alpha_size(raw.alpha_size), + alpha_remap(raw.alpha_remap), + impl_alpha_size(raw.getImplAlphaSize()) {} + + dstate_id_t implId(dstate_id_t raw_id) const { + return states[raw_id].impl_id; + } + + bool is_sherman(dstate_id_t raw_id) const { + return extra[raw_id].shermanState; + } + bool is_widestate(dstate_id_t raw_id) const { return extra[raw_id].wideState; } @@ -130,18 +130,18 @@ struct dfa_info { return extra[raw_id].wideHead; } - size_t size(void) const { return states.size(); } -}; - -u8 dfa_info::getAlphaShift() const { - if (impl_alpha_size < 2) { - return 1; - } else { - /* log2 round up */ - return 32 - clz32(impl_alpha_size - 1); - } -} - + size_t size(void) const { return states.size(); } +}; + +u8 dfa_info::getAlphaShift() const { + if (impl_alpha_size < 2) { + return 1; + } else { + /* log2 round up */ + return 32 - clz32(impl_alpha_size - 1); + } +} + struct state_prev_info { vector<vector<dstate_id_t>> prev_vec; explicit state_prev_info(size_t alpha_size) : prev_vec(alpha_size) {} @@ -171,80 +171,80 @@ DfaPrevInfo::DfaPrevInfo(raw_dfa &rdfa) } } } -} // namespace - -static -mstate_aux *getAux(NFA *n, dstate_id_t i) { - assert(isMcClellanType(n->type)); - - mcclellan *m = (mcclellan *)getMutableImplNfa(n); - mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); - - mstate_aux *aux = aux_base + i; - assert((const char *)aux < (const char *)n + m->length); - return aux; -} - -static -void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { - assert((size_t)succ_table % 2 == 0); - assert(n->type == MCCLELLAN_NFA_16); - u8 alphaShift = info.getAlphaShift(); - u16 alphaSize = info.impl_alpha_size; - mcclellan *m = (mcclellan *)getMutableImplNfa(n); - - /* handle the normal states */ - for (u32 i = 0; i < m->sherman_limit; i++) { - for (size_t j = 0; j < alphaSize; j++) { - size_t c_prime = (i << alphaShift) + j; - +} // namespace + +static +mstate_aux *getAux(NFA *n, dstate_id_t i) { + assert(isMcClellanType(n->type)); + + mcclellan *m = (mcclellan *)getMutableImplNfa(n); + mstate_aux *aux_base = (mstate_aux *)((char *)n + m->aux_offset); + + mstate_aux *aux = aux_base + i; + assert((const char *)aux < (const char *)n + m->length); + return aux; +} + +static +void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { + assert((size_t)succ_table % 2 == 0); + assert(n->type == MCCLELLAN_NFA_16); + u8 alphaShift = info.getAlphaShift(); + u16 alphaSize = info.impl_alpha_size; + mcclellan *m = (mcclellan *)getMutableImplNfa(n); + + /* handle the normal states */ + for (u32 i = 0; i < m->sherman_limit; i++) { + for (size_t j = 0; j < alphaSize; j++) { + size_t c_prime = (i << alphaShift) + j; + // wide state has no aux structure. if (m->has_wide && succ_table[c_prime] >= m->wide_limit) { continue; } - mstate_aux *aux = getAux(n, succ_table[c_prime]); - - if (aux->accept) { - succ_table[c_prime] |= ACCEPT_FLAG; - } - - if (aux->accel_offset) { - succ_table[c_prime] |= ACCEL_FLAG; - } - } - } - - /* handle the sherman states */ - char *sherman_base_offset = (char *)n + m->sherman_offset; + mstate_aux *aux = getAux(n, succ_table[c_prime]); + + if (aux->accept) { + succ_table[c_prime] |= ACCEPT_FLAG; + } + + if (aux->accel_offset) { + succ_table[c_prime] |= ACCEL_FLAG; + } + } + } + + /* handle the sherman states */ + char *sherman_base_offset = (char *)n + m->sherman_offset; u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count; for (u16 j = m->sherman_limit; j < sherman_ceil; j++) { - char *sherman_cur - = findMutableShermanState(sherman_base_offset, m->sherman_limit, j); - assert(*(sherman_cur + SHERMAN_TYPE_OFFSET) == SHERMAN_STATE); - u8 len = *(u8 *)(sherman_cur + SHERMAN_LEN_OFFSET); - u16 *succs = (u16 *)(sherman_cur + SHERMAN_STATES_OFFSET(len)); - - for (u8 i = 0; i < len; i++) { - u16 succ_i = unaligned_load_u16((u8 *)&succs[i]); + char *sherman_cur + = findMutableShermanState(sherman_base_offset, m->sherman_limit, j); + assert(*(sherman_cur + SHERMAN_TYPE_OFFSET) == SHERMAN_STATE); + u8 len = *(u8 *)(sherman_cur + SHERMAN_LEN_OFFSET); + u16 *succs = (u16 *)(sherman_cur + SHERMAN_STATES_OFFSET(len)); + + for (u8 i = 0; i < len; i++) { + u16 succ_i = unaligned_load_u16((u8 *)&succs[i]); // wide state has no aux structure. if (m->has_wide && succ_i >= m->wide_limit) { continue; } - mstate_aux *aux = getAux(n, succ_i); - - if (aux->accept) { - succ_i |= ACCEPT_FLAG; - } - - if (aux->accel_offset) { - succ_i |= ACCEL_FLAG; - } - - unaligned_store_u16((u8 *)&succs[i], succ_i); - } - } + mstate_aux *aux = getAux(n, succ_i); + + if (aux->accept) { + succ_i |= ACCEPT_FLAG; + } + + if (aux->accel_offset) { + succ_i |= ACCEL_FLAG; + } + + unaligned_store_u16((u8 *)&succs[i], succ_i); + } + } /* handle the wide states */ if (m->has_wide) { @@ -290,53 +290,53 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { } } } -} - +} + u32 mcclellan_build_strat::max_allowed_offset_accel() const { return ACCEL_DFA_MAX_OFFSET_DEPTH; -} - +} + u32 mcclellan_build_strat::max_stop_char() const { return ACCEL_DFA_MAX_STOP_CHAR; -} - +} + u32 mcclellan_build_strat::max_floating_stop_char() const { return ACCEL_DFA_MAX_FLOATING_STOP_CHAR; -} - -static -void populateBasicInfo(size_t state_size, const dfa_info &info, - u32 total_size, u32 aux_offset, u32 accel_offset, - u32 accel_count, ReportID arb, bool single, NFA *nfa) { - assert(state_size == sizeof(u16) || state_size == sizeof(u8)); - - nfa->length = total_size; - nfa->nPositions = info.states.size(); - - nfa->scratchStateSize = verify_u32(state_size); - nfa->streamStateSize = verify_u32(state_size); - - if (state_size == sizeof(u8)) { - nfa->type = MCCLELLAN_NFA_8; - } else { - nfa->type = MCCLELLAN_NFA_16; - } - - mcclellan *m = (mcclellan *)getMutableImplNfa(nfa); - for (u32 i = 0; i < 256; i++) { - m->remap[i] = verify_u8(info.alpha_remap[i]); - } - m->alphaShift = info.getAlphaShift(); - m->length = total_size; - m->aux_offset = aux_offset; - m->accel_offset = accel_offset; - m->arb_report = arb; - m->state_count = verify_u16(info.size()); - m->start_anchored = info.implId(info.raw.start_anchored); - m->start_floating = info.implId(info.raw.start_floating); - m->has_accel = accel_count ? 1 : 0; +} + +static +void populateBasicInfo(size_t state_size, const dfa_info &info, + u32 total_size, u32 aux_offset, u32 accel_offset, + u32 accel_count, ReportID arb, bool single, NFA *nfa) { + assert(state_size == sizeof(u16) || state_size == sizeof(u8)); + + nfa->length = total_size; + nfa->nPositions = info.states.size(); + + nfa->scratchStateSize = verify_u32(state_size); + nfa->streamStateSize = verify_u32(state_size); + + if (state_size == sizeof(u8)) { + nfa->type = MCCLELLAN_NFA_8; + } else { + nfa->type = MCCLELLAN_NFA_16; + } + + mcclellan *m = (mcclellan *)getMutableImplNfa(nfa); + for (u32 i = 0; i < 256; i++) { + m->remap[i] = verify_u8(info.alpha_remap[i]); + } + m->alphaShift = info.getAlphaShift(); + m->length = total_size; + m->aux_offset = aux_offset; + m->accel_offset = accel_offset; + m->arb_report = arb; + m->state_count = verify_u16(info.size()); + m->start_anchored = info.implId(info.raw.start_anchored); + m->start_floating = info.implId(info.raw.start_floating); + m->has_accel = accel_count ? 1 : 0; m->has_wide = info.wide_state_chain.size() > 0 ? 1 : 0; - + if (state_size == sizeof(u8) && m->has_wide == 1) { // allocate 1 more byte for wide state use. nfa->scratchStateSize += sizeof(u8); @@ -349,16 +349,16 @@ void populateBasicInfo(size_t state_size, const dfa_info &info, nfa->streamStateSize += sizeof(u16); } - if (single) { - m->flags |= MCCLELLAN_FLAG_SINGLE; - } -} - -namespace { - -struct raw_report_list { - flat_set<ReportID> reports; - + if (single) { + m->flags |= MCCLELLAN_FLAG_SINGLE; + } +} + +namespace { + +struct raw_report_list { + flat_set<ReportID> reports; + raw_report_list(const flat_set<ReportID> &reports_in, const ReportManager &rm, bool do_remap) { if (do_remap) { @@ -369,137 +369,137 @@ struct raw_report_list { reports = reports_in; } } - - bool operator<(const raw_report_list &b) const { - return reports < b.reports; - } -}; - -struct raw_report_info_impl : public raw_report_info { - vector<raw_report_list> rl; - u32 getReportListSize() const override; - size_t size() const override; - void fillReportLists(NFA *n, size_t base_offset, - std::vector<u32> &ro /* out */) const override; -}; -} - -unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports( - vector<u32> &reports, - vector<u32> &reports_eod, - u8 *isSingleReport, - ReportID *arbReport) const { - DEBUG_PRINTF("gathering reports\n"); - + + bool operator<(const raw_report_list &b) const { + return reports < b.reports; + } +}; + +struct raw_report_info_impl : public raw_report_info { + vector<raw_report_list> rl; + u32 getReportListSize() const override; + size_t size() const override; + void fillReportLists(NFA *n, size_t base_offset, + std::vector<u32> &ro /* out */) const override; +}; +} + +unique_ptr<raw_report_info> mcclellan_build_strat::gatherReports( + vector<u32> &reports, + vector<u32> &reports_eod, + u8 *isSingleReport, + ReportID *arbReport) const { + DEBUG_PRINTF("gathering reports\n"); + const bool remap_reports = has_managed_reports(rdfa.kind); - auto ri = ue2::make_unique<raw_report_info_impl>(); - map<raw_report_list, u32> rev; - - for (const dstate &s : rdfa.states) { - if (s.reports.empty()) { - reports.push_back(MO_INVALID_IDX); - continue; - } - + auto ri = ue2::make_unique<raw_report_info_impl>(); + map<raw_report_list, u32> rev; + + for (const dstate &s : rdfa.states) { + if (s.reports.empty()) { + reports.push_back(MO_INVALID_IDX); + continue; + } + raw_report_list rrl(s.reports, rm, remap_reports); - DEBUG_PRINTF("non empty r\n"); + DEBUG_PRINTF("non empty r\n"); auto it = rev.find(rrl); if (it != rev.end()) { reports.push_back(it->second); - } else { - DEBUG_PRINTF("adding to rl %zu\n", ri->size()); + } else { + DEBUG_PRINTF("adding to rl %zu\n", ri->size()); rev.emplace(rrl, ri->size()); - reports.push_back(ri->size()); - ri->rl.push_back(rrl); - } - } - - for (const dstate &s : rdfa.states) { - if (s.reports_eod.empty()) { - reports_eod.push_back(MO_INVALID_IDX); - continue; - } - - DEBUG_PRINTF("non empty r eod\n"); + reports.push_back(ri->size()); + ri->rl.push_back(rrl); + } + } + + for (const dstate &s : rdfa.states) { + if (s.reports_eod.empty()) { + reports_eod.push_back(MO_INVALID_IDX); + continue; + } + + DEBUG_PRINTF("non empty r eod\n"); raw_report_list rrl(s.reports_eod, rm, remap_reports); auto it = rev.find(rrl); if (it != rev.end()) { reports_eod.push_back(it->second); - continue; - } - - DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size()); + continue; + } + + DEBUG_PRINTF("adding to rl eod %zu\n", s.reports_eod.size()); rev.emplace(rrl, ri->size()); - reports_eod.push_back(ri->size()); - ri->rl.push_back(rrl); - } - - assert(!ri->rl.empty()); /* all components should be able to generate - reports */ - if (!ri->rl.empty()) { - *arbReport = *ri->rl.begin()->reports.begin(); - } else { - *arbReport = 0; - } - - /* if we have only a single report id generated from all accepts (not eod) - * we can take some short cuts */ + reports_eod.push_back(ri->size()); + ri->rl.push_back(rrl); + } + + assert(!ri->rl.empty()); /* all components should be able to generate + reports */ + if (!ri->rl.empty()) { + *arbReport = *ri->rl.begin()->reports.begin(); + } else { + *arbReport = 0; + } + + /* if we have only a single report id generated from all accepts (not eod) + * we can take some short cuts */ flat_set<ReportID> reps; - - for (u32 rl_index : reports) { - if (rl_index == MO_INVALID_IDX) { - continue; - } - assert(rl_index < ri->size()); - insert(&reps, ri->rl[rl_index].reports); - } - - if (reps.size() == 1) { - *isSingleReport = 1; - *arbReport = *reps.begin(); - DEBUG_PRINTF("single -- %u\n", *arbReport); - } else { - *isSingleReport = 0; - } - - return move(ri); -} - -u32 raw_report_info_impl::getReportListSize() const { - u32 rv = 0; - - for (const auto &reps : rl) { - rv += sizeof(report_list); - rv += sizeof(ReportID) * reps.reports.size(); - } - - return rv; -} - -size_t raw_report_info_impl::size() const { - return rl.size(); -} - -void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, - vector<u32> &ro) const { - for (const auto &reps : rl) { - ro.push_back(base_offset); - - report_list *p = (report_list *)((char *)n + base_offset); - - u32 i = 0; - for (const ReportID report : reps.reports) { - p->report[i++] = report; - } - p->count = verify_u32(reps.reports.size()); - - base_offset += sizeof(report_list); - base_offset += sizeof(ReportID) * reps.reports.size(); - } -} - -static + + for (u32 rl_index : reports) { + if (rl_index == MO_INVALID_IDX) { + continue; + } + assert(rl_index < ri->size()); + insert(&reps, ri->rl[rl_index].reports); + } + + if (reps.size() == 1) { + *isSingleReport = 1; + *arbReport = *reps.begin(); + DEBUG_PRINTF("single -- %u\n", *arbReport); + } else { + *isSingleReport = 0; + } + + return move(ri); +} + +u32 raw_report_info_impl::getReportListSize() const { + u32 rv = 0; + + for (const auto &reps : rl) { + rv += sizeof(report_list); + rv += sizeof(ReportID) * reps.reports.size(); + } + + return rv; +} + +size_t raw_report_info_impl::size() const { + return rl.size(); +} + +void raw_report_info_impl::fillReportLists(NFA *n, size_t base_offset, + vector<u32> &ro) const { + for (const auto &reps : rl) { + ro.push_back(base_offset); + + report_list *p = (report_list *)((char *)n + base_offset); + + u32 i = 0; + for (const ReportID report : reps.reports) { + p->report[i++] = report; + } + p->count = verify_u32(reps.reports.size()); + + base_offset += sizeof(report_list); + base_offset += sizeof(ReportID) * reps.reports.size(); + } +} + +static void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info, set<dstate_id_t> *accel_states) { for (dstate_id_t i : accel_escape_info | map_keys) { @@ -508,19 +508,19 @@ void fillAccelOut(const map<dstate_id_t, AccelScheme> &accel_escape_info, } static -size_t calcShermanRegionSize(const dfa_info &info) { - size_t rv = 0; - - for (size_t i = 0; i < info.size(); i++) { - if (info.is_sherman(i)) { - rv += SHERMAN_FIXED_SIZE; - } - } - - return ROUNDUP_16(rv); -} - -static +size_t calcShermanRegionSize(const dfa_info &info) { + size_t rv = 0; + + for (size_t i = 0; i < info.size(); i++) { + if (info.is_sherman(i)) { + rv += SHERMAN_FIXED_SIZE; + } + } + + return ROUNDUP_16(rv); +} + +static size_t calcWideRegionSize(const dfa_info &info) { if (info.wide_state_chain.empty()) { return 0; @@ -539,58 +539,58 @@ size_t calcWideRegionSize(const dfa_info &info) { } static -void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, - const vector<u32> &reports, const vector<u32> &reports_eod, - vector<u32> &reportOffsets) { - const dstate &raw_state = info.states[i]; - aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]]; - aux->accept_eod = raw_state.reports_eod.empty() ? 0 - : reportOffsets[reports_eod[i]]; - aux->top = info.implId(i ? raw_state.next[info.alpha_remap[TOP]] - : info.raw.start_floating); -} - +void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, + const vector<u32> &reports, const vector<u32> &reports_eod, + vector<u32> &reportOffsets) { + const dstate &raw_state = info.states[i]; + aux->accept = raw_state.reports.empty() ? 0 : reportOffsets[reports[i]]; + aux->accept_eod = raw_state.reports_eod.empty() ? 0 + : reportOffsets[reports_eod[i]]; + aux->top = info.implId(i ? raw_state.next[info.alpha_remap[TOP]] + : info.raw.start_floating); +} + /* returns false on error */ -static +static bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base, dstate_id_t *wide_limit) { - info.states[0].impl_id = 0; /* dead is always 0 */ - - vector<dstate_id_t> norm; - vector<dstate_id_t> sherm; + info.states[0].impl_id = 0; /* dead is always 0 */ + + vector<dstate_id_t> norm; + vector<dstate_id_t> sherm; vector<dstate_id_t> wideHead; vector<dstate_id_t> wideState; - - if (info.size() > (1 << 16)) { - DEBUG_PRINTF("too many states\n"); + + if (info.size() > (1 << 16)) { + DEBUG_PRINTF("too many states\n"); *wide_limit = 0; return false; - } - - for (u32 i = 1; i < info.size(); i++) { + } + + for (u32 i = 1; i < info.size(); i++) { if (info.is_widehead(i)) { wideHead.push_back(i); } else if (info.is_widestate(i)) { wideState.push_back(i); } else if (info.is_sherman(i)) { - sherm.push_back(i); - } else { - norm.push_back(i); - } - } - + sherm.push_back(i); + } else { + norm.push_back(i); + } + } + dstate_id_t next = 1; - for (const dstate_id_t &s : norm) { + for (const dstate_id_t &s : norm) { DEBUG_PRINTF("[norm] mapping state %u to %u\n", s, next); info.states[s].impl_id = next++; - } - + } + *sherman_base = next; - for (const dstate_id_t &s : sherm) { + for (const dstate_id_t &s : sherm) { DEBUG_PRINTF("[sherm] mapping state %u to %u\n", s, next); info.states[s].impl_id = next++; - } - + } + *wide_limit = next; for (const dstate_id_t &s : wideHead) { DEBUG_PRINTF("[widehead] mapping state %u to %u\n", s, next); @@ -602,58 +602,58 @@ bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base, info.states[s].impl_id = next++; } - /* Check to see if we haven't over allocated our states */ + /* Check to see if we haven't over allocated our states */ DEBUG_PRINTF("next sherman %u masked %u\n", next, (dstate_id_t)(next & STATE_MASK)); return (next - 1) == ((next - 1) & STATE_MASK); -} - -static +} + +static bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, set<dstate_id_t> *accel_states) { - DEBUG_PRINTF("building mcclellan 16\n"); - - vector<u32> reports; /* index in ri for the appropriate report list */ - vector<u32> reports_eod; /* as above */ - ReportID arb; - u8 single; - - u8 alphaShift = info.getAlphaShift(); - assert(alphaShift <= 8); - - u16 count_real_states; + DEBUG_PRINTF("building mcclellan 16\n"); + + vector<u32> reports; /* index in ri for the appropriate report list */ + vector<u32> reports_eod; /* as above */ + ReportID arb; + u8 single; + + u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); + + u16 count_real_states; u16 wide_limit; if (!allocateFSN16(info, &count_real_states, &wide_limit)) { - DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", - info.size()); - return nullptr; - } - + DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", + info.size()); + return nullptr; + } + DEBUG_PRINTF("count_real_states: %d\n", count_real_states); DEBUG_PRINTF("non_wide_states: %d\n", wide_limit); auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); map<dstate_id_t, AccelScheme> accel_escape_info = info.strat.getAccelInfo(cc.grey); - - size_t tran_size = (1 << info.getAlphaShift()) - * sizeof(u16) * count_real_states; - + + size_t tran_size = (1 << info.getAlphaShift()) + * sizeof(u16) * count_real_states; + size_t aux_size = sizeof(mstate_aux) * wide_limit; - - size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); + + size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); - size_t accel_offset = ROUNDUP_N(aux_offset + aux_size - + ri->getReportListSize(), 32); - size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); - size_t sherman_size = calcShermanRegionSize(info); + size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + + ri->getReportListSize(), 32); + size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); + size_t sherman_size = calcShermanRegionSize(info); size_t wide_offset = ROUNDUP_16(sherman_offset + sherman_size); size_t wide_size = calcWideRegionSize(info); size_t total_size = wide_offset + wide_size; - - accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ - assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - + + accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + DEBUG_PRINTF("aux_offset %zu\n", aux_offset); DEBUG_PRINTF("aux_size %zu\n", aux_size); DEBUG_PRINTF("rl size %u\n", ri->getReportListSize()); @@ -666,111 +666,111 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, DEBUG_PRINTF("total_size %zu\n", total_size); auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); - char *nfa_base = (char *)nfa.get(); - - populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, + char *nfa_base = (char *)nfa.get(); + + populateBasicInfo(sizeof(u16), info, total_size, aux_offset, accel_offset, accel_escape_info.size(), arb, single, nfa.get()); - - vector<u32> reportOffsets; - - ri->fillReportLists(nfa.get(), aux_offset + aux_size, reportOffsets); - - u16 *succ_table = (u16 *)(nfa_base + sizeof(NFA) + sizeof(mcclellan)); - mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset); - mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); - + + vector<u32> reportOffsets; + + ri->fillReportLists(nfa.get(), aux_offset + aux_size, reportOffsets); + + u16 *succ_table = (u16 *)(nfa_base + sizeof(NFA) + sizeof(mcclellan)); + mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset); + mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); + m->wide_limit = wide_limit; m->wide_offset = wide_offset; - /* copy in the mc header information */ - m->sherman_offset = sherman_offset; - m->sherman_end = total_size; - m->sherman_limit = count_real_states; - - /* do normal states */ - for (size_t i = 0; i < info.size(); i++) { + /* copy in the mc header information */ + m->sherman_offset = sherman_offset; + m->sherman_end = total_size; + m->sherman_limit = count_real_states; + + /* do normal states */ + for (size_t i = 0; i < info.size(); i++) { if (info.is_sherman(i) || info.is_widestate(i)) { - continue; - } - - u16 fs = info.implId(i); - mstate_aux *this_aux = getAux(nfa.get(), fs); - - assert(fs < count_real_states); - - for (size_t j = 0; j < info.impl_alpha_size; j++) { - succ_table[(fs << alphaShift) + j] = - info.implId(info.states[i].next[j]); - } - - fillInAux(&aux[fs], i, info, reports, reports_eod, reportOffsets); - + continue; + } + + u16 fs = info.implId(i); + mstate_aux *this_aux = getAux(nfa.get(), fs); + + assert(fs < count_real_states); + + for (size_t j = 0; j < info.impl_alpha_size; j++) { + succ_table[(fs << alphaShift) + j] = + info.implId(info.states[i].next[j]); + } + + fillInAux(&aux[fs], i, info, reports, reports_eod, reportOffsets); + if (contains(accel_escape_info, i)) { - this_aux->accel_offset = accel_offset; - accel_offset += info.strat.accelSize(); - assert(accel_offset + sizeof(NFA) <= sherman_offset); - assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + this_aux->accel_offset = accel_offset; + accel_offset += info.strat.accelSize(); + assert(accel_offset + sizeof(NFA) <= sherman_offset); + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); info.strat.buildAccel(i, accel_escape_info.at(i), - (void *)((char *)m + this_aux->accel_offset)); - } - } - - /* do sherman states */ - char *sherman_table = nfa_base + m->sherman_offset; - assert(ISALIGNED_16(sherman_table)); - for (size_t i = 0; i < info.size(); i++) { - if (!info.is_sherman(i)) { - continue; - } - - u16 fs = verify_u16(info.implId(i)); - mstate_aux *this_aux = getAux(nfa.get(), fs); - - assert(fs >= count_real_states); + (void *)((char *)m + this_aux->accel_offset)); + } + } + + /* do sherman states */ + char *sherman_table = nfa_base + m->sherman_offset; + assert(ISALIGNED_16(sherman_table)); + for (size_t i = 0; i < info.size(); i++) { + if (!info.is_sherman(i)) { + continue; + } + + u16 fs = verify_u16(info.implId(i)); + mstate_aux *this_aux = getAux(nfa.get(), fs); + + assert(fs >= count_real_states); assert(fs < wide_limit); - - char *curr_sherman_entry - = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; - assert(curr_sherman_entry <= nfa_base + m->length); - - fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets); - + + char *curr_sherman_entry + = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; + assert(curr_sherman_entry <= nfa_base + m->length); + + fillInAux(this_aux, i, info, reports, reports_eod, reportOffsets); + if (contains(accel_escape_info, i)) { - this_aux->accel_offset = accel_offset; - accel_offset += info.strat.accelSize(); - assert(accel_offset + sizeof(NFA) <= sherman_offset); - assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + this_aux->accel_offset = accel_offset; + accel_offset += info.strat.accelSize(); + assert(accel_offset + sizeof(NFA) <= sherman_offset); + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); info.strat.buildAccel(i, accel_escape_info.at(i), - (void *)((char *)m + this_aux->accel_offset)); - } - - u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken); - assert(len <= 9); - dstate_id_t d = info.states[i].daddy; - - *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE; - *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len; - *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d); - u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET); - - for (u16 s = 0; s < info.impl_alpha_size; s++) { - if (info.states[i].next[s] != info.states[d].next[s]) { - *(chars++) = (u8)s; - } - } - - u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); - for (u16 s = 0; s < info.impl_alpha_size; s++) { - if (info.states[i].next[s] != info.states[d].next[s]) { - DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", - fs, info.implId(d), - info.implId(info.states[i].next[s])); - unaligned_store_u16((u8 *)states++, - info.implId(info.states[i].next[s])); - } - } - } - + (void *)((char *)m + this_aux->accel_offset)); + } + + u8 len = verify_u8(info.impl_alpha_size - info.extra[i].daddytaken); + assert(len <= 9); + dstate_id_t d = info.states[i].daddy; + + *(u8 *)(curr_sherman_entry + SHERMAN_TYPE_OFFSET) = SHERMAN_STATE; + *(u8 *)(curr_sherman_entry + SHERMAN_LEN_OFFSET) = len; + *(u16 *)(curr_sherman_entry + SHERMAN_DADDY_OFFSET) = info.implId(d); + u8 *chars = (u8 *)(curr_sherman_entry + SHERMAN_CHARS_OFFSET); + + for (u16 s = 0; s < info.impl_alpha_size; s++) { + if (info.states[i].next[s] != info.states[d].next[s]) { + *(chars++) = (u8)s; + } + } + + u16 *states = (u16 *)(curr_sherman_entry + SHERMAN_STATES_OFFSET(len)); + for (u16 s = 0; s < info.impl_alpha_size; s++) { + if (info.states[i].next[s] != info.states[d].next[s]) { + DEBUG_PRINTF("s overrider %hu dad %hu char next %hu\n", + fs, info.implId(d), + info.implId(info.states[i].next[s])); + unaligned_store_u16((u8 *)states++, + info.implId(info.states[i].next[s])); + } + } + } + if (!info.wide_state_chain.empty()) { /* do wide states using info */ u16 wide_number = verify_u16(info.wide_symbol_chain.size()); @@ -836,185 +836,185 @@ bytecode_ptr<NFA> mcclellanCompile16(dfa_info &info, const CompileContext &cc, } } - markEdges(nfa.get(), succ_table, info); - + markEdges(nfa.get(), succ_table, info); + if (accel_states && nfa) { fillAccelOut(accel_escape_info, accel_states); } - return nfa; -} - -static -void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table, - const vector<u32> &reportOffsets, - const vector<u32> &reports, - const vector<u32> &reports_eod, u32 i) { - dstate_id_t j = info.implId(i); - u8 alphaShift = info.getAlphaShift(); - assert(alphaShift <= 8); - - for (size_t s = 0; s < info.impl_alpha_size; s++) { - dstate_id_t raw_succ = info.states[i].next[s]; - succ_table[(j << alphaShift) + s] = info.implId(raw_succ); - } - - aux[j].accept = 0; - aux[j].accept_eod = 0; - - if (!info.states[i].reports.empty()) { - DEBUG_PRINTF("i=%u r[i]=%u\n", i, reports[i]); - assert(reports[i] != MO_INVALID_IDX); - aux[j].accept = reportOffsets[reports[i]]; - } - - if (!info.states[i].reports_eod.empty()) { - DEBUG_PRINTF("i=%u re[i]=%u\n", i, reports_eod[i]); - aux[j].accept_eod = reportOffsets[reports_eod[i]]; - } - - dstate_id_t raw_top = i ? info.states[i].next[info.alpha_remap[TOP]] - : info.raw.start_floating; - - aux[j].top = info.implId(raw_top); -} - -static + return nfa; +} + +static +void fillInBasicState8(const dfa_info &info, mstate_aux *aux, u8 *succ_table, + const vector<u32> &reportOffsets, + const vector<u32> &reports, + const vector<u32> &reports_eod, u32 i) { + dstate_id_t j = info.implId(i); + u8 alphaShift = info.getAlphaShift(); + assert(alphaShift <= 8); + + for (size_t s = 0; s < info.impl_alpha_size; s++) { + dstate_id_t raw_succ = info.states[i].next[s]; + succ_table[(j << alphaShift) + s] = info.implId(raw_succ); + } + + aux[j].accept = 0; + aux[j].accept_eod = 0; + + if (!info.states[i].reports.empty()) { + DEBUG_PRINTF("i=%u r[i]=%u\n", i, reports[i]); + assert(reports[i] != MO_INVALID_IDX); + aux[j].accept = reportOffsets[reports[i]]; + } + + if (!info.states[i].reports_eod.empty()) { + DEBUG_PRINTF("i=%u re[i]=%u\n", i, reports_eod[i]); + aux[j].accept_eod = reportOffsets[reports_eod[i]]; + } + + dstate_id_t raw_top = i ? info.states[i].next[info.alpha_remap[TOP]] + : info.raw.start_floating; + + aux[j].top = info.implId(raw_top); +} + +static void allocateFSN8(dfa_info &info, const map<dstate_id_t, AccelScheme> &accel_escape_info, u16 *accel_limit, u16 *accept_limit) { - info.states[0].impl_id = 0; /* dead is always 0 */ - - vector<dstate_id_t> norm; - vector<dstate_id_t> accel; - vector<dstate_id_t> accept; - - assert(info.size() <= (1 << 8)); - - for (u32 i = 1; i < info.size(); i++) { - if (!info.states[i].reports.empty()) { - accept.push_back(i); + info.states[0].impl_id = 0; /* dead is always 0 */ + + vector<dstate_id_t> norm; + vector<dstate_id_t> accel; + vector<dstate_id_t> accept; + + assert(info.size() <= (1 << 8)); + + for (u32 i = 1; i < info.size(); i++) { + if (!info.states[i].reports.empty()) { + accept.push_back(i); } else if (contains(accel_escape_info, i)) { - accel.push_back(i); - } else { - norm.push_back(i); - } - } - - u32 j = 1; /* dead is already at 0 */ - for (const dstate_id_t &s : norm) { - assert(j <= 256); - DEBUG_PRINTF("mapping state %u to %u\n", s, j); - info.states[s].impl_id = j++; - } - *accel_limit = j; - for (const dstate_id_t &s : accel) { - assert(j <= 256); - DEBUG_PRINTF("mapping state %u to %u\n", s, j); - info.states[s].impl_id = j++; - } - *accept_limit = j; - for (const dstate_id_t &s : accept) { - assert(j <= 256); - DEBUG_PRINTF("mapping state %u to %u\n", s, j); - info.states[s].impl_id = j++; - } -} - -static + accel.push_back(i); + } else { + norm.push_back(i); + } + } + + u32 j = 1; /* dead is already at 0 */ + for (const dstate_id_t &s : norm) { + assert(j <= 256); + DEBUG_PRINTF("mapping state %u to %u\n", s, j); + info.states[s].impl_id = j++; + } + *accel_limit = j; + for (const dstate_id_t &s : accel) { + assert(j <= 256); + DEBUG_PRINTF("mapping state %u to %u\n", s, j); + info.states[s].impl_id = j++; + } + *accept_limit = j; + for (const dstate_id_t &s : accept) { + assert(j <= 256); + DEBUG_PRINTF("mapping state %u to %u\n", s, j); + info.states[s].impl_id = j++; + } +} + +static bytecode_ptr<NFA> mcclellanCompile8(dfa_info &info, const CompileContext &cc, set<dstate_id_t> *accel_states) { - DEBUG_PRINTF("building mcclellan 8\n"); - - vector<u32> reports; - vector<u32> reports_eod; - ReportID arb; - u8 single; - + DEBUG_PRINTF("building mcclellan 8\n"); + + vector<u32> reports; + vector<u32> reports_eod; + ReportID arb; + u8 single; + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); map<dstate_id_t, AccelScheme> accel_escape_info = info.strat.getAccelInfo(cc.grey); - - size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size(); - size_t aux_size = sizeof(mstate_aux) * info.size(); - size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); + + size_t tran_size = sizeof(u8) * (1 << info.getAlphaShift()) * info.size(); + size_t aux_size = sizeof(mstate_aux) * info.size(); + size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); - size_t accel_offset = ROUNDUP_N(aux_offset + aux_size - + ri->getReportListSize(), 32); - size_t total_size = accel_offset + accel_size; - - DEBUG_PRINTF("aux_size %zu\n", aux_size); - DEBUG_PRINTF("aux_offset %zu\n", aux_offset); - DEBUG_PRINTF("rl size %u\n", ri->getReportListSize()); - DEBUG_PRINTF("accel_size %zu\n", accel_size); - DEBUG_PRINTF("accel_offset %zu\n", accel_offset); - DEBUG_PRINTF("total_size %zu\n", total_size); - - accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ - assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); - + size_t accel_offset = ROUNDUP_N(aux_offset + aux_size + + ri->getReportListSize(), 32); + size_t total_size = accel_offset + accel_size; + + DEBUG_PRINTF("aux_size %zu\n", aux_size); + DEBUG_PRINTF("aux_offset %zu\n", aux_offset); + DEBUG_PRINTF("rl size %u\n", ri->getReportListSize()); + DEBUG_PRINTF("accel_size %zu\n", accel_size); + DEBUG_PRINTF("accel_offset %zu\n", accel_offset); + DEBUG_PRINTF("total_size %zu\n", total_size); + + accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ + assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + auto nfa = make_zeroed_bytecode_ptr<NFA>(total_size); - char *nfa_base = (char *)nfa.get(); - - mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); - + char *nfa_base = (char *)nfa.get(); + + mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); + allocateFSN8(info, accel_escape_info, &m->accel_limit_8, &m->accept_limit_8); - populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset, + populateBasicInfo(sizeof(u8), info, total_size, aux_offset, accel_offset, accel_escape_info.size(), arb, single, nfa.get()); - - vector<u32> reportOffsets; - - ri->fillReportLists(nfa.get(), aux_offset + aux_size, reportOffsets); - - /* copy in the state information */ - u8 *succ_table = (u8 *)(nfa_base + sizeof(NFA) + sizeof(mcclellan)); - mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset); - - for (size_t i = 0; i < info.size(); i++) { + + vector<u32> reportOffsets; + + ri->fillReportLists(nfa.get(), aux_offset + aux_size, reportOffsets); + + /* copy in the state information */ + u8 *succ_table = (u8 *)(nfa_base + sizeof(NFA) + sizeof(mcclellan)); + mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset); + + for (size_t i = 0; i < info.size(); i++) { if (contains(accel_escape_info, i)) { - u32 j = info.implId(i); - - aux[j].accel_offset = accel_offset; - accel_offset += info.strat.accelSize(); - + u32 j = info.implId(i); + + aux[j].accel_offset = accel_offset; + accel_offset += info.strat.accelSize(); + info.strat.buildAccel(i, accel_escape_info.at(i), (void *)((char *)m + aux[j].accel_offset)); - } - - fillInBasicState8(info, aux, succ_table, reportOffsets, reports, - reports_eod, i); - } - - assert(accel_offset + sizeof(NFA) <= total_size); - - DEBUG_PRINTF("rl size %zu\n", ri->size()); - + } + + fillInBasicState8(info, aux, succ_table, reportOffsets, reports, + reports_eod, i); + } + + assert(accel_offset + sizeof(NFA) <= total_size); + + DEBUG_PRINTF("rl size %zu\n", ri->size()); + if (accel_states && nfa) { fillAccelOut(accel_escape_info, accel_states); } - return nfa; -} - + return nfa; +} + #define MAX_SHERMAN_LIST_LEN 9 - -static + +static void addIfEarlier(flat_set<dstate_id_t> &dest, dstate_id_t candidate, - dstate_id_t max) { - if (candidate < max) { - dest.insert(candidate); - } -} - -static + dstate_id_t max) { + if (candidate < max) { + dest.insert(candidate); + } +} + +static void addSuccessors(flat_set<dstate_id_t> &dest, const dstate &source, - u16 alphasize, dstate_id_t curr_id) { - for (symbol_t s = 0; s < alphasize; s++) { - addIfEarlier(dest, source.next[s], curr_id); - } -} - + u16 alphasize, dstate_id_t curr_id) { + for (symbol_t s = 0; s < alphasize; s++) { + addIfEarlier(dest, source.next[s], curr_id); + } +} + /* \brief Returns a set of states to search for a better daddy. */ static flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info, @@ -1037,46 +1037,46 @@ flat_set<dstate_id_t> find_daddy_candidates(const dfa_info &info, return hinted; } -#define MAX_SHERMAN_SELF_LOOP 20 - -static +#define MAX_SHERMAN_SELF_LOOP 20 + +static void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, bool any_cyclic_near_anchored_state, bool trust_daddy_states, const Grey &grey) { - if (!grey.allowShermanStates) { - return; - } - - const u16 width = using8bit ? sizeof(u8) : sizeof(u16); - const u16 alphasize = info.impl_alpha_size; - - if (info.raw.start_anchored != DEAD_STATE - && any_cyclic_near_anchored_state - && curr_id < alphasize * 3) { - /* crude attempt to prevent frequent states from being sherman'ed - * depends on the fact that states are numbers are currently in bfs - * order */ - DEBUG_PRINTF("%hu is banned\n", curr_id); - return; - } - - if (info.raw.start_floating != DEAD_STATE - && curr_id >= info.raw.start_floating - && curr_id < info.raw.start_floating + alphasize * 3) { - /* crude attempt to prevent frequent states from being sherman'ed - * depends on the fact that states are numbers are currently in bfs - * order */ - DEBUG_PRINTF("%hu is banned (%hu)\n", curr_id, info.raw.start_floating); - return; - } - - const u16 full_state_size = width * alphasize; - const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN, - (full_state_size - 2)/(width + 1)); - u16 best_score = 0; - dstate_id_t best_daddy = 0; - dstate &currState = info.states[curr_id]; - + if (!grey.allowShermanStates) { + return; + } + + const u16 width = using8bit ? sizeof(u8) : sizeof(u16); + const u16 alphasize = info.impl_alpha_size; + + if (info.raw.start_anchored != DEAD_STATE + && any_cyclic_near_anchored_state + && curr_id < alphasize * 3) { + /* crude attempt to prevent frequent states from being sherman'ed + * depends on the fact that states are numbers are currently in bfs + * order */ + DEBUG_PRINTF("%hu is banned\n", curr_id); + return; + } + + if (info.raw.start_floating != DEAD_STATE + && curr_id >= info.raw.start_floating + && curr_id < info.raw.start_floating + alphasize * 3) { + /* crude attempt to prevent frequent states from being sherman'ed + * depends on the fact that states are numbers are currently in bfs + * order */ + DEBUG_PRINTF("%hu is banned (%hu)\n", curr_id, info.raw.start_floating); + return; + } + + const u16 full_state_size = width * alphasize; + const u16 max_list_len = MIN(MAX_SHERMAN_LIST_LEN, + (full_state_size - 2)/(width + 1)); + u16 best_score = 0; + dstate_id_t best_daddy = 0; + dstate &currState = info.states[curr_id]; + flat_set<dstate_id_t> hinted; if (trust_daddy_states) { // Use the daddy already set for this state so long as it isn't already @@ -1093,88 +1093,88 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, } assert(!info.is_sherman(granddaddy)); hinted.insert(granddaddy); - } + } } else { hinted = find_daddy_candidates(info, curr_id); - } - - for (const dstate_id_t &donor : hinted) { - assert(donor < curr_id); - u32 score = 0; - + } + + for (const dstate_id_t &donor : hinted) { + assert(donor < curr_id); + u32 score = 0; + if (info.is_sherman(donor) || info.is_widestate(donor)) { - continue; - } - - const dstate &donorState = info.states[donor]; - for (symbol_t s = 0; s < alphasize; s++) { - if (currState.next[s] == donorState.next[s]) { - score++; - } - } - - /* prefer lower ids to provide some stability amongst potential - * siblings */ - if (score > best_score || (score == best_score && donor < best_daddy)) { - best_daddy = donor; - best_score = score; - - if (score == alphasize) { - break; - } - } - } - - currState.daddy = best_daddy; - info.extra[curr_id].daddytaken = best_score; - DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n", curr_id, best_daddy, - best_score, alphasize); - - if (best_score + max_list_len < alphasize) { - return; /* ??? */ - } - - if (info.is_sherman(currState.daddy)) { - return; - } - - u32 self_loop_width = 0; + continue; + } + + const dstate &donorState = info.states[donor]; + for (symbol_t s = 0; s < alphasize; s++) { + if (currState.next[s] == donorState.next[s]) { + score++; + } + } + + /* prefer lower ids to provide some stability amongst potential + * siblings */ + if (score > best_score || (score == best_score && donor < best_daddy)) { + best_daddy = donor; + best_score = score; + + if (score == alphasize) { + break; + } + } + } + + currState.daddy = best_daddy; + info.extra[curr_id].daddytaken = best_score; + DEBUG_PRINTF("%hu -> daddy %hu: %u/%u BF\n", curr_id, best_daddy, + best_score, alphasize); + + if (best_score + max_list_len < alphasize) { + return; /* ??? */ + } + + if (info.is_sherman(currState.daddy)) { + return; + } + + u32 self_loop_width = 0; const dstate &curr_raw = info.states[curr_id]; - for (unsigned i = 0; i < N_CHARS; i++) { - if (curr_raw.next[info.alpha_remap[i]] == curr_id) { - self_loop_width++; - } - } - - if (self_loop_width > MAX_SHERMAN_SELF_LOOP) { - DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id, + for (unsigned i = 0; i < N_CHARS; i++) { + if (curr_raw.next[info.alpha_remap[i]] == curr_id) { + self_loop_width++; + } + } + + if (self_loop_width > MAX_SHERMAN_SELF_LOOP) { + DEBUG_PRINTF("%hu is banned wide self loop (%u)\n", curr_id, self_loop_width); - return; - } - - DEBUG_PRINTF("%hu is sherman\n", curr_id); - info.extra[curr_id].shermanState = true; -} - -static -bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { - symbol_t alphasize = raw.getImplAlphaSize(); - for (symbol_t s = 0; s < alphasize; s++) { - dstate_id_t succ_id = raw.states[root].next[s]; - if (succ_id == DEAD_STATE) { - continue; - } - - const dstate &succ = raw.states[succ_id]; - for (symbol_t t = 0; t < alphasize; t++) { - if (succ.next[t] == root || succ.next[t] == succ_id) { - return true; - } - } - } - return false; -} - + return; + } + + DEBUG_PRINTF("%hu is sherman\n", curr_id); + info.extra[curr_id].shermanState = true; +} + +static +bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { + symbol_t alphasize = raw.getImplAlphaSize(); + for (symbol_t s = 0; s < alphasize; s++) { + dstate_id_t succ_id = raw.states[root].next[s]; + if (succ_id == DEAD_STATE) { + continue; + } + + const dstate &succ = raw.states[succ_id]; + for (symbol_t t = 0; t < alphasize; t++) { + if (succ.next[t] == root || succ.next[t] == succ_id) { + return true; + } + } + } + return false; +} + /* \brief Test for only-one-predecessor property. */ static bool check_property1(const DfaPrevInfo &info, const u16 impl_alpha_size, @@ -1464,17 +1464,17 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, bool trust_daddy_states, set<dstate_id_t> *accel_states) { assert(!is_dead(raw)); - - dfa_info info(strat); - bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; - - if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming - * mode with our semantics */ - raw.stripExtraEodReports(); - } - - bool has_eod_reports = raw.hasEodReports(); - + + dfa_info info(strat); + bool using8bit = cc.grey.allowMcClellan8 && info.size() <= 256; + + if (!cc.streaming) { /* TODO: work out if we can do the strip in streaming + * mode with our semantics */ + raw.stripExtraEodReports(); + } + + bool has_eod_reports = raw.hasEodReports(); + bytecode_ptr<NFA> nfa; if (!using8bit) { // Wide state optimization @@ -1486,7 +1486,7 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, u16 total_daddy = 0; bool any_cyclic_near_anchored_state = is_cyclic_near(raw, raw.start_anchored); - + // Sherman optimization if (info.impl_alpha_size > 16) { for (u32 i = 0; i < info.size(); i++) { @@ -1503,20 +1503,20 @@ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, info.size() * info.impl_alpha_size, info.size(), info.impl_alpha_size); } - + nfa = mcclellanCompile16(info, cc, accel_states); - } else { + } else { nfa = mcclellanCompile8(info, cc, accel_states); - } - - if (has_eod_reports) { - nfa->flags |= NFA_ACCEPTS_EOD; - } - - DEBUG_PRINTF("compile done\n"); - return nfa; -} - + } + + if (has_eod_reports) { + nfa->flags |= NFA_ACCEPTS_EOD; + } + + DEBUG_PRINTF("compile done\n"); + return nfa; +} + bytecode_ptr<NFA> mcclellanCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, bool only_accel_init, @@ -1524,33 +1524,33 @@ bytecode_ptr<NFA> mcclellanCompile(raw_dfa &raw, const CompileContext &cc, set<dstate_id_t> *accel_states) { mcclellan_build_strat mbs(raw, rm, only_accel_init); return mcclellanCompile_i(raw, mbs, cc, trust_daddy_states, accel_states); -} - -size_t mcclellan_build_strat::accelSize(void) const { - return sizeof(AccelAux); /* McClellan accel structures are just bare - * accelaux */ -} - -u32 mcclellanStartReachSize(const raw_dfa *raw) { - if (raw->states.size() < 2) { - return 0; - } - - const dstate &ds = raw->states[raw->start_anchored]; - - CharReach out; - for (unsigned i = 0; i < N_CHARS; i++) { - if (ds.next[raw->alpha_remap[i]] != DEAD_STATE) { - out.set(i); - } - } - - return out.count(); -} - +} + +size_t mcclellan_build_strat::accelSize(void) const { + return sizeof(AccelAux); /* McClellan accel structures are just bare + * accelaux */ +} + +u32 mcclellanStartReachSize(const raw_dfa *raw) { + if (raw->states.size() < 2) { + return 0; + } + + const dstate &ds = raw->states[raw->start_anchored]; + + CharReach out; + for (unsigned i = 0; i < N_CHARS; i++) { + if (ds.next[raw->alpha_remap[i]] != DEAD_STATE) { + out.set(i); + } + } + + return out.count(); +} + bool has_accel_mcclellan(const NFA *nfa) { - const mcclellan *m = (const mcclellan *)getImplNfa(nfa); - return m->has_accel; -} - -} // namespace ue2 + const mcclellan *m = (const mcclellan *)getImplNfa(nfa); + return m->has_accel; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile.h b/contrib/libs/hyperscan/src/nfa/mcclellancompile.h index d819a86f2d..73cb9fd775 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellancompile.h +++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile.h @@ -1,71 +1,71 @@ -/* +/* * Copyright (c) 2015-2018, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MCCLELLANCOMPILE_H -#define MCCLELLANCOMPILE_H - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCCLELLANCOMPILE_H +#define MCCLELLANCOMPILE_H + #include "accel_dfa_build_strat.h" -#include "rdfa.h" -#include "ue2common.h" +#include "rdfa.h" +#include "ue2common.h" #include "util/bytecode_ptr.h" - -#include <memory> -#include <vector> -#include <set> - -struct NFA; - -namespace ue2 { - + +#include <memory> +#include <vector> +#include <set> + +struct NFA; + +namespace ue2 { + class ReportManager; -struct CompileContext; - +struct CompileContext; + class mcclellan_build_strat : public accel_dfa_build_strat { -public: +public: mcclellan_build_strat(raw_dfa &rdfa_in, const ReportManager &rm_in, bool only_accel_init_in) : accel_dfa_build_strat(rm_in, only_accel_init_in), rdfa(rdfa_in) {} - raw_dfa &get_raw() const override { return rdfa; } - std::unique_ptr<raw_report_info> gatherReports( + raw_dfa &get_raw() const override { return rdfa; } + std::unique_ptr<raw_report_info> gatherReports( std::vector<u32> &reports /* out */, std::vector<u32> &reports_eod /* out */, u8 *isSingleReport /* out */, ReportID *arbReport /* out */) const override; - size_t accelSize(void) const override; + size_t accelSize(void) const override; u32 max_allowed_offset_accel() const override; u32 max_stop_char() const override; u32 max_floating_stop_char() const override; DfaType getType() const override { return McClellan; } - -private: - raw_dfa &rdfa; -}; - + +private: + raw_dfa &rdfa; +}; + /** * \brief Construct an implementation DFA. * @@ -81,26 +81,26 @@ private: * accelerable states */ bytecode_ptr<NFA> -mcclellanCompile(raw_dfa &raw, const CompileContext &cc, +mcclellanCompile(raw_dfa &raw, const CompileContext &cc, const ReportManager &rm, bool only_accel_init, bool trust_daddy_states = false, - std::set<dstate_id_t> *accel_states = nullptr); - -/* used internally by mcclellan/haig/gough compile process */ + std::set<dstate_id_t> *accel_states = nullptr); + +/* used internally by mcclellan/haig/gough compile process */ bytecode_ptr<NFA> mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, const CompileContext &cc, bool trust_daddy_states = false, - std::set<dstate_id_t> *accel_states = nullptr); - -/** - * \brief Returns the width of the character reach at start. - */ -u32 mcclellanStartReachSize(const raw_dfa *raw); - -std::set<ReportID> all_reports(const raw_dfa &rdfa); - + std::set<dstate_id_t> *accel_states = nullptr); + +/** + * \brief Returns the width of the character reach at start. + */ +u32 mcclellanStartReachSize(const raw_dfa *raw); + +std::set<ReportID> all_reports(const raw_dfa &rdfa); + bool has_accel_mcclellan(const NFA *nfa); - -} // namespace ue2 - + +} // namespace ue2 + #endif // MCCLELLANCOMPILE_H diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.cpp b/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.cpp index 064f0c86e0..3e299b81e2 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.cpp +++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.cpp @@ -1,192 +1,192 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "mcclellancompile_util.h" - -#include "rdfa.h" -#include "util/container.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mcclellancompile_util.h" + +#include "rdfa.h" +#include "util/container.h" #include "util/hash.h" -#include "ue2common.h" - -#include <deque> +#include "ue2common.h" + +#include <deque> #include <map> - -using namespace std; - -namespace ue2 { - -#define INIT_STATE 1 - -static + +using namespace std; + +namespace ue2 { + +#define INIT_STATE 1 + +static bool state_has_reports(const raw_dfa &raw, dstate_id_t s) { const auto &ds = raw.states[s]; return !ds.reports.empty() || !ds.reports_eod.empty(); } static -u32 count_dots(const raw_dfa &raw) { - assert(raw.start_anchored == INIT_STATE); - - u32 i = INIT_STATE; - for (; i < raw.states.size() && i != raw.start_floating; i++) { - DEBUG_PRINTF("checking %u\n", i); - assert(raw.states[i].reports.empty()); - assert(raw.states[i].reports_eod.empty()); - - for (symbol_t s = 0; s < raw.getImplAlphaSize(); s++) { - DEBUG_PRINTF("%hu -> %hu\n", s, raw.states[i].next[s]); - if (raw.states[i].next[s] != i + 1) { - goto validate; - } - } - +u32 count_dots(const raw_dfa &raw) { + assert(raw.start_anchored == INIT_STATE); + + u32 i = INIT_STATE; + for (; i < raw.states.size() && i != raw.start_floating; i++) { + DEBUG_PRINTF("checking %u\n", i); + assert(raw.states[i].reports.empty()); + assert(raw.states[i].reports_eod.empty()); + + for (symbol_t s = 0; s < raw.getImplAlphaSize(); s++) { + DEBUG_PRINTF("%hu -> %hu\n", s, raw.states[i].next[s]); + if (raw.states[i].next[s] != i + 1) { + goto validate; + } + } + if (state_has_reports(raw, raw.states[i].next[0])) { - goto validate; - } - - DEBUG_PRINTF("got dot\n"); - } - - validate: - u32 dot_count = i - INIT_STATE; - - /* we need to check that no later state has a transition into these leading - * dots */ - for (; i < raw.states.size(); i++) { - for (symbol_t s = 0; s < raw.getImplAlphaSize(); s++) { - DEBUG_PRINTF("%hu -> %hu\n", s, raw.states[i].next[s]); - dstate_id_t n = raw.states[i].next[s]; - if (n != DEAD_STATE && n <= dot_count) { - return 0; - } - } - } - - return dot_count; -} - -static -void prune_leading_states(raw_dfa &raw, u32 count) { - if (!count) { - return; - } - - for (u32 i = INIT_STATE + count; i < raw.states.size(); i++) { - dstate &curr = raw.states[i - count]; - curr = raw.states[i]; - if (curr.daddy > count) { - curr.daddy -= count; - } else { - curr.daddy = DEAD_STATE; - } - - for (u32 j = 0; j < raw.alpha_size; j++) { - assert(curr.next[j] == DEAD_STATE || curr.next[j] > count); - if (curr.next[j]) { - curr.next[j] -= count; - } - } - } - - raw.states.erase(raw.states.end() - count, raw.states.end()); -} - -u32 remove_leading_dots(raw_dfa &raw) { - u32 count = count_dots(raw); - prune_leading_states(raw, count); - DEBUG_PRINTF("removed %u leading dots\n", count); - return count; -} - -static never_inline -u32 calc_min_dist_from_bob(raw_dfa &raw, vector<u32> *dist_in) { - vector<u32> &dist = *dist_in; + goto validate; + } + + DEBUG_PRINTF("got dot\n"); + } + + validate: + u32 dot_count = i - INIT_STATE; + + /* we need to check that no later state has a transition into these leading + * dots */ + for (; i < raw.states.size(); i++) { + for (symbol_t s = 0; s < raw.getImplAlphaSize(); s++) { + DEBUG_PRINTF("%hu -> %hu\n", s, raw.states[i].next[s]); + dstate_id_t n = raw.states[i].next[s]; + if (n != DEAD_STATE && n <= dot_count) { + return 0; + } + } + } + + return dot_count; +} + +static +void prune_leading_states(raw_dfa &raw, u32 count) { + if (!count) { + return; + } + + for (u32 i = INIT_STATE + count; i < raw.states.size(); i++) { + dstate &curr = raw.states[i - count]; + curr = raw.states[i]; + if (curr.daddy > count) { + curr.daddy -= count; + } else { + curr.daddy = DEAD_STATE; + } + + for (u32 j = 0; j < raw.alpha_size; j++) { + assert(curr.next[j] == DEAD_STATE || curr.next[j] > count); + if (curr.next[j]) { + curr.next[j] -= count; + } + } + } + + raw.states.erase(raw.states.end() - count, raw.states.end()); +} + +u32 remove_leading_dots(raw_dfa &raw) { + u32 count = count_dots(raw); + prune_leading_states(raw, count); + DEBUG_PRINTF("removed %u leading dots\n", count); + return count; +} + +static never_inline +u32 calc_min_dist_from_bob(raw_dfa &raw, vector<u32> *dist_in) { + vector<u32> &dist = *dist_in; dist.assign(raw.states.size(), ~0U); - - assert(raw.start_anchored != DEAD_STATE); - + + assert(raw.start_anchored != DEAD_STATE); + deque<dstate_id_t> to_visit = { raw.start_anchored }; - dist[raw.start_anchored] = 0; - - u32 last_d = 0; - - while (!to_visit.empty()) { - dstate_id_t s = to_visit.front(); - DEBUG_PRINTF("inspecting %u\n", s); - to_visit.pop_front(); - assert(s != DEAD_STATE); - - u32 d = dist[s]; - assert(d >= last_d); - assert(d != ~0U); - + dist[raw.start_anchored] = 0; + + u32 last_d = 0; + + while (!to_visit.empty()) { + dstate_id_t s = to_visit.front(); + DEBUG_PRINTF("inspecting %u\n", s); + to_visit.pop_front(); + assert(s != DEAD_STATE); + + u32 d = dist[s]; + assert(d >= last_d); + assert(d != ~0U); + for (dstate_id_t t : raw.states[s].next) { - if (t == DEAD_STATE) { - continue; - } - if (dist[t] == ~0U) { - to_visit.push_back(t); - dist[t] = d + 1; - } else { - assert(dist[t] <= d + 1); - } - } - - last_d = d; - } - - return last_d; -} - + if (t == DEAD_STATE) { + continue; + } + if (dist[t] == ~0U) { + to_visit.push_back(t); + dist[t] = d + 1; + } else { + assert(dist[t] <= d + 1); + } + } + + last_d = d; + } + + return last_d; +} + bool clear_deeper_reports(raw_dfa &raw, u32 max_offset) { DEBUG_PRINTF("clearing reports on states deeper than %u\n", max_offset); - vector<u32> bob_dist; - u32 max_min_dist_bob = calc_min_dist_from_bob(raw, &bob_dist); - - if (max_min_dist_bob <= max_offset) { + vector<u32> bob_dist; + u32 max_min_dist_bob = calc_min_dist_from_bob(raw, &bob_dist); + + if (max_min_dist_bob <= max_offset) { return false; - } - + } + bool changed = false; - for (u32 s = DEAD_STATE + 1; s < raw.states.size(); s++) { + for (u32 s = DEAD_STATE + 1; s < raw.states.size(); s++) { if (bob_dist[s] > max_offset && state_has_reports(raw, s)) { DEBUG_PRINTF("clearing reports on %u (depth %u)\n", s, bob_dist[s]); auto &ds = raw.states[s]; ds.reports.clear(); ds.reports_eod.clear(); changed = true; - } - } - + } + } + if (!changed) { return false; } - + // We may have cleared all reports from the DFA, in which case it should // become empty. if (all_of_in(raw.states, [](const dstate &ds) { @@ -195,57 +195,57 @@ bool clear_deeper_reports(raw_dfa &raw, u32 max_offset) { DEBUG_PRINTF("no reports left at all, dfa is dead\n"); raw.start_anchored = DEAD_STATE; raw.start_floating = DEAD_STATE; - } - + } + return true; -} - -set<ReportID> all_reports(const raw_dfa &rdfa) { - set<ReportID> all; - for (const auto &ds : rdfa.states) { - insert(&all, ds.reports); - insert(&all, ds.reports_eod); - } - return all; -} - -bool has_eod_accepts(const raw_dfa &rdfa) { - for (const auto &ds : rdfa.states) { - if (!ds.reports_eod.empty()) { - return true; - } - } - return false; -} - -bool has_non_eod_accepts(const raw_dfa &rdfa) { - for (const auto &ds : rdfa.states) { - if (!ds.reports.empty()) { - return true; - } - } - return false; -} - -size_t hash_dfa_no_reports(const raw_dfa &rdfa) { - size_t v = 0; - hash_combine(v, rdfa.alpha_size); +} + +set<ReportID> all_reports(const raw_dfa &rdfa) { + set<ReportID> all; + for (const auto &ds : rdfa.states) { + insert(&all, ds.reports); + insert(&all, ds.reports_eod); + } + return all; +} + +bool has_eod_accepts(const raw_dfa &rdfa) { + for (const auto &ds : rdfa.states) { + if (!ds.reports_eod.empty()) { + return true; + } + } + return false; +} + +bool has_non_eod_accepts(const raw_dfa &rdfa) { + for (const auto &ds : rdfa.states) { + if (!ds.reports.empty()) { + return true; + } + } + return false; +} + +size_t hash_dfa_no_reports(const raw_dfa &rdfa) { + size_t v = 0; + hash_combine(v, rdfa.alpha_size); hash_combine(v, rdfa.alpha_remap); - - for (const auto &ds : rdfa.states) { + + for (const auto &ds : rdfa.states) { hash_combine(v, ds.next); - } - - return v; -} - -size_t hash_dfa(const raw_dfa &rdfa) { - size_t v = 0; - hash_combine(v, hash_dfa_no_reports(rdfa)); - hash_combine(v, all_reports(rdfa)); - return v; -} - + } + + return v; +} + +size_t hash_dfa(const raw_dfa &rdfa) { + size_t v = 0; + hash_combine(v, hash_dfa_no_reports(rdfa)); + hash_combine(v, all_reports(rdfa)); + return v; +} + static bool can_die_early(const raw_dfa &raw, dstate_id_t s, map<dstate_id_t, u32> &visited, u32 age_limit) { @@ -283,4 +283,4 @@ bool is_dead(const raw_dfa &rdfa) { rdfa.start_floating == DEAD_STATE; } -} // namespace ue2 +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.h b/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.h index 0dc58533a1..bc730cddea 100644 --- a/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.h +++ b/contrib/libs/hyperscan/src/nfa/mcclellancompile_util.h @@ -1,43 +1,43 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MCCLELLAN_COMPILE_UTIL_H -#define MCCLELLAN_COMPILE_UTIL_H - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MCCLELLAN_COMPILE_UTIL_H +#define MCCLELLAN_COMPILE_UTIL_H + #include "rdfa.h" -#include "ue2common.h" - -#include <set> - -namespace ue2 { - +#include "ue2common.h" + +#include <set> + +namespace ue2 { + u32 remove_leading_dots(raw_dfa &raw); - + /** * \brief Clear reports on any states that are deeper than \a max_offset from * start of stream. @@ -46,17 +46,17 @@ u32 remove_leading_dots(raw_dfa &raw); */ bool clear_deeper_reports(raw_dfa &raw, u32 max_offset); -std::set<ReportID> all_reports(const raw_dfa &rdfa); -bool has_eod_accepts(const raw_dfa &rdfa); -bool has_non_eod_accepts(const raw_dfa &rdfa); - -/** \brief Compute a simple hash of this raw_dfa. Does not include report - * information. */ -size_t hash_dfa_no_reports(const raw_dfa &rdfa); - -/** \brief Compute a simple hash of this raw_dfa, including its reports. */ -size_t hash_dfa(const raw_dfa &rdfa); - +std::set<ReportID> all_reports(const raw_dfa &rdfa); +bool has_eod_accepts(const raw_dfa &rdfa); +bool has_non_eod_accepts(const raw_dfa &rdfa); + +/** \brief Compute a simple hash of this raw_dfa. Does not include report + * information. */ +size_t hash_dfa_no_reports(const raw_dfa &rdfa); + +/** \brief Compute a simple hash of this raw_dfa, including its reports. */ +size_t hash_dfa(const raw_dfa &rdfa); + bool can_die_early(const raw_dfa &raw, u32 age_limit); /** @@ -66,6 +66,6 @@ bool can_die_early(const raw_dfa &raw, u32 age_limit); bool is_dead(const raw_dfa &rdfa); -} // namespace ue2 - -#endif +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/mpv.c b/contrib/libs/hyperscan/src/nfa/mpv.c index a3245267ca..552754d608 100644 --- a/contrib/libs/hyperscan/src/nfa/mpv.c +++ b/contrib/libs/hyperscan/src/nfa/mpv.c @@ -1,1096 +1,1096 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "mpv.h" - -#include "mpv_internal.h" -#include "nfa_api.h" -#include "nfa_api_queue.h" -#include "nfa_internal.h" -#include "shufti.h" -#include "truffle.h" -#include "ue2common.h" -#include "vermicelli.h" -#include "vermicelli_run.h" -#include "util/multibit.h" -#include "util/partial_store.h" -#include "util/simd_utils.h" -#include "util/unaligned.h" - -#include <string.h> - -#define MIN_SKIP_REPEAT 32 - -typedef struct mpv_pq_item PQ_T; -#define PQ_COMP(pqc_items, a, b) \ - ((pqc_items)[a].trigger_loc < (pqc_items)[b].trigger_loc) -#define PQ_COMP_B(pqc_items, a, b_fixed) \ - ((pqc_items)[a].trigger_loc < (b_fixed).trigger_loc) - -#include "util/pqueue.h" - -static really_inline -u64a *get_counter_n(struct mpv_decomp_state *s, - const struct mpv *m, u32 n) { - return (u64a *)((char *)s + get_counter_info(m)[n].counter_offset); -} - -static really_inline -u64a *get_counter_for_kilo(struct mpv_decomp_state *s, - const struct mpv_kilopuff *kp) { - return (u64a *)((char *)s + kp->counter_offset); -} - -static really_inline -u64a get_counter_value_for_kilo(struct mpv_decomp_state *s, - const struct mpv_kilopuff *kp) { - return *get_counter_for_kilo(s, kp) + s->counter_adj; -} - -static really_inline -const u64a *get_counter_for_kilo_c(const struct mpv_decomp_state *s, - const struct mpv_kilopuff *kp) { - return (const u64a *)((const char *)s + kp->counter_offset); -} - - -static never_inline -void normalize_counters(struct mpv_decomp_state *dstate, const struct mpv *m) { - u64a adj = dstate->counter_adj; - u64a *counters = get_counter_n(dstate, m, 0); - - if (!adj) { - return; - } - - for (u32 i = 0; i < m->counter_count; i++) { - /* update all counters - alive or dead */ - counters[i] += adj; - DEBUG_PRINTF("counter %u: %llu\n", i, counters[i]); - } - - dstate->counter_adj = 0; -} - -static really_inline -char processReports(const struct mpv *m, u8 *reporters, - const struct mpv_decomp_state *dstate, u64a counter_adj, - u64a report_offset, NfaCallback cb, void *ctxt, - ReportID *rl, u32 *rl_count_out) { - DEBUG_PRINTF("reporting at offset %llu\n", report_offset); - const struct mpv_kilopuff *kp = (const void *)(m + 1); - u32 rl_count = 0; - - for (u32 i = mmbit_iterate(reporters, m->kilo_count, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(reporters, m->kilo_count, i)) { - const struct mpv_puffette *curr = dstate->active[i].curr; - u64a curr_counter_val = *get_counter_for_kilo_c(dstate, &kp[i]) - + counter_adj; - DEBUG_PRINTF("kilo %u, underlying counter: %llu current: %llu\n", i, - *get_counter_for_kilo_c(dstate, &kp[i]), curr_counter_val); - assert(curr_counter_val != MPV_DEAD_VALUE); /* counter_adj should take - * care if underlying value - * is -1 */ - char did_stuff = 0; - - while (curr->report != INVALID_REPORT) { - assert(curr_counter_val >= curr->repeats); - if (curr->unbounded || curr_counter_val == curr->repeats) { - DEBUG_PRINTF("report %u at %llu\n", curr->report, - report_offset); - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mpv.h" + +#include "mpv_internal.h" +#include "nfa_api.h" +#include "nfa_api_queue.h" +#include "nfa_internal.h" +#include "shufti.h" +#include "truffle.h" +#include "ue2common.h" +#include "vermicelli.h" +#include "vermicelli_run.h" +#include "util/multibit.h" +#include "util/partial_store.h" +#include "util/simd_utils.h" +#include "util/unaligned.h" + +#include <string.h> + +#define MIN_SKIP_REPEAT 32 + +typedef struct mpv_pq_item PQ_T; +#define PQ_COMP(pqc_items, a, b) \ + ((pqc_items)[a].trigger_loc < (pqc_items)[b].trigger_loc) +#define PQ_COMP_B(pqc_items, a, b_fixed) \ + ((pqc_items)[a].trigger_loc < (b_fixed).trigger_loc) + +#include "util/pqueue.h" + +static really_inline +u64a *get_counter_n(struct mpv_decomp_state *s, + const struct mpv *m, u32 n) { + return (u64a *)((char *)s + get_counter_info(m)[n].counter_offset); +} + +static really_inline +u64a *get_counter_for_kilo(struct mpv_decomp_state *s, + const struct mpv_kilopuff *kp) { + return (u64a *)((char *)s + kp->counter_offset); +} + +static really_inline +u64a get_counter_value_for_kilo(struct mpv_decomp_state *s, + const struct mpv_kilopuff *kp) { + return *get_counter_for_kilo(s, kp) + s->counter_adj; +} + +static really_inline +const u64a *get_counter_for_kilo_c(const struct mpv_decomp_state *s, + const struct mpv_kilopuff *kp) { + return (const u64a *)((const char *)s + kp->counter_offset); +} + + +static never_inline +void normalize_counters(struct mpv_decomp_state *dstate, const struct mpv *m) { + u64a adj = dstate->counter_adj; + u64a *counters = get_counter_n(dstate, m, 0); + + if (!adj) { + return; + } + + for (u32 i = 0; i < m->counter_count; i++) { + /* update all counters - alive or dead */ + counters[i] += adj; + DEBUG_PRINTF("counter %u: %llu\n", i, counters[i]); + } + + dstate->counter_adj = 0; +} + +static really_inline +char processReports(const struct mpv *m, u8 *reporters, + const struct mpv_decomp_state *dstate, u64a counter_adj, + u64a report_offset, NfaCallback cb, void *ctxt, + ReportID *rl, u32 *rl_count_out) { + DEBUG_PRINTF("reporting at offset %llu\n", report_offset); + const struct mpv_kilopuff *kp = (const void *)(m + 1); + u32 rl_count = 0; + + for (u32 i = mmbit_iterate(reporters, m->kilo_count, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(reporters, m->kilo_count, i)) { + const struct mpv_puffette *curr = dstate->active[i].curr; + u64a curr_counter_val = *get_counter_for_kilo_c(dstate, &kp[i]) + + counter_adj; + DEBUG_PRINTF("kilo %u, underlying counter: %llu current: %llu\n", i, + *get_counter_for_kilo_c(dstate, &kp[i]), curr_counter_val); + assert(curr_counter_val != MPV_DEAD_VALUE); /* counter_adj should take + * care if underlying value + * is -1 */ + char did_stuff = 0; + + while (curr->report != INVALID_REPORT) { + assert(curr_counter_val >= curr->repeats); + if (curr->unbounded || curr_counter_val == curr->repeats) { + DEBUG_PRINTF("report %u at %llu\n", curr->report, + report_offset); + if (curr->unbounded && !curr->simple_exhaust) { - assert(rl_count < m->puffette_count); - *rl = curr->report; - ++rl; - rl_count++; - } - + assert(rl_count < m->puffette_count); + *rl = curr->report; + ++rl; + rl_count++; + } + if (cb(0, report_offset, curr->report, ctxt) == MO_HALT_MATCHING) { - DEBUG_PRINTF("bailing\n"); - return MO_HALT_MATCHING; - } - did_stuff = 1; - } - - curr--; - } - - if (!did_stuff) { - mmbit_unset(reporters, m->kilo_count, i); - } - } - - *rl_count_out = rl_count; - return MO_CONTINUE_MATCHING; -} - -static -ReportID *get_report_list(const struct mpv *m, struct mpv_decomp_state *s) { - return (ReportID *)((char *)s + m->report_list_offset); -} - -static really_inline -char processReportsForRange(const struct mpv *m, u8 *reporters, - struct mpv_decomp_state *dstate, u64a first_offset, - size_t length, NfaCallback cb, void *ctxt) { - if (!length) { - return MO_CONTINUE_MATCHING; - } - - u64a counter_adj = dstate->counter_adj; - u32 rl_count = 0; - ReportID *rl = get_report_list(m, dstate); - char rv = processReports(m, reporters, dstate, 1 + counter_adj, - first_offset + 1, cb, ctxt, rl, &rl_count); - if (rv != MO_CONTINUE_MATCHING) { - DEBUG_PRINTF("bailing\n"); - return rv; - } - if (!rl_count) { - return MO_CONTINUE_MATCHING; - } - + DEBUG_PRINTF("bailing\n"); + return MO_HALT_MATCHING; + } + did_stuff = 1; + } + + curr--; + } + + if (!did_stuff) { + mmbit_unset(reporters, m->kilo_count, i); + } + } + + *rl_count_out = rl_count; + return MO_CONTINUE_MATCHING; +} + +static +ReportID *get_report_list(const struct mpv *m, struct mpv_decomp_state *s) { + return (ReportID *)((char *)s + m->report_list_offset); +} + +static really_inline +char processReportsForRange(const struct mpv *m, u8 *reporters, + struct mpv_decomp_state *dstate, u64a first_offset, + size_t length, NfaCallback cb, void *ctxt) { + if (!length) { + return MO_CONTINUE_MATCHING; + } + + u64a counter_adj = dstate->counter_adj; + u32 rl_count = 0; + ReportID *rl = get_report_list(m, dstate); + char rv = processReports(m, reporters, dstate, 1 + counter_adj, + first_offset + 1, cb, ctxt, rl, &rl_count); + if (rv != MO_CONTINUE_MATCHING) { + DEBUG_PRINTF("bailing\n"); + return rv; + } + if (!rl_count) { + return MO_CONTINUE_MATCHING; + } + DEBUG_PRINTF("length=%zu, rl_count=%u\n", length, rl_count); for (size_t i = 2; i <= length; i++) { - for (u32 j = 0; j < rl_count; j++) { + for (u32 j = 0; j < rl_count; j++) { if (cb(0, first_offset + i, rl[j], ctxt) == MO_HALT_MATCHING) { - DEBUG_PRINTF("bailing\n"); - return MO_HALT_MATCHING; - } - } - } - - return MO_CONTINUE_MATCHING; -} - -/* returns last puffette that we have satisfied */ -static -const struct mpv_puffette *get_curr_puff(const struct mpv *m, - const struct mpv_kilopuff *kp, - struct mpv_decomp_state *dstate) { - u64a counter = *get_counter_for_kilo(dstate, kp); - assert(counter != MPV_DEAD_VALUE); - - const struct mpv_puffette *p = get_puff_array(m, kp); - DEBUG_PRINTF("looking for current puffette (counter = %llu)\n", counter); - DEBUG_PRINTF("next: (%u, %u)\n", p->repeats, p->report); - while (counter + 1 >= p->repeats && p->report != INVALID_REPORT) { - DEBUG_PRINTF("advancing\n"); - ++p; - DEBUG_PRINTF("next: (%u, %u)\n", p->repeats, p->report); - } - - return p - 1; -} - -static -const struct mpv_puffette *get_init_puff(const struct mpv *m, - const struct mpv_kilopuff *kp) { - const struct mpv_puffette *p = get_puff_array(m, kp); - while (p->repeats == 1) { - ++p; - } - return p - 1; -} - - -/* returns the last puffette whose repeats have been satisfied */ -static really_inline -const struct mpv_puffette *update_curr_puff(const struct mpv *m, u8 *reporters, - u64a counter, - const struct mpv_puffette *in, - u32 kilo_index) { - assert(counter != MPV_DEAD_VALUE); - - const struct mpv_puffette *p = in; - DEBUG_PRINTF("looking for current puffette (counter = %llu)\n", counter); - DEBUG_PRINTF("curr: (%u, %u)\n", p->repeats, p->report); - while (counter + 1 >= p[1].repeats && p[1].report != INVALID_REPORT) { - DEBUG_PRINTF("advancing\n"); - ++p; - DEBUG_PRINTF("curr: (%u, %u)\n", p->repeats, p->report); - } - - if (p != in) { - mmbit_set(reporters, m->kilo_count, kilo_index); - } - - return p; -} - -static really_inline -size_t limitByReach(const struct mpv_kilopuff *kp, const u8 *buf, - size_t length) { - if (kp->type == MPV_VERM) { - return vermicelliExec(kp->u.verm.c, 0, buf, buf + length) - buf; - } else if (kp->type == MPV_SHUFTI) { - m128 mask_lo = kp->u.shuf.mask_lo; - m128 mask_hi = kp->u.shuf.mask_hi; - return shuftiExec(mask_lo, mask_hi, buf, buf + length) - buf; - } else if (kp->type == MPV_TRUFFLE) { - return truffleExec(kp->u.truffle.mask1, kp->u.truffle.mask2, buf, buf + length) - buf; - } else if (kp->type == MPV_NVERM) { - return nvermicelliExec(kp->u.verm.c, 0, buf, buf + length) - buf; - } - - assert(kp->type == MPV_DOT); - return length; -} - -static never_inline -void fillLimits(const struct mpv *m, u8 *active, u8 *reporters, - struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, - const u8 *buf, size_t length) { - DEBUG_PRINTF("filling limits %zu\n", length); - assert(!dstate->pq_size); - - if (!length) { - DEBUG_PRINTF("0 length\n"); - return; - } - - const struct mpv_kilopuff *kp = (const void *)(m + 1); - - for (u32 i = mmbit_iterate(active, m->kilo_count, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(active, m->kilo_count, i)) { - dstate->active[i].curr = get_curr_puff(m, &kp[i], dstate); - if (dstate->active[i].curr->report != INVALID_REPORT) { - /* this kilo puff may fire reports */ - mmbit_set(reporters, m->kilo_count, i); - } - - u64a lim = limitByReach(&kp[i], buf, length); - DEBUG_PRINTF("lim %llu/%zu\n", lim, length); - - if (kp[i].dead_point != MPV_DEAD_VALUE) { - assert(!kp[i].auto_restart); - u64a counter = get_counter_value_for_kilo(dstate, &kp[i]); - u64a dp_trigger = kp[i].dead_point - counter; - if (dp_trigger < lim) { - DEBUG_PRINTF("dead point trigger %llu\n", dp_trigger); - lim = dp_trigger; - } - } - - if (kp[i].auto_restart && !lim) { - *get_counter_for_kilo(dstate, &kp[i]) = MPV_DEAD_VALUE; - mmbit_unset(reporters, m->kilo_count, i); - /* the counter value will cause the nex_trigger calculation below to - * adjust correctly */ - if (length == 1) { - dstate->active[i].limit = 0; - continue; - } - - lim = limitByReach(&kp[i], buf + 1, length - 1) + 1; - - - /* restart active counters */ - dstate->active[i].curr = get_init_puff(m, &kp[i]); - assert(dstate->active[i].curr[0].report == INVALID_REPORT); - - DEBUG_PRINTF("lim now %llu/%zu\n", lim, length); - } - - dstate->active[i].limit = lim; - if (!lim) { - mmbit_unset(active, m->kilo_count, i); - mmbit_unset(reporters, m->kilo_count, i); - continue; - } - if (dstate->active[i].curr[1].report != INVALID_REPORT) { - u32 next_trigger = dstate->active[i].curr[1].repeats - 1ULL - - *get_counter_for_kilo(dstate, &kp[i]); - DEBUG_PRINTF("next trigger %u\n", next_trigger); - lim = MIN(lim, next_trigger); - } - - if (lim != length) { - struct mpv_pq_item temp = { - .trigger_loc = lim, - .kilo = i - }; - - DEBUG_PRINTF("push for %u at %llu\n", i, lim); - pq_insert(pq, dstate->pq_size, temp); - ++dstate->pq_size; - } - - assert(lim || kp[i].auto_restart); - } - - DEBUG_PRINTF("filled\n"); - dstate->filled = 1; -} - -static never_inline -void handleTopN(const struct mpv *m, s64a loc, u8 *active, u8 *reporters, - struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, - const u8 *buf, size_t length, u32 i) { - assert(i < m->kilo_count); - DEBUG_PRINTF("MQE_TOP + %u @%lld\n", i, loc); - if (mmbit_set(active, m->kilo_count, i)) { - DEBUG_PRINTF("kilo is already alive and kicking\n"); - return; - } - - const struct mpv_kilopuff *kp = (const struct mpv_kilopuff *)(m + 1); - - assert(!kp[i].auto_restart); /* handle later/never */ - - /* we need to ensure that the counters are upto date */ - normalize_counters(dstate, m); - - /* reset counter */ - *get_counter_for_kilo(dstate, &kp[i]) = 0; - - if ((size_t)loc == length) { - /* end of buffer, just make sure it is active */ - dstate->active[i].limit = loc; - dstate->active[i].curr = get_init_puff(m, &kp[i]); - return; - } - - /* find the limit */ - u64a lim = limitByReach(&kp[i], buf + loc, length - loc) + loc; - - /* no need to worry about dead_point triggers here as kilopuff must first - * update chain (to fire a report) before it goes dead. */ - - if (lim == (u64a)loc) { - DEBUG_PRINTF("dead on arrival\n"); - mmbit_unset(active, m->kilo_count, i); - return; - } - dstate->active[i].limit = lim; - - /* setup puffette, find next trigger */ - dstate->active[i].curr = get_init_puff(m, &kp[i]); - if (dstate->active[i].curr[1].report != INVALID_REPORT) { - u32 next_trigger = dstate->active[i].curr[1].repeats - 1ULL + loc; - lim = MIN(lim, next_trigger); - } - - assert(dstate->active[i].curr[0].repeats == 1 - || dstate->active[i].curr[0].report == INVALID_REPORT); - if (dstate->active[i].curr[0].repeats == 1) { - DEBUG_PRINTF("yippee\n"); - mmbit_set(reporters, m->kilo_count, i); - } - - assert(lim > (u64a)loc); - - /* add to pq */ - if (lim != length) { - struct mpv_pq_item temp = { - .trigger_loc = lim, - .kilo = i - }; - - DEBUG_PRINTF("push for %u at %llu\n", i, lim); - pq_insert(pq, dstate->pq_size, temp); - ++dstate->pq_size; - } -} - -static really_inline -void killKilo(const struct mpv *m, u8 *active, u8 *reporters, - struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, u32 i) { - DEBUG_PRINTF("squashing kilo %u (progress %llu, limit %llu)\n", - i, pq_top(pq)->trigger_loc, dstate->active[i].limit); - mmbit_unset(active, m->kilo_count, i); - mmbit_unset(reporters, m->kilo_count, i); - - pq_pop(pq, dstate->pq_size); - dstate->pq_size--; -} - -static really_inline -void updateKiloChains(const struct mpv *m, u8 *reporters, - struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, - u64a curr_loc, size_t buf_length, u32 i) { - const struct mpv_kilopuff *kp = (const void *)(m + 1); - u64a counter = get_counter_value_for_kilo(dstate, &kp[i]); - - DEBUG_PRINTF("updating active puff for kilo %u\n", i); - dstate->active[i].curr = update_curr_puff(m, reporters, counter, - dstate->active[i].curr, i); - - u64a next_trigger = dstate->active[i].limit; - - if (dstate->active[i].curr[1].report != INVALID_REPORT) { - u64a next_rep_trigger = dstate->active[i].curr[1].repeats - 1 - counter - + curr_loc; - - next_trigger = MIN(next_trigger, next_rep_trigger); - } else if (kp[i].dead_point != MPV_DEAD_VALUE) { - u64a dp_trigger = kp[i].dead_point - counter + curr_loc; - DEBUG_PRINTF("dead point trigger %llu\n", dp_trigger); - if (dp_trigger < dstate->active[i].limit) { - dstate->active[i].limit = dp_trigger; - next_trigger = dp_trigger; - } - } - - DEBUG_PRINTF("next trigger location is %llu\n", next_trigger); - - if (next_trigger < buf_length) { - assert(dstate->pq_size <= m->kilo_count); - assert(next_trigger > pq_top(pq)->trigger_loc); - struct mpv_pq_item temp = { - .trigger_loc = next_trigger, - .kilo = i - }; - - DEBUG_PRINTF("(replace) push for %u at %llu\n", i, next_trigger); - pq_replace_top(pq, dstate->pq_size, temp); - } else { - pq_pop(pq, dstate->pq_size); - dstate->pq_size--; - DEBUG_PRINTF("PQ_POP\n"); - } - DEBUG_PRINTF("pq size now %u next top %llu\n", dstate->pq_size, - pq_top(pq)->trigger_loc); -} - -static really_inline -u8 do_single_shufti(const m128 l, const m128 h, u8 c) { - const u8 *lo = (const u8 *)&l; - const u8 *hi = (const u8 *)&h; - return lo[c & 0xf] & hi[c >> 4]; -} - -static really_inline -size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf, - size_t length, size_t curr, u32 min_rep) { - assert(kp->type != MPV_DOT); - - DEBUG_PRINTF("repeats = %u\n", min_rep); - /* TODO: this should be replace by some sort of simd stuff */ - - if (kp->type == MPV_VERM) { - if (min_rep < MIN_SKIP_REPEAT) { - return find_nverm_run(kp->u.verm.c, 0, min_rep, buf, buf + curr, - buf + length) - buf - 1; - } - - verm_restart:; - assert(buf[curr] == kp->u.verm.c); - size_t test = curr; - if (curr + min_rep < length) { - test = curr + min_rep; - } else { - test = length - 1; - } - - while (test > curr) { - if (buf[test] == kp->u.verm.c) { - curr = test; - if (curr == length - 1) { - return curr; - } - goto verm_restart; - } - --test; - } - } else if (kp->type == MPV_SHUFTI) { - m128 lo = kp->u.shuf.mask_lo; - m128 hi = kp->u.shuf.mask_hi; - shuf_restart: - assert(do_single_shufti(lo, hi, buf[curr])); - size_t test = curr; - if (curr + min_rep < length) { - test = curr + min_rep; - } else { - test = length - 1; - } - - while (test > curr) { - if (do_single_shufti(lo, hi, buf[test])) { - DEBUG_PRINTF("updating curr from %zu to %zu\n", curr, test); - curr = test; - if (curr == length - 1) { - return curr; - } - goto shuf_restart; - } - --test; - } - } else if (kp->type == MPV_TRUFFLE) { - const m128 mask1 = kp->u.truffle.mask1; - const m128 mask2 = kp->u.truffle.mask2; - truffle_restart:; - size_t test = curr; - if (curr + min_rep < length) { - test = curr + min_rep; - } else { - test = length - 1; - } - - while (test > curr) { - const u8 *rv = truffleExec(mask1, mask2, buf + test, buf + test + 1); - if (rv == buf + test) { - curr = test; - if (curr == length - 1) { - return curr; - } - goto truffle_restart; - } - --test; - } - } else if (kp->type == MPV_NVERM) { - if (min_rep < MIN_SKIP_REPEAT) { - return find_verm_run(kp->u.verm.c, 0, min_rep, buf, buf + curr, - buf + length) - buf - 1; - } - - nverm_restart:; - assert(buf[curr] != kp->u.verm.c); - size_t test = curr; - if (curr + min_rep < length) { - test = curr + min_rep; - } else { - test = length - 1; - } - - while (test > curr) { - if (buf[test] != kp->u.verm.c) { - curr = test; - if (curr == length - 1) { - return curr; - } - goto nverm_restart; - } - --test; - } - } else { - assert(0); - } - - return curr; -} - -static really_inline -void restartKilo(const struct mpv *m, UNUSED u8 *active, u8 *reporters, - struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, - const u8 *buf, u64a prev_limit, size_t buf_length, u32 i) { - const struct mpv_kilopuff *kp = (const void *)(m + 1); - assert(kp[i].auto_restart); - assert(mmbit_isset(active, m->kilo_count, i)); - - DEBUG_PRINTF("we got to %llu,%llu\n", prev_limit, dstate->active[i].limit); - assert(prev_limit == dstate->active[i].limit); - - DEBUG_PRINTF("resetting counter\n"); - - /* we need to ensure that the counters are upto date */ - normalize_counters(dstate, m); - - /* current byte is dead, will wrap to 0 after processing this byte */ - assert(MPV_DEAD_VALUE + 1 == 0); - *get_counter_for_kilo(dstate, &kp[i]) = MPV_DEAD_VALUE; - - DEBUG_PRINTF("resetting puffettes\n"); - dstate->active[i].curr = get_init_puff(m, &kp[i]); - - assert(dstate->active[i].curr[0].report == INVALID_REPORT); - /* TODO: handle restart .{1,}s */ - - mmbit_unset(reporters, m->kilo_count, i); - - if (prev_limit != buf_length - 1) { - size_t last_bad = find_last_bad(&kp[i], buf, buf_length, prev_limit, - dstate->active[i].curr[1].repeats); - assert(last_bad >= prev_limit && last_bad < buf_length); - if (last_bad != prev_limit) { - /* there is no point in getting restarted at this location */ - dstate->active[i].limit = last_bad; - assert(dstate->pq_size <= m->kilo_count); - struct mpv_pq_item temp = { - .trigger_loc = last_bad, - .kilo = i - }; - - pq_replace_top(pq, dstate->pq_size, temp); - return; - } - } - - /* TODO: skipping would really come in handy about now */ - u64a lim; - if (buf_length > prev_limit + 1) { - lim = limitByReach(&kp[i], buf + prev_limit + 1, - buf_length - (prev_limit + 1)) + - prev_limit + 1; - } else { - assert(buf_length == prev_limit + 1); - lim = buf_length; - } - DEBUG_PRINTF("next limit is %llu\n", lim); - - assert(lim > prev_limit); - - dstate->active[i].limit = lim; - - if (dstate->active[i].curr[1].report != INVALID_REPORT) { - u32 next_trigger = dstate->active[i].curr[1].repeats + prev_limit; - lim = MIN(lim, next_trigger); - } - - DEBUG_PRINTF("next trigger for kilo at %llu\n", lim); - - if (lim < buf_length) { - assert(dstate->pq_size <= m->kilo_count); - assert(lim >= prev_limit); - struct mpv_pq_item temp = { - .trigger_loc = lim, - .kilo = i - }; - - pq_replace_top(pq, dstate->pq_size, temp); - } else { - pq_pop(pq, dstate->pq_size); - dstate->pq_size--; - } -} - -static really_inline -void handle_events(const struct mpv *m, u8 *active, u8 *reporters, - struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, - u64a loc, const u8 *buf, size_t buf_length) { - const struct mpv_kilopuff *kp = (const void *)(m + 1); - - while (dstate->pq_size && pq_top(pq)->trigger_loc <= loc) { - assert(pq_top(pq)->trigger_loc == loc); - - u32 kilo = pq_top(pq)->kilo; - - DEBUG_PRINTF("pop for kilo %u at %llu\n", kilo, - pq_top(pq)->trigger_loc); - - if (dstate->active[kilo].limit <= loc) { - if (!kp[kilo].auto_restart) { - killKilo(m, active, reporters, dstate, pq, kilo); - } else { - restartKilo(m, active, reporters, dstate, pq, buf, loc, - buf_length, kilo); - } - } else { - updateKiloChains(m, reporters, dstate, pq, loc, buf_length, kilo); - } - } -} - -static really_inline -u64a find_next_limit(const struct mpv *m, u8 *active, u8 *reporters, - struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, - const u8 *buf, u64a prev_limit, u64a ep, - size_t buf_length) { - u64a limit = ep; - - DEBUG_PRINTF("length %llu (prev %llu), pq %u\n", limit, prev_limit, - dstate->pq_size); - - handle_events(m, active, reporters, dstate, pq, prev_limit, buf, - buf_length); - - if (dstate->pq_size) { - limit = MIN(pq_top(pq)->trigger_loc, limit); - assert(limit > prev_limit); - } - - DEBUG_PRINTF("limit now %llu\n", limit); - return limit; -} - -static really_inline -char mpvExec(const struct mpv *m, u8 *active, u8 *reporters, - struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, - const u8 *buf, s64a start, size_t length, size_t buf_length, - u64a offsetAdj, NfaCallback cb, void *ctxt) { - DEBUG_PRINTF("running mpv (s %lliu, l %zu, o %llu)\n", - *get_counter_n(dstate, m, 0) + dstate->counter_adj, length, - offsetAdj); - - u64a progress = start; /* progress is relative to buffer offsets */ - - while (progress < length) { - DEBUG_PRINTF("progress %llu\n", progress); - - /* find next limit and update chains */ - u64a limit = find_next_limit(m, active, reporters, dstate, pq, buf, - progress, length, buf_length); - assert(limit != progress); - u64a incr = limit - progress; - DEBUG_PRINTF("incr = %llu\n", incr); - - /* report matches upto next limit */ - char rv = processReportsForRange(m, reporters, dstate, - offsetAdj + progress, limit - progress, - cb, ctxt); - - if (rv != MO_CONTINUE_MATCHING) { - DEBUG_PRINTF("mpvExec done %llu/%zu\n", progress, length); - return rv; - } - - dstate->counter_adj += incr; - progress = limit; - } - - assert(progress == length); - - DEBUG_PRINTF("mpvExec done\n"); - return MO_CONTINUE_MATCHING; -} - -static really_inline -void mpvLoadState(struct mpv_decomp_state *out, const struct NFA *n, - const char *state) { - assert(16 >= sizeof(struct mpv_decomp_kilo)); - assert(sizeof(*out) <= n->scratchStateSize); - assert(ISALIGNED(out)); - - const struct mpv *m = getImplNfa(n); - const struct mpv_counter_info *counter_info = get_counter_info(m); - u64a *counters = get_counter_n(out, m, 0); - const char *comp_counter = state; - for (u32 i = 0; i < m->counter_count; i++) { - u32 counter_size = counter_info[i].counter_size; - counters[i] = partial_load_u64a(comp_counter, counter_size); - DEBUG_PRINTF("loaded %llu counter %u\n", counters[i], i); - comp_counter += counter_size; - } - - out->filled = 0; /* _Q_i will fill limits, curr puffetes, and populate pq - * on first call */ - out->counter_adj = 0; - out->pq_size = 0; - - u8 *reporters = (u8 *)out + m->reporter_offset; - - mmbit_clear(reporters, m->kilo_count); -} - -static really_inline -void mpvStoreState(const struct NFA *n, char *state, - const struct mpv_decomp_state *in) { - assert(ISALIGNED(in)); - const struct mpv *m = getImplNfa(n); - const struct mpv_counter_info *counter_info = get_counter_info(m); - - const u64a *counters = (const u64a *)((const char *)in - + get_counter_info(m)[0].counter_offset); - u64a adj = in->counter_adj; - char *comp_counter = state; - for (u32 i = 0; i < m->counter_count; i++) { - /* clamp counter to allow storage in smaller ints */ - u64a curr_counter = MIN(counters[i] + adj, counter_info[i].max_counter); - - u32 counter_size = counter_info[i].counter_size; - partial_store_u64a(comp_counter, curr_counter, counter_size); - DEBUG_PRINTF("stored %llu counter %u (orig %llu)\n", curr_counter, i, - counters[i]); - /* assert(counters[i] != MPV_DEAD_VALUE); /\* should have process 1 byte */ - /* * since a clear *\/ */ - comp_counter += counter_size; - } -} - + DEBUG_PRINTF("bailing\n"); + return MO_HALT_MATCHING; + } + } + } + + return MO_CONTINUE_MATCHING; +} + +/* returns last puffette that we have satisfied */ +static +const struct mpv_puffette *get_curr_puff(const struct mpv *m, + const struct mpv_kilopuff *kp, + struct mpv_decomp_state *dstate) { + u64a counter = *get_counter_for_kilo(dstate, kp); + assert(counter != MPV_DEAD_VALUE); + + const struct mpv_puffette *p = get_puff_array(m, kp); + DEBUG_PRINTF("looking for current puffette (counter = %llu)\n", counter); + DEBUG_PRINTF("next: (%u, %u)\n", p->repeats, p->report); + while (counter + 1 >= p->repeats && p->report != INVALID_REPORT) { + DEBUG_PRINTF("advancing\n"); + ++p; + DEBUG_PRINTF("next: (%u, %u)\n", p->repeats, p->report); + } + + return p - 1; +} + +static +const struct mpv_puffette *get_init_puff(const struct mpv *m, + const struct mpv_kilopuff *kp) { + const struct mpv_puffette *p = get_puff_array(m, kp); + while (p->repeats == 1) { + ++p; + } + return p - 1; +} + + +/* returns the last puffette whose repeats have been satisfied */ +static really_inline +const struct mpv_puffette *update_curr_puff(const struct mpv *m, u8 *reporters, + u64a counter, + const struct mpv_puffette *in, + u32 kilo_index) { + assert(counter != MPV_DEAD_VALUE); + + const struct mpv_puffette *p = in; + DEBUG_PRINTF("looking for current puffette (counter = %llu)\n", counter); + DEBUG_PRINTF("curr: (%u, %u)\n", p->repeats, p->report); + while (counter + 1 >= p[1].repeats && p[1].report != INVALID_REPORT) { + DEBUG_PRINTF("advancing\n"); + ++p; + DEBUG_PRINTF("curr: (%u, %u)\n", p->repeats, p->report); + } + + if (p != in) { + mmbit_set(reporters, m->kilo_count, kilo_index); + } + + return p; +} + +static really_inline +size_t limitByReach(const struct mpv_kilopuff *kp, const u8 *buf, + size_t length) { + if (kp->type == MPV_VERM) { + return vermicelliExec(kp->u.verm.c, 0, buf, buf + length) - buf; + } else if (kp->type == MPV_SHUFTI) { + m128 mask_lo = kp->u.shuf.mask_lo; + m128 mask_hi = kp->u.shuf.mask_hi; + return shuftiExec(mask_lo, mask_hi, buf, buf + length) - buf; + } else if (kp->type == MPV_TRUFFLE) { + return truffleExec(kp->u.truffle.mask1, kp->u.truffle.mask2, buf, buf + length) - buf; + } else if (kp->type == MPV_NVERM) { + return nvermicelliExec(kp->u.verm.c, 0, buf, buf + length) - buf; + } + + assert(kp->type == MPV_DOT); + return length; +} + +static never_inline +void fillLimits(const struct mpv *m, u8 *active, u8 *reporters, + struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, + const u8 *buf, size_t length) { + DEBUG_PRINTF("filling limits %zu\n", length); + assert(!dstate->pq_size); + + if (!length) { + DEBUG_PRINTF("0 length\n"); + return; + } + + const struct mpv_kilopuff *kp = (const void *)(m + 1); + + for (u32 i = mmbit_iterate(active, m->kilo_count, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(active, m->kilo_count, i)) { + dstate->active[i].curr = get_curr_puff(m, &kp[i], dstate); + if (dstate->active[i].curr->report != INVALID_REPORT) { + /* this kilo puff may fire reports */ + mmbit_set(reporters, m->kilo_count, i); + } + + u64a lim = limitByReach(&kp[i], buf, length); + DEBUG_PRINTF("lim %llu/%zu\n", lim, length); + + if (kp[i].dead_point != MPV_DEAD_VALUE) { + assert(!kp[i].auto_restart); + u64a counter = get_counter_value_for_kilo(dstate, &kp[i]); + u64a dp_trigger = kp[i].dead_point - counter; + if (dp_trigger < lim) { + DEBUG_PRINTF("dead point trigger %llu\n", dp_trigger); + lim = dp_trigger; + } + } + + if (kp[i].auto_restart && !lim) { + *get_counter_for_kilo(dstate, &kp[i]) = MPV_DEAD_VALUE; + mmbit_unset(reporters, m->kilo_count, i); + /* the counter value will cause the nex_trigger calculation below to + * adjust correctly */ + if (length == 1) { + dstate->active[i].limit = 0; + continue; + } + + lim = limitByReach(&kp[i], buf + 1, length - 1) + 1; + + + /* restart active counters */ + dstate->active[i].curr = get_init_puff(m, &kp[i]); + assert(dstate->active[i].curr[0].report == INVALID_REPORT); + + DEBUG_PRINTF("lim now %llu/%zu\n", lim, length); + } + + dstate->active[i].limit = lim; + if (!lim) { + mmbit_unset(active, m->kilo_count, i); + mmbit_unset(reporters, m->kilo_count, i); + continue; + } + if (dstate->active[i].curr[1].report != INVALID_REPORT) { + u32 next_trigger = dstate->active[i].curr[1].repeats - 1ULL + - *get_counter_for_kilo(dstate, &kp[i]); + DEBUG_PRINTF("next trigger %u\n", next_trigger); + lim = MIN(lim, next_trigger); + } + + if (lim != length) { + struct mpv_pq_item temp = { + .trigger_loc = lim, + .kilo = i + }; + + DEBUG_PRINTF("push for %u at %llu\n", i, lim); + pq_insert(pq, dstate->pq_size, temp); + ++dstate->pq_size; + } + + assert(lim || kp[i].auto_restart); + } + + DEBUG_PRINTF("filled\n"); + dstate->filled = 1; +} + +static never_inline +void handleTopN(const struct mpv *m, s64a loc, u8 *active, u8 *reporters, + struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, + const u8 *buf, size_t length, u32 i) { + assert(i < m->kilo_count); + DEBUG_PRINTF("MQE_TOP + %u @%lld\n", i, loc); + if (mmbit_set(active, m->kilo_count, i)) { + DEBUG_PRINTF("kilo is already alive and kicking\n"); + return; + } + + const struct mpv_kilopuff *kp = (const struct mpv_kilopuff *)(m + 1); + + assert(!kp[i].auto_restart); /* handle later/never */ + + /* we need to ensure that the counters are upto date */ + normalize_counters(dstate, m); + + /* reset counter */ + *get_counter_for_kilo(dstate, &kp[i]) = 0; + + if ((size_t)loc == length) { + /* end of buffer, just make sure it is active */ + dstate->active[i].limit = loc; + dstate->active[i].curr = get_init_puff(m, &kp[i]); + return; + } + + /* find the limit */ + u64a lim = limitByReach(&kp[i], buf + loc, length - loc) + loc; + + /* no need to worry about dead_point triggers here as kilopuff must first + * update chain (to fire a report) before it goes dead. */ + + if (lim == (u64a)loc) { + DEBUG_PRINTF("dead on arrival\n"); + mmbit_unset(active, m->kilo_count, i); + return; + } + dstate->active[i].limit = lim; + + /* setup puffette, find next trigger */ + dstate->active[i].curr = get_init_puff(m, &kp[i]); + if (dstate->active[i].curr[1].report != INVALID_REPORT) { + u32 next_trigger = dstate->active[i].curr[1].repeats - 1ULL + loc; + lim = MIN(lim, next_trigger); + } + + assert(dstate->active[i].curr[0].repeats == 1 + || dstate->active[i].curr[0].report == INVALID_REPORT); + if (dstate->active[i].curr[0].repeats == 1) { + DEBUG_PRINTF("yippee\n"); + mmbit_set(reporters, m->kilo_count, i); + } + + assert(lim > (u64a)loc); + + /* add to pq */ + if (lim != length) { + struct mpv_pq_item temp = { + .trigger_loc = lim, + .kilo = i + }; + + DEBUG_PRINTF("push for %u at %llu\n", i, lim); + pq_insert(pq, dstate->pq_size, temp); + ++dstate->pq_size; + } +} + +static really_inline +void killKilo(const struct mpv *m, u8 *active, u8 *reporters, + struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, u32 i) { + DEBUG_PRINTF("squashing kilo %u (progress %llu, limit %llu)\n", + i, pq_top(pq)->trigger_loc, dstate->active[i].limit); + mmbit_unset(active, m->kilo_count, i); + mmbit_unset(reporters, m->kilo_count, i); + + pq_pop(pq, dstate->pq_size); + dstate->pq_size--; +} + +static really_inline +void updateKiloChains(const struct mpv *m, u8 *reporters, + struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, + u64a curr_loc, size_t buf_length, u32 i) { + const struct mpv_kilopuff *kp = (const void *)(m + 1); + u64a counter = get_counter_value_for_kilo(dstate, &kp[i]); + + DEBUG_PRINTF("updating active puff for kilo %u\n", i); + dstate->active[i].curr = update_curr_puff(m, reporters, counter, + dstate->active[i].curr, i); + + u64a next_trigger = dstate->active[i].limit; + + if (dstate->active[i].curr[1].report != INVALID_REPORT) { + u64a next_rep_trigger = dstate->active[i].curr[1].repeats - 1 - counter + + curr_loc; + + next_trigger = MIN(next_trigger, next_rep_trigger); + } else if (kp[i].dead_point != MPV_DEAD_VALUE) { + u64a dp_trigger = kp[i].dead_point - counter + curr_loc; + DEBUG_PRINTF("dead point trigger %llu\n", dp_trigger); + if (dp_trigger < dstate->active[i].limit) { + dstate->active[i].limit = dp_trigger; + next_trigger = dp_trigger; + } + } + + DEBUG_PRINTF("next trigger location is %llu\n", next_trigger); + + if (next_trigger < buf_length) { + assert(dstate->pq_size <= m->kilo_count); + assert(next_trigger > pq_top(pq)->trigger_loc); + struct mpv_pq_item temp = { + .trigger_loc = next_trigger, + .kilo = i + }; + + DEBUG_PRINTF("(replace) push for %u at %llu\n", i, next_trigger); + pq_replace_top(pq, dstate->pq_size, temp); + } else { + pq_pop(pq, dstate->pq_size); + dstate->pq_size--; + DEBUG_PRINTF("PQ_POP\n"); + } + DEBUG_PRINTF("pq size now %u next top %llu\n", dstate->pq_size, + pq_top(pq)->trigger_loc); +} + +static really_inline +u8 do_single_shufti(const m128 l, const m128 h, u8 c) { + const u8 *lo = (const u8 *)&l; + const u8 *hi = (const u8 *)&h; + return lo[c & 0xf] & hi[c >> 4]; +} + +static really_inline +size_t find_last_bad(const struct mpv_kilopuff *kp, const u8 *buf, + size_t length, size_t curr, u32 min_rep) { + assert(kp->type != MPV_DOT); + + DEBUG_PRINTF("repeats = %u\n", min_rep); + /* TODO: this should be replace by some sort of simd stuff */ + + if (kp->type == MPV_VERM) { + if (min_rep < MIN_SKIP_REPEAT) { + return find_nverm_run(kp->u.verm.c, 0, min_rep, buf, buf + curr, + buf + length) - buf - 1; + } + + verm_restart:; + assert(buf[curr] == kp->u.verm.c); + size_t test = curr; + if (curr + min_rep < length) { + test = curr + min_rep; + } else { + test = length - 1; + } + + while (test > curr) { + if (buf[test] == kp->u.verm.c) { + curr = test; + if (curr == length - 1) { + return curr; + } + goto verm_restart; + } + --test; + } + } else if (kp->type == MPV_SHUFTI) { + m128 lo = kp->u.shuf.mask_lo; + m128 hi = kp->u.shuf.mask_hi; + shuf_restart: + assert(do_single_shufti(lo, hi, buf[curr])); + size_t test = curr; + if (curr + min_rep < length) { + test = curr + min_rep; + } else { + test = length - 1; + } + + while (test > curr) { + if (do_single_shufti(lo, hi, buf[test])) { + DEBUG_PRINTF("updating curr from %zu to %zu\n", curr, test); + curr = test; + if (curr == length - 1) { + return curr; + } + goto shuf_restart; + } + --test; + } + } else if (kp->type == MPV_TRUFFLE) { + const m128 mask1 = kp->u.truffle.mask1; + const m128 mask2 = kp->u.truffle.mask2; + truffle_restart:; + size_t test = curr; + if (curr + min_rep < length) { + test = curr + min_rep; + } else { + test = length - 1; + } + + while (test > curr) { + const u8 *rv = truffleExec(mask1, mask2, buf + test, buf + test + 1); + if (rv == buf + test) { + curr = test; + if (curr == length - 1) { + return curr; + } + goto truffle_restart; + } + --test; + } + } else if (kp->type == MPV_NVERM) { + if (min_rep < MIN_SKIP_REPEAT) { + return find_verm_run(kp->u.verm.c, 0, min_rep, buf, buf + curr, + buf + length) - buf - 1; + } + + nverm_restart:; + assert(buf[curr] != kp->u.verm.c); + size_t test = curr; + if (curr + min_rep < length) { + test = curr + min_rep; + } else { + test = length - 1; + } + + while (test > curr) { + if (buf[test] != kp->u.verm.c) { + curr = test; + if (curr == length - 1) { + return curr; + } + goto nverm_restart; + } + --test; + } + } else { + assert(0); + } + + return curr; +} + +static really_inline +void restartKilo(const struct mpv *m, UNUSED u8 *active, u8 *reporters, + struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, + const u8 *buf, u64a prev_limit, size_t buf_length, u32 i) { + const struct mpv_kilopuff *kp = (const void *)(m + 1); + assert(kp[i].auto_restart); + assert(mmbit_isset(active, m->kilo_count, i)); + + DEBUG_PRINTF("we got to %llu,%llu\n", prev_limit, dstate->active[i].limit); + assert(prev_limit == dstate->active[i].limit); + + DEBUG_PRINTF("resetting counter\n"); + + /* we need to ensure that the counters are upto date */ + normalize_counters(dstate, m); + + /* current byte is dead, will wrap to 0 after processing this byte */ + assert(MPV_DEAD_VALUE + 1 == 0); + *get_counter_for_kilo(dstate, &kp[i]) = MPV_DEAD_VALUE; + + DEBUG_PRINTF("resetting puffettes\n"); + dstate->active[i].curr = get_init_puff(m, &kp[i]); + + assert(dstate->active[i].curr[0].report == INVALID_REPORT); + /* TODO: handle restart .{1,}s */ + + mmbit_unset(reporters, m->kilo_count, i); + + if (prev_limit != buf_length - 1) { + size_t last_bad = find_last_bad(&kp[i], buf, buf_length, prev_limit, + dstate->active[i].curr[1].repeats); + assert(last_bad >= prev_limit && last_bad < buf_length); + if (last_bad != prev_limit) { + /* there is no point in getting restarted at this location */ + dstate->active[i].limit = last_bad; + assert(dstate->pq_size <= m->kilo_count); + struct mpv_pq_item temp = { + .trigger_loc = last_bad, + .kilo = i + }; + + pq_replace_top(pq, dstate->pq_size, temp); + return; + } + } + + /* TODO: skipping would really come in handy about now */ + u64a lim; + if (buf_length > prev_limit + 1) { + lim = limitByReach(&kp[i], buf + prev_limit + 1, + buf_length - (prev_limit + 1)) + + prev_limit + 1; + } else { + assert(buf_length == prev_limit + 1); + lim = buf_length; + } + DEBUG_PRINTF("next limit is %llu\n", lim); + + assert(lim > prev_limit); + + dstate->active[i].limit = lim; + + if (dstate->active[i].curr[1].report != INVALID_REPORT) { + u32 next_trigger = dstate->active[i].curr[1].repeats + prev_limit; + lim = MIN(lim, next_trigger); + } + + DEBUG_PRINTF("next trigger for kilo at %llu\n", lim); + + if (lim < buf_length) { + assert(dstate->pq_size <= m->kilo_count); + assert(lim >= prev_limit); + struct mpv_pq_item temp = { + .trigger_loc = lim, + .kilo = i + }; + + pq_replace_top(pq, dstate->pq_size, temp); + } else { + pq_pop(pq, dstate->pq_size); + dstate->pq_size--; + } +} + +static really_inline +void handle_events(const struct mpv *m, u8 *active, u8 *reporters, + struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, + u64a loc, const u8 *buf, size_t buf_length) { + const struct mpv_kilopuff *kp = (const void *)(m + 1); + + while (dstate->pq_size && pq_top(pq)->trigger_loc <= loc) { + assert(pq_top(pq)->trigger_loc == loc); + + u32 kilo = pq_top(pq)->kilo; + + DEBUG_PRINTF("pop for kilo %u at %llu\n", kilo, + pq_top(pq)->trigger_loc); + + if (dstate->active[kilo].limit <= loc) { + if (!kp[kilo].auto_restart) { + killKilo(m, active, reporters, dstate, pq, kilo); + } else { + restartKilo(m, active, reporters, dstate, pq, buf, loc, + buf_length, kilo); + } + } else { + updateKiloChains(m, reporters, dstate, pq, loc, buf_length, kilo); + } + } +} + +static really_inline +u64a find_next_limit(const struct mpv *m, u8 *active, u8 *reporters, + struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, + const u8 *buf, u64a prev_limit, u64a ep, + size_t buf_length) { + u64a limit = ep; + + DEBUG_PRINTF("length %llu (prev %llu), pq %u\n", limit, prev_limit, + dstate->pq_size); + + handle_events(m, active, reporters, dstate, pq, prev_limit, buf, + buf_length); + + if (dstate->pq_size) { + limit = MIN(pq_top(pq)->trigger_loc, limit); + assert(limit > prev_limit); + } + + DEBUG_PRINTF("limit now %llu\n", limit); + return limit; +} + +static really_inline +char mpvExec(const struct mpv *m, u8 *active, u8 *reporters, + struct mpv_decomp_state *dstate, struct mpv_pq_item *pq, + const u8 *buf, s64a start, size_t length, size_t buf_length, + u64a offsetAdj, NfaCallback cb, void *ctxt) { + DEBUG_PRINTF("running mpv (s %lliu, l %zu, o %llu)\n", + *get_counter_n(dstate, m, 0) + dstate->counter_adj, length, + offsetAdj); + + u64a progress = start; /* progress is relative to buffer offsets */ + + while (progress < length) { + DEBUG_PRINTF("progress %llu\n", progress); + + /* find next limit and update chains */ + u64a limit = find_next_limit(m, active, reporters, dstate, pq, buf, + progress, length, buf_length); + assert(limit != progress); + u64a incr = limit - progress; + DEBUG_PRINTF("incr = %llu\n", incr); + + /* report matches upto next limit */ + char rv = processReportsForRange(m, reporters, dstate, + offsetAdj + progress, limit - progress, + cb, ctxt); + + if (rv != MO_CONTINUE_MATCHING) { + DEBUG_PRINTF("mpvExec done %llu/%zu\n", progress, length); + return rv; + } + + dstate->counter_adj += incr; + progress = limit; + } + + assert(progress == length); + + DEBUG_PRINTF("mpvExec done\n"); + return MO_CONTINUE_MATCHING; +} + +static really_inline +void mpvLoadState(struct mpv_decomp_state *out, const struct NFA *n, + const char *state) { + assert(16 >= sizeof(struct mpv_decomp_kilo)); + assert(sizeof(*out) <= n->scratchStateSize); + assert(ISALIGNED(out)); + + const struct mpv *m = getImplNfa(n); + const struct mpv_counter_info *counter_info = get_counter_info(m); + u64a *counters = get_counter_n(out, m, 0); + const char *comp_counter = state; + for (u32 i = 0; i < m->counter_count; i++) { + u32 counter_size = counter_info[i].counter_size; + counters[i] = partial_load_u64a(comp_counter, counter_size); + DEBUG_PRINTF("loaded %llu counter %u\n", counters[i], i); + comp_counter += counter_size; + } + + out->filled = 0; /* _Q_i will fill limits, curr puffetes, and populate pq + * on first call */ + out->counter_adj = 0; + out->pq_size = 0; + + u8 *reporters = (u8 *)out + m->reporter_offset; + + mmbit_clear(reporters, m->kilo_count); +} + +static really_inline +void mpvStoreState(const struct NFA *n, char *state, + const struct mpv_decomp_state *in) { + assert(ISALIGNED(in)); + const struct mpv *m = getImplNfa(n); + const struct mpv_counter_info *counter_info = get_counter_info(m); + + const u64a *counters = (const u64a *)((const char *)in + + get_counter_info(m)[0].counter_offset); + u64a adj = in->counter_adj; + char *comp_counter = state; + for (u32 i = 0; i < m->counter_count; i++) { + /* clamp counter to allow storage in smaller ints */ + u64a curr_counter = MIN(counters[i] + adj, counter_info[i].max_counter); + + u32 counter_size = counter_info[i].counter_size; + partial_store_u64a(comp_counter, curr_counter, counter_size); + DEBUG_PRINTF("stored %llu counter %u (orig %llu)\n", curr_counter, i, + counters[i]); + /* assert(counters[i] != MPV_DEAD_VALUE); /\* should have process 1 byte */ + /* * since a clear *\/ */ + comp_counter += counter_size; + } +} + char nfaExecMpv_queueCompressState(const struct NFA *nfa, const struct mq *q, UNUSED s64a loc) { - void *dest = q->streamState; - const void *src = q->state; - mpvStoreState(nfa, dest, src); - return 0; -} - + void *dest = q->streamState; + const void *src = q->state; + mpvStoreState(nfa, dest, src); + return 0; +} + char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src, UNUSED u64a offset, UNUSED u8 key) { - mpvLoadState(dest, nfa, src); - return 0; -} - + mpvLoadState(dest, nfa, src); + return 0; +} + char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q) { - const struct mpv *m = getImplNfa(n); - u64a offset = q_cur_offset(q); - struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state; - - DEBUG_PRINTF("report current: offset %llu\n", offset); - - u8 *active = (u8 *)q->streamState + m->active_offset; - u32 rl_count = 0; - ReportID *rl = get_report_list(m, s); - - processReports(m, active, s, s->counter_adj, offset, q->cb, q->context, rl, - &rl_count); - return 0; -} - + const struct mpv *m = getImplNfa(n); + u64a offset = q_cur_offset(q); + struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state; + + DEBUG_PRINTF("report current: offset %llu\n", offset); + + u8 *active = (u8 *)q->streamState + m->active_offset; + u32 rl_count = 0; + ReportID *rl = get_report_list(m, s); + + processReports(m, active, s, s->counter_adj, offset, q->cb, q->context, rl, + &rl_count); + return 0; +} + char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q) { - struct mpv_decomp_state *out = (void *)q->state; - const struct mpv *m = getImplNfa(n); - assert(sizeof(*out) <= n->scratchStateSize); - - DEBUG_PRINTF("queue init state\n"); - - u64a *counters = get_counter_n(out, m, 0); - for (u32 i = 0; i < m->counter_count; i++) { - counters[i] = MPV_DEAD_VALUE; - } - - out->filled = 0; - out->counter_adj = 0; - out->pq_size = 0; - out->active[0].curr = NULL; - - assert(q->streamState); - u8 *active_kpuff = (u8 *)q->streamState + m->active_offset; - u8 *reporters = (u8 *)q->state + m->reporter_offset; - mmbit_clear(active_kpuff, m->kilo_count); - mmbit_clear(reporters, m->kilo_count); - return 0; -} - + struct mpv_decomp_state *out = (void *)q->state; + const struct mpv *m = getImplNfa(n); + assert(sizeof(*out) <= n->scratchStateSize); + + DEBUG_PRINTF("queue init state\n"); + + u64a *counters = get_counter_n(out, m, 0); + for (u32 i = 0; i < m->counter_count; i++) { + counters[i] = MPV_DEAD_VALUE; + } + + out->filled = 0; + out->counter_adj = 0; + out->pq_size = 0; + out->active[0].curr = NULL; + + assert(q->streamState); + u8 *active_kpuff = (u8 *)q->streamState + m->active_offset; + u8 *reporters = (u8 *)q->state + m->reporter_offset; + mmbit_clear(active_kpuff, m->kilo_count); + mmbit_clear(reporters, m->kilo_count); + return 0; +} + char nfaExecMpv_initCompressedState(const struct NFA *n, u64a offset, void *state, UNUSED u8 key) { - const struct mpv *m = getImplNfa(n); - memset(state, 0, m->active_offset); /* active_offset marks end of comp - * counters */ - u8 *active_kpuff = (u8 *)state + m->active_offset; - if (!offset) { - mmbit_init_range(active_kpuff, m->kilo_count, m->top_kilo_begin, - m->top_kilo_end); - return 1; - } else { - return 0; - } -} - -static really_inline + const struct mpv *m = getImplNfa(n); + memset(state, 0, m->active_offset); /* active_offset marks end of comp + * counters */ + u8 *active_kpuff = (u8 *)state + m->active_offset; + if (!offset) { + mmbit_init_range(active_kpuff, m->kilo_count, m->top_kilo_begin, + m->top_kilo_end); + return 1; + } else { + return 0; + } +} + +static really_inline char nfaExecMpv_Q_i(const struct NFA *n, struct mq *q, s64a end) { - u64a offset = q->offset; - const u8 *buffer = q->buffer; - size_t length = q->length; - NfaCallback cb = q->cb; - void *context = q->context; - s64a sp; - const struct mpv *m = getImplNfa(n); - struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state; - u8 *active = (u8 *)q->streamState + m->active_offset; - u8 *reporters = (u8 *)q->state + m->reporter_offset; - struct mpv_pq_item *pq = (struct mpv_pq_item *)(q->state + m->pq_offset); - - if (!s->filled) { - fillLimits(m, active, reporters, s, pq, q->buffer, q->length); - } - - assert(!q->report_current); - - if (q->cur == q->end) { - return 1; - } - - assert(q->cur + 1 < q->end); /* require at least two items */ - - assert(q_cur_type(q) == MQE_START); - assert(q_cur_loc(q) >= 0); - sp = q->items[q->cur].location; - q->cur++; - - if (q->items[q->cur - 1].location > end) { - /* this is as far as we go */ - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - return MO_ALIVE; - } - - while (q->cur < q->end) { - s64a ep = q->items[q->cur].location; - - ep = MIN(ep, end); - - assert(ep >= sp); - - assert(sp >= 0); /* mpv should be an outfix; outfixes are not lazy */ - - if (sp >= ep) { - goto scan_done; - } - - /* do main buffer region */ - assert((u64a)ep <= length); - char rv = mpvExec(m, active, reporters, s, pq, buffer, sp, ep, length, - offset, cb, context); - if (rv == MO_HALT_MATCHING) { - q->cur = q->end; - return 0; - } - - scan_done: - if (q->items[q->cur].location > end) { - /* this is as far as we go */ - q->cur--; - q->items[q->cur].type = MQE_START; - q->items[q->cur].location = end; - return MO_ALIVE; - } - - sp = ep; - - switch (q->items[q->cur].type) { - case MQE_TOP: - DEBUG_PRINTF("top %u %u\n", m->top_kilo_begin, m->top_kilo_end); - /* MQE_TOP initialise all counters to 0; activates all kilos */ - { - u64a *counters = get_counter_n(s, m, 0); - assert(counters[0] == MPV_DEAD_VALUE); - assert(!s->counter_adj); - for (u32 i = 0; i < m->counter_count; i++) { - counters[i] = 0; - } - mmbit_init_range(active, m->kilo_count, m->top_kilo_begin, - m->top_kilo_end); - fillLimits(m, active, reporters, s, pq, buffer, length); - } - break; - case MQE_START: - case MQE_END: - break; - default: - /* MQE_TOP_N --> switch on kilo puff N */ - assert(q->items[q->cur].type >= MQE_TOP_FIRST); - assert(q->items[q->cur].type < MQE_INVALID); - u32 i = q->items[q->cur].type - MQE_TOP_FIRST; - handleTopN(m, sp, active, reporters, s, pq, buffer, length, i); - break; - } - - q->cur++; - } - - char alive = 0; - assert(q->items[q->cur - 1].type == MQE_END); - if (q->items[q->cur - 1].location == (s64a)q->length) { - normalize_counters(s, m); - - const struct mpv_kilopuff *kp = (const struct mpv_kilopuff *)(m + 1); - for (u32 i = mmbit_iterate(active, m->kilo_count, MMB_INVALID); - i != MMB_INVALID; i = mmbit_iterate(active, m->kilo_count, i)) { - if (*get_counter_for_kilo(s, &kp[i]) >= kp[i].dead_point) { - mmbit_unset(active, m->kilo_count, i); - } else { - alive = 1; - } - } - } else { - alive - = mmbit_iterate(active, m->kilo_count, MMB_INVALID) != MMB_INVALID; - } - - DEBUG_PRINTF("finished %d\n", (int)alive); - return alive; -} - + u64a offset = q->offset; + const u8 *buffer = q->buffer; + size_t length = q->length; + NfaCallback cb = q->cb; + void *context = q->context; + s64a sp; + const struct mpv *m = getImplNfa(n); + struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state; + u8 *active = (u8 *)q->streamState + m->active_offset; + u8 *reporters = (u8 *)q->state + m->reporter_offset; + struct mpv_pq_item *pq = (struct mpv_pq_item *)(q->state + m->pq_offset); + + if (!s->filled) { + fillLimits(m, active, reporters, s, pq, q->buffer, q->length); + } + + assert(!q->report_current); + + if (q->cur == q->end) { + return 1; + } + + assert(q->cur + 1 < q->end); /* require at least two items */ + + assert(q_cur_type(q) == MQE_START); + assert(q_cur_loc(q) >= 0); + sp = q->items[q->cur].location; + q->cur++; + + if (q->items[q->cur - 1].location > end) { + /* this is as far as we go */ + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + return MO_ALIVE; + } + + while (q->cur < q->end) { + s64a ep = q->items[q->cur].location; + + ep = MIN(ep, end); + + assert(ep >= sp); + + assert(sp >= 0); /* mpv should be an outfix; outfixes are not lazy */ + + if (sp >= ep) { + goto scan_done; + } + + /* do main buffer region */ + assert((u64a)ep <= length); + char rv = mpvExec(m, active, reporters, s, pq, buffer, sp, ep, length, + offset, cb, context); + if (rv == MO_HALT_MATCHING) { + q->cur = q->end; + return 0; + } + + scan_done: + if (q->items[q->cur].location > end) { + /* this is as far as we go */ + q->cur--; + q->items[q->cur].type = MQE_START; + q->items[q->cur].location = end; + return MO_ALIVE; + } + + sp = ep; + + switch (q->items[q->cur].type) { + case MQE_TOP: + DEBUG_PRINTF("top %u %u\n", m->top_kilo_begin, m->top_kilo_end); + /* MQE_TOP initialise all counters to 0; activates all kilos */ + { + u64a *counters = get_counter_n(s, m, 0); + assert(counters[0] == MPV_DEAD_VALUE); + assert(!s->counter_adj); + for (u32 i = 0; i < m->counter_count; i++) { + counters[i] = 0; + } + mmbit_init_range(active, m->kilo_count, m->top_kilo_begin, + m->top_kilo_end); + fillLimits(m, active, reporters, s, pq, buffer, length); + } + break; + case MQE_START: + case MQE_END: + break; + default: + /* MQE_TOP_N --> switch on kilo puff N */ + assert(q->items[q->cur].type >= MQE_TOP_FIRST); + assert(q->items[q->cur].type < MQE_INVALID); + u32 i = q->items[q->cur].type - MQE_TOP_FIRST; + handleTopN(m, sp, active, reporters, s, pq, buffer, length, i); + break; + } + + q->cur++; + } + + char alive = 0; + assert(q->items[q->cur - 1].type == MQE_END); + if (q->items[q->cur - 1].location == (s64a)q->length) { + normalize_counters(s, m); + + const struct mpv_kilopuff *kp = (const struct mpv_kilopuff *)(m + 1); + for (u32 i = mmbit_iterate(active, m->kilo_count, MMB_INVALID); + i != MMB_INVALID; i = mmbit_iterate(active, m->kilo_count, i)) { + if (*get_counter_for_kilo(s, &kp[i]) >= kp[i].dead_point) { + mmbit_unset(active, m->kilo_count, i); + } else { + alive = 1; + } + } + } else { + alive + = mmbit_iterate(active, m->kilo_count, MMB_INVALID) != MMB_INVALID; + } + + DEBUG_PRINTF("finished %d\n", (int)alive); + return alive; +} + char nfaExecMpv_Q(const struct NFA *n, struct mq *q, s64a end) { - DEBUG_PRINTF("_Q %lld\n", end); + DEBUG_PRINTF("_Q %lld\n", end); return nfaExecMpv_Q_i(n, q, end); -} - +} + s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end) { - DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end); -#ifdef DEBUG - debugQueue(q); -#endif - + DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end); +#ifdef DEBUG + debugQueue(q); +#endif + assert(nfa->type == MPV_NFA); - assert(q && q->context && q->state); - assert(end >= 0); - assert(q->cur < q->end); - assert(q->end <= MAX_MQE_LEN); - assert(ISALIGNED_16(nfa) && ISALIGNED_16(getImplNfa(nfa))); - assert(end < q->items[q->end - 1].location - || q->items[q->end - 1].type == MQE_END); - - if (q->items[q->cur].location > end) { - return 1; - } - - char q_trimmed = 0; - - assert(end <= (s64a)q->length || !q->hlength); - /* due to reverse accel in block mode some queues may work on a truncated - * buffer */ - if (end > (s64a)q->length) { - end = q->length; - q_trimmed = 1; - } - - /* TODO: restore max offset stuff, if/when _interesting_ max offset stuff - * is filled in */ - + assert(q && q->context && q->state); + assert(end >= 0); + assert(q->cur < q->end); + assert(q->end <= MAX_MQE_LEN); + assert(ISALIGNED_16(nfa) && ISALIGNED_16(getImplNfa(nfa))); + assert(end < q->items[q->end - 1].location + || q->items[q->end - 1].type == MQE_END); + + if (q->items[q->cur].location > end) { + return 1; + } + + char q_trimmed = 0; + + assert(end <= (s64a)q->length || !q->hlength); + /* due to reverse accel in block mode some queues may work on a truncated + * buffer */ + if (end > (s64a)q->length) { + end = q->length; + q_trimmed = 1; + } + + /* TODO: restore max offset stuff, if/when _interesting_ max offset stuff + * is filled in */ + char rv = nfaExecMpv_Q_i(nfa, q, end); - - assert(!q->report_current); - DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed); - if (q_trimmed || !rv) { - return 0; - } else { - const struct mpv *m = getImplNfa(nfa); - u8 *reporters = (u8 *)q->state + m->reporter_offset; - - if (mmbit_any_precise(reporters, m->kilo_count)) { - DEBUG_PRINTF("next byte\n"); - return 1; /* need to match at next byte */ - } else { - s64a next_event = q->length; - s64a next_pq = q->length; - - if (q->cur < q->end) { - next_event = q->items[q->cur].location; - } - - struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state; - struct mpv_pq_item *pq - = (struct mpv_pq_item *)(q->state + m->pq_offset); - if (s->pq_size) { - next_pq = pq_top(pq)->trigger_loc; - } - - assert(next_event); - assert(next_pq); - - DEBUG_PRINTF("next pq %lld event %lld\n", next_pq, next_event); - return MIN(next_pq, next_event); - } - } -} + + assert(!q->report_current); + DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed); + if (q_trimmed || !rv) { + return 0; + } else { + const struct mpv *m = getImplNfa(nfa); + u8 *reporters = (u8 *)q->state + m->reporter_offset; + + if (mmbit_any_precise(reporters, m->kilo_count)) { + DEBUG_PRINTF("next byte\n"); + return 1; /* need to match at next byte */ + } else { + s64a next_event = q->length; + s64a next_pq = q->length; + + if (q->cur < q->end) { + next_event = q->items[q->cur].location; + } + + struct mpv_decomp_state *s = (struct mpv_decomp_state *)q->state; + struct mpv_pq_item *pq + = (struct mpv_pq_item *)(q->state + m->pq_offset); + if (s->pq_size) { + next_pq = pq_top(pq)->trigger_loc; + } + + assert(next_event); + assert(next_pq); + + DEBUG_PRINTF("next pq %lld event %lld\n", next_pq, next_event); + return MIN(next_pq, next_event); + } + } +} diff --git a/contrib/libs/hyperscan/src/nfa/mpv.h b/contrib/libs/hyperscan/src/nfa/mpv.h index 244dfe800d..3780728d7f 100644 --- a/contrib/libs/hyperscan/src/nfa/mpv.h +++ b/contrib/libs/hyperscan/src/nfa/mpv.h @@ -1,39 +1,39 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MPV_H -#define MPV_H - -#include "ue2common.h" - -struct mq; -struct NFA; - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MPV_H +#define MPV_H + +#include "ue2common.h" + +struct mq; +struct NFA; + char nfaExecMpv_Q(const struct NFA *n, struct mq *q, s64a end); char nfaExecMpv_reportCurrent(const struct NFA *n, struct mq *q); char nfaExecMpv_queueInitState(const struct NFA *n, struct mq *q); @@ -43,7 +43,7 @@ char nfaExecMpv_queueCompressState(const struct NFA *nfa, const struct mq *q, s64a loc); char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src, u64a offset, u8 key); - + #define nfaExecMpv_testEOD NFA_API_NO_IMPL #define nfaExecMpv_inAccept NFA_API_NO_IMPL #define nfaExecMpv_inAnyAccept NFA_API_NO_IMPL @@ -51,10 +51,10 @@ char nfaExecMpv_expandState(const struct NFA *nfa, void *dest, const void *src, #define nfaExecMpv_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */ #define nfaExecMpv_B_Reverse NFA_API_NO_IMPL #define nfaExecMpv_zombie_status NFA_API_ZOMBIE_NO_IMPL - -/** - * return 0 if the mpv dies, otherwise returns the location of the next possible - * match (given the currently known events). */ + +/** + * return 0 if the mpv dies, otherwise returns the location of the next possible + * match (given the currently known events). */ s64a nfaExecMpv_QueueExecRaw(const struct NFA *nfa, struct mq *q, s64a end); - -#endif + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/mpv_internal.h b/contrib/libs/hyperscan/src/nfa/mpv_internal.h index 527a691cf8..a52853dce2 100644 --- a/contrib/libs/hyperscan/src/nfa/mpv_internal.h +++ b/contrib/libs/hyperscan/src/nfa/mpv_internal.h @@ -1,45 +1,45 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MPV_INTERNAL_H -#define MPV_INTERNAL_H - -#include "ue2common.h" - -#define MPV_DOT 0 -#define MPV_VERM 1 -#define MPV_SHUFTI 2 -#define MPV_TRUFFLE 3 -#define MPV_NVERM 4 - -struct mpv_puffette { - u32 repeats; - char unbounded; + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MPV_INTERNAL_H +#define MPV_INTERNAL_H + +#include "ue2common.h" + +#define MPV_DOT 0 +#define MPV_VERM 1 +#define MPV_SHUFTI 2 +#define MPV_TRUFFLE 3 +#define MPV_NVERM 4 + +struct mpv_puffette { + u32 repeats; + char unbounded; /** * \brief Report is simple-exhaustible. @@ -49,149 +49,149 @@ struct mpv_puffette { */ char simple_exhaust; - ReportID report; -}; - -struct mpv_kilopuff { - u32 counter_offset; /**< offset (in full stream state) to the counter that - * this kilopuff refers to */ - u32 count; /**< number of real (non sentinel mpv puffettes) */ - u32 puffette_offset; /**< relative to base of mpv, points past the 1st - * sent */ - u64a dead_point; - u8 auto_restart; - u8 type; /* MPV_DOT, MPV_VERM, etc */ - union { - struct { - char c; - } verm; - struct { - m128 mask_lo; - m128 mask_hi; - } shuf; - struct { - m128 mask1; - m128 mask2; - } truffle; - } u; -}; - -struct mpv_counter_info { - u64a max_counter; /**< maximum value this counter needs to track */ - u32 counter_size; /**< number of bytes to represent the counter in stream - * state */ - u32 counter_offset; /**< offset that this counter is stored at in the - * full stream state */ - u32 kilo_begin; /**< first kilo to turn on when the counter is started */ - u32 kilo_end; /**< 1 + last kilo to turn on when the counter is started */ -}; - -struct ALIGN_AVX_DIRECTIVE mpv { - u32 kilo_count; /**< number of kilopuffs following */ - u32 counter_count; /**< number of counters managed by the mpv */ - u32 puffette_count; /**< total number of puffettes under all the kilos */ - u32 pq_offset; /**< offset to the priority queue in the decompressed - * state */ - u32 reporter_offset; /**< offset to the reporter mmbit in the decompressed - * state */ - u32 report_list_offset; /**< offset to the report list scratch space in the - * decompressed state */ - u32 active_offset; /**< offset to the active kp mmbit in the compressed - * state */ - u32 top_kilo_begin; /**< first kilo to switch on when top arrives */ - u32 top_kilo_end; /**< one past the last kilo to switch on when top - * arrives */ -}; - -struct mpv_decomp_kilo { - u64a limit; - const struct mpv_puffette *curr; -}; - -/* note: size varies on different platforms */ -struct mpv_decomp_state { - u32 pq_size; - char filled; - u64a counter_adj; /**< progress not yet written to the real counters */ - struct mpv_decomp_kilo active[]; -}; - -/* --- - * | | mpv - * --- - * | | - * | | kilo_count * mpv_kilopuffs - * | | - * ... - * | | - * --- - * | | - * | | counter_count * mpv_counter_infos - * | | - * ... - * | | - * --- - * | | sentinel mpv_puffette - * --- - * | | mpv_puffettes for 1st kilopuff - * | | (mpv_puffettes are ordered by minimum number of repeats) - * | | - * --- - * | | sentinel mpv_puffette - * --- - * | | mpv_puffettes for 2nd kilopuff - * ... - * | | - * --- - * | | sentinel mpv_puffette - * --- - */ - -/* - * Stream State - * [Compressed Counter 0] - * [Compressed Counter 1] - * ... - * [Compressed Counter N] - * [mmbit of active kilopuffs] - * - * Decompressed State - * [header (limit pq_size)] - * [ - * [kilo 1 current reports] - * ... - * [kilo N current reports] - * ] - * [ - * [Full Counter 0] - * [Full Counter 1] - * ... - * [Full Counter N] - * ] - * [pq of kilo changes] - * [scratch space for current report lists (total number of puffettes)] - * [mmbit of kilopuffs with active reports] - */ - -struct mpv_pq_item { - u64a trigger_loc; - u32 kilo; -}; - -/* returns pointer to first non sentinel mpv_puff */ -static really_inline -const struct mpv_puffette *get_puff_array(const struct mpv *m, - const struct mpv_kilopuff *kp) { - return (const struct mpv_puffette *)((const char *)m + kp->puffette_offset); -} - -static really_inline -const struct mpv_counter_info *get_counter_info(const struct mpv *m) { - return (const struct mpv_counter_info *)((const char *)(m + 1) - + m->kilo_count * sizeof(struct mpv_kilopuff)); -} - -#define MPV_DEAD_VALUE (~0ULL) -#define INVALID_REPORT (~0U) - -#endif + ReportID report; +}; + +struct mpv_kilopuff { + u32 counter_offset; /**< offset (in full stream state) to the counter that + * this kilopuff refers to */ + u32 count; /**< number of real (non sentinel mpv puffettes) */ + u32 puffette_offset; /**< relative to base of mpv, points past the 1st + * sent */ + u64a dead_point; + u8 auto_restart; + u8 type; /* MPV_DOT, MPV_VERM, etc */ + union { + struct { + char c; + } verm; + struct { + m128 mask_lo; + m128 mask_hi; + } shuf; + struct { + m128 mask1; + m128 mask2; + } truffle; + } u; +}; + +struct mpv_counter_info { + u64a max_counter; /**< maximum value this counter needs to track */ + u32 counter_size; /**< number of bytes to represent the counter in stream + * state */ + u32 counter_offset; /**< offset that this counter is stored at in the + * full stream state */ + u32 kilo_begin; /**< first kilo to turn on when the counter is started */ + u32 kilo_end; /**< 1 + last kilo to turn on when the counter is started */ +}; + +struct ALIGN_AVX_DIRECTIVE mpv { + u32 kilo_count; /**< number of kilopuffs following */ + u32 counter_count; /**< number of counters managed by the mpv */ + u32 puffette_count; /**< total number of puffettes under all the kilos */ + u32 pq_offset; /**< offset to the priority queue in the decompressed + * state */ + u32 reporter_offset; /**< offset to the reporter mmbit in the decompressed + * state */ + u32 report_list_offset; /**< offset to the report list scratch space in the + * decompressed state */ + u32 active_offset; /**< offset to the active kp mmbit in the compressed + * state */ + u32 top_kilo_begin; /**< first kilo to switch on when top arrives */ + u32 top_kilo_end; /**< one past the last kilo to switch on when top + * arrives */ +}; + +struct mpv_decomp_kilo { + u64a limit; + const struct mpv_puffette *curr; +}; + +/* note: size varies on different platforms */ +struct mpv_decomp_state { + u32 pq_size; + char filled; + u64a counter_adj; /**< progress not yet written to the real counters */ + struct mpv_decomp_kilo active[]; +}; + +/* --- + * | | mpv + * --- + * | | + * | | kilo_count * mpv_kilopuffs + * | | + * ... + * | | + * --- + * | | + * | | counter_count * mpv_counter_infos + * | | + * ... + * | | + * --- + * | | sentinel mpv_puffette + * --- + * | | mpv_puffettes for 1st kilopuff + * | | (mpv_puffettes are ordered by minimum number of repeats) + * | | + * --- + * | | sentinel mpv_puffette + * --- + * | | mpv_puffettes for 2nd kilopuff + * ... + * | | + * --- + * | | sentinel mpv_puffette + * --- + */ + +/* + * Stream State + * [Compressed Counter 0] + * [Compressed Counter 1] + * ... + * [Compressed Counter N] + * [mmbit of active kilopuffs] + * + * Decompressed State + * [header (limit pq_size)] + * [ + * [kilo 1 current reports] + * ... + * [kilo N current reports] + * ] + * [ + * [Full Counter 0] + * [Full Counter 1] + * ... + * [Full Counter N] + * ] + * [pq of kilo changes] + * [scratch space for current report lists (total number of puffettes)] + * [mmbit of kilopuffs with active reports] + */ + +struct mpv_pq_item { + u64a trigger_loc; + u32 kilo; +}; + +/* returns pointer to first non sentinel mpv_puff */ +static really_inline +const struct mpv_puffette *get_puff_array(const struct mpv *m, + const struct mpv_kilopuff *kp) { + return (const struct mpv_puffette *)((const char *)m + kp->puffette_offset); +} + +static really_inline +const struct mpv_counter_info *get_counter_info(const struct mpv *m) { + return (const struct mpv_counter_info *)((const char *)(m + 1) + + m->kilo_count * sizeof(struct mpv_kilopuff)); +} + +#define MPV_DEAD_VALUE (~0ULL) +#define INVALID_REPORT (~0U) + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/mpvcompile.cpp b/contrib/libs/hyperscan/src/nfa/mpvcompile.cpp index 653c7ac78a..8497c64870 100644 --- a/contrib/libs/hyperscan/src/nfa/mpvcompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/mpvcompile.cpp @@ -1,99 +1,99 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "mpvcompile.h" - -#include "mpv_internal.h" -#include "nfa_api_queue.h" -#include "nfa_internal.h" -#include "shufticompile.h" -#include "trufflecompile.h" -#include "util/alloc.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "mpvcompile.h" + +#include "mpv_internal.h" +#include "nfa_api_queue.h" +#include "nfa_internal.h" +#include "shufticompile.h" +#include "trufflecompile.h" +#include "util/alloc.h" #include "util/multibit_build.h" -#include "util/order_check.h" +#include "util/order_check.h" #include "util/report_manager.h" -#include "util/verify_types.h" - -#include <algorithm> -#include <iterator> -#include <map> - -#include <boost/range/adaptor/map.hpp> - -using namespace std; -using boost::adaptors::map_values; -using boost::adaptors::map_keys; - -namespace ue2 { - -namespace { -struct pcomp { - bool operator()(const raw_puff &a, const raw_puff &b) const { +#include "util/verify_types.h" + +#include <algorithm> +#include <iterator> +#include <map> + +#include <boost/range/adaptor/map.hpp> + +using namespace std; +using boost::adaptors::map_values; +using boost::adaptors::map_keys; + +namespace ue2 { + +namespace { +struct pcomp { + bool operator()(const raw_puff &a, const raw_puff &b) const { return tie(a.repeats, a.unbounded, a.simple_exhaust, a.report) < tie(b.repeats, b.unbounded, b.simple_exhaust, b.report); - } -}; - -struct ClusterKey { - explicit ClusterKey(const raw_puff &src) - : trigger_event(MQE_INVALID), reach(src.reach), - auto_restart(src.auto_restart) {} - ClusterKey(u32 event, const raw_puff &src) - : trigger_event(event), reach(src.reach), - auto_restart(src.auto_restart) {} - - u32 trigger_event; - CharReach reach; - bool auto_restart; - - bool operator<(const ClusterKey &b) const { - const ClusterKey &a = *this; - ORDER_CHECK(trigger_event); /* want triggered puffs first */ - ORDER_CHECK(auto_restart); - ORDER_CHECK(reach); - return false; - } -}; - -} // namespace - -static + } +}; + +struct ClusterKey { + explicit ClusterKey(const raw_puff &src) + : trigger_event(MQE_INVALID), reach(src.reach), + auto_restart(src.auto_restart) {} + ClusterKey(u32 event, const raw_puff &src) + : trigger_event(event), reach(src.reach), + auto_restart(src.auto_restart) {} + + u32 trigger_event; + CharReach reach; + bool auto_restart; + + bool operator<(const ClusterKey &b) const { + const ClusterKey &a = *this; + ORDER_CHECK(trigger_event); /* want triggered puffs first */ + ORDER_CHECK(auto_restart); + ORDER_CHECK(reach); + return false; + } +}; + +} // namespace + +static void writePuffette(mpv_puffette *out, const raw_puff &rp, const ReportManager &rm) { - DEBUG_PRINTF("outputting %u %d %u to %p\n", rp.repeats, (int)rp.unbounded, - rp.report, out); - out->repeats = rp.repeats; - out->unbounded = rp.unbounded; + DEBUG_PRINTF("outputting %u %d %u to %p\n", rp.repeats, (int)rp.unbounded, + rp.report, out); + out->repeats = rp.repeats; + out->unbounded = rp.unbounded; out->simple_exhaust = rp.simple_exhaust; out->report = rm.getProgramOffset(rp.report); -} - -static +} + +static void writeSentinel(mpv_puffette *out) { DEBUG_PRINTF("outputting sentinel to %p\n", out); memset(out, 0, sizeof(*out)); @@ -101,300 +101,300 @@ void writeSentinel(mpv_puffette *out) { } static -void writeDeadPoint(mpv_kilopuff *out, const vector<raw_puff> &puffs) { - for (const auto &puff : puffs) { - if (puff.unbounded) { /* mpv can never die */ - out->dead_point = MPV_DEAD_VALUE; - return; - } - } - - out->dead_point = puffs.back().repeats + 1; -} - -static -size_t calcSize(const map<ClusterKey, vector<raw_puff>> &raw, - const vector<mpv_counter_info> &counters) { - size_t len = sizeof(NFA) + sizeof(mpv); - - len += sizeof(mpv_kilopuff) * raw.size(); /* need a kilopuff for each - distinct reach */ - - len += sizeof(mpv_counter_info) * counters.size(); - - len += sizeof(mpv_puffette); /* initial sent */ - - for (const vector<raw_puff> &puffs : raw | map_values) { - len += sizeof(mpv_puffette) * puffs.size(); - len += sizeof(mpv_puffette); /* terminal sent */ - } - - return len; -} - -static -void populateClusters(const vector<raw_puff> &puffs_in, - const vector<raw_puff> &triggered_puffs, - map<ClusterKey, vector<raw_puff>> *raw) { - map<ClusterKey, vector<raw_puff>> &puff_clusters = *raw; - - u32 e = MQE_TOP_FIRST; - for (const auto &puff : triggered_puffs) { - puff_clusters[ClusterKey(e, puff)].push_back(puff); - e++; - } - - for (const auto &puff : puffs_in) { - puff_clusters[ClusterKey(puff)].push_back(puff); - } - - - for (vector<raw_puff> &puffs : puff_clusters | map_values) { - sort(puffs.begin(), puffs.end(), pcomp()); - } -} - -static -void writeKiloPuff(const map<ClusterKey, vector<raw_puff>>::const_iterator &it, +void writeDeadPoint(mpv_kilopuff *out, const vector<raw_puff> &puffs) { + for (const auto &puff : puffs) { + if (puff.unbounded) { /* mpv can never die */ + out->dead_point = MPV_DEAD_VALUE; + return; + } + } + + out->dead_point = puffs.back().repeats + 1; +} + +static +size_t calcSize(const map<ClusterKey, vector<raw_puff>> &raw, + const vector<mpv_counter_info> &counters) { + size_t len = sizeof(NFA) + sizeof(mpv); + + len += sizeof(mpv_kilopuff) * raw.size(); /* need a kilopuff for each + distinct reach */ + + len += sizeof(mpv_counter_info) * counters.size(); + + len += sizeof(mpv_puffette); /* initial sent */ + + for (const vector<raw_puff> &puffs : raw | map_values) { + len += sizeof(mpv_puffette) * puffs.size(); + len += sizeof(mpv_puffette); /* terminal sent */ + } + + return len; +} + +static +void populateClusters(const vector<raw_puff> &puffs_in, + const vector<raw_puff> &triggered_puffs, + map<ClusterKey, vector<raw_puff>> *raw) { + map<ClusterKey, vector<raw_puff>> &puff_clusters = *raw; + + u32 e = MQE_TOP_FIRST; + for (const auto &puff : triggered_puffs) { + puff_clusters[ClusterKey(e, puff)].push_back(puff); + e++; + } + + for (const auto &puff : puffs_in) { + puff_clusters[ClusterKey(puff)].push_back(puff); + } + + + for (vector<raw_puff> &puffs : puff_clusters | map_values) { + sort(puffs.begin(), puffs.end(), pcomp()); + } +} + +static +void writeKiloPuff(const map<ClusterKey, vector<raw_puff>>::const_iterator &it, const ReportManager &rm, u32 counter_offset, mpv *m, mpv_kilopuff *kp, mpv_puffette **pa) { - const CharReach &reach = it->first.reach; - const vector<raw_puff> &puffs = it->second; - - kp->auto_restart = it->first.auto_restart; - - if (reach.all()) { - kp->type = MPV_DOT; - } else if (reach.count() == 255) { - kp->type = MPV_VERM; - size_t unset = (~reach).find_first(); - assert(unset != CharReach::npos); - kp->u.verm.c = (char)unset; - } else if (reach.count() == 1) { - kp->type = MPV_NVERM; - size_t set = reach.find_first(); - assert(set != CharReach::npos); - kp->u.verm.c = (char)set; + const CharReach &reach = it->first.reach; + const vector<raw_puff> &puffs = it->second; + + kp->auto_restart = it->first.auto_restart; + + if (reach.all()) { + kp->type = MPV_DOT; + } else if (reach.count() == 255) { + kp->type = MPV_VERM; + size_t unset = (~reach).find_first(); + assert(unset != CharReach::npos); + kp->u.verm.c = (char)unset; + } else if (reach.count() == 1) { + kp->type = MPV_NVERM; + size_t set = reach.find_first(); + assert(set != CharReach::npos); + kp->u.verm.c = (char)set; } else if (shuftiBuildMasks(~reach, (u8 *)&kp->u.shuf.mask_lo, (u8 *)&kp->u.shuf.mask_hi) != -1) { - kp->type = MPV_SHUFTI; - } else { - kp->type = MPV_TRUFFLE; + kp->type = MPV_SHUFTI; + } else { + kp->type = MPV_TRUFFLE; truffleBuildMasks(~reach, (u8 *)&kp->u.truffle.mask1, (u8 *)&kp->u.truffle.mask2); - } - - kp->count = verify_u32(puffs.size()); - kp->counter_offset = counter_offset; - - /* start of real puffette array */ - kp->puffette_offset = verify_u32((char *)*pa - (char *)m); - for (size_t i = 0; i < puffs.size(); i++) { - assert(!it->first.auto_restart || puffs[i].unbounded); + } + + kp->count = verify_u32(puffs.size()); + kp->counter_offset = counter_offset; + + /* start of real puffette array */ + kp->puffette_offset = verify_u32((char *)*pa - (char *)m); + for (size_t i = 0; i < puffs.size(); i++) { + assert(!it->first.auto_restart || puffs[i].unbounded); writePuffette(*pa + i, puffs[i], rm); - } - - *pa += puffs.size(); + } + + *pa += puffs.size(); writeSentinel(*pa); - ++*pa; - - writeDeadPoint(kp, puffs); -} - -static -void writeCoreNfa(NFA *nfa, u32 len, u32 min_width, u32 max_counter, - u32 streamStateSize, u32 scratchStateSize) { - assert(nfa); - - nfa->length = len; - nfa->nPositions = max_counter - 1; + ++*pa; + + writeDeadPoint(kp, puffs); +} + +static +void writeCoreNfa(NFA *nfa, u32 len, u32 min_width, u32 max_counter, + u32 streamStateSize, u32 scratchStateSize) { + assert(nfa); + + nfa->length = len; + nfa->nPositions = max_counter - 1; nfa->type = MPV_NFA; - nfa->streamStateSize = streamStateSize; - assert(16 >= sizeof(mpv_decomp_kilo)); - nfa->scratchStateSize = scratchStateSize; - nfa->minWidth = min_width; -} - -static -void findCounterSize(map<ClusterKey, vector<raw_puff>>::const_iterator kp_it, - map<ClusterKey, vector<raw_puff>>::const_iterator kp_ite, - u64a *max_counter_out, u32 *counter_size) { - u32 max_counter = 0; /* max counter that we may need to know about is one - more than largest repeat */ - for (; kp_it != kp_ite; ++kp_it) { - max_counter = MAX(max_counter, kp_it->second.back().repeats + 1); - } - - if (max_counter < (1U << 8)) { - *counter_size = 1; - } else if (max_counter < (1U << 16)) { - *counter_size = 2; - } else if (max_counter < (1U << 24)) { - *counter_size = 3; - } else { - *counter_size = 4; - } - - *max_counter_out = max_counter; -} - -static -void fillCounterInfo(mpv_counter_info *out, u32 *curr_decomp_offset, - u32 *curr_comp_offset, - const map<ClusterKey, vector<raw_puff>> &kilopuffs, - map<ClusterKey, vector<raw_puff>>::const_iterator kp_it, - map<ClusterKey, vector<raw_puff>>::const_iterator kp_ite) { - - out->kilo_begin = distance(kilopuffs.begin(), kp_it); - out->kilo_end = distance(kilopuffs.begin(), kp_ite); - findCounterSize(kp_it, kp_ite, &out->max_counter, &out->counter_size); - out->counter_offset = *curr_decomp_offset; - *curr_decomp_offset += sizeof(u64a); - *curr_comp_offset += out->counter_size; -} - -static -void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset, - u32 *curr_comp_offset, - const map<ClusterKey, vector<raw_puff>> &kilopuffs) { - /* first the triggered puffs */ - map<ClusterKey, vector<raw_puff>>::const_iterator it = kilopuffs.begin(); - while (it != kilopuffs.end() && it->first.trigger_event != MQE_INVALID) { - assert(!it->first.auto_restart); - assert(it->first.trigger_event - == MQE_TOP_FIRST + distance(kilopuffs.begin(), it)); - - out->push_back(mpv_counter_info()); - map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it; - ++it; - fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset, - kilopuffs, it_o, it); - } - - /* we may have 2 sets of non triggered puffs: - * 1) always started with no auto_restart - * 2) always started with auto_restart - */ - map<ClusterKey, vector<raw_puff>>::const_iterator trig_ite = it; - while (it != kilopuffs.end() && !it->first.auto_restart) { - assert(it->first.trigger_event == MQE_INVALID); - - ++it; - } - if (it != trig_ite) { - out->push_back(mpv_counter_info()); - fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset, - kilopuffs, kilopuffs.begin(), it); - } - while (it != kilopuffs.end() && it->first.auto_restart) { - assert(it->first.trigger_event == MQE_INVALID); - - out->push_back(mpv_counter_info()); - map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it; - ++it; - fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset, - kilopuffs, it_o, it); - } -} - -static -const mpv_counter_info &findCounter(const vector<mpv_counter_info> &counters, - u32 i) { - for (const auto &counter : counters) { - if (i >= counter.kilo_begin && i < counter.kilo_end) { - return counter; - } - } - assert(0); - return counters.front(); -} - + nfa->streamStateSize = streamStateSize; + assert(16 >= sizeof(mpv_decomp_kilo)); + nfa->scratchStateSize = scratchStateSize; + nfa->minWidth = min_width; +} + +static +void findCounterSize(map<ClusterKey, vector<raw_puff>>::const_iterator kp_it, + map<ClusterKey, vector<raw_puff>>::const_iterator kp_ite, + u64a *max_counter_out, u32 *counter_size) { + u32 max_counter = 0; /* max counter that we may need to know about is one + more than largest repeat */ + for (; kp_it != kp_ite; ++kp_it) { + max_counter = MAX(max_counter, kp_it->second.back().repeats + 1); + } + + if (max_counter < (1U << 8)) { + *counter_size = 1; + } else if (max_counter < (1U << 16)) { + *counter_size = 2; + } else if (max_counter < (1U << 24)) { + *counter_size = 3; + } else { + *counter_size = 4; + } + + *max_counter_out = max_counter; +} + +static +void fillCounterInfo(mpv_counter_info *out, u32 *curr_decomp_offset, + u32 *curr_comp_offset, + const map<ClusterKey, vector<raw_puff>> &kilopuffs, + map<ClusterKey, vector<raw_puff>>::const_iterator kp_it, + map<ClusterKey, vector<raw_puff>>::const_iterator kp_ite) { + + out->kilo_begin = distance(kilopuffs.begin(), kp_it); + out->kilo_end = distance(kilopuffs.begin(), kp_ite); + findCounterSize(kp_it, kp_ite, &out->max_counter, &out->counter_size); + out->counter_offset = *curr_decomp_offset; + *curr_decomp_offset += sizeof(u64a); + *curr_comp_offset += out->counter_size; +} + +static +void fillCounterInfos(vector<mpv_counter_info> *out, u32 *curr_decomp_offset, + u32 *curr_comp_offset, + const map<ClusterKey, vector<raw_puff>> &kilopuffs) { + /* first the triggered puffs */ + map<ClusterKey, vector<raw_puff>>::const_iterator it = kilopuffs.begin(); + while (it != kilopuffs.end() && it->first.trigger_event != MQE_INVALID) { + assert(!it->first.auto_restart); + assert(it->first.trigger_event + == MQE_TOP_FIRST + distance(kilopuffs.begin(), it)); + + out->push_back(mpv_counter_info()); + map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it; + ++it; + fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset, + kilopuffs, it_o, it); + } + + /* we may have 2 sets of non triggered puffs: + * 1) always started with no auto_restart + * 2) always started with auto_restart + */ + map<ClusterKey, vector<raw_puff>>::const_iterator trig_ite = it; + while (it != kilopuffs.end() && !it->first.auto_restart) { + assert(it->first.trigger_event == MQE_INVALID); + + ++it; + } + if (it != trig_ite) { + out->push_back(mpv_counter_info()); + fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset, + kilopuffs, kilopuffs.begin(), it); + } + while (it != kilopuffs.end() && it->first.auto_restart) { + assert(it->first.trigger_event == MQE_INVALID); + + out->push_back(mpv_counter_info()); + map<ClusterKey, vector<raw_puff>>::const_iterator it_o = it; + ++it; + fillCounterInfo(&out->back(), curr_decomp_offset, curr_comp_offset, + kilopuffs, it_o, it); + } +} + +static +const mpv_counter_info &findCounter(const vector<mpv_counter_info> &counters, + u32 i) { + for (const auto &counter : counters) { + if (i >= counter.kilo_begin && i < counter.kilo_end) { + return counter; + } + } + assert(0); + return counters.front(); +} + bytecode_ptr<NFA> mpvCompile(const vector<raw_puff> &puffs_in, const vector<raw_puff> &triggered_puffs, const ReportManager &rm) { - assert(!puffs_in.empty() || !triggered_puffs.empty()); - u32 puffette_count = puffs_in.size() + triggered_puffs.size(); - - map<ClusterKey, vector<raw_puff>> puff_clusters; - populateClusters(puffs_in, triggered_puffs, &puff_clusters); - - u32 curr_comp_offset = 0; - - u32 curr_decomp_offset = sizeof(mpv_decomp_state); - curr_decomp_offset += 16 * puff_clusters.size(); - - vector<mpv_counter_info> counters; - fillCounterInfos(&counters, &curr_decomp_offset, &curr_comp_offset, - puff_clusters); - - u32 pq_offset = curr_decomp_offset; - curr_decomp_offset += sizeof(mpv_pq_item) * puff_clusters.size(); - - u32 rl_offset = curr_decomp_offset; - curr_decomp_offset += sizeof(ReportID) * puffette_count; - - u32 reporter_offset = curr_decomp_offset; - curr_decomp_offset += mmbit_size(puff_clusters.size()); - - u32 active_offset = curr_comp_offset; - curr_comp_offset += mmbit_size(puff_clusters.size()); - - u32 len = calcSize(puff_clusters, counters); - - DEBUG_PRINTF("%u puffs, len = %u\n", puffette_count, len); - + assert(!puffs_in.empty() || !triggered_puffs.empty()); + u32 puffette_count = puffs_in.size() + triggered_puffs.size(); + + map<ClusterKey, vector<raw_puff>> puff_clusters; + populateClusters(puffs_in, triggered_puffs, &puff_clusters); + + u32 curr_comp_offset = 0; + + u32 curr_decomp_offset = sizeof(mpv_decomp_state); + curr_decomp_offset += 16 * puff_clusters.size(); + + vector<mpv_counter_info> counters; + fillCounterInfos(&counters, &curr_decomp_offset, &curr_comp_offset, + puff_clusters); + + u32 pq_offset = curr_decomp_offset; + curr_decomp_offset += sizeof(mpv_pq_item) * puff_clusters.size(); + + u32 rl_offset = curr_decomp_offset; + curr_decomp_offset += sizeof(ReportID) * puffette_count; + + u32 reporter_offset = curr_decomp_offset; + curr_decomp_offset += mmbit_size(puff_clusters.size()); + + u32 active_offset = curr_comp_offset; + curr_comp_offset += mmbit_size(puff_clusters.size()); + + u32 len = calcSize(puff_clusters, counters); + + DEBUG_PRINTF("%u puffs, len = %u\n", puffette_count, len); + auto nfa = make_zeroed_bytecode_ptr<NFA>(len); - - mpv_puffette *pa_base = (mpv_puffette *) - ((char *)nfa.get() + sizeof(NFA) + sizeof(mpv) - + sizeof(mpv_kilopuff) * puff_clusters.size() - + sizeof(mpv_counter_info) * counters.size()); - mpv_puffette *pa = pa_base; - + + mpv_puffette *pa_base = (mpv_puffette *) + ((char *)nfa.get() + sizeof(NFA) + sizeof(mpv) + + sizeof(mpv_kilopuff) * puff_clusters.size() + + sizeof(mpv_counter_info) * counters.size()); + mpv_puffette *pa = pa_base; + writeSentinel(pa); - - ++pa; /* skip init sentinel */ - - u32 min_repeat = ~0U; - u32 max_counter = 0; /* max counter that we may need to know about is one - more than largest repeat */ - for (const vector<raw_puff> &puffs : puff_clusters | map_values) { - max_counter = max(max_counter, puffs.back().repeats + 1); - min_repeat = min(min_repeat, puffs.front().repeats); - } - - mpv *m = (mpv *)getMutableImplNfa(nfa.get()); - m->kilo_count = verify_u32(puff_clusters.size()); - m->counter_count = verify_u32(counters.size()); - m->puffette_count = puffette_count; - m->pq_offset = pq_offset; - m->reporter_offset = reporter_offset; - m->report_list_offset = rl_offset; - m->active_offset = active_offset; - m->top_kilo_begin = verify_u32(triggered_puffs.size()); - m->top_kilo_end = verify_u32(puff_clusters.size()); - - mpv_kilopuff *kp_begin = (mpv_kilopuff *)(m + 1); - mpv_kilopuff *kp = kp_begin; - for (auto it = puff_clusters.begin(); it != puff_clusters.end(); ++it) { + + ++pa; /* skip init sentinel */ + + u32 min_repeat = ~0U; + u32 max_counter = 0; /* max counter that we may need to know about is one + more than largest repeat */ + for (const vector<raw_puff> &puffs : puff_clusters | map_values) { + max_counter = max(max_counter, puffs.back().repeats + 1); + min_repeat = min(min_repeat, puffs.front().repeats); + } + + mpv *m = (mpv *)getMutableImplNfa(nfa.get()); + m->kilo_count = verify_u32(puff_clusters.size()); + m->counter_count = verify_u32(counters.size()); + m->puffette_count = puffette_count; + m->pq_offset = pq_offset; + m->reporter_offset = reporter_offset; + m->report_list_offset = rl_offset; + m->active_offset = active_offset; + m->top_kilo_begin = verify_u32(triggered_puffs.size()); + m->top_kilo_end = verify_u32(puff_clusters.size()); + + mpv_kilopuff *kp_begin = (mpv_kilopuff *)(m + 1); + mpv_kilopuff *kp = kp_begin; + for (auto it = puff_clusters.begin(); it != puff_clusters.end(); ++it) { writeKiloPuff(it, rm, findCounter(counters, kp - kp_begin).counter_offset, m, kp, &pa); - ++kp; - } - assert((char *)pa == (char *)nfa.get() + len); - - mpv_counter_info *out_ci = (mpv_counter_info *)kp; - for (const auto &counter : counters) { - *out_ci = counter; - ++out_ci; - } - assert((char *)out_ci == (char *)pa_base); - - writeCoreNfa(nfa.get(), len, min_repeat, max_counter, curr_comp_offset, - curr_decomp_offset); - - return nfa; -} - -} // namespace ue2 + ++kp; + } + assert((char *)pa == (char *)nfa.get() + len); + + mpv_counter_info *out_ci = (mpv_counter_info *)kp; + for (const auto &counter : counters) { + *out_ci = counter; + ++out_ci; + } + assert((char *)out_ci == (char *)pa_base); + + writeCoreNfa(nfa.get(), len, min_repeat, max_counter, curr_comp_offset, + curr_decomp_offset); + + return nfa; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/mpvcompile.h b/contrib/libs/hyperscan/src/nfa/mpvcompile.h index 497b52358e..4f820e4365 100644 --- a/contrib/libs/hyperscan/src/nfa/mpvcompile.h +++ b/contrib/libs/hyperscan/src/nfa/mpvcompile.h @@ -1,70 +1,70 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef MPV_COMPILE_H -#define MPV_COMPILE_H - -#include "ue2common.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MPV_COMPILE_H +#define MPV_COMPILE_H + +#include "ue2common.h" #include "util/bytecode_ptr.h" -#include "util/charreach.h" - -#include <memory> -#include <vector> - -struct NFA; - -namespace ue2 { - +#include "util/charreach.h" + +#include <memory> +#include <vector> + +struct NFA; + +namespace ue2 { + class ReportManager; -struct raw_puff { - raw_puff(u32 repeats_in, bool unbounded_in, ReportID report_in, +struct raw_puff { + raw_puff(u32 repeats_in, bool unbounded_in, ReportID report_in, const CharReach &reach_in, bool auto_restart_in = false, bool simple_exhaust_in = false) - : repeats(repeats_in), unbounded(unbounded_in), + : repeats(repeats_in), unbounded(unbounded_in), auto_restart(auto_restart_in), simple_exhaust(simple_exhaust_in), report(report_in), reach(reach_in) {} - u32 repeats; /**< report match after this many matching bytes */ - bool unbounded; /**< keep producing matches after repeats are reached */ - bool auto_restart; /**< for /[^X]{n}/ type patterns */ + u32 repeats; /**< report match after this many matching bytes */ + bool unbounded; /**< keep producing matches after repeats are reached */ + bool auto_restart; /**< for /[^X]{n}/ type patterns */ bool simple_exhaust; /* first report will exhaust us */ - ReportID report; - CharReach reach; /**< = ~escapes */ -}; - -/* - * puffs in the triggered_puffs vector are enabled when an TOP_N event is - * delivered corresponding to their index in the vector - */ + ReportID report; + CharReach reach; /**< = ~escapes */ +}; + +/* + * puffs in the triggered_puffs vector are enabled when an TOP_N event is + * delivered corresponding to their index in the vector + */ bytecode_ptr<NFA> mpvCompile(const std::vector<raw_puff> &puffs, const std::vector<raw_puff> &triggered_puffs, const ReportManager &rm); - -} // namespace ue2 - -#endif + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/nfa_api.h b/contrib/libs/hyperscan/src/nfa/nfa_api.h index 020f44682a..e3f7f74311 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_api.h +++ b/contrib/libs/hyperscan/src/nfa/nfa_api.h @@ -1,125 +1,125 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Declarations for the main NFA Engine API. - * - * This file provides the internal API for all runtime engines ("NFAs", even if - * they're not strictly NFA implementations). - */ - -#ifndef NFA_API_H -#define NFA_API_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -#include "callback.h" -#include "ue2common.h" - -struct mq; -struct NFA; - -/** - * Indicates if an nfa is a zombie. Note: that there were plans for a more - * nuanced view of zombiehood but this never eventuated. - */ -enum nfa_zombie_status { - NFA_ZOMBIE_NO, /**< nfa is not a zombie and will respond to top events */ - NFA_ZOMBIE_ALWAYS_YES /**< nfa is a zombie and will always be a zombie */ -}; - -/** - * Compresses an engine's state. - * The expanded state (@ref mq::state, @ref mq::streamState) is reduced purely - * to a corresponding compressed stream state (@ref mq::streamState). - * - * @param nfa engine the state belongs to - * @param q queue for the engine. The final compressed stream stream is placed - * in the location indicated by @ref mq::streamState - * @param loc the location corresponding to the engine's current state - */ -char nfaQueueCompressState(const struct NFA *nfa, const struct mq *q, s64a loc); - -/** - * Expands an engine's compressed stream state, into its scratch space - * representation. This is required before an engine starts operating over its - * queue. - * - * @param nfa engine the state belongs to - * @param dest location in scratch for decompressed state - * @param src compressed stream state - * @param offset the current stream offset. - * @param key byte corresponding to the location where the compressed state was - * created. - */ -char nfaExpandState(const struct NFA *nfa, void *dest, const void *src, - u64a offset, u8 key); - -/** - * Gives us a properly initialised dead state suitable for later @ref - * nfaQueueExec calls. - */ -char nfaQueueInitState(const struct NFA *nfa, struct mq *q); - -/** - * Initialise the state, applying a TOP appropriate for the offset. If the - * NFA becomes inactive, return zero. Otherwise, write out its compressed - * representation to `state' and return non-zero. - * - * @param nfa engine the state belongs to - * @param offset offset in the stream (relative to start of stream) - * @param state pointer indicating where the state is to be written - * @param key byte corresponding to the location where the compressed state is - * to be created. - */ -char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state, - u8 key); - -/** - * Process the queued commands on the given NFA. - * - * @param nfa the NFA to execute - * @param q the queued commands. It must start with some variant of start and - * end with some variant of end. The location field of the events must - * be monotonically increasing. - * @param end stop processing command queue when we reach this point - * - * @return non-zero if the nfa is still active, if the nfa is not active the - * state data is undefined - * - * Note: this function can not process events from the past: the location field - * of each event must be >= current offset. - */ -char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end); - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Declarations for the main NFA Engine API. + * + * This file provides the internal API for all runtime engines ("NFAs", even if + * they're not strictly NFA implementations). + */ + +#ifndef NFA_API_H +#define NFA_API_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include "callback.h" +#include "ue2common.h" + +struct mq; +struct NFA; + +/** + * Indicates if an nfa is a zombie. Note: that there were plans for a more + * nuanced view of zombiehood but this never eventuated. + */ +enum nfa_zombie_status { + NFA_ZOMBIE_NO, /**< nfa is not a zombie and will respond to top events */ + NFA_ZOMBIE_ALWAYS_YES /**< nfa is a zombie and will always be a zombie */ +}; + +/** + * Compresses an engine's state. + * The expanded state (@ref mq::state, @ref mq::streamState) is reduced purely + * to a corresponding compressed stream state (@ref mq::streamState). + * + * @param nfa engine the state belongs to + * @param q queue for the engine. The final compressed stream stream is placed + * in the location indicated by @ref mq::streamState + * @param loc the location corresponding to the engine's current state + */ +char nfaQueueCompressState(const struct NFA *nfa, const struct mq *q, s64a loc); + +/** + * Expands an engine's compressed stream state, into its scratch space + * representation. This is required before an engine starts operating over its + * queue. + * + * @param nfa engine the state belongs to + * @param dest location in scratch for decompressed state + * @param src compressed stream state + * @param offset the current stream offset. + * @param key byte corresponding to the location where the compressed state was + * created. + */ +char nfaExpandState(const struct NFA *nfa, void *dest, const void *src, + u64a offset, u8 key); + +/** + * Gives us a properly initialised dead state suitable for later @ref + * nfaQueueExec calls. + */ +char nfaQueueInitState(const struct NFA *nfa, struct mq *q); + +/** + * Initialise the state, applying a TOP appropriate for the offset. If the + * NFA becomes inactive, return zero. Otherwise, write out its compressed + * representation to `state' and return non-zero. + * + * @param nfa engine the state belongs to + * @param offset offset in the stream (relative to start of stream) + * @param state pointer indicating where the state is to be written + * @param key byte corresponding to the location where the compressed state is + * to be created. + */ +char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state, + u8 key); + +/** + * Process the queued commands on the given NFA. + * + * @param nfa the NFA to execute + * @param q the queued commands. It must start with some variant of start and + * end with some variant of end. The location field of the events must + * be monotonically increasing. + * @param end stop processing command queue when we reach this point + * + * @return non-zero if the nfa is still active, if the nfa is not active the + * state data is undefined + * + * Note: this function can not process events from the past: the location field + * of each event must be >= current offset. + */ +char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end); + /** * Main execution function that doesn't perform the checks and optimisations of * nfaQueueExec() and just dispatches directly to the nfa implementations. It is @@ -130,42 +130,42 @@ char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end); /** Return value indicating that the engine is dead. */ #define MO_DEAD 0 -/** Return value indicating that the engine is alive. */ -#define MO_ALIVE 1 - -/** Return value from @ref nfaQueueExecToMatch indicating that engine progress - * stopped as a match state was reached. */ -#define MO_MATCHES_PENDING 2 - -/** - * Process the queued commands on the given nfa up to end or the first match. - * This function will only fire the callback in response to an report_current - * being set and accepts at the starting offset, in all other situations accepts - * will result in the queue pausing with a return value of - * @ref MO_MATCHES_PENDING. - * - * @param nfa the NFA to execute - * @param q the queued commands. It must start with some variant of start and - * end with some variant of end. The location field of the events must - * be monotonically increasing. If not all the data was processed during - * the call, the queue is updated to reflect the remaining work. - * @param end stop processing command queue when we reach this point - * - * @return @ref MO_ALIVE if the nfa is still active with no matches pending, - * and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not - * alive - * - * Note: if it can be determined that the stream can never match, the nfa - * may be reported as dead even if not all the data was scanned - * - * Note: if the nfa is not alive the state data is undefined - * - * Note: this function can not process events from the past: the location field - * of each event must be >= current offset. - */ -char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end); - -/** +/** Return value indicating that the engine is alive. */ +#define MO_ALIVE 1 + +/** Return value from @ref nfaQueueExecToMatch indicating that engine progress + * stopped as a match state was reached. */ +#define MO_MATCHES_PENDING 2 + +/** + * Process the queued commands on the given nfa up to end or the first match. + * This function will only fire the callback in response to an report_current + * being set and accepts at the starting offset, in all other situations accepts + * will result in the queue pausing with a return value of + * @ref MO_MATCHES_PENDING. + * + * @param nfa the NFA to execute + * @param q the queued commands. It must start with some variant of start and + * end with some variant of end. The location field of the events must + * be monotonically increasing. If not all the data was processed during + * the call, the queue is updated to reflect the remaining work. + * @param end stop processing command queue when we reach this point + * + * @return @ref MO_ALIVE if the nfa is still active with no matches pending, + * and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not + * alive + * + * Note: if it can be determined that the stream can never match, the nfa + * may be reported as dead even if not all the data was scanned + * + * Note: if the nfa is not alive the state data is undefined + * + * Note: this function can not process events from the past: the location field + * of each event must be >= current offset. + */ +char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end); + +/** * Main execution function that doesn't perform the checks and optimisations of * nfaQueueExecToMatch() and just dispatches directly to the nfa * implementations. It is intended to be used by the Tamarama engine. @@ -173,108 +173,108 @@ char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end); char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end); /** - * Report matches at the current queue location. - * - * @param nfa the NFA to execute - * @param q the queued commands. It must start with some variant of start and - * end with some variant of end. The location field of the events must - * be monotonically increasing. - * - * Note: the queue MUST be located at position where @ref nfaQueueExecToMatch - * returned @ref MO_MATCHES_PENDING. - * - * Note: the return value of this call is undefined, and should be ignored. - */ -char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q); - -/** - * Returns non-zero if the NFA is in an accept state with the given report ID. - */ -char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q); - -/** + * Report matches at the current queue location. + * + * @param nfa the NFA to execute + * @param q the queued commands. It must start with some variant of start and + * end with some variant of end. The location field of the events must + * be monotonically increasing. + * + * Note: the queue MUST be located at position where @ref nfaQueueExecToMatch + * returned @ref MO_MATCHES_PENDING. + * + * Note: the return value of this call is undefined, and should be ignored. + */ +char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q); + +/** + * Returns non-zero if the NFA is in an accept state with the given report ID. + */ +char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q); + +/** * Returns non-zero if the NFA is in any accept state regardless of report * ID. */ char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q); /** - * Process the queued commands on the given NFA up to end or the first match. - * + * Process the queued commands on the given NFA up to end or the first match. + * * Note: This version is meant for rose prefix/infix NFAs: - * - never uses a callback - * - loading of state at a point in history is not special cased - * - * @param nfa the NFA to execute - * @param q the queued commands. It must start with some variant of start and - * end with some variant of end. The location field of the events must - * be monotonically increasing. If not all the data was processed during - * the call, the queue is updated to reflect the remaining work. + * - never uses a callback + * - loading of state at a point in history is not special cased + * + * @param nfa the NFA to execute + * @param q the queued commands. It must start with some variant of start and + * end with some variant of end. The location field of the events must + * be monotonically increasing. If not all the data was processed during + * the call, the queue is updated to reflect the remaining work. * @param report we are interested in. If the given report will be raised at * the end location, the function returns @ref MO_MATCHES_PENDING. If no * match information is desired, MO_INVALID_IDX should be passed in. - * @return @ref MO_ALIVE if the nfa is still active with no matches pending, - * and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not - * alive - * - * Note: if it can be determined that the stream can never match, the nfa - * may be reported as dead even if not all the data was scanned - * - * Note: if the NFA is not active the state data is undefined. - */ -char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID report); - -/** - * Runs an NFA in reverse from (buf + buflen) to buf and then from (hbuf + hlen) - * to hbuf (main buffer and history buffer). - * + * @return @ref MO_ALIVE if the nfa is still active with no matches pending, + * and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not + * alive + * + * Note: if it can be determined that the stream can never match, the nfa + * may be reported as dead even if not all the data was scanned + * + * Note: if the NFA is not active the state data is undefined. + */ +char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID report); + +/** + * Runs an NFA in reverse from (buf + buflen) to buf and then from (hbuf + hlen) + * to hbuf (main buffer and history buffer). + * * Note: provides the match location as the "end" offset when the callback is * called. * - * @param nfa engine to run - * @param offset base offset of buf - * @param buf main buffer - * @param buflen length of buf - * @param hbuf history buf - * @param hlen length of hbuf - * @param callback the callback to call for each match raised - * @param context context pointer passed to each callback - */ -char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf, - size_t buflen, const u8 *hbuf, size_t hlen, + * @param nfa engine to run + * @param offset base offset of buf + * @param buf main buffer + * @param buflen length of buf + * @param hbuf history buf + * @param hlen length of hbuf + * @param callback the callback to call for each match raised + * @param context context pointer passed to each callback + */ +char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf, + size_t buflen, const u8 *hbuf, size_t hlen, NfaCallback callback, void *context); - -/** - * Check whether the given NFA's state indicates that it is in one or more - * final (accept at end of data) state. If so, call the callback for each - * match. - * - * @param nfa the NFA to execute - * @param state current state associated with this NFA - * @param streamState stream version of the state associated with this NFA - * (including br region) - * @param offset the offset to return (via the callback) with each match - * @param callback the callback to call for each match raised - * @param context context pointer passed to each callback + +/** + * Check whether the given NFA's state indicates that it is in one or more + * final (accept at end of data) state. If so, call the callback for each + * match. + * + * @param nfa the NFA to execute + * @param state current state associated with this NFA + * @param streamState stream version of the state associated with this NFA + * (including br region) + * @param offset the offset to return (via the callback) with each match + * @param callback the callback to call for each match raised + * @param context context pointer passed to each callback * * @return @ref MO_HALT_MATCHING if the user instructed us to halt, otherwise * @ref MO_CONTINUE_MATCHING. - */ -char nfaCheckFinalState(const struct NFA *nfa, const char *state, - const char *streamState, u64a offset, + */ +char nfaCheckFinalState(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, NfaCallback callback, void *context); - -/** - * Indicates if an engine is a zombie. - * - * @param nfa engine to consider - * @param q queue corresponding to the engine - * @param loc current location in the buffer for an engine - */ -enum nfa_zombie_status nfaGetZombieStatus(const struct NFA *nfa, struct mq *q, - s64a loc); -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif + +/** + * Indicates if an engine is a zombie. + * + * @param nfa engine to consider + * @param q queue corresponding to the engine + * @param loc current location in the buffer for an engine + */ +enum nfa_zombie_status nfaGetZombieStatus(const struct NFA *nfa, struct mq *q, + s64a loc); +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c b/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c index 04b9f7144d..75cac4b481 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c +++ b/contrib/libs/hyperscan/src/nfa/nfa_api_dispatch.c @@ -1,58 +1,58 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - \brief Dispatches NFA engine API calls to the appropriate engines -*/ -#include "nfa_api.h" - -#include "nfa_api_queue.h" -#include "nfa_internal.h" -#include "ue2common.h" - -// Engine implementations. -#include "castle.h" -#include "gough.h" -#include "lbr.h" -#include "limex.h" -#include "mcclellan.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + \brief Dispatches NFA engine API calls to the appropriate engines +*/ +#include "nfa_api.h" + +#include "nfa_api_queue.h" +#include "nfa_internal.h" +#include "ue2common.h" + +// Engine implementations. +#include "castle.h" +#include "gough.h" +#include "lbr.h" +#include "limex.h" +#include "mcclellan.h" #include "mcsheng.h" -#include "mpv.h" +#include "mpv.h" #include "sheng.h" #include "tamarama.h" - + #define DISPATCH_CASE(dc_ltype, dc_ftype, dc_func_call) \ case dc_ltype: \ return nfaExec##dc_ftype##dc_func_call; \ - break - -// general framework calls - + break + +// general framework calls + #define DISPATCH_BY_NFA_TYPE(dbnt_func) \ switch (nfa->type) { \ DISPATCH_CASE(LIMEX_NFA_32, LimEx32, dbnt_func); \ @@ -82,40 +82,40 @@ DISPATCH_CASE(MCSHENG_64_NFA_16, McSheng64_16, dbnt_func); \ default: \ assert(0); \ - } - -char nfaCheckFinalState(const struct NFA *nfa, const char *state, - const char *streamState, u64a offset, + } + +char nfaCheckFinalState(const struct NFA *nfa, const char *state, + const char *streamState, u64a offset, NfaCallback callback, void *context) { - assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); - - // Caller should avoid calling us if we can never produce matches. - assert(nfaAcceptsEod(nfa)); - - DISPATCH_BY_NFA_TYPE(_testEOD(nfa, state, streamState, offset, callback, + assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); + + // Caller should avoid calling us if we can never produce matches. + assert(nfaAcceptsEod(nfa)); + + DISPATCH_BY_NFA_TYPE(_testEOD(nfa, state, streamState, offset, callback, context)); - return 0; -} - -char nfaQueueInitState(const struct NFA *nfa, struct mq *q) { - assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); - - DISPATCH_BY_NFA_TYPE(_queueInitState(nfa, q)); - return 0; -} - -static really_inline -char nfaQueueExec_i(const struct NFA *nfa, struct mq *q, s64a end) { - DISPATCH_BY_NFA_TYPE(_Q(nfa, q, end)); - return 0; -} - -static really_inline -char nfaQueueExec2_i(const struct NFA *nfa, struct mq *q, s64a end) { - DISPATCH_BY_NFA_TYPE(_Q2(nfa, q, end)); - return 0; -} - + return 0; +} + +char nfaQueueInitState(const struct NFA *nfa, struct mq *q) { + assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); + + DISPATCH_BY_NFA_TYPE(_queueInitState(nfa, q)); + return 0; +} + +static really_inline +char nfaQueueExec_i(const struct NFA *nfa, struct mq *q, s64a end) { + DISPATCH_BY_NFA_TYPE(_Q(nfa, q, end)); + return 0; +} + +static really_inline +char nfaQueueExec2_i(const struct NFA *nfa, struct mq *q, s64a end) { + DISPATCH_BY_NFA_TYPE(_Q2(nfa, q, end)); + return 0; +} + char nfaQueueExec_raw(const struct NFA *nfa, struct mq *q, s64a end) { return nfaQueueExec_i(nfa, q, end); } @@ -124,245 +124,245 @@ char nfaQueueExec2_raw(const struct NFA *nfa, struct mq *q, s64a end) { return nfaQueueExec2_i(nfa, q, end); } -static really_inline -char nfaQueueExecRose_i(const struct NFA *nfa, struct mq *q, ReportID report) { - DISPATCH_BY_NFA_TYPE(_QR(nfa, q, report)); - return 0; -} - -/** Returns 0 if this NFA cannot possibly match (due to width constraints etc) - * and the caller should return 0. May also edit the queue. */ -static really_inline -char nfaQueueCanMatch(const struct NFA *nfa, struct mq *q, s64a end, - char *q_trimmed) { - assert(q_trimmed); - assert(q->end - q->cur >= 2); - assert(end >= 0); - - DEBUG_PRINTF("q->offset=%llu, end=%lld\n", q->offset, end); - DEBUG_PRINTF("maxBiAnchoredWidth=%u, maxOffset=%u\n", - nfa->maxBiAnchoredWidth, nfa->maxOffset); - - if (nfa->maxBiAnchoredWidth && - (end + q->offset > nfa->maxBiAnchoredWidth)) { - DEBUG_PRINTF("stream too long: o %llu l %zu max: %hhu\n", q->offset, - q->length, nfa->maxBiAnchoredWidth); - return 0; - } - - if (nfa->maxOffset) { - if (q->offset >= nfa->maxOffset) { - DEBUG_PRINTF("stream is past maxOffset\n"); - return 0; - } - - if (q->offset + end > nfa->maxOffset) { - s64a maxEnd = nfa->maxOffset - q->offset; - DEBUG_PRINTF("me %lld off %llu len = %lld\n", maxEnd, - q->offset, end); - while (q->end > q->cur - && q->items[q->end - 1].location > maxEnd) { - *q_trimmed = 1; - DEBUG_PRINTF("killing item %u %lld %u\n", q->end, - q->items[q->end - 1].location, - q->items[q->end - 1].type); - q->items[q->end - 1].location = maxEnd; - q->items[q->end - 1].type = MQE_END; - if (q->end - q->cur < 2 - ||q->items[q->end - 2].location <= maxEnd) { - break; - } - q->end--; - } - - if (q->end - q->cur < 2) { /* nothing left on q */ - DEBUG_PRINTF("queue empty\n"); - return 0; - } - } - -#ifdef DEBUG - if (*q_trimmed) { - debugQueue(q); - } -#endif - } - - return 1; -} - -char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end) { - DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end); -#ifdef DEBUG - debugQueue(q); -#endif - - assert(q && q->context && q->state); - assert(end >= 0); - assert(q->cur < q->end); - assert(q->end <= MAX_MQE_LEN); - assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); - assert(end < q->items[q->end - 1].location - || q->items[q->end - 1].type == MQE_END); - - if (q->items[q->cur].location > end) { - return 1; - } - - char q_trimmed = 0; - - assert(end <= (s64a)q->length || !q->hlength); - /* due to reverse accel in block mode some queues may work on a truncated - * buffer */ - if (end > (s64a)q->length) { - end = q->length; - q_trimmed = 1; - } - - if (!nfaQueueCanMatch(nfa, q, end, &q_trimmed)) { - if (q->report_current) { - nfaReportCurrentMatches(nfa, q); - q->report_current = 0; - } - - return 0; - } - - char rv = nfaQueueExec_i(nfa, q, end); - -#ifdef DEBUG - debugQueue(q); -#endif - - assert(!q->report_current); - DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed); - return rv && !q_trimmed; -} - -char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end) { - DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end); -#ifdef DEBUG - debugQueue(q); -#endif - - assert(q); - assert(end >= 0); - assert(q->state); - assert(q->cur < q->end); - assert(q->end <= MAX_MQE_LEN); - assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); - assert(end < q->items[q->end - 1].location - || q->items[q->end - 1].type == MQE_END); - - char q_trimmed_ra = 0; - assert(end <= (s64a)q->length || !q->hlength); - /* due to reverse accel in block mode some queues may work on a truncated - * buffer */ - if (q->items[q->cur].location > end) { - return 1; - } - - if (end > (s64a)q->length) { - end = q->length; - q_trimmed_ra = 1; - } - - char q_trimmed = 0; - if (!nfaQueueCanMatch(nfa, q, end, &q_trimmed)) { - if (q->report_current) { - nfaReportCurrentMatches(nfa, q); - q->report_current = 0; - } - - return 0; - } - - char rv = nfaQueueExec2_i(nfa, q, end); - assert(!q->report_current); - DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed); - if (rv == MO_MATCHES_PENDING) { - if (q_trimmed) { - // We need to "fix" the queue so that subsequent operations must - // trim it as well. - assert(q->end > 0); - assert(nfa->maxOffset); - q->items[q->end - 1].location = nfa->maxOffset + 1; - } - return rv; - } - return rv && !q_trimmed && !q_trimmed_ra; -} - -char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q) { - DISPATCH_BY_NFA_TYPE(_reportCurrent(nfa, q)); - return 0; -} - -char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q) { - DISPATCH_BY_NFA_TYPE(_inAccept(nfa, report, q)); - return 0; -} - +static really_inline +char nfaQueueExecRose_i(const struct NFA *nfa, struct mq *q, ReportID report) { + DISPATCH_BY_NFA_TYPE(_QR(nfa, q, report)); + return 0; +} + +/** Returns 0 if this NFA cannot possibly match (due to width constraints etc) + * and the caller should return 0. May also edit the queue. */ +static really_inline +char nfaQueueCanMatch(const struct NFA *nfa, struct mq *q, s64a end, + char *q_trimmed) { + assert(q_trimmed); + assert(q->end - q->cur >= 2); + assert(end >= 0); + + DEBUG_PRINTF("q->offset=%llu, end=%lld\n", q->offset, end); + DEBUG_PRINTF("maxBiAnchoredWidth=%u, maxOffset=%u\n", + nfa->maxBiAnchoredWidth, nfa->maxOffset); + + if (nfa->maxBiAnchoredWidth && + (end + q->offset > nfa->maxBiAnchoredWidth)) { + DEBUG_PRINTF("stream too long: o %llu l %zu max: %hhu\n", q->offset, + q->length, nfa->maxBiAnchoredWidth); + return 0; + } + + if (nfa->maxOffset) { + if (q->offset >= nfa->maxOffset) { + DEBUG_PRINTF("stream is past maxOffset\n"); + return 0; + } + + if (q->offset + end > nfa->maxOffset) { + s64a maxEnd = nfa->maxOffset - q->offset; + DEBUG_PRINTF("me %lld off %llu len = %lld\n", maxEnd, + q->offset, end); + while (q->end > q->cur + && q->items[q->end - 1].location > maxEnd) { + *q_trimmed = 1; + DEBUG_PRINTF("killing item %u %lld %u\n", q->end, + q->items[q->end - 1].location, + q->items[q->end - 1].type); + q->items[q->end - 1].location = maxEnd; + q->items[q->end - 1].type = MQE_END; + if (q->end - q->cur < 2 + ||q->items[q->end - 2].location <= maxEnd) { + break; + } + q->end--; + } + + if (q->end - q->cur < 2) { /* nothing left on q */ + DEBUG_PRINTF("queue empty\n"); + return 0; + } + } + +#ifdef DEBUG + if (*q_trimmed) { + debugQueue(q); + } +#endif + } + + return 1; +} + +char nfaQueueExec(const struct NFA *nfa, struct mq *q, s64a end) { + DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end); +#ifdef DEBUG + debugQueue(q); +#endif + + assert(q && q->context && q->state); + assert(end >= 0); + assert(q->cur < q->end); + assert(q->end <= MAX_MQE_LEN); + assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); + assert(end < q->items[q->end - 1].location + || q->items[q->end - 1].type == MQE_END); + + if (q->items[q->cur].location > end) { + return 1; + } + + char q_trimmed = 0; + + assert(end <= (s64a)q->length || !q->hlength); + /* due to reverse accel in block mode some queues may work on a truncated + * buffer */ + if (end > (s64a)q->length) { + end = q->length; + q_trimmed = 1; + } + + if (!nfaQueueCanMatch(nfa, q, end, &q_trimmed)) { + if (q->report_current) { + nfaReportCurrentMatches(nfa, q); + q->report_current = 0; + } + + return 0; + } + + char rv = nfaQueueExec_i(nfa, q, end); + +#ifdef DEBUG + debugQueue(q); +#endif + + assert(!q->report_current); + DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed); + return rv && !q_trimmed; +} + +char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end) { + DEBUG_PRINTF("nfa=%p end=%lld\n", nfa, end); +#ifdef DEBUG + debugQueue(q); +#endif + + assert(q); + assert(end >= 0); + assert(q->state); + assert(q->cur < q->end); + assert(q->end <= MAX_MQE_LEN); + assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); + assert(end < q->items[q->end - 1].location + || q->items[q->end - 1].type == MQE_END); + + char q_trimmed_ra = 0; + assert(end <= (s64a)q->length || !q->hlength); + /* due to reverse accel in block mode some queues may work on a truncated + * buffer */ + if (q->items[q->cur].location > end) { + return 1; + } + + if (end > (s64a)q->length) { + end = q->length; + q_trimmed_ra = 1; + } + + char q_trimmed = 0; + if (!nfaQueueCanMatch(nfa, q, end, &q_trimmed)) { + if (q->report_current) { + nfaReportCurrentMatches(nfa, q); + q->report_current = 0; + } + + return 0; + } + + char rv = nfaQueueExec2_i(nfa, q, end); + assert(!q->report_current); + DEBUG_PRINTF("returned rv=%d, q_trimmed=%d\n", rv, q_trimmed); + if (rv == MO_MATCHES_PENDING) { + if (q_trimmed) { + // We need to "fix" the queue so that subsequent operations must + // trim it as well. + assert(q->end > 0); + assert(nfa->maxOffset); + q->items[q->end - 1].location = nfa->maxOffset + 1; + } + return rv; + } + return rv && !q_trimmed && !q_trimmed_ra; +} + +char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q) { + DISPATCH_BY_NFA_TYPE(_reportCurrent(nfa, q)); + return 0; +} + +char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q) { + DISPATCH_BY_NFA_TYPE(_inAccept(nfa, report, q)); + return 0; +} + char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q) { DISPATCH_BY_NFA_TYPE(_inAnyAccept(nfa, q)); return 0; } -char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) { - DEBUG_PRINTF("nfa=%p\n", nfa); -#ifdef DEBUG - debugQueue(q); -#endif - - assert(q && !q->context && q->state); - assert(q->cur <= q->end); - assert(q->end <= MAX_MQE_LEN); - assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); - assert(!q->report_current); - - return nfaQueueExecRose_i(nfa, q, r); -} - -char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf, - size_t buflen, const u8 *hbuf, size_t hlen, +char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) { + DEBUG_PRINTF("nfa=%p\n", nfa); +#ifdef DEBUG + debugQueue(q); +#endif + + assert(q && !q->context && q->state); + assert(q->cur <= q->end); + assert(q->end <= MAX_MQE_LEN); + assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); + assert(!q->report_current); + + return nfaQueueExecRose_i(nfa, q, r); +} + +char nfaBlockExecReverse(const struct NFA *nfa, u64a offset, const u8 *buf, + size_t buflen, const u8 *hbuf, size_t hlen, NfaCallback callback, void *context) { - assert(nfa); - assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); - - DISPATCH_BY_NFA_TYPE(_B_Reverse(nfa, offset, buf, buflen, hbuf, hlen, + assert(nfa); + assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); + + DISPATCH_BY_NFA_TYPE(_B_Reverse(nfa, offset, buf, buflen, hbuf, hlen, callback, context)); - return 0; -} - -char nfaQueueCompressState(const struct NFA *nfa, const struct mq *q, - s64a loc) { - assert(nfa && q); - assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); - - DISPATCH_BY_NFA_TYPE(_queueCompressState(nfa, q, loc)); - return 0; -} - -char nfaExpandState(const struct NFA *nfa, void *dest, const void *src, - u64a offset, u8 key) { - assert(nfa && dest && src); - assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); - - DISPATCH_BY_NFA_TYPE(_expandState(nfa, dest, src, offset, key)); - return 0; -} - -char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state, - u8 key) { - assert(nfa && state); - assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); - - DISPATCH_BY_NFA_TYPE(_initCompressedState(nfa, offset, state, key)); - return 0; -} - -enum nfa_zombie_status nfaGetZombieStatus(const struct NFA *nfa, struct mq *q, - s64a loc) { - DISPATCH_BY_NFA_TYPE(_zombie_status(nfa, q, loc)); - return NFA_ZOMBIE_NO; -} + return 0; +} + +char nfaQueueCompressState(const struct NFA *nfa, const struct mq *q, + s64a loc) { + assert(nfa && q); + assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); + + DISPATCH_BY_NFA_TYPE(_queueCompressState(nfa, q, loc)); + return 0; +} + +char nfaExpandState(const struct NFA *nfa, void *dest, const void *src, + u64a offset, u8 key) { + assert(nfa && dest && src); + assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); + + DISPATCH_BY_NFA_TYPE(_expandState(nfa, dest, src, offset, key)); + return 0; +} + +char nfaInitCompressedState(const struct NFA *nfa, u64a offset, void *state, + u8 key) { + assert(nfa && state); + assert(ISALIGNED_CL(nfa) && ISALIGNED_CL(getImplNfa(nfa))); + + DISPATCH_BY_NFA_TYPE(_initCompressedState(nfa, offset, state, key)); + return 0; +} + +enum nfa_zombie_status nfaGetZombieStatus(const struct NFA *nfa, struct mq *q, + s64a loc) { + DISPATCH_BY_NFA_TYPE(_zombie_status(nfa, q, loc)); + return NFA_ZOMBIE_NO; +} diff --git a/contrib/libs/hyperscan/src/nfa/nfa_api_queue.h b/contrib/libs/hyperscan/src/nfa/nfa_api_queue.h index 511941f30b..e3579a7ee2 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_api_queue.h +++ b/contrib/libs/hyperscan/src/nfa/nfa_api_queue.h @@ -1,289 +1,289 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef NFA_API_QUEUE_H -#define NFA_API_QUEUE_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -#include "ue2common.h" -#include "callback.h" - -/** Size of mq::items, max elements on a queue. */ -#define MAX_MQE_LEN 10 - -/** Queue events */ - -/** Queue event: begin scanning. Note: stateless engines will start from this - * location. */ -#define MQE_START 0U - -/** Queue event: stop scanning. */ -#define MQE_END 1U - -/** Queue event: enable start and start-dot-star. */ -#define MQE_TOP 2U - -/** Queue event: first event corresponding to a numbered TOP. Additional tops - * (in multi-top engines) use the event values from MQE_TOP_FIRST to - * MQE_INVALID - 1. */ -#define MQE_TOP_FIRST 4U - -/** Invalid queue event */ -#define MQE_INVALID (~0U) - -/** Queue item */ -struct mq_item { - u32 type; /**< event type, from MQE_* */ - s64a location; /**< relative to the start of the current buffer */ - u64a som; /**< pattern start-of-match corresponding to a top, only used - * by som engines. */ -}; - -// Forward decl. -struct NFA; - -/** - * Queue of events to control engine execution. mq::cur is index of first - * valid event, mq::end is one past the index of last valid event. - */ -struct mq { - const struct NFA *nfa; /**< nfa corresponding to the queue */ - u32 cur; /**< index of the first valid item in the queue */ - u32 end; /**< index one past the last valid item in the queue */ - char *state; /**< uncompressed stream state; lives in scratch */ - char *streamState; /**< - * real stream state; used to access structures which - * not duplicated the scratch state (bounded repeats, - * etc) */ - u64a offset; /**< base offset of the buffer */ - const u8 *buffer; /**< buffer to scan */ - size_t length; /**< length of buffer */ - const u8 *history; /**< - * history buffer; (logically) immediately before the - * main buffer */ - size_t hlength; /**< length of the history buffer */ - struct hs_scratch *scratch; /**< global scratch space */ - char report_current; /**< - * report_current matches at starting offset through - * callback. If true, the queue must be located at a - * point where MO_MATCHES_PENDING was returned */ - NfaCallback cb; /**< callback to trigger on matches */ - void *context; /**< context to pass along with a callback */ - struct mq_item items[MAX_MQE_LEN]; /**< queue items */ -}; - - -/** - * Pushes an (event, location, som) item onto a queue. If it is identical to the - * previous item on the queue, it is not added to the queue. - * @param q queue - * @param e event - * @param som som marker - * @param loc event location - */ -static really_inline -void pushQueueSom(struct mq * restrict q, u32 e, s64a loc, u64a som) { - DEBUG_PRINTF("pushing %u@%lld -> %u [som = %llu]\n", e, loc, q->end, som); - assert(q->end < MAX_MQE_LEN); - assert(e < MQE_INVALID); -/* stop gcc getting too smart for its own good */ -/* assert(!q->end || q->items[q->end - 1].location <= loc); */ - assert(q->end || e == MQE_START); - - // Avoid duplicate items on the queue. - if (q->end) { - struct mq_item *item = &q->items[q->end - 1]; - if (item->type == e && item->location == loc) { - DEBUG_PRINTF("dropping duplicate item\n"); - LIMIT_TO_AT_MOST(&item->som, som); /* take lower som */ - return; - } - } - - u32 end = q->end; - struct mq_item *item = &q->items[end]; - item->type = e; - item->location = loc; - item->som = som; - q->end = end + 1; -} - -/** - * Pushes an (event, location) item onto a queue. If it is identical to the - * previous item on the queue, it is not added to the queue. - * @param q queue - * @param e event - * @param loc event location - */ -static really_inline -void pushQueue(struct mq * restrict q, u32 e, s64a loc) { - pushQueueSom(q, e, loc, 0); -} - -/** - * Pushes an (event, location) item onto a queue. - * This version of @ref pushQueue does not check to ensure that the item being - * added is not already on the queue. Used for events other than tops. - */ -static really_inline -void pushQueueNoMerge(struct mq * restrict q, u32 e, s64a loc) { - DEBUG_PRINTF("pushing %u@%lld -> %u\n", e, loc, q->end); - assert(q->end < MAX_MQE_LEN); - assert(e < MQE_INVALID); -/* stop gcc getting too smart for its own good */ -/* assert(!q->end || q->items[q->end - 1].location <= loc); */ - assert(q->end || e == MQE_START); - -#ifndef NDEBUG - // We assert that the event is different from its predecessor. If it's a - // dupe, you should have used the ordinary pushQueue call. - if (q->end) { - UNUSED struct mq_item *prev = &q->items[q->end - 1]; - assert(prev->type != e || prev->location != loc); - } -#endif - - u32 end = q->end; - struct mq_item *item = &q->items[end]; - item->type = e; - item->location = loc; - item->som = 0; - q->end = end + 1; -} - -/** \brief Returns the type of the current queue event. */ -static really_inline u32 q_cur_type(const struct mq *q) { - assert(q->cur < q->end); - assert(q->cur < MAX_MQE_LEN); - return q->items[q->cur].type; -} - -/** \brief Returns the location (relative to the beginning of the current data - * buffer) of the current queue event. */ -static really_inline s64a q_cur_loc(const struct mq *q) { - assert(q->cur < q->end); - assert(q->cur < MAX_MQE_LEN); - return q->items[q->cur].location; -} - -/** \brief Returns the type of the last event in the queue. */ -static really_inline u32 q_last_type(const struct mq *q) { - assert(q->cur < q->end); - assert(q->end > 0); - assert(q->end <= MAX_MQE_LEN); - return q->items[q->end - 1].type; -} - -/** \brief Returns the location (relative to the beginning of the current data - * buffer) of the last event in the queue. */ -static really_inline s64a q_last_loc(const struct mq *q) { - assert(q->cur < q->end); - assert(q->end > 0); - assert(q->end <= MAX_MQE_LEN); - return q->items[q->end - 1].location; -} - -/** \brief Returns the absolute stream offset of the current queue event. */ -static really_inline u64a q_cur_offset(const struct mq *q) { - assert(q->cur < q->end); - assert(q->cur < MAX_MQE_LEN); - return q->offset + (u64a)q->items[q->cur].location; -} - -/** - * \brief Removes all events in the queue before the given location. - */ -static really_inline -void q_skip_forward_to(struct mq *q, s64a min_loc) { - assert(q->cur < q->end); - assert(q->cur < MAX_MQE_LEN); - assert(q->items[q->cur].type == MQE_START); - - if (q_cur_loc(q) >= min_loc) { - DEBUG_PRINTF("all events >= loc %lld\n", min_loc); - return; - } - - const u32 start_loc = q->cur; - - do { - DEBUG_PRINTF("remove item with loc=%lld\n", q_cur_loc(q)); - q->cur++; - } while (q->cur < q->end && q_cur_loc(q) < min_loc); - - if (q->cur > start_loc) { - // Move original MQE_START item forward. - q->cur--; - q->items[q->cur] = q->items[start_loc]; - } -} - -#ifdef DEBUG -// Dump the contents of the given queue. -static never_inline UNUSED -void debugQueue(const struct mq *q) { - DEBUG_PRINTF("q=%p, nfa=%p\n", q, q->nfa); - DEBUG_PRINTF("q offset=%llu, buf={%p, len=%zu}, history={%p, len=%zu}\n", - q->offset, q->buffer, q->length, q->history, q->hlength); - DEBUG_PRINTF("q cur=%u, end=%u\n", q->cur, q->end); - for (u32 cur = q->cur; cur < q->end; cur++) { - const char *type = "UNKNOWN"; - u32 e = q->items[cur].type; - switch (e) { - case MQE_START: - type = "MQE_START"; - break; - case MQE_END: - type = "MQE_END"; - break; - case MQE_TOP: - type = "MQE_TOP"; - break; - case MQE_INVALID: - type = "MQE_INVALID"; - break; - default: - assert(e >= MQE_TOP_FIRST && e < MQE_INVALID); - type = "MQE_TOP_N"; - break; - } - DEBUG_PRINTF("\tq[%u] %lld %u:%s\n", cur, q->items[cur].location, - q->items[cur].type, type); - } -} -#endif // DEBUG - -#ifdef __cplusplus -} -#endif - -#endif + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef NFA_API_QUEUE_H +#define NFA_API_QUEUE_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include "ue2common.h" +#include "callback.h" + +/** Size of mq::items, max elements on a queue. */ +#define MAX_MQE_LEN 10 + +/** Queue events */ + +/** Queue event: begin scanning. Note: stateless engines will start from this + * location. */ +#define MQE_START 0U + +/** Queue event: stop scanning. */ +#define MQE_END 1U + +/** Queue event: enable start and start-dot-star. */ +#define MQE_TOP 2U + +/** Queue event: first event corresponding to a numbered TOP. Additional tops + * (in multi-top engines) use the event values from MQE_TOP_FIRST to + * MQE_INVALID - 1. */ +#define MQE_TOP_FIRST 4U + +/** Invalid queue event */ +#define MQE_INVALID (~0U) + +/** Queue item */ +struct mq_item { + u32 type; /**< event type, from MQE_* */ + s64a location; /**< relative to the start of the current buffer */ + u64a som; /**< pattern start-of-match corresponding to a top, only used + * by som engines. */ +}; + +// Forward decl. +struct NFA; + +/** + * Queue of events to control engine execution. mq::cur is index of first + * valid event, mq::end is one past the index of last valid event. + */ +struct mq { + const struct NFA *nfa; /**< nfa corresponding to the queue */ + u32 cur; /**< index of the first valid item in the queue */ + u32 end; /**< index one past the last valid item in the queue */ + char *state; /**< uncompressed stream state; lives in scratch */ + char *streamState; /**< + * real stream state; used to access structures which + * not duplicated the scratch state (bounded repeats, + * etc) */ + u64a offset; /**< base offset of the buffer */ + const u8 *buffer; /**< buffer to scan */ + size_t length; /**< length of buffer */ + const u8 *history; /**< + * history buffer; (logically) immediately before the + * main buffer */ + size_t hlength; /**< length of the history buffer */ + struct hs_scratch *scratch; /**< global scratch space */ + char report_current; /**< + * report_current matches at starting offset through + * callback. If true, the queue must be located at a + * point where MO_MATCHES_PENDING was returned */ + NfaCallback cb; /**< callback to trigger on matches */ + void *context; /**< context to pass along with a callback */ + struct mq_item items[MAX_MQE_LEN]; /**< queue items */ +}; + + +/** + * Pushes an (event, location, som) item onto a queue. If it is identical to the + * previous item on the queue, it is not added to the queue. + * @param q queue + * @param e event + * @param som som marker + * @param loc event location + */ +static really_inline +void pushQueueSom(struct mq * restrict q, u32 e, s64a loc, u64a som) { + DEBUG_PRINTF("pushing %u@%lld -> %u [som = %llu]\n", e, loc, q->end, som); + assert(q->end < MAX_MQE_LEN); + assert(e < MQE_INVALID); +/* stop gcc getting too smart for its own good */ +/* assert(!q->end || q->items[q->end - 1].location <= loc); */ + assert(q->end || e == MQE_START); + + // Avoid duplicate items on the queue. + if (q->end) { + struct mq_item *item = &q->items[q->end - 1]; + if (item->type == e && item->location == loc) { + DEBUG_PRINTF("dropping duplicate item\n"); + LIMIT_TO_AT_MOST(&item->som, som); /* take lower som */ + return; + } + } + + u32 end = q->end; + struct mq_item *item = &q->items[end]; + item->type = e; + item->location = loc; + item->som = som; + q->end = end + 1; +} + +/** + * Pushes an (event, location) item onto a queue. If it is identical to the + * previous item on the queue, it is not added to the queue. + * @param q queue + * @param e event + * @param loc event location + */ +static really_inline +void pushQueue(struct mq * restrict q, u32 e, s64a loc) { + pushQueueSom(q, e, loc, 0); +} + +/** + * Pushes an (event, location) item onto a queue. + * This version of @ref pushQueue does not check to ensure that the item being + * added is not already on the queue. Used for events other than tops. + */ +static really_inline +void pushQueueNoMerge(struct mq * restrict q, u32 e, s64a loc) { + DEBUG_PRINTF("pushing %u@%lld -> %u\n", e, loc, q->end); + assert(q->end < MAX_MQE_LEN); + assert(e < MQE_INVALID); +/* stop gcc getting too smart for its own good */ +/* assert(!q->end || q->items[q->end - 1].location <= loc); */ + assert(q->end || e == MQE_START); + +#ifndef NDEBUG + // We assert that the event is different from its predecessor. If it's a + // dupe, you should have used the ordinary pushQueue call. + if (q->end) { + UNUSED struct mq_item *prev = &q->items[q->end - 1]; + assert(prev->type != e || prev->location != loc); + } +#endif + + u32 end = q->end; + struct mq_item *item = &q->items[end]; + item->type = e; + item->location = loc; + item->som = 0; + q->end = end + 1; +} + +/** \brief Returns the type of the current queue event. */ +static really_inline u32 q_cur_type(const struct mq *q) { + assert(q->cur < q->end); + assert(q->cur < MAX_MQE_LEN); + return q->items[q->cur].type; +} + +/** \brief Returns the location (relative to the beginning of the current data + * buffer) of the current queue event. */ +static really_inline s64a q_cur_loc(const struct mq *q) { + assert(q->cur < q->end); + assert(q->cur < MAX_MQE_LEN); + return q->items[q->cur].location; +} + +/** \brief Returns the type of the last event in the queue. */ +static really_inline u32 q_last_type(const struct mq *q) { + assert(q->cur < q->end); + assert(q->end > 0); + assert(q->end <= MAX_MQE_LEN); + return q->items[q->end - 1].type; +} + +/** \brief Returns the location (relative to the beginning of the current data + * buffer) of the last event in the queue. */ +static really_inline s64a q_last_loc(const struct mq *q) { + assert(q->cur < q->end); + assert(q->end > 0); + assert(q->end <= MAX_MQE_LEN); + return q->items[q->end - 1].location; +} + +/** \brief Returns the absolute stream offset of the current queue event. */ +static really_inline u64a q_cur_offset(const struct mq *q) { + assert(q->cur < q->end); + assert(q->cur < MAX_MQE_LEN); + return q->offset + (u64a)q->items[q->cur].location; +} + +/** + * \brief Removes all events in the queue before the given location. + */ +static really_inline +void q_skip_forward_to(struct mq *q, s64a min_loc) { + assert(q->cur < q->end); + assert(q->cur < MAX_MQE_LEN); + assert(q->items[q->cur].type == MQE_START); + + if (q_cur_loc(q) >= min_loc) { + DEBUG_PRINTF("all events >= loc %lld\n", min_loc); + return; + } + + const u32 start_loc = q->cur; + + do { + DEBUG_PRINTF("remove item with loc=%lld\n", q_cur_loc(q)); + q->cur++; + } while (q->cur < q->end && q_cur_loc(q) < min_loc); + + if (q->cur > start_loc) { + // Move original MQE_START item forward. + q->cur--; + q->items[q->cur] = q->items[start_loc]; + } +} + +#ifdef DEBUG +// Dump the contents of the given queue. +static never_inline UNUSED +void debugQueue(const struct mq *q) { + DEBUG_PRINTF("q=%p, nfa=%p\n", q, q->nfa); + DEBUG_PRINTF("q offset=%llu, buf={%p, len=%zu}, history={%p, len=%zu}\n", + q->offset, q->buffer, q->length, q->history, q->hlength); + DEBUG_PRINTF("q cur=%u, end=%u\n", q->cur, q->end); + for (u32 cur = q->cur; cur < q->end; cur++) { + const char *type = "UNKNOWN"; + u32 e = q->items[cur].type; + switch (e) { + case MQE_START: + type = "MQE_START"; + break; + case MQE_END: + type = "MQE_END"; + break; + case MQE_TOP: + type = "MQE_TOP"; + break; + case MQE_INVALID: + type = "MQE_INVALID"; + break; + default: + assert(e >= MQE_TOP_FIRST && e < MQE_INVALID); + type = "MQE_TOP_N"; + break; + } + DEBUG_PRINTF("\tq[%u] %lld %u:%s\n", cur, q->items[cur].location, + q->items[cur].type, type); + } +} +#endif // DEBUG + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/nfa_api_util.h b/contrib/libs/hyperscan/src/nfa/nfa_api_util.h index 7e797e74b1..affc5f38f3 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_api_util.h +++ b/contrib/libs/hyperscan/src/nfa/nfa_api_util.h @@ -1,82 +1,82 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef NFA_API_UTIL_H -#define NFA_API_UTIL_H - -#include "nfa_api_queue.h" -#include "ue2common.h" - -/* returns the byte prior to the given location, NUL if not available */ -static really_inline -u8 queue_prev_byte(const struct mq *q, s64a loc) { - if (loc <= 0) { - if (1LL - loc > (s64a)q->hlength) { - return 0; /* assume NUL for start of stream write */ - } - // In the history buffer. - assert(q->history); - assert(q->hlength >= (u64a)(loc * -1)); - return q->history[q->hlength - 1 + loc]; - } else { - // In the stream write buffer. - assert(q->buffer); - assert(q->length >= (u64a)loc); - return q->buffer[loc - 1]; - } -} - -/* this is a modified version of pushQueue where we statically know the state of - * the queue. Does not attempt to merge and inserts at the given queue - * position. */ -static really_inline -void pushQueueAt(struct mq * restrict q, u32 pos, u32 e, s64a loc) { - assert(pos == q->end); - DEBUG_PRINTF("pushing %u@%lld -> %u\n", e, loc, q->end); - assert(q->end < MAX_MQE_LEN); - assert(e < MQE_INVALID); -/* stop gcc getting too smart for its own good */ -/* assert(!q->end || q->items[q->end - 1].location <= loc); */ - assert(q->end || e == MQE_START); - -#ifndef NDEBUG - // We assert that the event is different from its predecessor. If it's a - // dupe, you should have used the ordinary pushQueue call. - if (q->end) { - UNUSED struct mq_item *prev = &q->items[q->end - 1]; - assert(prev->type != e || prev->location != loc); - } -#endif - - struct mq_item *item = &q->items[pos]; - item->type = e; - item->location = loc; - item->som = 0; - q->end = pos + 1; -} -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef NFA_API_UTIL_H +#define NFA_API_UTIL_H + +#include "nfa_api_queue.h" +#include "ue2common.h" + +/* returns the byte prior to the given location, NUL if not available */ +static really_inline +u8 queue_prev_byte(const struct mq *q, s64a loc) { + if (loc <= 0) { + if (1LL - loc > (s64a)q->hlength) { + return 0; /* assume NUL for start of stream write */ + } + // In the history buffer. + assert(q->history); + assert(q->hlength >= (u64a)(loc * -1)); + return q->history[q->hlength - 1 + loc]; + } else { + // In the stream write buffer. + assert(q->buffer); + assert(q->length >= (u64a)loc); + return q->buffer[loc - 1]; + } +} + +/* this is a modified version of pushQueue where we statically know the state of + * the queue. Does not attempt to merge and inserts at the given queue + * position. */ +static really_inline +void pushQueueAt(struct mq * restrict q, u32 pos, u32 e, s64a loc) { + assert(pos == q->end); + DEBUG_PRINTF("pushing %u@%lld -> %u\n", e, loc, q->end); + assert(q->end < MAX_MQE_LEN); + assert(e < MQE_INVALID); +/* stop gcc getting too smart for its own good */ +/* assert(!q->end || q->items[q->end - 1].location <= loc); */ + assert(q->end || e == MQE_START); + +#ifndef NDEBUG + // We assert that the event is different from its predecessor. If it's a + // dupe, you should have used the ordinary pushQueue call. + if (q->end) { + UNUSED struct mq_item *prev = &q->items[q->end - 1]; + assert(prev->type != e || prev->location != loc); + } +#endif + + struct mq_item *item = &q->items[pos]; + item->type = e; + item->location = loc; + item->som = 0; + q->end = pos + 1; +} +#endif diff --git a/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp b/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp index bcf7ae1708..47153163e9 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp +++ b/contrib/libs/hyperscan/src/nfa/nfa_build_util.cpp @@ -1,96 +1,96 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "nfa_build_util.h" - -#include "limex_internal.h" -#include "mcclellancompile.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "nfa_build_util.h" + +#include "limex_internal.h" +#include "mcclellancompile.h" #include "mcsheng_compile.h" #include "shengcompile.h" -#include "nfa_internal.h" -#include "repeat_internal.h" -#include "ue2common.h" - -#include <algorithm> -#include <cassert> -#include <cstddef> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include <sstream> - -using namespace std; - -namespace ue2 { - -namespace { - -template<NFAEngineType t> struct NFATraits { }; - -template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t, - NFAEngineType lb> -struct DISPATCH_BY_NFA_TYPE_INT { - static rv_t doOp(NFAEngineType i, const arg_t &arg) { - if (i == lb) { - return sfunc<lb>::call(arg); - } else { - return DISPATCH_BY_NFA_TYPE_INT<sfunc, rv_t, arg_t, - (NFAEngineType)(lb + 1)> - ::doOp(i, arg); - } - } -}; - -template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t> -struct DISPATCH_BY_NFA_TYPE_INT<sfunc, rv_t, arg_t, INVALID_NFA> { - // dummy - static rv_t doOp(NFAEngineType, const arg_t &) { - assert(0); - throw std::logic_error("Unreachable"); - } -}; - -#define DISPATCH_BY_NFA_TYPE(i, op, arg) \ - DISPATCH_BY_NFA_TYPE_INT<op, decltype(op<(NFAEngineType)0>::call(arg)), \ - decltype(arg), (NFAEngineType)0>::doOp(i, arg) -} - +#include "nfa_internal.h" +#include "repeat_internal.h" +#include "ue2common.h" + +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <sstream> + +using namespace std; + +namespace ue2 { + +namespace { + +template<NFAEngineType t> struct NFATraits { }; + +template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t, + NFAEngineType lb> +struct DISPATCH_BY_NFA_TYPE_INT { + static rv_t doOp(NFAEngineType i, const arg_t &arg) { + if (i == lb) { + return sfunc<lb>::call(arg); + } else { + return DISPATCH_BY_NFA_TYPE_INT<sfunc, rv_t, arg_t, + (NFAEngineType)(lb + 1)> + ::doOp(i, arg); + } + } +}; + +template<template<NFAEngineType t> class sfunc, typename rv_t, typename arg_t> +struct DISPATCH_BY_NFA_TYPE_INT<sfunc, rv_t, arg_t, INVALID_NFA> { + // dummy + static rv_t doOp(NFAEngineType, const arg_t &) { + assert(0); + throw std::logic_error("Unreachable"); + } +}; + +#define DISPATCH_BY_NFA_TYPE(i, op, arg) \ + DISPATCH_BY_NFA_TYPE_INT<op, decltype(op<(NFAEngineType)0>::call(arg)), \ + decltype(arg), (NFAEngineType)0>::doOp(i, arg) +} + typedef bool (*nfa_dispatch_fn)(const NFA *nfa); - -template<typename T> -static -bool has_accel_limex(const NFA *nfa) { - const T *limex = (const T *)getImplNfa(nfa); - return limex->accelCount; -} - + template<typename T> -static +static +bool has_accel_limex(const NFA *nfa) { + const T *limex = (const T *)getImplNfa(nfa); + return limex->accelCount; +} + +template<typename T> +static bool has_repeats_limex(const NFA *nfa) { const T *limex = (const T *)getImplNfa(nfa); return limex->repeatCount; @@ -115,261 +115,261 @@ bool has_repeats_other_than_firsts_limex(const NFA *nfa) { } } - return false; -} - + return false; +} + static bool dispatch_false(const NFA *) { return false; } -#ifdef DUMP_SUPPORT -namespace { -template<NFAEngineType t> -struct getName { - static const char *call(void *) { - return NFATraits<t>::name; - } -}; - -// descr helper for LimEx NFAs -template<NFAEngineType t> -static -string getDescriptionLimEx(const NFA *nfa) { - const typename NFATraits<t>::implNFA_t *limex = - (const typename NFATraits<t>::implNFA_t *)getImplNfa(nfa); - ostringstream oss; - oss << NFATraits<t>::name << "/" << limex->exceptionCount; - if (limex->repeatCount) { - oss << " +" << limex->repeatCount << "r"; - } - return oss.str(); -} -} - -// generic description: just return the name -namespace { -template<NFAEngineType t> -struct getDescription { - static string call(const void *) { - return string(NFATraits<t>::name); - } -}; -} -#endif - - -/* build-utility Traits */ - -namespace { -enum NFACategory {NFA_LIMEX, NFA_OTHER}; - -// Some of our traits we want around in DUMP_SUPPORT mode only. -#if defined(DUMP_SUPPORT) -#define DO_IF_DUMP_SUPPORT(a) a -#else -#define DO_IF_DUMP_SUPPORT(a) -#endif - +#ifdef DUMP_SUPPORT +namespace { +template<NFAEngineType t> +struct getName { + static const char *call(void *) { + return NFATraits<t>::name; + } +}; + +// descr helper for LimEx NFAs +template<NFAEngineType t> +static +string getDescriptionLimEx(const NFA *nfa) { + const typename NFATraits<t>::implNFA_t *limex = + (const typename NFATraits<t>::implNFA_t *)getImplNfa(nfa); + ostringstream oss; + oss << NFATraits<t>::name << "/" << limex->exceptionCount; + if (limex->repeatCount) { + oss << " +" << limex->repeatCount << "r"; + } + return oss.str(); +} +} + +// generic description: just return the name +namespace { +template<NFAEngineType t> +struct getDescription { + static string call(const void *) { + return string(NFATraits<t>::name); + } +}; +} +#endif + + +/* build-utility Traits */ + +namespace { +enum NFACategory {NFA_LIMEX, NFA_OTHER}; + +// Some of our traits we want around in DUMP_SUPPORT mode only. +#if defined(DUMP_SUPPORT) +#define DO_IF_DUMP_SUPPORT(a) a +#else +#define DO_IF_DUMP_SUPPORT(a) +#endif + #define MAKE_LIMEX_TRAITS(mlt_size, mlt_align) \ template<> struct NFATraits<LIMEX_NFA_##mlt_size> { \ - static UNUSED const char *name; \ - static const NFACategory category = NFA_LIMEX; \ - typedef LimExNFA##mlt_size implNFA_t; \ + static UNUSED const char *name; \ + static const NFACategory category = NFA_LIMEX; \ + typedef LimExNFA##mlt_size implNFA_t; \ static const nfa_dispatch_fn has_accel; \ static const nfa_dispatch_fn has_repeats; \ static const nfa_dispatch_fn has_repeats_other_than_firsts; \ - static const u32 stateAlign = \ + static const u32 stateAlign = \ MAX(mlt_align, alignof(RepeatControl)); \ - }; \ + }; \ const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_accel \ - = has_accel_limex<LimExNFA##mlt_size>; \ + = has_accel_limex<LimExNFA##mlt_size>; \ const nfa_dispatch_fn NFATraits<LIMEX_NFA_##mlt_size>::has_repeats \ = has_repeats_limex<LimExNFA##mlt_size>; \ const nfa_dispatch_fn \ NFATraits<LIMEX_NFA_##mlt_size>::has_repeats_other_than_firsts \ = has_repeats_other_than_firsts_limex<LimExNFA##mlt_size>; \ - DO_IF_DUMP_SUPPORT( \ + DO_IF_DUMP_SUPPORT( \ const char *NFATraits<LIMEX_NFA_##mlt_size>::name \ = "LimEx "#mlt_size; \ template<> struct getDescription<LIMEX_NFA_##mlt_size> { \ static string call(const void *p) { \ return getDescriptionLimEx<LIMEX_NFA_##mlt_size>((const NFA *)p); \ } \ - };) - + };) + MAKE_LIMEX_TRAITS(32, alignof(u32)) MAKE_LIMEX_TRAITS(64, alignof(m128)) /* special, 32bit arch uses m128 */ MAKE_LIMEX_TRAITS(128, alignof(m128)) MAKE_LIMEX_TRAITS(256, alignof(m256)) MAKE_LIMEX_TRAITS(384, alignof(m384)) MAKE_LIMEX_TRAITS(512, alignof(m512)) - -template<> struct NFATraits<MCCLELLAN_NFA_8> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 1; + +template<> struct NFATraits<MCCLELLAN_NFA_8> { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 1; static const nfa_dispatch_fn has_accel; static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; +}; const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_accel = has_accel_mcclellan; const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats = dispatch_false; const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_8>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) -const char *NFATraits<MCCLELLAN_NFA_8>::name = "McClellan 8"; -#endif - -template<> struct NFATraits<MCCLELLAN_NFA_16> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 2; +#if defined(DUMP_SUPPORT) +const char *NFATraits<MCCLELLAN_NFA_8>::name = "McClellan 8"; +#endif + +template<> struct NFATraits<MCCLELLAN_NFA_16> { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 2; static const nfa_dispatch_fn has_accel; static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; +}; const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_accel = has_accel_mcclellan; const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats = dispatch_false; const nfa_dispatch_fn NFATraits<MCCLELLAN_NFA_16>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) -const char *NFATraits<MCCLELLAN_NFA_16>::name = "McClellan 16"; -#endif - -template<> struct NFATraits<GOUGH_NFA_8> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 8; +#if defined(DUMP_SUPPORT) +const char *NFATraits<MCCLELLAN_NFA_16>::name = "McClellan 16"; +#endif + +template<> struct NFATraits<GOUGH_NFA_8> { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 8; static const nfa_dispatch_fn has_accel; static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; +}; const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_accel = has_accel_mcclellan; const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats = dispatch_false; const nfa_dispatch_fn NFATraits<GOUGH_NFA_8>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) -const char *NFATraits<GOUGH_NFA_8>::name = "Goughfish 8"; -#endif - -template<> struct NFATraits<GOUGH_NFA_16> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 8; +#if defined(DUMP_SUPPORT) +const char *NFATraits<GOUGH_NFA_8>::name = "Goughfish 8"; +#endif + +template<> struct NFATraits<GOUGH_NFA_16> { + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 8; static const nfa_dispatch_fn has_accel; static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; +}; const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_accel = has_accel_mcclellan; const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats = dispatch_false; const nfa_dispatch_fn NFATraits<GOUGH_NFA_16>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) -const char *NFATraits<GOUGH_NFA_16>::name = "Goughfish 16"; -#endif - +#if defined(DUMP_SUPPORT) +const char *NFATraits<GOUGH_NFA_16>::name = "Goughfish 16"; +#endif + template<> struct NFATraits<MPV_NFA> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 8; + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 8; static const nfa_dispatch_fn has_accel; static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; +}; const nfa_dispatch_fn NFATraits<MPV_NFA>::has_accel = dispatch_false; const nfa_dispatch_fn NFATraits<MPV_NFA>::has_repeats = dispatch_false; const nfa_dispatch_fn NFATraits<MPV_NFA>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) +#if defined(DUMP_SUPPORT) const char *NFATraits<MPV_NFA>::name = "Mega-Puff-Vac"; -#endif - +#endif + template<> struct NFATraits<CASTLE_NFA> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 8; + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 8; static const nfa_dispatch_fn has_accel; static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; +}; const nfa_dispatch_fn NFATraits<CASTLE_NFA>::has_accel = dispatch_false; const nfa_dispatch_fn NFATraits<CASTLE_NFA>::has_repeats = dispatch_false; const nfa_dispatch_fn NFATraits<CASTLE_NFA>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) +#if defined(DUMP_SUPPORT) const char *NFATraits<CASTLE_NFA>::name = "Castle"; -#endif - +#endif + template<> struct NFATraits<LBR_NFA_DOT> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 8; + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 8; static const nfa_dispatch_fn has_accel; static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; +}; const nfa_dispatch_fn NFATraits<LBR_NFA_DOT>::has_accel = dispatch_false; const nfa_dispatch_fn NFATraits<LBR_NFA_DOT>::has_repeats = dispatch_false; const nfa_dispatch_fn NFATraits<LBR_NFA_DOT>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) +#if defined(DUMP_SUPPORT) const char *NFATraits<LBR_NFA_DOT>::name = "Lim Bounded Repeat (D)"; -#endif - +#endif + template<> struct NFATraits<LBR_NFA_VERM> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 8; + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 8; static const nfa_dispatch_fn has_accel; static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; +}; const nfa_dispatch_fn NFATraits<LBR_NFA_VERM>::has_accel = dispatch_false; const nfa_dispatch_fn NFATraits<LBR_NFA_VERM>::has_repeats = dispatch_false; const nfa_dispatch_fn NFATraits<LBR_NFA_VERM>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) +#if defined(DUMP_SUPPORT) const char *NFATraits<LBR_NFA_VERM>::name = "Lim Bounded Repeat (V)"; -#endif - +#endif + template<> struct NFATraits<LBR_NFA_NVERM> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 8; + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 8; static const nfa_dispatch_fn has_accel; static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; +}; const nfa_dispatch_fn NFATraits<LBR_NFA_NVERM>::has_accel = dispatch_false; const nfa_dispatch_fn NFATraits<LBR_NFA_NVERM>::has_repeats = dispatch_false; const nfa_dispatch_fn NFATraits<LBR_NFA_NVERM>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) +#if defined(DUMP_SUPPORT) const char *NFATraits<LBR_NFA_NVERM>::name = "Lim Bounded Repeat (NV)"; -#endif - +#endif + template<> struct NFATraits<LBR_NFA_SHUF> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 8; + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 8; static const nfa_dispatch_fn has_accel; static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; +}; const nfa_dispatch_fn NFATraits<LBR_NFA_SHUF>::has_accel = dispatch_false; const nfa_dispatch_fn NFATraits<LBR_NFA_SHUF>::has_repeats = dispatch_false; const nfa_dispatch_fn NFATraits<LBR_NFA_SHUF>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) +#if defined(DUMP_SUPPORT) const char *NFATraits<LBR_NFA_SHUF>::name = "Lim Bounded Repeat (S)"; -#endif - +#endif + template<> struct NFATraits<LBR_NFA_TRUF> { - UNUSED static const char *name; - static const NFACategory category = NFA_OTHER; - static const u32 stateAlign = 8; + UNUSED static const char *name; + static const NFACategory category = NFA_OTHER; + static const u32 stateAlign = 8; static const nfa_dispatch_fn has_accel; static const nfa_dispatch_fn has_repeats; static const nfa_dispatch_fn has_repeats_other_than_firsts; -}; +}; const nfa_dispatch_fn NFATraits<LBR_NFA_TRUF>::has_accel = dispatch_false; const nfa_dispatch_fn NFATraits<LBR_NFA_TRUF>::has_repeats = dispatch_false; const nfa_dispatch_fn NFATraits<LBR_NFA_TRUF>::has_repeats_other_than_firsts = dispatch_false; -#if defined(DUMP_SUPPORT) +#if defined(DUMP_SUPPORT) const char *NFATraits<LBR_NFA_TRUF>::name = "Lim Bounded Repeat (M)"; -#endif - +#endif + template<> struct NFATraits<SHENG_NFA> { UNUSED static const char *name; static const NFACategory category = NFA_OTHER; @@ -489,87 +489,87 @@ const nfa_dispatch_fn NFATraits<MCSHENG_64_NFA_16>::has_repeats_other_than_first #if defined(DUMP_SUPPORT) const char *NFATraits<MCSHENG_64_NFA_16>::name = "Shengy64 McShengFace 16"; #endif -} // namespace - -#if defined(DUMP_SUPPORT) - -const char *nfa_type_name(NFAEngineType type) { - return DISPATCH_BY_NFA_TYPE(type, getName, nullptr); -} - -string describe(const NFA &nfa) { - return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, getDescription, &nfa); -} - -#endif /* DUMP_SUPPORT */ - -namespace { -template<NFAEngineType t> -struct getStateAlign { - static u32 call(void *) { - return NFATraits<t>::stateAlign; - } -}; -} - -u32 state_alignment(const NFA &nfa) { - return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, getStateAlign, nullptr); -} - -namespace { -template<NFAEngineType t> -struct is_limex { - static bool call(const void *) { - return NFATraits<t>::category == NFA_LIMEX; - } -}; -} - +} // namespace + +#if defined(DUMP_SUPPORT) + +const char *nfa_type_name(NFAEngineType type) { + return DISPATCH_BY_NFA_TYPE(type, getName, nullptr); +} + +string describe(const NFA &nfa) { + return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, getDescription, &nfa); +} + +#endif /* DUMP_SUPPORT */ + +namespace { +template<NFAEngineType t> +struct getStateAlign { + static u32 call(void *) { + return NFATraits<t>::stateAlign; + } +}; +} + +u32 state_alignment(const NFA &nfa) { + return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, getStateAlign, nullptr); +} + +namespace { +template<NFAEngineType t> +struct is_limex { + static bool call(const void *) { + return NFATraits<t>::category == NFA_LIMEX; + } +}; +} + namespace { template<NFAEngineType t> struct has_repeats_other_than_firsts_dispatch { static nfa_dispatch_fn call(const void *) { return NFATraits<t>::has_repeats_other_than_firsts; - } + } }; } - + bool has_bounded_repeats_other_than_firsts(const NFA &nfa) { return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_repeats_other_than_firsts_dispatch, &nfa)(&nfa); } - + namespace { template<NFAEngineType t> struct has_repeats_dispatch { static nfa_dispatch_fn call(const void *) { return NFATraits<t>::has_repeats; - } + } }; -} - -bool has_bounded_repeats(const NFA &nfa) { +} + +bool has_bounded_repeats(const NFA &nfa) { return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_repeats_dispatch, &nfa)(&nfa); -} - -namespace { -template<NFAEngineType t> -struct has_accel_dispatch { +} + +namespace { +template<NFAEngineType t> +struct has_accel_dispatch { static nfa_dispatch_fn call(const void *) { - return NFATraits<t>::has_accel; - } -}; -} - -bool has_accel(const NFA &nfa) { - return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_accel_dispatch, + return NFATraits<t>::has_accel; + } +}; +} + +bool has_accel(const NFA &nfa) { + return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, has_accel_dispatch, &nfa)(&nfa); -} - -bool requires_decompress_key(const NFA &nfa) { - return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, is_limex, &nfa); -} - -} // namespace ue2 +} + +bool requires_decompress_key(const NFA &nfa) { + return DISPATCH_BY_NFA_TYPE((NFAEngineType)nfa.type, is_limex, &nfa); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/nfa_build_util.h b/contrib/libs/hyperscan/src/nfa/nfa_build_util.h index 9c6ec83ca8..ee7a309494 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_build_util.h +++ b/contrib/libs/hyperscan/src/nfa/nfa_build_util.h @@ -1,60 +1,60 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef NFA_BUILD_UTIL_H -#define NFA_BUILD_UTIL_H - -#include "ue2common.h" -#include "nfa_internal.h" - -#include <string> - -struct NFA; - -namespace ue2 { - -#ifdef DUMP_SUPPORT -/* provided for debugging functions */ -const char *nfa_type_name(NFAEngineType type); -std::string describe(const NFA &nfa); -#endif - -// For a given NFA, retrieve the alignment required by its uncompressed state. -u32 state_alignment(const NFA &nfa); - -bool has_bounded_repeats_other_than_firsts(const NFA &n); - -bool has_bounded_repeats(const NFA &n); - -bool has_accel(const NFA &n); - -bool requires_decompress_key(const NFA &n); - -} // namespace ue2 - -#endif + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef NFA_BUILD_UTIL_H +#define NFA_BUILD_UTIL_H + +#include "ue2common.h" +#include "nfa_internal.h" + +#include <string> + +struct NFA; + +namespace ue2 { + +#ifdef DUMP_SUPPORT +/* provided for debugging functions */ +const char *nfa_type_name(NFAEngineType type); +std::string describe(const NFA &nfa); +#endif + +// For a given NFA, retrieve the alignment required by its uncompressed state. +u32 state_alignment(const NFA &nfa); + +bool has_bounded_repeats_other_than_firsts(const NFA &n); + +bool has_bounded_repeats(const NFA &n); + +bool has_accel(const NFA &n); + +bool requires_decompress_key(const NFA &n); + +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/nfa_internal.h b/contrib/libs/hyperscan/src/nfa/nfa_internal.h index 8a61c04807..ad27e28b14 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_internal.h +++ b/contrib/libs/hyperscan/src/nfa/nfa_internal.h @@ -1,66 +1,66 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - \brief Declarations for the main NFA engine types and structures. -*/ -#ifndef NFA_INTERNAL_H -#define NFA_INTERNAL_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -#include "ue2common.h" - -// Constants - -#define MO_INVALID_IDX 0xffffffff /**< index meaning value is invalid */ - -// Flags (used in NFA::flags) - -#define NFA_ACCEPTS_EOD 1U /**< can produce matches on EOD. */ -#define NFA_ZOMBIE 2U /**< supports zombies */ - -// Common data structures for NFAs - -enum NFAEngineType { + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + \brief Declarations for the main NFA engine types and structures. +*/ +#ifndef NFA_INTERNAL_H +#define NFA_INTERNAL_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include "ue2common.h" + +// Constants + +#define MO_INVALID_IDX 0xffffffff /**< index meaning value is invalid */ + +// Flags (used in NFA::flags) + +#define NFA_ACCEPTS_EOD 1U /**< can produce matches on EOD. */ +#define NFA_ZOMBIE 2U /**< supports zombies */ + +// Common data structures for NFAs + +enum NFAEngineType { LIMEX_NFA_32, LIMEX_NFA_64, LIMEX_NFA_128, LIMEX_NFA_256, LIMEX_NFA_384, LIMEX_NFA_512, - MCCLELLAN_NFA_8, /**< magic pseudo nfa */ - MCCLELLAN_NFA_16, /**< magic pseudo nfa */ - GOUGH_NFA_8, /**< magic pseudo nfa */ - GOUGH_NFA_16, /**< magic pseudo nfa */ + MCCLELLAN_NFA_8, /**< magic pseudo nfa */ + MCCLELLAN_NFA_16, /**< magic pseudo nfa */ + GOUGH_NFA_8, /**< magic pseudo nfa */ + GOUGH_NFA_16, /**< magic pseudo nfa */ MPV_NFA, /**< magic pseudo nfa */ LBR_NFA_DOT, /**< magic pseudo nfa */ LBR_NFA_VERM, /**< magic pseudo nfa */ @@ -76,79 +76,79 @@ enum NFAEngineType { SHENG_NFA_64, /**< magic pseudo nfa */ MCSHENG_64_NFA_8, /**< magic pseudo nfa */ MCSHENG_64_NFA_16, /**< magic pseudo nfa */ - /** \brief bogus NFA - not used */ - INVALID_NFA -}; - -/** \brief header for the NFA implementation. */ -struct ALIGN_CL_DIRECTIVE NFA { - u32 flags; - - /** \brief The size in bytes of the NFA engine. The engine is - * serialized to the extent that copying length bytes back into a - * 16-byte aligned memory location yields a structure that has the same - * behaviour as the original engine. */ - u32 length; - - /** \brief Active implementation used by this NFAEngineType */ - u8 type; - - u8 rAccelType; - u8 rAccelOffset; - u8 maxBiAnchoredWidth; /**< if non zero, max width of the block */ - - union { - u8 c; - u16 dc; - u8 array[2]; - } rAccelData; - - u32 queueIndex; /**< index of the associated queue in scratch */ - - /** \brief The number of valid positions/states for this NFA. Debug only */ - u32 nPositions; - - /** \brief Size of the state required in scratch space. - * - * This state has less strict size requirements (as it doesn't go in stream - * state) and does not persist between stream writes. - */ - u32 scratchStateSize; - - /** \brief Size of the state required in stream state. - * - * This encompasses all state stored by the engine that must persist between - * stream writes. */ - u32 streamStateSize; - - u32 maxWidth; /**< longest possible match in this NFA, 0 if unbounded */ - u32 minWidth; /**< minimum bytes required to match this NFA */ - u32 maxOffset; /**< non zero: maximum offset this pattern can match at */ - - /* Note: implementation (e.g. a LimEx) directly follows struct in memory */ -} ; - -// Accessor macro for the implementation NFA: we do things this way to avoid -// type-punning warnings. -#define getImplNfa(nfa) \ - ((const void *)((const char *)(nfa) + sizeof(struct NFA))) - -// Non-const version of the above, used at compile time. -#define getMutableImplNfa(nfa) ((char *)(nfa) + sizeof(struct NFA)) - -static really_inline u32 nfaAcceptsEod(const struct NFA *nfa) { - return nfa->flags & NFA_ACCEPTS_EOD; -} - -static really_inline u32 nfaSupportsZombie(const struct NFA *nfa) { - return nfa->flags & NFA_ZOMBIE; -} - -/** \brief True if the given type (from NFA::type) is a McClellan DFA. */ -static really_inline int isMcClellanType(u8 t) { - return t == MCCLELLAN_NFA_8 || t == MCCLELLAN_NFA_16; -} - + /** \brief bogus NFA - not used */ + INVALID_NFA +}; + +/** \brief header for the NFA implementation. */ +struct ALIGN_CL_DIRECTIVE NFA { + u32 flags; + + /** \brief The size in bytes of the NFA engine. The engine is + * serialized to the extent that copying length bytes back into a + * 16-byte aligned memory location yields a structure that has the same + * behaviour as the original engine. */ + u32 length; + + /** \brief Active implementation used by this NFAEngineType */ + u8 type; + + u8 rAccelType; + u8 rAccelOffset; + u8 maxBiAnchoredWidth; /**< if non zero, max width of the block */ + + union { + u8 c; + u16 dc; + u8 array[2]; + } rAccelData; + + u32 queueIndex; /**< index of the associated queue in scratch */ + + /** \brief The number of valid positions/states for this NFA. Debug only */ + u32 nPositions; + + /** \brief Size of the state required in scratch space. + * + * This state has less strict size requirements (as it doesn't go in stream + * state) and does not persist between stream writes. + */ + u32 scratchStateSize; + + /** \brief Size of the state required in stream state. + * + * This encompasses all state stored by the engine that must persist between + * stream writes. */ + u32 streamStateSize; + + u32 maxWidth; /**< longest possible match in this NFA, 0 if unbounded */ + u32 minWidth; /**< minimum bytes required to match this NFA */ + u32 maxOffset; /**< non zero: maximum offset this pattern can match at */ + + /* Note: implementation (e.g. a LimEx) directly follows struct in memory */ +} ; + +// Accessor macro for the implementation NFA: we do things this way to avoid +// type-punning warnings. +#define getImplNfa(nfa) \ + ((const void *)((const char *)(nfa) + sizeof(struct NFA))) + +// Non-const version of the above, used at compile time. +#define getMutableImplNfa(nfa) ((char *)(nfa) + sizeof(struct NFA)) + +static really_inline u32 nfaAcceptsEod(const struct NFA *nfa) { + return nfa->flags & NFA_ACCEPTS_EOD; +} + +static really_inline u32 nfaSupportsZombie(const struct NFA *nfa) { + return nfa->flags & NFA_ZOMBIE; +} + +/** \brief True if the given type (from NFA::type) is a McClellan DFA. */ +static really_inline int isMcClellanType(u8 t) { + return t == MCCLELLAN_NFA_8 || t == MCCLELLAN_NFA_16; +} + /** \brief True if the given type (from NFA::type) is a Sheng-McClellan hybrid * DFA. */ static really_inline int isShengMcClellanType(u8 t) { @@ -156,11 +156,11 @@ static really_inline int isShengMcClellanType(u8 t) { t == MCSHENG_64_NFA_8 || t == MCSHENG_64_NFA_16; } -/** \brief True if the given type (from NFA::type) is a Gough DFA. */ -static really_inline int isGoughType(u8 t) { - return t == GOUGH_NFA_8 || t == GOUGH_NFA_16; -} - +/** \brief True if the given type (from NFA::type) is a Gough DFA. */ +static really_inline int isGoughType(u8 t) { + return t == GOUGH_NFA_8 || t == GOUGH_NFA_16; +} + /** \brief True if the given type (from NFA::type) is a Sheng DFA. */ static really_inline int isSheng16Type(u8 t) { return t == SHENG_NFA; @@ -185,11 +185,11 @@ static really_inline int isShengType(u8 t) { * \brief True if the given type (from NFA::type) is a McClellan, Gough or * Sheng DFA. */ -static really_inline int isDfaType(u8 t) { +static really_inline int isDfaType(u8 t) { return isMcClellanType(t) || isGoughType(t) || isShengType(t) || isShengMcClellanType(t); -} - +} + static really_inline int isBigDfaType(u8 t) { return t == MCCLELLAN_NFA_16 || t == MCSHENG_NFA_16 || t == GOUGH_NFA_16; } @@ -198,69 +198,69 @@ static really_inline int isSmallDfaType(u8 t) { return isDfaType(t) && !isBigDfaType(t); } -/** \brief True if the given type (from NFA::type) is an NFA. */ -static really_inline int isNfaType(u8 t) { - switch (t) { +/** \brief True if the given type (from NFA::type) is an NFA. */ +static really_inline int isNfaType(u8 t) { + switch (t) { case LIMEX_NFA_32: case LIMEX_NFA_64: case LIMEX_NFA_128: case LIMEX_NFA_256: case LIMEX_NFA_384: case LIMEX_NFA_512: - return 1; - default: - break; - } - return 0; -} - -/** \brief True if the given type (from NFA::type) is an LBR. */ -static really_inline -int isLbrType(u8 t) { + return 1; + default: + break; + } + return 0; +} + +/** \brief True if the given type (from NFA::type) is an LBR. */ +static really_inline +int isLbrType(u8 t) { return t == LBR_NFA_DOT || t == LBR_NFA_VERM || t == LBR_NFA_NVERM || t == LBR_NFA_SHUF || t == LBR_NFA_TRUF; -} - +} + /** \brief True if the given type (from NFA::type) is a container engine. */ -static really_inline +static really_inline int isContainerType(u8 t) { return t == TAMARAMA_NFA; } static really_inline -int isMultiTopType(u8 t) { - return !isDfaType(t) && !isLbrType(t); -} - -/** Macros used in place of unimplemented NFA API functions for a given - * engine. */ -#if !defined(_WIN32) - -/* Use for functions that return an integer. */ -#define NFA_API_NO_IMPL(...) \ - ({ \ +int isMultiTopType(u8 t) { + return !isDfaType(t) && !isLbrType(t); +} + +/** Macros used in place of unimplemented NFA API functions for a given + * engine. */ +#if !defined(_WIN32) + +/* Use for functions that return an integer. */ +#define NFA_API_NO_IMPL(...) \ + ({ \ assert(!"not implemented for this engine!"); \ - 0; /* return value, for places that need it */ \ - }) - -/* Use for _zombie_status functions. */ -#define NFA_API_ZOMBIE_NO_IMPL(...) \ - ({ \ + 0; /* return value, for places that need it */ \ + }) + +/* Use for _zombie_status functions. */ +#define NFA_API_ZOMBIE_NO_IMPL(...) \ + ({ \ assert(!"not implemented for this engine!"); \ - NFA_ZOMBIE_NO; \ - }) - -#else - -/* Simpler implementation for compilers that don't like the GCC extension used - * above. */ -#define NFA_API_NO_IMPL(...) 0 -#define NFA_API_ZOMBIE_NO_IMPL(...) NFA_ZOMBIE_NO - -#endif - -#ifdef __cplusplus -} -#endif - -#endif + NFA_ZOMBIE_NO; \ + }) + +#else + +/* Simpler implementation for compilers that don't like the GCC extension used + * above. */ +#define NFA_API_NO_IMPL(...) 0 +#define NFA_API_ZOMBIE_NO_IMPL(...) NFA_ZOMBIE_NO + +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/nfa_kind.h b/contrib/libs/hyperscan/src/nfa/nfa_kind.h index 2dbc2406a5..f2ac6189b1 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_kind.h +++ b/contrib/libs/hyperscan/src/nfa/nfa_kind.h @@ -1,60 +1,60 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** * \file * \brief Data structures and helper functions used to describe the purpose of * a particular NFA engine at build time. */ -#ifndef NFA_KIND_H -#define NFA_KIND_H - -#include "ue2common.h" - +#ifndef NFA_KIND_H +#define NFA_KIND_H + +#include "ue2common.h" + #include <string> -namespace ue2 { - -/** \brief Specify the use-case for an nfa engine. */ -enum nfa_kind { - NFA_PREFIX, //!< rose prefix - NFA_INFIX, //!< rose infix - NFA_SUFFIX, //!< rose suffix - NFA_OUTFIX, //!< "outfix" nfa not triggered by external events +namespace ue2 { + +/** \brief Specify the use-case for an nfa engine. */ +enum nfa_kind { + NFA_PREFIX, //!< rose prefix + NFA_INFIX, //!< rose infix + NFA_SUFFIX, //!< rose suffix + NFA_OUTFIX, //!< "outfix" nfa not triggered by external events NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports - NFA_REV_PREFIX, //! reverse running prefixes (for som) + NFA_REV_PREFIX, //! reverse running prefixes (for som) NFA_EAGER_PREFIX, //!< rose prefix that is also run up to matches -}; - +}; + /** \brief True if this kind of engine is triggered by a top event. */ inline -bool is_triggered(enum nfa_kind k) { +bool is_triggered(enum nfa_kind k) { switch (k) { case NFA_INFIX: case NFA_SUFFIX: @@ -63,8 +63,8 @@ bool is_triggered(enum nfa_kind k) { default: return false; } -} - +} + /** * \brief True if this kind of engine generates actively checks for accept * states either to halt matching or to raise a callback. Only these engines @@ -72,7 +72,7 @@ bool is_triggered(enum nfa_kind k) { * nfaQueueExecToMatch(). */ inline -bool generates_callbacks(enum nfa_kind k) { +bool generates_callbacks(enum nfa_kind k) { switch (k) { case NFA_SUFFIX: case NFA_OUTFIX: @@ -83,8 +83,8 @@ bool generates_callbacks(enum nfa_kind k) { default: return false; } -} - +} + /** * \brief True if this kind of engine has its state inspected to see if it is in * an accept state. Engines generated with this property will commonly call @@ -143,6 +143,6 @@ std::string to_string(nfa_kind k) { #endif -} // namespace ue2 - -#endif +} // namespace ue2 + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/nfa_rev_api.h b/contrib/libs/hyperscan/src/nfa/nfa_rev_api.h index 335a5440f3..370f96ef62 100644 --- a/contrib/libs/hyperscan/src/nfa/nfa_rev_api.h +++ b/contrib/libs/hyperscan/src/nfa/nfa_rev_api.h @@ -1,157 +1,157 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Reverse-acceleration optimizations for the NFA API block mode scans. - */ - -#ifndef NFA_REV_API_H -#define NFA_REV_API_H - -#include "accel.h" -#include "nfa_internal.h" -#include "vermicelli.h" -#include "util/unaligned.h" - -static really_inline -size_t nfaRevAccel_i(const struct NFA *nfa, const u8 *buffer, size_t length) { - DEBUG_PRINTF("checking rev accel mw %u\n", nfa->minWidth); - assert(nfa->rAccelOffset >= 1); - assert(nfa->rAccelOffset <= nfa->minWidth); - - const u8 *rv; // result for accel engine - - switch (nfa->rAccelType) { - case ACCEL_RVERM: - DEBUG_PRINTF("ACCEL_RVERM\n"); - if (length + 1 - nfa->rAccelOffset < 16) { - break; - } - - rv = rvermicelliExec(nfa->rAccelData.c, 0, buffer, - buffer + length + 1 - nfa->rAccelOffset); - length = (size_t)(rv - buffer + nfa->rAccelOffset); - break; - case ACCEL_RVERM_NOCASE: - DEBUG_PRINTF("ACCEL_RVERM_NOCASE\n"); - if (length + 1 - nfa->rAccelOffset < 16) { - break; - } - - rv = rvermicelliExec(nfa->rAccelData.c, 1, buffer, - buffer + length + 1 - nfa->rAccelOffset); - length = (size_t)(rv - buffer + nfa->rAccelOffset); - break; - case ACCEL_RDVERM: - DEBUG_PRINTF("ACCEL_RDVERM\n"); - if (length + 1 - nfa->rAccelOffset < 17) { - break; - } - - rv = rvermicelliDoubleExec(nfa->rAccelData.array[0], - nfa->rAccelData.array[1], 0, buffer, - buffer + length + 1 - nfa->rAccelOffset); - length = (size_t)(rv - buffer + nfa->rAccelOffset); - break; - case ACCEL_RDVERM_NOCASE: - DEBUG_PRINTF("ACCEL_RVERM_NOCASE\n"); - if (length + 1 - nfa->rAccelOffset < 17) { - break; - } - - rv = rvermicelliDoubleExec(nfa->rAccelData.array[0], - nfa->rAccelData.array[1], 1, buffer, - buffer + length + 1 - nfa->rAccelOffset); - length = (size_t)(rv - buffer + nfa->rAccelOffset); - break; - case ACCEL_REOD: - DEBUG_PRINTF("ACCEL_REOD\n"); - if (buffer[length - nfa->rAccelOffset] != nfa->rAccelData.c) { - return 0; - } - break; - case ACCEL_REOD_NOCASE: - DEBUG_PRINTF("ACCEL_REOD_NOCASE\n"); - if ((buffer[length - nfa->rAccelOffset] & CASE_CLEAR) != - nfa->rAccelData.c) { - return 0; - } - break; - case ACCEL_RDEOD: - DEBUG_PRINTF("ACCEL_RDEOD\n"); - if (unaligned_load_u16(buffer + length - nfa->rAccelOffset) != - nfa->rAccelData.dc) { - return 0; - } - break; - case ACCEL_RDEOD_NOCASE: - DEBUG_PRINTF("ACCEL_RDEOD_NOCASE\n"); - if ((unaligned_load_u16(buffer + length - nfa->rAccelOffset) & - DOUBLE_CASE_CLEAR) != nfa->rAccelData.dc) { - return 0; - } - break; - default: - assert(!"not here"); - } - - if (nfa->minWidth > length) { - DEBUG_PRINTF("post-accel, scan skipped: %zu < min %u bytes\n", length, - nfa->minWidth); - return 0; - } - - return length; -} - -/** \brief Reverse acceleration check. Returns a new length for the block, - * guaranteeing that a match cannot occur beyond that point. */ -static really_inline -size_t nfaRevAccelCheck(const struct NFA *nfa, const u8 *buffer, - size_t length) { - assert(nfa); - - // If this block is not long enough to satisfy the minimum width - // constraint on this NFA, we can avoid the scan altogether. - if (nfa->minWidth > length) { - DEBUG_PRINTF("scan skipped: %zu < min %u bytes\n", length, - nfa->minWidth); - return 0; - } - - if (nfa->rAccelType == ACCEL_NONE) { - DEBUG_PRINTF("no rev accel available\n"); - return length; - } - - size_t rv_length = nfaRevAccel_i(nfa, buffer, length); - assert(rv_length <= length); - return rv_length; -} - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Reverse-acceleration optimizations for the NFA API block mode scans. + */ + +#ifndef NFA_REV_API_H +#define NFA_REV_API_H + +#include "accel.h" +#include "nfa_internal.h" +#include "vermicelli.h" +#include "util/unaligned.h" + +static really_inline +size_t nfaRevAccel_i(const struct NFA *nfa, const u8 *buffer, size_t length) { + DEBUG_PRINTF("checking rev accel mw %u\n", nfa->minWidth); + assert(nfa->rAccelOffset >= 1); + assert(nfa->rAccelOffset <= nfa->minWidth); + + const u8 *rv; // result for accel engine + + switch (nfa->rAccelType) { + case ACCEL_RVERM: + DEBUG_PRINTF("ACCEL_RVERM\n"); + if (length + 1 - nfa->rAccelOffset < 16) { + break; + } + + rv = rvermicelliExec(nfa->rAccelData.c, 0, buffer, + buffer + length + 1 - nfa->rAccelOffset); + length = (size_t)(rv - buffer + nfa->rAccelOffset); + break; + case ACCEL_RVERM_NOCASE: + DEBUG_PRINTF("ACCEL_RVERM_NOCASE\n"); + if (length + 1 - nfa->rAccelOffset < 16) { + break; + } + + rv = rvermicelliExec(nfa->rAccelData.c, 1, buffer, + buffer + length + 1 - nfa->rAccelOffset); + length = (size_t)(rv - buffer + nfa->rAccelOffset); + break; + case ACCEL_RDVERM: + DEBUG_PRINTF("ACCEL_RDVERM\n"); + if (length + 1 - nfa->rAccelOffset < 17) { + break; + } + + rv = rvermicelliDoubleExec(nfa->rAccelData.array[0], + nfa->rAccelData.array[1], 0, buffer, + buffer + length + 1 - nfa->rAccelOffset); + length = (size_t)(rv - buffer + nfa->rAccelOffset); + break; + case ACCEL_RDVERM_NOCASE: + DEBUG_PRINTF("ACCEL_RVERM_NOCASE\n"); + if (length + 1 - nfa->rAccelOffset < 17) { + break; + } + + rv = rvermicelliDoubleExec(nfa->rAccelData.array[0], + nfa->rAccelData.array[1], 1, buffer, + buffer + length + 1 - nfa->rAccelOffset); + length = (size_t)(rv - buffer + nfa->rAccelOffset); + break; + case ACCEL_REOD: + DEBUG_PRINTF("ACCEL_REOD\n"); + if (buffer[length - nfa->rAccelOffset] != nfa->rAccelData.c) { + return 0; + } + break; + case ACCEL_REOD_NOCASE: + DEBUG_PRINTF("ACCEL_REOD_NOCASE\n"); + if ((buffer[length - nfa->rAccelOffset] & CASE_CLEAR) != + nfa->rAccelData.c) { + return 0; + } + break; + case ACCEL_RDEOD: + DEBUG_PRINTF("ACCEL_RDEOD\n"); + if (unaligned_load_u16(buffer + length - nfa->rAccelOffset) != + nfa->rAccelData.dc) { + return 0; + } + break; + case ACCEL_RDEOD_NOCASE: + DEBUG_PRINTF("ACCEL_RDEOD_NOCASE\n"); + if ((unaligned_load_u16(buffer + length - nfa->rAccelOffset) & + DOUBLE_CASE_CLEAR) != nfa->rAccelData.dc) { + return 0; + } + break; + default: + assert(!"not here"); + } + + if (nfa->minWidth > length) { + DEBUG_PRINTF("post-accel, scan skipped: %zu < min %u bytes\n", length, + nfa->minWidth); + return 0; + } + + return length; +} + +/** \brief Reverse acceleration check. Returns a new length for the block, + * guaranteeing that a match cannot occur beyond that point. */ +static really_inline +size_t nfaRevAccelCheck(const struct NFA *nfa, const u8 *buffer, + size_t length) { + assert(nfa); + + // If this block is not long enough to satisfy the minimum width + // constraint on this NFA, we can avoid the scan altogether. + if (nfa->minWidth > length) { + DEBUG_PRINTF("scan skipped: %zu < min %u bytes\n", length, + nfa->minWidth); + return 0; + } + + if (nfa->rAccelType == ACCEL_NONE) { + DEBUG_PRINTF("no rev accel available\n"); + return length; + } + + size_t rv_length = nfaRevAccel_i(nfa, buffer, length); + assert(rv_length <= length); + return rv_length; +} + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/rdfa.h b/contrib/libs/hyperscan/src/nfa/rdfa.h index d1f1c3614d..6b994e4f2f 100644 --- a/contrib/libs/hyperscan/src/nfa/rdfa.h +++ b/contrib/libs/hyperscan/src/nfa/rdfa.h @@ -1,91 +1,91 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef RDFA_H -#define RDFA_H - -#include "nfa_kind.h" -#include "ue2common.h" - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RDFA_H +#define RDFA_H + +#include "nfa_kind.h" +#include "ue2common.h" + #include "util/flat_containers.h" - -#include <array> -#include <vector> - -namespace ue2 { - -typedef u16 dstate_id_t; -typedef u16 symbol_t; - -static constexpr symbol_t TOP = 256; -static constexpr symbol_t ALPHABET_SIZE = 257; -static constexpr symbol_t N_SPECIAL_SYMBOL = 1; -static constexpr dstate_id_t DEAD_STATE = 0; - -/** Structure representing a dfa state during construction. */ -struct dstate { - /** Next state; indexed by remapped sym */ - std::vector<dstate_id_t> next; - - /** Set by ng_mcclellan, refined by mcclellancompile */ - dstate_id_t daddy = 0; - - /** Set by mcclellancompile, implementation state id, excludes edge - * decorations */ - dstate_id_t impl_id = 0; - - /** Reports to fire (at any location). */ - flat_set<ReportID> reports; - - /** Reports to fire (at EOD). */ - flat_set<ReportID> reports_eod; - - explicit dstate(size_t alphabet_size) : next(alphabet_size, 0) {} -}; - -struct raw_dfa { - nfa_kind kind; - std::vector<dstate> states; - dstate_id_t start_anchored = DEAD_STATE; - dstate_id_t start_floating = DEAD_STATE; - u16 alpha_size = 0; /* including special symbols */ - - /* mapping from input symbol --> equiv class id */ - std::array<u16, ALPHABET_SIZE> alpha_remap; - - explicit raw_dfa(nfa_kind k) : kind(k) {} - virtual ~raw_dfa(); - + +#include <array> +#include <vector> + +namespace ue2 { + +typedef u16 dstate_id_t; +typedef u16 symbol_t; + +static constexpr symbol_t TOP = 256; +static constexpr symbol_t ALPHABET_SIZE = 257; +static constexpr symbol_t N_SPECIAL_SYMBOL = 1; +static constexpr dstate_id_t DEAD_STATE = 0; + +/** Structure representing a dfa state during construction. */ +struct dstate { + /** Next state; indexed by remapped sym */ + std::vector<dstate_id_t> next; + + /** Set by ng_mcclellan, refined by mcclellancompile */ + dstate_id_t daddy = 0; + + /** Set by mcclellancompile, implementation state id, excludes edge + * decorations */ + dstate_id_t impl_id = 0; + + /** Reports to fire (at any location). */ + flat_set<ReportID> reports; + + /** Reports to fire (at EOD). */ + flat_set<ReportID> reports_eod; + + explicit dstate(size_t alphabet_size) : next(alphabet_size, 0) {} +}; + +struct raw_dfa { + nfa_kind kind; + std::vector<dstate> states; + dstate_id_t start_anchored = DEAD_STATE; + dstate_id_t start_floating = DEAD_STATE; + u16 alpha_size = 0; /* including special symbols */ + + /* mapping from input symbol --> equiv class id */ + std::array<u16, ALPHABET_SIZE> alpha_remap; + + explicit raw_dfa(nfa_kind k) : kind(k) {} + virtual ~raw_dfa(); + u16 getImplAlphaSize() const { return alpha_size - N_SPECIAL_SYMBOL; } - virtual void stripExtraEodReports(void); - bool hasEodReports(void) const; -}; - -} - -#endif + virtual void stripExtraEodReports(void); + bool hasEodReports(void) const; +}; + +} + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/rdfa_merge.cpp b/contrib/libs/hyperscan/src/nfa/rdfa_merge.cpp index 33f70a6bb2..2ad871234f 100644 --- a/contrib/libs/hyperscan/src/nfa/rdfa_merge.cpp +++ b/contrib/libs/hyperscan/src/nfa/rdfa_merge.cpp @@ -1,399 +1,399 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "rdfa_merge.h" - -#include "grey.h" -#include "dfa_min.h" -#include "mcclellancompile_util.h" -#include "rdfa.h" -#include "ue2common.h" -#include "nfagraph/ng_mcclellan_internal.h" -#include "util/container.h" -#include "util/determinise.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rdfa_merge.h" + +#include "grey.h" +#include "dfa_min.h" +#include "mcclellancompile_util.h" +#include "rdfa.h" +#include "ue2common.h" +#include "nfagraph/ng_mcclellan_internal.h" +#include "util/container.h" +#include "util/determinise.h" #include "util/flat_containers.h" -#include "util/make_unique.h" -#include "util/report_manager.h" +#include "util/make_unique.h" +#include "util/report_manager.h" #include "util/unordered.h" - + #include <algorithm> -#include <queue> - -using namespace std; - -namespace ue2 { - -#define MAX_DFA_STATES 16383 - -namespace { - -class Automaton_Merge { -public: +#include <queue> + +using namespace std; + +namespace ue2 { + +#define MAX_DFA_STATES 16383 + +namespace { + +class Automaton_Merge { +public: using StateSet = vector<u16>; using StateMap = ue2_unordered_map<StateSet, dstate_id_t>; - - Automaton_Merge(const raw_dfa *rdfa1, const raw_dfa *rdfa2, - const ReportManager *rm_in, const Grey &grey_in) - : rm(rm_in), grey(grey_in), nfas{rdfa1, rdfa2}, dead(2) { - calculateAlphabet(); - populateAsFs(); - prunable = isPrunable(); - } - - Automaton_Merge(const vector<const raw_dfa *> &dfas, - const ReportManager *rm_in, const Grey &grey_in) - : rm(rm_in), grey(grey_in), nfas(dfas), dead(nfas.size()) { - calculateAlphabet(); - populateAsFs(); - prunable = isPrunable(); - } - - void populateAsFs(void) { - bool fs_same = true; - bool fs_dead = true; - - as.resize(nfas.size()); - fs.resize(nfas.size()); - for (size_t i = 0, end = nfas.size(); i < end; i++) { - as[i] = nfas[i]->start_anchored; - fs[i] = nfas[i]->start_floating; - - if (fs[i]) { - fs_dead = false; - } - - if (as[i] != fs[i]) { - fs_same = false; - } - } - - start_anchored = DEAD_STATE + 1; - if (fs_same) { - start_floating = start_anchored; - } else if (fs_dead) { - start_floating = DEAD_STATE; - } else { - start_floating = start_anchored + 1; - } - } - - void calculateAlphabet(void) { - DEBUG_PRINTF("calculating alphabet\n"); - vector<CharReach> esets = {CharReach::dot()}; - - for (const auto &rdfa : nfas) { - DEBUG_PRINTF("...next dfa alphabet\n"); - assert(rdfa); - const auto &alpha_remap = rdfa->alpha_remap; - - for (size_t i = 0; i < esets.size(); i++) { - assert(esets[i].count()); - if (esets[i].count() == 1) { - DEBUG_PRINTF("skipping singleton eq set\n"); - continue; - } - - CharReach t; - u8 leader_s = alpha_remap[esets[i].find_first()]; - - DEBUG_PRINTF("checking eq set, leader %02hhx \n", leader_s); - - for (size_t s = esets[i].find_first(); s != CharReach::npos; - s = esets[i].find_next(s)) { - if (alpha_remap[s] != leader_s) { - t.set(s); - } - } - - if (t.any() && t != esets[i]) { - esets[i] &= ~t; - esets.push_back(t); - } - } - } - + + Automaton_Merge(const raw_dfa *rdfa1, const raw_dfa *rdfa2, + const ReportManager *rm_in, const Grey &grey_in) + : rm(rm_in), grey(grey_in), nfas{rdfa1, rdfa2}, dead(2) { + calculateAlphabet(); + populateAsFs(); + prunable = isPrunable(); + } + + Automaton_Merge(const vector<const raw_dfa *> &dfas, + const ReportManager *rm_in, const Grey &grey_in) + : rm(rm_in), grey(grey_in), nfas(dfas), dead(nfas.size()) { + calculateAlphabet(); + populateAsFs(); + prunable = isPrunable(); + } + + void populateAsFs(void) { + bool fs_same = true; + bool fs_dead = true; + + as.resize(nfas.size()); + fs.resize(nfas.size()); + for (size_t i = 0, end = nfas.size(); i < end; i++) { + as[i] = nfas[i]->start_anchored; + fs[i] = nfas[i]->start_floating; + + if (fs[i]) { + fs_dead = false; + } + + if (as[i] != fs[i]) { + fs_same = false; + } + } + + start_anchored = DEAD_STATE + 1; + if (fs_same) { + start_floating = start_anchored; + } else if (fs_dead) { + start_floating = DEAD_STATE; + } else { + start_floating = start_anchored + 1; + } + } + + void calculateAlphabet(void) { + DEBUG_PRINTF("calculating alphabet\n"); + vector<CharReach> esets = {CharReach::dot()}; + + for (const auto &rdfa : nfas) { + DEBUG_PRINTF("...next dfa alphabet\n"); + assert(rdfa); + const auto &alpha_remap = rdfa->alpha_remap; + + for (size_t i = 0; i < esets.size(); i++) { + assert(esets[i].count()); + if (esets[i].count() == 1) { + DEBUG_PRINTF("skipping singleton eq set\n"); + continue; + } + + CharReach t; + u8 leader_s = alpha_remap[esets[i].find_first()]; + + DEBUG_PRINTF("checking eq set, leader %02hhx \n", leader_s); + + for (size_t s = esets[i].find_first(); s != CharReach::npos; + s = esets[i].find_next(s)) { + if (alpha_remap[s] != leader_s) { + t.set(s); + } + } + + if (t.any() && t != esets[i]) { + esets[i] &= ~t; + esets.push_back(t); + } + } + } + // Sort so that our alphabet mapping isn't dependent on the order of // rdfas passed in. sort(esets.begin(), esets.end()); - alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha); - } - - bool isPrunable() const { - if (!grey.highlanderPruneDFA || !rm) { - DEBUG_PRINTF("disabled, or not managed reports\n"); - return false; - } - - assert(!nfas.empty()); - if (!generates_callbacks(nfas.front()->kind)) { - DEBUG_PRINTF("doesn't generate callbacks\n"); - return false; - } - - // Collect all reports from all merge candidates. - flat_set<ReportID> merge_reports; - for (const auto &rdfa : nfas) { - insert(&merge_reports, all_reports(*rdfa)); - } - - DEBUG_PRINTF("all reports: %s\n", as_string_list(merge_reports).c_str()); - - // Return true if they're all exhaustible with the same exhaustion key. - u32 ekey = INVALID_EKEY; - for (const auto &report_id : merge_reports) { - const Report &r = rm->getReport(report_id); - if (!isSimpleExhaustible(r)) { - DEBUG_PRINTF("report %u not simple exhaustible\n", report_id); - return false; - } - assert(r.ekey != INVALID_EKEY); - if (ekey == INVALID_EKEY) { - ekey = r.ekey; - } else if (ekey != r.ekey) { - DEBUG_PRINTF("two different ekeys, %u and %u\n", ekey, r.ekey); - return false; - } - } - - DEBUG_PRINTF("is prunable\n"); - return true; - } - - - void transition(const StateSet &in, StateSet *next) { - u16 t[ALPHABET_SIZE]; - - for (u32 i = 0; i < alphasize; i++) { - next[i].resize(nfas.size()); - } - - for (size_t j = 0, j_end = nfas.size(); j < j_end; j++) { - getFullTransitionFromState(*nfas[j], in[j], t); - for (u32 i = 0; i < alphasize; i++) { - next[i][j] = t[unalpha[i]]; - } - } - } - - const vector<StateSet> initial() { - vector<StateSet> rv = {as}; - if (start_floating != DEAD_STATE && start_floating != start_anchored) { - rv.push_back(fs); - } - return rv; - } - -private: - void reports_i(const StateSet &in, flat_set<ReportID> dstate::*r_set, - flat_set<ReportID> &r) const { - for (size_t i = 0, end = nfas.size(); i < end; i++) { - const auto &rs = nfas[i]->states[in[i]].*r_set; - insert(&r, rs); - } - } - -public: - void reports(const StateSet &in, flat_set<ReportID> &rv) const { - reports_i(in, &dstate::reports, rv); - } - void reportsEod(const StateSet &in, flat_set<ReportID> &rv) const { - reports_i(in, &dstate::reports_eod, rv); - } - - bool canPrune(const flat_set<ReportID> &test_reports) const { - if (!grey.highlanderPruneDFA || !prunable) { - return false; - } - - // Must all be external reports. - assert(rm); - for (const auto &report_id : test_reports) { - if (!isExternalReport(rm->getReport(report_id))) { - return false; - } - } - - return true; - } - - /** True if the minimization algorithm should be run after merging. */ - bool shouldMinimize() const { - // We only need to run minimization if our merged DFAs shared a report. - flat_set<ReportID> seen_reports; - for (const auto &rdfa : nfas) { - for (const auto &report_id : all_reports(*rdfa)) { - if (!seen_reports.insert(report_id).second) { - DEBUG_PRINTF("report %u in several dfas\n", report_id); - return true; - } - } - } - - return false; - } - -private: - const ReportManager *rm; - const Grey &grey; - - vector<const raw_dfa *> nfas; - vector<dstate_id_t> as; - vector<dstate_id_t> fs; - - bool prunable = false; - -public: - std::array<u16, ALPHABET_SIZE> alpha; - std::array<u16, ALPHABET_SIZE> unalpha; - u16 alphasize; - StateSet dead; - - u16 start_anchored; - u16 start_floating; -}; - -} // namespace - -unique_ptr<raw_dfa> mergeTwoDfas(const raw_dfa *d1, const raw_dfa *d2, - size_t max_states, const ReportManager *rm, - const Grey &grey) { - assert(d1 && d2); - assert(d1->kind == d2->kind); - assert(max_states <= MAX_DFA_STATES); - - auto rdfa = ue2::make_unique<raw_dfa>(d1->kind); - - Automaton_Merge autom(d1, d2, rm, grey); + alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha); + } + + bool isPrunable() const { + if (!grey.highlanderPruneDFA || !rm) { + DEBUG_PRINTF("disabled, or not managed reports\n"); + return false; + } + + assert(!nfas.empty()); + if (!generates_callbacks(nfas.front()->kind)) { + DEBUG_PRINTF("doesn't generate callbacks\n"); + return false; + } + + // Collect all reports from all merge candidates. + flat_set<ReportID> merge_reports; + for (const auto &rdfa : nfas) { + insert(&merge_reports, all_reports(*rdfa)); + } + + DEBUG_PRINTF("all reports: %s\n", as_string_list(merge_reports).c_str()); + + // Return true if they're all exhaustible with the same exhaustion key. + u32 ekey = INVALID_EKEY; + for (const auto &report_id : merge_reports) { + const Report &r = rm->getReport(report_id); + if (!isSimpleExhaustible(r)) { + DEBUG_PRINTF("report %u not simple exhaustible\n", report_id); + return false; + } + assert(r.ekey != INVALID_EKEY); + if (ekey == INVALID_EKEY) { + ekey = r.ekey; + } else if (ekey != r.ekey) { + DEBUG_PRINTF("two different ekeys, %u and %u\n", ekey, r.ekey); + return false; + } + } + + DEBUG_PRINTF("is prunable\n"); + return true; + } + + + void transition(const StateSet &in, StateSet *next) { + u16 t[ALPHABET_SIZE]; + + for (u32 i = 0; i < alphasize; i++) { + next[i].resize(nfas.size()); + } + + for (size_t j = 0, j_end = nfas.size(); j < j_end; j++) { + getFullTransitionFromState(*nfas[j], in[j], t); + for (u32 i = 0; i < alphasize; i++) { + next[i][j] = t[unalpha[i]]; + } + } + } + + const vector<StateSet> initial() { + vector<StateSet> rv = {as}; + if (start_floating != DEAD_STATE && start_floating != start_anchored) { + rv.push_back(fs); + } + return rv; + } + +private: + void reports_i(const StateSet &in, flat_set<ReportID> dstate::*r_set, + flat_set<ReportID> &r) const { + for (size_t i = 0, end = nfas.size(); i < end; i++) { + const auto &rs = nfas[i]->states[in[i]].*r_set; + insert(&r, rs); + } + } + +public: + void reports(const StateSet &in, flat_set<ReportID> &rv) const { + reports_i(in, &dstate::reports, rv); + } + void reportsEod(const StateSet &in, flat_set<ReportID> &rv) const { + reports_i(in, &dstate::reports_eod, rv); + } + + bool canPrune(const flat_set<ReportID> &test_reports) const { + if (!grey.highlanderPruneDFA || !prunable) { + return false; + } + + // Must all be external reports. + assert(rm); + for (const auto &report_id : test_reports) { + if (!isExternalReport(rm->getReport(report_id))) { + return false; + } + } + + return true; + } + + /** True if the minimization algorithm should be run after merging. */ + bool shouldMinimize() const { + // We only need to run minimization if our merged DFAs shared a report. + flat_set<ReportID> seen_reports; + for (const auto &rdfa : nfas) { + for (const auto &report_id : all_reports(*rdfa)) { + if (!seen_reports.insert(report_id).second) { + DEBUG_PRINTF("report %u in several dfas\n", report_id); + return true; + } + } + } + + return false; + } + +private: + const ReportManager *rm; + const Grey &grey; + + vector<const raw_dfa *> nfas; + vector<dstate_id_t> as; + vector<dstate_id_t> fs; + + bool prunable = false; + +public: + std::array<u16, ALPHABET_SIZE> alpha; + std::array<u16, ALPHABET_SIZE> unalpha; + u16 alphasize; + StateSet dead; + + u16 start_anchored; + u16 start_floating; +}; + +} // namespace + +unique_ptr<raw_dfa> mergeTwoDfas(const raw_dfa *d1, const raw_dfa *d2, + size_t max_states, const ReportManager *rm, + const Grey &grey) { + assert(d1 && d2); + assert(d1->kind == d2->kind); + assert(max_states <= MAX_DFA_STATES); + + auto rdfa = ue2::make_unique<raw_dfa>(d1->kind); + + Automaton_Merge autom(d1, d2, rm, grey); if (determinise(autom, rdfa->states, max_states)) { - rdfa->start_anchored = autom.start_anchored; - rdfa->start_floating = autom.start_floating; - rdfa->alpha_size = autom.alphasize; - rdfa->alpha_remap = autom.alpha; - DEBUG_PRINTF("merge succeeded, %zu states\n", rdfa->states.size()); - - if (autom.shouldMinimize()) { - minimize_hopcroft(*rdfa, grey); - DEBUG_PRINTF("minimized, %zu states\n", rdfa->states.size()); - } - - return rdfa; - } - - return nullptr; -} - -void mergeDfas(vector<unique_ptr<raw_dfa>> &dfas, size_t max_states, - const ReportManager *rm, const Grey &grey) { - assert(max_states <= MAX_DFA_STATES); - - if (dfas.size() <= 1) { - return; - } - - DEBUG_PRINTF("before merging, we have %zu dfas\n", dfas.size()); - - queue<unique_ptr<raw_dfa>> q; - for (auto &dfa : dfas) { - q.push(move(dfa)); - } - - // All DFAs are now on the queue, so we'll clear the vector and use it for - // output from here. - dfas.clear(); - - while (q.size() > 1) { - // Attempt to merge the two front elements of the queue. - unique_ptr<raw_dfa> d1 = move(q.front()); - q.pop(); - unique_ptr<raw_dfa> d2 = move(q.front()); - q.pop(); - - auto rdfa = mergeTwoDfas(d1.get(), d2.get(), max_states, rm, grey); - if (rdfa) { - q.push(move(rdfa)); - } else { - DEBUG_PRINTF("failed to merge\n"); - // Put the larger of the two DFAs on the output list, retain the - // smaller one on the queue for further merge attempts. - if (d2->states.size() > d1->states.size()) { - dfas.push_back(move(d2)); - q.push(move(d1)); - } else { - dfas.push_back(move(d1)); - q.push(move(d2)); - } - } - } - - while (!q.empty()) { - dfas.push_back(move(q.front())); - q.pop(); - } - - DEBUG_PRINTF("after merging, we have %zu dfas\n", dfas.size()); -} - -unique_ptr<raw_dfa> mergeAllDfas(const vector<const raw_dfa *> &dfas, - size_t max_states, const ReportManager *rm, - const Grey &grey) { - assert(max_states <= MAX_DFA_STATES); - assert(!dfas.empty()); - - // All the DFAs should be of the same kind. - const auto kind = dfas.front()->kind; - assert(all_of(begin(dfas), end(dfas), - [&kind](const raw_dfa *rdfa) { return rdfa->kind == kind; })); - - auto rdfa = ue2::make_unique<raw_dfa>(kind); - Automaton_Merge n(dfas, rm, grey); - - DEBUG_PRINTF("merging dfa\n"); - + rdfa->start_anchored = autom.start_anchored; + rdfa->start_floating = autom.start_floating; + rdfa->alpha_size = autom.alphasize; + rdfa->alpha_remap = autom.alpha; + DEBUG_PRINTF("merge succeeded, %zu states\n", rdfa->states.size()); + + if (autom.shouldMinimize()) { + minimize_hopcroft(*rdfa, grey); + DEBUG_PRINTF("minimized, %zu states\n", rdfa->states.size()); + } + + return rdfa; + } + + return nullptr; +} + +void mergeDfas(vector<unique_ptr<raw_dfa>> &dfas, size_t max_states, + const ReportManager *rm, const Grey &grey) { + assert(max_states <= MAX_DFA_STATES); + + if (dfas.size() <= 1) { + return; + } + + DEBUG_PRINTF("before merging, we have %zu dfas\n", dfas.size()); + + queue<unique_ptr<raw_dfa>> q; + for (auto &dfa : dfas) { + q.push(move(dfa)); + } + + // All DFAs are now on the queue, so we'll clear the vector and use it for + // output from here. + dfas.clear(); + + while (q.size() > 1) { + // Attempt to merge the two front elements of the queue. + unique_ptr<raw_dfa> d1 = move(q.front()); + q.pop(); + unique_ptr<raw_dfa> d2 = move(q.front()); + q.pop(); + + auto rdfa = mergeTwoDfas(d1.get(), d2.get(), max_states, rm, grey); + if (rdfa) { + q.push(move(rdfa)); + } else { + DEBUG_PRINTF("failed to merge\n"); + // Put the larger of the two DFAs on the output list, retain the + // smaller one on the queue for further merge attempts. + if (d2->states.size() > d1->states.size()) { + dfas.push_back(move(d2)); + q.push(move(d1)); + } else { + dfas.push_back(move(d1)); + q.push(move(d2)); + } + } + } + + while (!q.empty()) { + dfas.push_back(move(q.front())); + q.pop(); + } + + DEBUG_PRINTF("after merging, we have %zu dfas\n", dfas.size()); +} + +unique_ptr<raw_dfa> mergeAllDfas(const vector<const raw_dfa *> &dfas, + size_t max_states, const ReportManager *rm, + const Grey &grey) { + assert(max_states <= MAX_DFA_STATES); + assert(!dfas.empty()); + + // All the DFAs should be of the same kind. + const auto kind = dfas.front()->kind; + assert(all_of(begin(dfas), end(dfas), + [&kind](const raw_dfa *rdfa) { return rdfa->kind == kind; })); + + auto rdfa = ue2::make_unique<raw_dfa>(kind); + Automaton_Merge n(dfas, rm, grey); + + DEBUG_PRINTF("merging dfa\n"); + if (!determinise(n, rdfa->states, max_states)) { - DEBUG_PRINTF("state limit (%zu) exceeded\n", max_states); - return nullptr; /* over state limit */ - } - - rdfa->start_anchored = n.start_anchored; - rdfa->start_floating = n.start_floating; - rdfa->alpha_size = n.alphasize; - rdfa->alpha_remap = n.alpha; - - DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n", - rdfa->start_anchored, rdfa->start_floating); - - if (n.shouldMinimize()) { - minimize_hopcroft(*rdfa, grey); - DEBUG_PRINTF("minimized, %zu states\n", rdfa->states.size()); - } - - return rdfa; -} - -} // namespace ue2 + DEBUG_PRINTF("state limit (%zu) exceeded\n", max_states); + return nullptr; /* over state limit */ + } + + rdfa->start_anchored = n.start_anchored; + rdfa->start_floating = n.start_floating; + rdfa->alpha_size = n.alphasize; + rdfa->alpha_remap = n.alpha; + + DEBUG_PRINTF("merged, building impl dfa (a,f) = (%hu,%hu)\n", + rdfa->start_anchored, rdfa->start_floating); + + if (n.shouldMinimize()) { + minimize_hopcroft(*rdfa, grey); + DEBUG_PRINTF("minimized, %zu states\n", rdfa->states.size()); + } + + return rdfa; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/rdfa_merge.h b/contrib/libs/hyperscan/src/nfa/rdfa_merge.h index 857a32224c..9cfb3843a6 100644 --- a/contrib/libs/hyperscan/src/nfa/rdfa_merge.h +++ b/contrib/libs/hyperscan/src/nfa/rdfa_merge.h @@ -1,62 +1,62 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Merge code for McClellan DFA. - */ - -#ifndef RDFA_MERGE_H -#define RDFA_MERGE_H - -#include <memory> -#include <vector> - -namespace ue2 { - -class ReportManager; -struct raw_dfa; -struct Grey; - -/** \brief Attempts to merge two raw_dfas into one. */ -std::unique_ptr<raw_dfa> mergeTwoDfas(const raw_dfa *d1, const raw_dfa *d2, - size_t max_states, const ReportManager *rm, - const Grey &grey); - -/** \brief Attempts to merge all the given raw_dfas into one. */ -std::unique_ptr<raw_dfa> mergeAllDfas(const std::vector<const raw_dfa *> &dfas, - size_t max_states, - const ReportManager *rm, - const Grey &grey); - -/** \brief Merges the given list of raw_dfas as much as possible in-place. */ -void mergeDfas(std::vector<std::unique_ptr<raw_dfa>> &dfas, size_t max_states, - const ReportManager *rm, const Grey &grey); - -} // namespace ue2 - -#endif // RDFA_MERGE_H +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Merge code for McClellan DFA. + */ + +#ifndef RDFA_MERGE_H +#define RDFA_MERGE_H + +#include <memory> +#include <vector> + +namespace ue2 { + +class ReportManager; +struct raw_dfa; +struct Grey; + +/** \brief Attempts to merge two raw_dfas into one. */ +std::unique_ptr<raw_dfa> mergeTwoDfas(const raw_dfa *d1, const raw_dfa *d2, + size_t max_states, const ReportManager *rm, + const Grey &grey); + +/** \brief Attempts to merge all the given raw_dfas into one. */ +std::unique_ptr<raw_dfa> mergeAllDfas(const std::vector<const raw_dfa *> &dfas, + size_t max_states, + const ReportManager *rm, + const Grey &grey); + +/** \brief Merges the given list of raw_dfas as much as possible in-place. */ +void mergeDfas(std::vector<std::unique_ptr<raw_dfa>> &dfas, size_t max_states, + const ReportManager *rm, const Grey &grey); + +} // namespace ue2 + +#endif // RDFA_MERGE_H diff --git a/contrib/libs/hyperscan/src/nfa/repeat.c b/contrib/libs/hyperscan/src/nfa/repeat.c index 5ef76ac696..5b2e4df4ed 100644 --- a/contrib/libs/hyperscan/src/nfa/repeat.c +++ b/contrib/libs/hyperscan/src/nfa/repeat.c @@ -1,893 +1,893 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief API for handling bounded repeats. - * - * This file provides an internal API for handling bounded repeats of character - * classes. It is used by the Large Bounded Repeat (LBR) engine and by the - * bounded repeat handling in the LimEx NFA engine as well. - */ -#include "repeat.h" -#include "util/bitutils.h" -#include "util/multibit.h" -#include "util/pack_bits.h" -#include "util/partial_store.h" -#include "util/unaligned.h" - -#include <stdint.h> -#include <string.h> - -/** \brief Returns the total capacity of the ring. - * Note that it's currently one greater than repeatMax so that we can handle - * cases where the tug and pos triggers overlap. */ -static -u32 ringCapacity(const struct RepeatInfo *info) { - return info->repeatMax + 1; -} - -/** \brief Returns the number of elements currently in the ring. Note that if - * the first and last indices are equal, the ring is full. */ -static -u32 ringOccupancy(const struct RepeatRingControl *xs, const u32 ringSize) { - if (xs->last > xs->first) { - return xs->last - xs->first; - } else { // wrapped - return ringSize - (xs->first - xs->last); - } -} - -/** \brief Returns the offset of the _last_ top stored in the ring. */ -static -u64a ringLastTop(const struct RepeatRingControl *xs, const u32 ringSize) { - return xs->offset + ringOccupancy(xs, ringSize) - 1; -} - -#if !defined(NDEBUG) || defined(DUMP_SUPPORT) -/** \brief For debugging: returns the total capacity of the range list. */ -static UNUSED -u32 rangeListCapacity(const struct RepeatInfo *info) { - u32 d = info->repeatMax - info->repeatMin; - assert(d > 0); // should be in a RING model! - return 2 * ((info->repeatMax / d) + 1); -} -#endif - -#ifdef DEBUG -static -void dumpRing(const struct RepeatInfo *info, const struct RepeatRingControl *xs, - const u8 *ring) { - const u32 ringSize = ringCapacity(info); - DEBUG_PRINTF("ring (occ %u/%u, %u->%u): ", ringOccupancy(xs, ringSize), - ringSize, xs->first, xs->last); - - u16 i = xs->first, n = 0; - do { - if (mmbit_isset(ring, ringSize, i)) { - u64a ringOffset = xs->offset + n; - printf("%llu ", ringOffset); - } - ++i, ++n; - if (i == ringSize) { - i = 0; - } - } while (i != xs->last); - printf("\n"); -} - -static -void dumpRange(const struct RepeatInfo *info, - const struct RepeatRangeControl *xs, const u16 *ring) { - const u32 ringSize = rangeListCapacity(info); - DEBUG_PRINTF("ring (occ %u/%u): ", xs->num, ringSize); - - if (xs->num) { - for (u32 i = 0; i < xs->num; i++) { - printf("%llu ", xs->offset + unaligned_load_u16(ring + i)); - } - } else { - printf("empty"); - } - printf("\n"); -} - -static -void dumpBitmap(const struct RepeatBitmapControl *xs) { - DEBUG_PRINTF("bitmap (base=%llu): ", xs->offset); - u64a bitmap = xs->bitmap; - while (bitmap) { - printf("%llu ", xs->offset + findAndClearLSB_64(&bitmap)); - } - printf("\n"); -} - -static -void dumpTrailer(const struct RepeatInfo *info, - const struct RepeatTrailerControl *xs) { - const u64a m_width = info->repeatMax - info->repeatMin; - DEBUG_PRINTF("trailer: current extent is [%llu,%llu]", xs->offset, - xs->offset + m_width); - u64a bitmap = xs->bitmap; - if (bitmap) { - printf(", also matches at: "); - while (bitmap) { - u32 idx = findAndClearMSB_64(&bitmap); - printf("%llu ", xs->offset - idx - 1); - } - } else { - printf(", no earlier matches"); - } - printf("\n"); -} - -#endif // DEBUG - -#ifndef NDEBUG -/** \brief For debugging: returns true if the range is ordered with no dupes. */ -static UNUSED -int rangeListIsOrdered(const struct RepeatRangeControl *xs, const u16 *ring) { - for (u32 i = 1; i < xs->num; i++) { - u16 a = unaligned_load_u16(ring + i - 1); - u16 b = unaligned_load_u16(ring + i); - if (a >= b) { - return 0; - } - } - return 1; -} -#endif - -u64a repeatLastTopRing(const struct RepeatInfo *info, - const union RepeatControl *ctrl) { - const u32 ringSize = ringCapacity(info); - return ringLastTop(&ctrl->ring, ringSize); -} - -u64a repeatLastTopRange(const union RepeatControl *ctrl, const void *state) { - const u16 *ring = (const u16 *)state; - const struct RepeatRangeControl *xs = &ctrl->range; - assert(xs->num); - return xs->offset + unaligned_load_u16(ring + xs->num - 1); -} - -u64a repeatLastTopBitmap(const union RepeatControl *ctrl) { - const struct RepeatBitmapControl *xs = &ctrl->bitmap; + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief API for handling bounded repeats. + * + * This file provides an internal API for handling bounded repeats of character + * classes. It is used by the Large Bounded Repeat (LBR) engine and by the + * bounded repeat handling in the LimEx NFA engine as well. + */ +#include "repeat.h" +#include "util/bitutils.h" +#include "util/multibit.h" +#include "util/pack_bits.h" +#include "util/partial_store.h" +#include "util/unaligned.h" + +#include <stdint.h> +#include <string.h> + +/** \brief Returns the total capacity of the ring. + * Note that it's currently one greater than repeatMax so that we can handle + * cases where the tug and pos triggers overlap. */ +static +u32 ringCapacity(const struct RepeatInfo *info) { + return info->repeatMax + 1; +} + +/** \brief Returns the number of elements currently in the ring. Note that if + * the first and last indices are equal, the ring is full. */ +static +u32 ringOccupancy(const struct RepeatRingControl *xs, const u32 ringSize) { + if (xs->last > xs->first) { + return xs->last - xs->first; + } else { // wrapped + return ringSize - (xs->first - xs->last); + } +} + +/** \brief Returns the offset of the _last_ top stored in the ring. */ +static +u64a ringLastTop(const struct RepeatRingControl *xs, const u32 ringSize) { + return xs->offset + ringOccupancy(xs, ringSize) - 1; +} + +#if !defined(NDEBUG) || defined(DUMP_SUPPORT) +/** \brief For debugging: returns the total capacity of the range list. */ +static UNUSED +u32 rangeListCapacity(const struct RepeatInfo *info) { + u32 d = info->repeatMax - info->repeatMin; + assert(d > 0); // should be in a RING model! + return 2 * ((info->repeatMax / d) + 1); +} +#endif + +#ifdef DEBUG +static +void dumpRing(const struct RepeatInfo *info, const struct RepeatRingControl *xs, + const u8 *ring) { + const u32 ringSize = ringCapacity(info); + DEBUG_PRINTF("ring (occ %u/%u, %u->%u): ", ringOccupancy(xs, ringSize), + ringSize, xs->first, xs->last); + + u16 i = xs->first, n = 0; + do { + if (mmbit_isset(ring, ringSize, i)) { + u64a ringOffset = xs->offset + n; + printf("%llu ", ringOffset); + } + ++i, ++n; + if (i == ringSize) { + i = 0; + } + } while (i != xs->last); + printf("\n"); +} + +static +void dumpRange(const struct RepeatInfo *info, + const struct RepeatRangeControl *xs, const u16 *ring) { + const u32 ringSize = rangeListCapacity(info); + DEBUG_PRINTF("ring (occ %u/%u): ", xs->num, ringSize); + + if (xs->num) { + for (u32 i = 0; i < xs->num; i++) { + printf("%llu ", xs->offset + unaligned_load_u16(ring + i)); + } + } else { + printf("empty"); + } + printf("\n"); +} + +static +void dumpBitmap(const struct RepeatBitmapControl *xs) { + DEBUG_PRINTF("bitmap (base=%llu): ", xs->offset); + u64a bitmap = xs->bitmap; + while (bitmap) { + printf("%llu ", xs->offset + findAndClearLSB_64(&bitmap)); + } + printf("\n"); +} + +static +void dumpTrailer(const struct RepeatInfo *info, + const struct RepeatTrailerControl *xs) { + const u64a m_width = info->repeatMax - info->repeatMin; + DEBUG_PRINTF("trailer: current extent is [%llu,%llu]", xs->offset, + xs->offset + m_width); + u64a bitmap = xs->bitmap; + if (bitmap) { + printf(", also matches at: "); + while (bitmap) { + u32 idx = findAndClearMSB_64(&bitmap); + printf("%llu ", xs->offset - idx - 1); + } + } else { + printf(", no earlier matches"); + } + printf("\n"); +} + +#endif // DEBUG + +#ifndef NDEBUG +/** \brief For debugging: returns true if the range is ordered with no dupes. */ +static UNUSED +int rangeListIsOrdered(const struct RepeatRangeControl *xs, const u16 *ring) { + for (u32 i = 1; i < xs->num; i++) { + u16 a = unaligned_load_u16(ring + i - 1); + u16 b = unaligned_load_u16(ring + i); + if (a >= b) { + return 0; + } + } + return 1; +} +#endif + +u64a repeatLastTopRing(const struct RepeatInfo *info, + const union RepeatControl *ctrl) { + const u32 ringSize = ringCapacity(info); + return ringLastTop(&ctrl->ring, ringSize); +} + +u64a repeatLastTopRange(const union RepeatControl *ctrl, const void *state) { + const u16 *ring = (const u16 *)state; + const struct RepeatRangeControl *xs = &ctrl->range; + assert(xs->num); + return xs->offset + unaligned_load_u16(ring + xs->num - 1); +} + +u64a repeatLastTopBitmap(const union RepeatControl *ctrl) { + const struct RepeatBitmapControl *xs = &ctrl->bitmap; if (!xs->bitmap) { /* last top was too long ago */ return 0; } - return xs->offset + 63 - clz64(xs->bitmap); -} - -u64a repeatLastTopTrailer(const struct RepeatInfo *info, - const union RepeatControl *ctrl) { - const struct RepeatTrailerControl *xs = &ctrl->trailer; - assert(xs->offset >= info->repeatMin); - return xs->offset - info->repeatMin; -} - -u64a repeatNextMatchRing(const struct RepeatInfo *info, - const union RepeatControl *ctrl, const void *state, - u64a offset) { - const struct RepeatRingControl *xs = &ctrl->ring; - const u8 *ring = (const u8 *)state; - const u32 ringSize = ringCapacity(info); - - // We should have at least one top stored. - assert(mmbit_any(ring, ringSize)); - assert(info->repeatMax < REPEAT_INF); - - // Increment offset, as we want the NEXT match. - offset++; - - const u64a base_offset = xs->offset; - DEBUG_PRINTF("offset=%llu, base_offset=%llu\n", offset, base_offset); - - u64a delta = offset - base_offset; - if (offset < base_offset || delta < info->repeatMin) { - DEBUG_PRINTF("before min repeat\n"); - return base_offset + info->repeatMin; - } - if (offset > ringLastTop(xs, ringSize) + info->repeatMax) { - DEBUG_PRINTF("ring is stale\n"); - return 0; // no more matches - } - - DEBUG_PRINTF("delta=%llu\n", delta); - u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0; - DEBUG_PRINTF("lower=%llu\n", lower); - - assert(lower < ringSize); - - // First scan, either to xs->last if there's no wrap-around or ringSize - // (end of the underlying multibit) if we are wrapping. - - u32 begin = xs->first + lower; - if (begin >= ringSize) { - // This branch and sub tested a lot faster than using % (integer div). - begin -= ringSize; - } - const u32 end = begin >= xs->last ? ringSize : xs->last; - u32 i = mmbit_iterate_bounded(ring, ringSize, begin, end); - if (i != MMB_INVALID) { - u32 j = i - begin + lower; - return MAX(offset, base_offset + j + info->repeatMin); - } - - // A second scan is necessary if we need to cope with wrap-around in the - // ring buffer. - - if (begin >= xs->last) { - i = mmbit_iterate_bounded(ring, ringSize, 0, xs->last); - if (i != MMB_INVALID) { - u32 j = i + (ringSize - begin) + lower; - return MAX(offset, base_offset + j + info->repeatMin); - } - } - - return 0; -} - -u64a repeatNextMatchRange(const struct RepeatInfo *info, - const union RepeatControl *ctrl, const void *state, - u64a offset) { - const struct RepeatRangeControl *xs = &ctrl->range; - const u16 *ring = (const u16 *)state; - - assert(xs->num > 0); - assert(xs->num <= rangeListCapacity(info)); - assert(rangeListIsOrdered(xs, ring)); - assert(info->repeatMax < REPEAT_INF); - - for (u32 i = 0; i < xs->num; i++) { - u64a base = xs->offset + unaligned_load_u16(ring + i); - u64a first = base + info->repeatMin; - if (offset < first) { - return first; - } - if (offset < base + info->repeatMax) { - return offset + 1; - } - } - - return 0; -} - -u64a repeatNextMatchBitmap(const struct RepeatInfo *info, - const union RepeatControl *ctrl, u64a offset) { - const struct RepeatBitmapControl *xs = &ctrl->bitmap; - const u64a base = xs->offset; - u64a bitmap = xs->bitmap; - - // FIXME: quick exit if there is no match, based on last top in bitmap? - - while (bitmap) { - u64a top = base + findAndClearLSB_64(&bitmap); - if (offset < top + info->repeatMin) { - return top + info->repeatMin; - } - if (offset < top + info->repeatMax) { - return offset + 1; - } - } - - return 0; // No more matches. -} - -u64a repeatNextMatchTrailer(const struct RepeatInfo *info, - const union RepeatControl *ctrl, u64a offset) { - const struct RepeatTrailerControl *xs = &ctrl->trailer; - const u32 m_width = info->repeatMax - info->repeatMin; - - DEBUG_PRINTF("offset=%llu, xs->offset=%llu\n", offset, xs->offset); - DEBUG_PRINTF("{%u,%u} repeat, m_width=%u\n", info->repeatMin, - info->repeatMax, m_width); - - assert(xs->offset >= info->repeatMin); - - if (offset >= xs->offset + m_width) { - DEBUG_PRINTF("no more matches\n"); - return 0; - } - - if (offset >= xs->offset) { - DEBUG_PRINTF("inside most recent match window, next match %llu\n", - offset + 1); - return offset + 1; - } - - // Offset is before the match window, we need to consult the bitmap of - // earlier match offsets. - u64a bitmap = xs->bitmap; - - u64a diff = xs->offset - offset; - DEBUG_PRINTF("diff=%llu\n", diff); - if (diff <= 64) { - assert(diff); - bitmap &= (1ULL << (diff - 1)) - 1; - } - DEBUG_PRINTF("bitmap = 0x%llx\n", bitmap); - if (bitmap) { - u32 idx = 63 - clz64(bitmap); - DEBUG_PRINTF("clz=%u, idx = %u -> offset %llu\n", clz64(bitmap), idx, - xs->offset - idx); - DEBUG_PRINTF("next match at %llu\n", xs->offset - idx - 1); - u64a next_match = xs->offset - idx - 1; - assert(next_match > offset); - return next_match; - } - - DEBUG_PRINTF("next match is start of match window, %llu\n", xs->offset); - return xs->offset; -} - -/** \brief Store the first top in the ring buffer. */ -static -void storeInitialRingTop(struct RepeatRingControl *xs, u8 *ring, - u64a offset, const u32 ringSize) { - DEBUG_PRINTF("ring=%p, ringSize=%u\n", ring, ringSize); - xs->offset = offset; - mmbit_clear(ring, ringSize); - mmbit_set(ring, ringSize, 0); - xs->first = 0; - xs->last = 1; -} - -static really_inline -char ringIsStale(const struct RepeatRingControl *xs, const u32 ringSize, - const u64a offset) { - u64a finalMatch = ringLastTop(xs, ringSize); - if (offset - finalMatch >= ringSize) { - DEBUG_PRINTF("all matches in ring are stale\n"); - return 1; - } - - return 0; -} - -void repeatStoreRing(const struct RepeatInfo *info, union RepeatControl *ctrl, - void *state, u64a offset, char is_alive) { - struct RepeatRingControl *xs = &ctrl->ring; - u8 *ring = (u8 *)state; - const u32 ringSize = ringCapacity(info); - assert(ringSize > 0); - - DEBUG_PRINTF("storing top for offset %llu in ring\n", offset); - - if (!is_alive || ringIsStale(xs, ringSize, offset)) { - storeInitialRingTop(xs, ring, offset, ringSize); - } else { - assert(offset > ringLastTop(xs, ringSize)); // Dupe or out of order. - u32 occ = ringOccupancy(xs, ringSize); - u64a diff = offset - xs->offset; - DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ); - if (diff >= ringSize) { - u32 push = diff - ringSize + 1; - DEBUG_PRINTF("push ring %u\n", push); - xs->first += push; - if (xs->first >= ringSize) { - xs->first -= ringSize; - } - xs->offset += push; - diff -= push; - occ -= push; - } - - // There's now room in the ring for this top, so we write a run of - // zeroes, then a one. - DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ); - assert(diff < ringSize); - assert(diff >= occ); - u32 n = diff - occ; - - u32 i = xs->last + n; - - mmbit_unset_range(ring, ringSize, xs->last, MIN(i, ringSize)); - if (i >= ringSize) { - i -= ringSize; - mmbit_unset_range(ring, ringSize, 0, i); - } - - assert(i != xs->first); - DEBUG_PRINTF("set bit %u\n", i); - mmbit_set(ring, ringSize, i); - xs->last = i + 1; - if (xs->last == ringSize) { - xs->last = 0; - } - } - - // Our ring indices shouldn't have spiraled off into uncharted space. - assert(xs->first < ringSize); - assert(xs->last < ringSize); - -#ifdef DEBUG - DEBUG_PRINTF("post-store ring state\n"); - dumpRing(info, xs, ring); -#endif - - // The final top stored in our ring should be the one we just wrote in. - assert(ringLastTop(xs, ringSize) == offset); -} - -static really_inline -void storeInitialRangeTop(struct RepeatRangeControl *xs, u16 *ring, - u64a offset) { - xs->offset = offset; - xs->num = 1; - unaligned_store_u16(ring, 0); -} - -void repeatStoreRange(const struct RepeatInfo *info, union RepeatControl *ctrl, - void *state, u64a offset, char is_alive) { - struct RepeatRangeControl *xs = &ctrl->range; - u16 *ring = (u16 *)state; - - if (!is_alive) { - DEBUG_PRINTF("storing initial top at %llu\n", offset); - storeInitialRangeTop(xs, ring, offset); - return; - } - - DEBUG_PRINTF("storing top at %llu, list currently has %u/%u elements\n", - offset, xs->num, rangeListCapacity(info)); - -#ifdef DEBUG - dumpRange(info, xs, ring); -#endif - - // Walk ring from front. Identify the number of stale elements, and shift - // the whole ring to delete them. - u32 i = 0; - for (; i < xs->num; i++) { - u64a this_offset = xs->offset + unaligned_load_u16(ring + i); - DEBUG_PRINTF("this_offset=%llu, diff=%llu\n", this_offset, - offset - this_offset); - if (offset - this_offset <= info->repeatMax) { - break; - } - } - - if (i == xs->num) { - DEBUG_PRINTF("whole ring is stale\n"); - storeInitialRangeTop(xs, ring, offset); - return; - } else if (i > 0) { - DEBUG_PRINTF("expiring %u stale tops\n", i); - u16 first_offset = unaligned_load_u16(ring + i); // first live top - for (u32 j = 0; j < xs->num - i; j++) { - u16 val = unaligned_load_u16(ring + i + j); - assert(val >= first_offset); - unaligned_store_u16(ring + j, val - first_offset); - } - xs->offset += first_offset; - xs->num -= i; - } - -#ifdef DEBUG - DEBUG_PRINTF("post-expire:\n"); - dumpRange(info, xs, ring); -#endif - - if (xs->num == 1) { - goto append; - } - - // Let d = repeatMax - repeatMin - // Examine penultimate entry x[-2]. - // If (offset - x[-2] <= d), then last entry x[-1] can be replaced with - // entry for offset. - assert(xs->num >= 2); - u32 d = info->repeatMax - info->repeatMin; - u64a penultimate_offset = - xs->offset + unaligned_load_u16(ring + xs->num - 2); - if (offset - penultimate_offset <= d) { - assert(offset - xs->offset <= (u16)-1); - unaligned_store_u16(ring + xs->num - 1, offset - xs->offset); - goto done; - } - - // Otherwise, write a new entry for offset and return. - -append: - assert(offset - xs->offset <= (u16)-1); - assert(xs->num < rangeListCapacity(info)); - unaligned_store_u16(ring + xs->num, offset - xs->offset); - xs->num++; - -done: - assert(rangeListIsOrdered(xs, ring)); -} - -void repeatStoreBitmap(const struct RepeatInfo *info, union RepeatControl *ctrl, - u64a offset, char is_alive) { - DEBUG_PRINTF("{%u,%u} repeat, storing top at %llu\n", info->repeatMin, - info->repeatMax, offset); - - struct RepeatBitmapControl *xs = &ctrl->bitmap; - if (!is_alive || !xs->bitmap) { - DEBUG_PRINTF("storing initial top at %llu\n", offset); - xs->offset = offset; - xs->bitmap = 1U; - return; - } - -#ifdef DEBUG - DEBUG_PRINTF("pre-store:\n"); - dumpBitmap(xs); -#endif - - assert(offset >= xs->offset); - - u64a last_top = xs->offset + 63 - clz64(xs->bitmap); - if (offset > last_top + info->repeatMax) { - DEBUG_PRINTF("bitmap stale, storing initial top\n"); - xs->offset = offset; - xs->bitmap = 1U; - return; - } - - u64a diff = offset - xs->offset; - if (diff >= info->repeatMax + 1) { - DEBUG_PRINTF("need expire, diff=%llu\n", diff); - u64a push = diff - info->repeatMax; - xs->offset += push; - xs->bitmap = push >= 64 ? 0 : xs->bitmap >> push; - DEBUG_PRINTF("pushed xs->offset to %llu\n", xs->offset); - } - - // Write a new entry. - diff = offset - xs->offset; - assert(diff < 64); - xs->bitmap |= (1ULL << diff); - -#ifdef DEBUG - DEBUG_PRINTF("post-store:\n"); - dumpBitmap(xs); -#endif -} - -/** \brief Returns 1 if the ring has a match between (logical) index \a lower - * and \a upper, excluding \a upper. */ -static -int ringHasMatch(const struct RepeatRingControl *xs, const u8 *ring, - const u32 ringSize, u32 lower, u32 upper) { - assert(lower < upper); - assert(lower < ringSize); - assert(upper <= ringSize); - - u32 i = xs->first + lower; - if (i >= ringSize) { - i -= ringSize; - } - - // Performance tweak: if we're looking at a fixed repeat, we can just use - // mmbit_isset. - if (lower + 1 == upper) { - return mmbit_isset(ring, ringSize, i); - } - - u32 end = xs->first + upper; - if (end >= ringSize) { - end -= ringSize; - } - - // First scan, either to end if there's no wrap-around or ringSize (end of - // the underlying multibit) if we are wrapping. - - u32 scan_end = i < end ? end : ringSize; - u32 m = mmbit_iterate_bounded(ring, ringSize, i, scan_end); - if (m != MMB_INVALID) { - return 1; - } - - // A second scan is necessary if we need to cope with wrap-around in the - // ring buffer. - - if (i >= end) { - m = mmbit_iterate_bounded(ring, ringSize, 0, end); - return m != MMB_INVALID; - } - - return 0; -} - -/** Return a mask of ones in bit positions [0..v]. */ -static really_inline -u64a mask_ones_to(u32 v) { - if (v < 63) { - return (1ULL << (v + 1)) - 1; - } else { - return ~(0ULL); - } -} - -void repeatStoreTrailer(const struct RepeatInfo *info, - union RepeatControl *ctrl, u64a offset, char is_alive) { - DEBUG_PRINTF("{%u,%u} repeat, top at %llu\n", info->repeatMin, - info->repeatMax, offset); - - struct RepeatTrailerControl *xs = &ctrl->trailer; - - /* The TRAILER repeat model stores the following data in its control block: - * - * 1. offset, which is the min extent of the most recent match window - * (i.e. corresponding to the most recent top) - * 2. bitmap, which is a bitmap of up to repeatMin matches before - * the min extent offset. - */ - - const u64a next_extent = offset + info->repeatMin; - - if (!is_alive) { - xs->offset = next_extent; - xs->bitmap = 0; - DEBUG_PRINTF("initial top, set extent to %llu\n", next_extent); - return; - } - -#ifdef DEBUG - DEBUG_PRINTF("pre-store:\n"); - dumpTrailer(info, xs); -#endif - - const u32 m_width = info->repeatMax - info->repeatMin; - DEBUG_PRINTF("most recent match window is [%llu,%llu]\n", xs->offset, - xs->offset + m_width); - - assert(next_extent > xs->offset); - u64a diff = next_extent - xs->offset; - DEBUG_PRINTF("diff=%llu, m_width=%u\n", diff, m_width); - - assert(diff); - xs->bitmap = diff < 64 ? xs->bitmap << diff : 0; - - // Switch on bits in the bitmask corresponding to matches in the previous - // match window. - if (diff <= m_width) { - u64a m = mask_ones_to(diff - 1); - xs->bitmap |= m; - } else { - u64a shift = diff - m_width - 1; - if (shift < 64) { - u64a m = mask_ones_to(m_width); - m <<= shift; - xs->bitmap |= m; - } - } - - DEBUG_PRINTF("bitmap=0x%llx\n", xs->bitmap); - - // Update max extent. - xs->offset = next_extent; - - // Trim stale history: we only need repeatMin bytes of history. - if (info->repeatMin < 63) { - u64a mask = (1ULL << (info->repeatMin + 1)) - 1; - xs->bitmap &= mask; - } - -#ifdef DEBUG - DEBUG_PRINTF("post-store:\n"); - dumpTrailer(info, xs); -#endif -} - -enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state, u64a offset) { - const struct RepeatRingControl *xs = &ctrl->ring; - const u8 *ring = (const u8 *)state; - const u32 ringSize = ringCapacity(info); - - assert(mmbit_any(ring, ringSize)); - assert(offset >= xs->offset); - - DEBUG_PRINTF("check: offset=%llu, repeat=[%u,%u]\n", offset, - info->repeatMin, info->repeatMax); -#ifdef DEBUG - DEBUG_PRINTF("ring state\n"); - dumpRing(info, xs, ring); -#endif - - if (offset - xs->offset < info->repeatMin) { - DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n"); - return REPEAT_NOMATCH; - } - - if (offset - ringLastTop(xs, ringSize) >= ringSize) { - DEBUG_PRINTF("ring is stale\n"); - return REPEAT_STALE; - } - - // If we're not stale, delta fits in the range [repeatMin, lastTop + - // repeatMax], which fits in a u32. - assert(offset - xs->offset < UINT32_MAX); - u32 delta = (u32)(offset - xs->offset); - DEBUG_PRINTF("delta=%u\n", delta); - - // Find the bounds on possible matches in the ring buffer. - u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0; - u32 upper = MIN(delta - info->repeatMin + 1, ringOccupancy(xs, ringSize)); - - if (lower >= upper) { - DEBUG_PRINTF("no matches to check\n"); - return REPEAT_NOMATCH; - } - - DEBUG_PRINTF("possible match indices=[%u,%u]\n", lower, upper); - if (ringHasMatch(xs, ring, ringSize, lower, upper)) { - return REPEAT_MATCH; - } - - return REPEAT_NOMATCH; -} - -enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state, u64a offset) { - const struct RepeatRangeControl *xs = &ctrl->range; - const u16 *ring = (const u16 *)state; - - assert(xs->num > 0); - assert(xs->num <= rangeListCapacity(info)); - assert(rangeListIsOrdered(xs, ring)); - - // Walk the ring. For each entry x: - // if (offset - x) falls inside repeat bounds, return success. - - // It may be worth doing tests on first and last elements first to bail - // early if the whole ring is too young or stale. - - DEBUG_PRINTF("check %u (of %u) elements, offset %llu, bounds={%u,%u}\n", - xs->num, rangeListCapacity(info), offset, - info->repeatMin, info->repeatMax); -#ifdef DEBUG - dumpRange(info, xs, ring); -#endif - - // Quick pre-check for minimum. - assert(offset >= xs->offset); - if (offset - xs->offset < info->repeatMin) { - DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n"); - return REPEAT_NOMATCH; - } - - // We check the most recent offset first, as we can establish staleness. - u64a match = xs->offset + unaligned_load_u16(ring + xs->num - 1); - assert(offset >= match); - u64a diff = offset - match; - if (diff > info->repeatMax) { - DEBUG_PRINTF("range list is stale\n"); - return REPEAT_STALE; - } else if (diff >= info->repeatMin && diff <= info->repeatMax) { - return REPEAT_MATCH; - } - - // Check the other offsets in the list. - u32 count = xs->num - 1; - for (u32 i = 0; i < count; i++) { - match = xs->offset + unaligned_load_u16(ring + i); - assert(offset >= match); - diff = offset - match; - if (diff >= info->repeatMin && diff <= info->repeatMax) { - return REPEAT_MATCH; - } - } - - return REPEAT_NOMATCH; -} - -enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - u64a offset) { - const struct RepeatBitmapControl *xs = &ctrl->bitmap; - - DEBUG_PRINTF("checking if offset=%llu is a match\n", offset); - -#ifdef DEBUG - dumpBitmap(xs); -#endif - - u64a bitmap = xs->bitmap; - if (!bitmap) { - DEBUG_PRINTF("no tops; stale\n"); - return REPEAT_STALE; - } - - // Quick pre-check for minimum. - const u64a base = xs->offset; - assert(offset >= base); - if (offset - base < info->repeatMin) { - DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n"); - return REPEAT_NOMATCH; - } - - // We check the most recent offset first, as we can establish staleness. - u64a match = base + findAndClearMSB_64(&bitmap); - DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match); - assert(offset >= match); - u64a diff = offset - match; - if (diff > info->repeatMax) { - DEBUG_PRINTF("stale\n"); - return REPEAT_STALE; - } else if (diff >= info->repeatMin && diff <= info->repeatMax) { - return REPEAT_MATCH; - } - - while (bitmap) { - match = base + findAndClearLSB_64(&bitmap); - DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match); - assert(offset >= match); - diff = offset - match; - if (diff >= info->repeatMin && diff <= info->repeatMax) { - return REPEAT_MATCH; - } - } - - return REPEAT_NOMATCH; -} - -enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - u64a offset) { - const struct RepeatTrailerControl *xs = &ctrl->trailer; - const u32 m_width = info->repeatMax - info->repeatMin; - - DEBUG_PRINTF("offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n", offset, - xs->offset, xs->bitmap); - - if (offset > xs->offset + m_width) { - DEBUG_PRINTF("stale\n"); - return REPEAT_STALE; - } - - if (offset >= xs->offset) { - DEBUG_PRINTF("in match window\n"); - return REPEAT_MATCH; - } - - if (offset >= xs->offset - info->repeatMin) { - u32 idx = xs->offset - offset - 1; - DEBUG_PRINTF("check bitmap idx %u\n", idx); - assert(idx < 64); - if (xs->bitmap & (1ULL << idx)) { - DEBUG_PRINTF("match in bitmap\n"); - return REPEAT_MATCH; - } - } - - DEBUG_PRINTF("no match\n"); - return REPEAT_NOMATCH; -} - + return xs->offset + 63 - clz64(xs->bitmap); +} + +u64a repeatLastTopTrailer(const struct RepeatInfo *info, + const union RepeatControl *ctrl) { + const struct RepeatTrailerControl *xs = &ctrl->trailer; + assert(xs->offset >= info->repeatMin); + return xs->offset - info->repeatMin; +} + +u64a repeatNextMatchRing(const struct RepeatInfo *info, + const union RepeatControl *ctrl, const void *state, + u64a offset) { + const struct RepeatRingControl *xs = &ctrl->ring; + const u8 *ring = (const u8 *)state; + const u32 ringSize = ringCapacity(info); + + // We should have at least one top stored. + assert(mmbit_any(ring, ringSize)); + assert(info->repeatMax < REPEAT_INF); + + // Increment offset, as we want the NEXT match. + offset++; + + const u64a base_offset = xs->offset; + DEBUG_PRINTF("offset=%llu, base_offset=%llu\n", offset, base_offset); + + u64a delta = offset - base_offset; + if (offset < base_offset || delta < info->repeatMin) { + DEBUG_PRINTF("before min repeat\n"); + return base_offset + info->repeatMin; + } + if (offset > ringLastTop(xs, ringSize) + info->repeatMax) { + DEBUG_PRINTF("ring is stale\n"); + return 0; // no more matches + } + + DEBUG_PRINTF("delta=%llu\n", delta); + u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0; + DEBUG_PRINTF("lower=%llu\n", lower); + + assert(lower < ringSize); + + // First scan, either to xs->last if there's no wrap-around or ringSize + // (end of the underlying multibit) if we are wrapping. + + u32 begin = xs->first + lower; + if (begin >= ringSize) { + // This branch and sub tested a lot faster than using % (integer div). + begin -= ringSize; + } + const u32 end = begin >= xs->last ? ringSize : xs->last; + u32 i = mmbit_iterate_bounded(ring, ringSize, begin, end); + if (i != MMB_INVALID) { + u32 j = i - begin + lower; + return MAX(offset, base_offset + j + info->repeatMin); + } + + // A second scan is necessary if we need to cope with wrap-around in the + // ring buffer. + + if (begin >= xs->last) { + i = mmbit_iterate_bounded(ring, ringSize, 0, xs->last); + if (i != MMB_INVALID) { + u32 j = i + (ringSize - begin) + lower; + return MAX(offset, base_offset + j + info->repeatMin); + } + } + + return 0; +} + +u64a repeatNextMatchRange(const struct RepeatInfo *info, + const union RepeatControl *ctrl, const void *state, + u64a offset) { + const struct RepeatRangeControl *xs = &ctrl->range; + const u16 *ring = (const u16 *)state; + + assert(xs->num > 0); + assert(xs->num <= rangeListCapacity(info)); + assert(rangeListIsOrdered(xs, ring)); + assert(info->repeatMax < REPEAT_INF); + + for (u32 i = 0; i < xs->num; i++) { + u64a base = xs->offset + unaligned_load_u16(ring + i); + u64a first = base + info->repeatMin; + if (offset < first) { + return first; + } + if (offset < base + info->repeatMax) { + return offset + 1; + } + } + + return 0; +} + +u64a repeatNextMatchBitmap(const struct RepeatInfo *info, + const union RepeatControl *ctrl, u64a offset) { + const struct RepeatBitmapControl *xs = &ctrl->bitmap; + const u64a base = xs->offset; + u64a bitmap = xs->bitmap; + + // FIXME: quick exit if there is no match, based on last top in bitmap? + + while (bitmap) { + u64a top = base + findAndClearLSB_64(&bitmap); + if (offset < top + info->repeatMin) { + return top + info->repeatMin; + } + if (offset < top + info->repeatMax) { + return offset + 1; + } + } + + return 0; // No more matches. +} + +u64a repeatNextMatchTrailer(const struct RepeatInfo *info, + const union RepeatControl *ctrl, u64a offset) { + const struct RepeatTrailerControl *xs = &ctrl->trailer; + const u32 m_width = info->repeatMax - info->repeatMin; + + DEBUG_PRINTF("offset=%llu, xs->offset=%llu\n", offset, xs->offset); + DEBUG_PRINTF("{%u,%u} repeat, m_width=%u\n", info->repeatMin, + info->repeatMax, m_width); + + assert(xs->offset >= info->repeatMin); + + if (offset >= xs->offset + m_width) { + DEBUG_PRINTF("no more matches\n"); + return 0; + } + + if (offset >= xs->offset) { + DEBUG_PRINTF("inside most recent match window, next match %llu\n", + offset + 1); + return offset + 1; + } + + // Offset is before the match window, we need to consult the bitmap of + // earlier match offsets. + u64a bitmap = xs->bitmap; + + u64a diff = xs->offset - offset; + DEBUG_PRINTF("diff=%llu\n", diff); + if (diff <= 64) { + assert(diff); + bitmap &= (1ULL << (diff - 1)) - 1; + } + DEBUG_PRINTF("bitmap = 0x%llx\n", bitmap); + if (bitmap) { + u32 idx = 63 - clz64(bitmap); + DEBUG_PRINTF("clz=%u, idx = %u -> offset %llu\n", clz64(bitmap), idx, + xs->offset - idx); + DEBUG_PRINTF("next match at %llu\n", xs->offset - idx - 1); + u64a next_match = xs->offset - idx - 1; + assert(next_match > offset); + return next_match; + } + + DEBUG_PRINTF("next match is start of match window, %llu\n", xs->offset); + return xs->offset; +} + +/** \brief Store the first top in the ring buffer. */ +static +void storeInitialRingTop(struct RepeatRingControl *xs, u8 *ring, + u64a offset, const u32 ringSize) { + DEBUG_PRINTF("ring=%p, ringSize=%u\n", ring, ringSize); + xs->offset = offset; + mmbit_clear(ring, ringSize); + mmbit_set(ring, ringSize, 0); + xs->first = 0; + xs->last = 1; +} + +static really_inline +char ringIsStale(const struct RepeatRingControl *xs, const u32 ringSize, + const u64a offset) { + u64a finalMatch = ringLastTop(xs, ringSize); + if (offset - finalMatch >= ringSize) { + DEBUG_PRINTF("all matches in ring are stale\n"); + return 1; + } + + return 0; +} + +void repeatStoreRing(const struct RepeatInfo *info, union RepeatControl *ctrl, + void *state, u64a offset, char is_alive) { + struct RepeatRingControl *xs = &ctrl->ring; + u8 *ring = (u8 *)state; + const u32 ringSize = ringCapacity(info); + assert(ringSize > 0); + + DEBUG_PRINTF("storing top for offset %llu in ring\n", offset); + + if (!is_alive || ringIsStale(xs, ringSize, offset)) { + storeInitialRingTop(xs, ring, offset, ringSize); + } else { + assert(offset > ringLastTop(xs, ringSize)); // Dupe or out of order. + u32 occ = ringOccupancy(xs, ringSize); + u64a diff = offset - xs->offset; + DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ); + if (diff >= ringSize) { + u32 push = diff - ringSize + 1; + DEBUG_PRINTF("push ring %u\n", push); + xs->first += push; + if (xs->first >= ringSize) { + xs->first -= ringSize; + } + xs->offset += push; + diff -= push; + occ -= push; + } + + // There's now room in the ring for this top, so we write a run of + // zeroes, then a one. + DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ); + assert(diff < ringSize); + assert(diff >= occ); + u32 n = diff - occ; + + u32 i = xs->last + n; + + mmbit_unset_range(ring, ringSize, xs->last, MIN(i, ringSize)); + if (i >= ringSize) { + i -= ringSize; + mmbit_unset_range(ring, ringSize, 0, i); + } + + assert(i != xs->first); + DEBUG_PRINTF("set bit %u\n", i); + mmbit_set(ring, ringSize, i); + xs->last = i + 1; + if (xs->last == ringSize) { + xs->last = 0; + } + } + + // Our ring indices shouldn't have spiraled off into uncharted space. + assert(xs->first < ringSize); + assert(xs->last < ringSize); + +#ifdef DEBUG + DEBUG_PRINTF("post-store ring state\n"); + dumpRing(info, xs, ring); +#endif + + // The final top stored in our ring should be the one we just wrote in. + assert(ringLastTop(xs, ringSize) == offset); +} + +static really_inline +void storeInitialRangeTop(struct RepeatRangeControl *xs, u16 *ring, + u64a offset) { + xs->offset = offset; + xs->num = 1; + unaligned_store_u16(ring, 0); +} + +void repeatStoreRange(const struct RepeatInfo *info, union RepeatControl *ctrl, + void *state, u64a offset, char is_alive) { + struct RepeatRangeControl *xs = &ctrl->range; + u16 *ring = (u16 *)state; + + if (!is_alive) { + DEBUG_PRINTF("storing initial top at %llu\n", offset); + storeInitialRangeTop(xs, ring, offset); + return; + } + + DEBUG_PRINTF("storing top at %llu, list currently has %u/%u elements\n", + offset, xs->num, rangeListCapacity(info)); + +#ifdef DEBUG + dumpRange(info, xs, ring); +#endif + + // Walk ring from front. Identify the number of stale elements, and shift + // the whole ring to delete them. + u32 i = 0; + for (; i < xs->num; i++) { + u64a this_offset = xs->offset + unaligned_load_u16(ring + i); + DEBUG_PRINTF("this_offset=%llu, diff=%llu\n", this_offset, + offset - this_offset); + if (offset - this_offset <= info->repeatMax) { + break; + } + } + + if (i == xs->num) { + DEBUG_PRINTF("whole ring is stale\n"); + storeInitialRangeTop(xs, ring, offset); + return; + } else if (i > 0) { + DEBUG_PRINTF("expiring %u stale tops\n", i); + u16 first_offset = unaligned_load_u16(ring + i); // first live top + for (u32 j = 0; j < xs->num - i; j++) { + u16 val = unaligned_load_u16(ring + i + j); + assert(val >= first_offset); + unaligned_store_u16(ring + j, val - first_offset); + } + xs->offset += first_offset; + xs->num -= i; + } + +#ifdef DEBUG + DEBUG_PRINTF("post-expire:\n"); + dumpRange(info, xs, ring); +#endif + + if (xs->num == 1) { + goto append; + } + + // Let d = repeatMax - repeatMin + // Examine penultimate entry x[-2]. + // If (offset - x[-2] <= d), then last entry x[-1] can be replaced with + // entry for offset. + assert(xs->num >= 2); + u32 d = info->repeatMax - info->repeatMin; + u64a penultimate_offset = + xs->offset + unaligned_load_u16(ring + xs->num - 2); + if (offset - penultimate_offset <= d) { + assert(offset - xs->offset <= (u16)-1); + unaligned_store_u16(ring + xs->num - 1, offset - xs->offset); + goto done; + } + + // Otherwise, write a new entry for offset and return. + +append: + assert(offset - xs->offset <= (u16)-1); + assert(xs->num < rangeListCapacity(info)); + unaligned_store_u16(ring + xs->num, offset - xs->offset); + xs->num++; + +done: + assert(rangeListIsOrdered(xs, ring)); +} + +void repeatStoreBitmap(const struct RepeatInfo *info, union RepeatControl *ctrl, + u64a offset, char is_alive) { + DEBUG_PRINTF("{%u,%u} repeat, storing top at %llu\n", info->repeatMin, + info->repeatMax, offset); + + struct RepeatBitmapControl *xs = &ctrl->bitmap; + if (!is_alive || !xs->bitmap) { + DEBUG_PRINTF("storing initial top at %llu\n", offset); + xs->offset = offset; + xs->bitmap = 1U; + return; + } + +#ifdef DEBUG + DEBUG_PRINTF("pre-store:\n"); + dumpBitmap(xs); +#endif + + assert(offset >= xs->offset); + + u64a last_top = xs->offset + 63 - clz64(xs->bitmap); + if (offset > last_top + info->repeatMax) { + DEBUG_PRINTF("bitmap stale, storing initial top\n"); + xs->offset = offset; + xs->bitmap = 1U; + return; + } + + u64a diff = offset - xs->offset; + if (diff >= info->repeatMax + 1) { + DEBUG_PRINTF("need expire, diff=%llu\n", diff); + u64a push = diff - info->repeatMax; + xs->offset += push; + xs->bitmap = push >= 64 ? 0 : xs->bitmap >> push; + DEBUG_PRINTF("pushed xs->offset to %llu\n", xs->offset); + } + + // Write a new entry. + diff = offset - xs->offset; + assert(diff < 64); + xs->bitmap |= (1ULL << diff); + +#ifdef DEBUG + DEBUG_PRINTF("post-store:\n"); + dumpBitmap(xs); +#endif +} + +/** \brief Returns 1 if the ring has a match between (logical) index \a lower + * and \a upper, excluding \a upper. */ +static +int ringHasMatch(const struct RepeatRingControl *xs, const u8 *ring, + const u32 ringSize, u32 lower, u32 upper) { + assert(lower < upper); + assert(lower < ringSize); + assert(upper <= ringSize); + + u32 i = xs->first + lower; + if (i >= ringSize) { + i -= ringSize; + } + + // Performance tweak: if we're looking at a fixed repeat, we can just use + // mmbit_isset. + if (lower + 1 == upper) { + return mmbit_isset(ring, ringSize, i); + } + + u32 end = xs->first + upper; + if (end >= ringSize) { + end -= ringSize; + } + + // First scan, either to end if there's no wrap-around or ringSize (end of + // the underlying multibit) if we are wrapping. + + u32 scan_end = i < end ? end : ringSize; + u32 m = mmbit_iterate_bounded(ring, ringSize, i, scan_end); + if (m != MMB_INVALID) { + return 1; + } + + // A second scan is necessary if we need to cope with wrap-around in the + // ring buffer. + + if (i >= end) { + m = mmbit_iterate_bounded(ring, ringSize, 0, end); + return m != MMB_INVALID; + } + + return 0; +} + +/** Return a mask of ones in bit positions [0..v]. */ +static really_inline +u64a mask_ones_to(u32 v) { + if (v < 63) { + return (1ULL << (v + 1)) - 1; + } else { + return ~(0ULL); + } +} + +void repeatStoreTrailer(const struct RepeatInfo *info, + union RepeatControl *ctrl, u64a offset, char is_alive) { + DEBUG_PRINTF("{%u,%u} repeat, top at %llu\n", info->repeatMin, + info->repeatMax, offset); + + struct RepeatTrailerControl *xs = &ctrl->trailer; + + /* The TRAILER repeat model stores the following data in its control block: + * + * 1. offset, which is the min extent of the most recent match window + * (i.e. corresponding to the most recent top) + * 2. bitmap, which is a bitmap of up to repeatMin matches before + * the min extent offset. + */ + + const u64a next_extent = offset + info->repeatMin; + + if (!is_alive) { + xs->offset = next_extent; + xs->bitmap = 0; + DEBUG_PRINTF("initial top, set extent to %llu\n", next_extent); + return; + } + +#ifdef DEBUG + DEBUG_PRINTF("pre-store:\n"); + dumpTrailer(info, xs); +#endif + + const u32 m_width = info->repeatMax - info->repeatMin; + DEBUG_PRINTF("most recent match window is [%llu,%llu]\n", xs->offset, + xs->offset + m_width); + + assert(next_extent > xs->offset); + u64a diff = next_extent - xs->offset; + DEBUG_PRINTF("diff=%llu, m_width=%u\n", diff, m_width); + + assert(diff); + xs->bitmap = diff < 64 ? xs->bitmap << diff : 0; + + // Switch on bits in the bitmask corresponding to matches in the previous + // match window. + if (diff <= m_width) { + u64a m = mask_ones_to(diff - 1); + xs->bitmap |= m; + } else { + u64a shift = diff - m_width - 1; + if (shift < 64) { + u64a m = mask_ones_to(m_width); + m <<= shift; + xs->bitmap |= m; + } + } + + DEBUG_PRINTF("bitmap=0x%llx\n", xs->bitmap); + + // Update max extent. + xs->offset = next_extent; + + // Trim stale history: we only need repeatMin bytes of history. + if (info->repeatMin < 63) { + u64a mask = (1ULL << (info->repeatMin + 1)) - 1; + xs->bitmap &= mask; + } + +#ifdef DEBUG + DEBUG_PRINTF("post-store:\n"); + dumpTrailer(info, xs); +#endif +} + +enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state, u64a offset) { + const struct RepeatRingControl *xs = &ctrl->ring; + const u8 *ring = (const u8 *)state; + const u32 ringSize = ringCapacity(info); + + assert(mmbit_any(ring, ringSize)); + assert(offset >= xs->offset); + + DEBUG_PRINTF("check: offset=%llu, repeat=[%u,%u]\n", offset, + info->repeatMin, info->repeatMax); +#ifdef DEBUG + DEBUG_PRINTF("ring state\n"); + dumpRing(info, xs, ring); +#endif + + if (offset - xs->offset < info->repeatMin) { + DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n"); + return REPEAT_NOMATCH; + } + + if (offset - ringLastTop(xs, ringSize) >= ringSize) { + DEBUG_PRINTF("ring is stale\n"); + return REPEAT_STALE; + } + + // If we're not stale, delta fits in the range [repeatMin, lastTop + + // repeatMax], which fits in a u32. + assert(offset - xs->offset < UINT32_MAX); + u32 delta = (u32)(offset - xs->offset); + DEBUG_PRINTF("delta=%u\n", delta); + + // Find the bounds on possible matches in the ring buffer. + u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0; + u32 upper = MIN(delta - info->repeatMin + 1, ringOccupancy(xs, ringSize)); + + if (lower >= upper) { + DEBUG_PRINTF("no matches to check\n"); + return REPEAT_NOMATCH; + } + + DEBUG_PRINTF("possible match indices=[%u,%u]\n", lower, upper); + if (ringHasMatch(xs, ring, ringSize, lower, upper)) { + return REPEAT_MATCH; + } + + return REPEAT_NOMATCH; +} + +enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state, u64a offset) { + const struct RepeatRangeControl *xs = &ctrl->range; + const u16 *ring = (const u16 *)state; + + assert(xs->num > 0); + assert(xs->num <= rangeListCapacity(info)); + assert(rangeListIsOrdered(xs, ring)); + + // Walk the ring. For each entry x: + // if (offset - x) falls inside repeat bounds, return success. + + // It may be worth doing tests on first and last elements first to bail + // early if the whole ring is too young or stale. + + DEBUG_PRINTF("check %u (of %u) elements, offset %llu, bounds={%u,%u}\n", + xs->num, rangeListCapacity(info), offset, + info->repeatMin, info->repeatMax); +#ifdef DEBUG + dumpRange(info, xs, ring); +#endif + + // Quick pre-check for minimum. + assert(offset >= xs->offset); + if (offset - xs->offset < info->repeatMin) { + DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n"); + return REPEAT_NOMATCH; + } + + // We check the most recent offset first, as we can establish staleness. + u64a match = xs->offset + unaligned_load_u16(ring + xs->num - 1); + assert(offset >= match); + u64a diff = offset - match; + if (diff > info->repeatMax) { + DEBUG_PRINTF("range list is stale\n"); + return REPEAT_STALE; + } else if (diff >= info->repeatMin && diff <= info->repeatMax) { + return REPEAT_MATCH; + } + + // Check the other offsets in the list. + u32 count = xs->num - 1; + for (u32 i = 0; i < count; i++) { + match = xs->offset + unaligned_load_u16(ring + i); + assert(offset >= match); + diff = offset - match; + if (diff >= info->repeatMin && diff <= info->repeatMax) { + return REPEAT_MATCH; + } + } + + return REPEAT_NOMATCH; +} + +enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + u64a offset) { + const struct RepeatBitmapControl *xs = &ctrl->bitmap; + + DEBUG_PRINTF("checking if offset=%llu is a match\n", offset); + +#ifdef DEBUG + dumpBitmap(xs); +#endif + + u64a bitmap = xs->bitmap; + if (!bitmap) { + DEBUG_PRINTF("no tops; stale\n"); + return REPEAT_STALE; + } + + // Quick pre-check for minimum. + const u64a base = xs->offset; + assert(offset >= base); + if (offset - base < info->repeatMin) { + DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n"); + return REPEAT_NOMATCH; + } + + // We check the most recent offset first, as we can establish staleness. + u64a match = base + findAndClearMSB_64(&bitmap); + DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match); + assert(offset >= match); + u64a diff = offset - match; + if (diff > info->repeatMax) { + DEBUG_PRINTF("stale\n"); + return REPEAT_STALE; + } else if (diff >= info->repeatMin && diff <= info->repeatMax) { + return REPEAT_MATCH; + } + + while (bitmap) { + match = base + findAndClearLSB_64(&bitmap); + DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match); + assert(offset >= match); + diff = offset - match; + if (diff >= info->repeatMin && diff <= info->repeatMax) { + return REPEAT_MATCH; + } + } + + return REPEAT_NOMATCH; +} + +enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + u64a offset) { + const struct RepeatTrailerControl *xs = &ctrl->trailer; + const u32 m_width = info->repeatMax - info->repeatMin; + + DEBUG_PRINTF("offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n", offset, + xs->offset, xs->bitmap); + + if (offset > xs->offset + m_width) { + DEBUG_PRINTF("stale\n"); + return REPEAT_STALE; + } + + if (offset >= xs->offset) { + DEBUG_PRINTF("in match window\n"); + return REPEAT_MATCH; + } + + if (offset >= xs->offset - info->repeatMin) { + u32 idx = xs->offset - offset - 1; + DEBUG_PRINTF("check bitmap idx %u\n", idx); + assert(idx < 64); + if (xs->bitmap & (1ULL << idx)) { + DEBUG_PRINTF("match in bitmap\n"); + return REPEAT_MATCH; + } + } + + DEBUG_PRINTF("no match\n"); + return REPEAT_NOMATCH; +} + /** \brief True if the given value can be packed into len bytes. */ -static really_inline +static really_inline int fits_in_len_bytes(u64a val, u32 len) { if (len >= 8) { return 1; @@ -896,205 +896,205 @@ int fits_in_len_bytes(u64a val, u32 len) { } static really_inline -void storePackedRelative(char *dest, u64a val, u64a offset, u64a max, u32 len) { - assert(val <= offset); +void storePackedRelative(char *dest, u64a val, u64a offset, u64a max, u32 len) { + assert(val <= offset); assert(fits_in_len_bytes(max, len)); - u64a delta = offset - val; - if (delta >= max) { - delta = max; - } - DEBUG_PRINTF("delta %llu\n", delta); + u64a delta = offset - val; + if (delta >= max) { + delta = max; + } + DEBUG_PRINTF("delta %llu\n", delta); assert(fits_in_len_bytes(delta, len)); - partial_store_u64a(dest, delta, len); -} - -static -void repeatPackRing(char *dest, const struct RepeatInfo *info, - const union RepeatControl *ctrl, u64a offset) { - const struct RepeatRingControl *xs = &ctrl->ring; - const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4; - const u32 offset_len = info->packedCtrlSize - ring_indices_len; - - // Write out packed relative base offset. - assert(info->packedCtrlSize > ring_indices_len); - storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len); - - // Write out ring indices. - if (ring_indices_len == 4) { - unaligned_store_u16(dest + offset_len, xs->first); - unaligned_store_u16(dest + offset_len + 2, xs->last); - } else { - assert(xs->first < 256 && xs->last < 256); - u8 *indices = (u8 *)dest + offset_len; - indices[0] = xs->first; - indices[1] = xs->last; - } -} - -static -void repeatPackOffset(char *dest, const struct RepeatInfo *info, - const union RepeatControl *ctrl, u64a offset) { - const struct RepeatOffsetControl *xs = &ctrl->offset; - DEBUG_PRINTF("packing offset %llu [h %u]\n", xs->offset, info->horizon); + partial_store_u64a(dest, delta, len); +} + +static +void repeatPackRing(char *dest, const struct RepeatInfo *info, + const union RepeatControl *ctrl, u64a offset) { + const struct RepeatRingControl *xs = &ctrl->ring; + const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4; + const u32 offset_len = info->packedCtrlSize - ring_indices_len; + + // Write out packed relative base offset. + assert(info->packedCtrlSize > ring_indices_len); + storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len); + + // Write out ring indices. + if (ring_indices_len == 4) { + unaligned_store_u16(dest + offset_len, xs->first); + unaligned_store_u16(dest + offset_len + 2, xs->last); + } else { + assert(xs->first < 256 && xs->last < 256); + u8 *indices = (u8 *)dest + offset_len; + indices[0] = xs->first; + indices[1] = xs->last; + } +} + +static +void repeatPackOffset(char *dest, const struct RepeatInfo *info, + const union RepeatControl *ctrl, u64a offset) { + const struct RepeatOffsetControl *xs = &ctrl->offset; + DEBUG_PRINTF("packing offset %llu [h %u]\n", xs->offset, info->horizon); if (!info->packedCtrlSize) { assert(info->type == REPEAT_ALWAYS); DEBUG_PRINTF("externally guarded .*\n"); return; } - storePackedRelative(dest, xs->offset, offset, info->horizon, - info->packedCtrlSize); -} - -static -void repeatPackRange(char *dest, const struct RepeatInfo *info, - const union RepeatControl *ctrl, u64a offset) { - const struct RepeatRangeControl *xs = &ctrl->range; - - // Write out packed relative base offset. - assert(info->packedCtrlSize > 1); - storePackedRelative(dest, xs->offset, offset, info->horizon, - info->packedCtrlSize - 1); - - // Write out range number of elements. - dest[info->packedCtrlSize - 1] = xs->num; -} - -static -void repeatPackBitmap(char *dest, const struct RepeatInfo *info, - const union RepeatControl *ctrl, u64a offset) { - const struct RepeatBitmapControl *xs = &ctrl->bitmap; - const u32 bound = info->repeatMax; - - assert(offset >= xs->offset); - u64a new_base = offset > bound ? offset - bound : 0; - - // Shift bitmap to begin at new_base rather than xs->offset. - u64a bitmap = xs->bitmap; - if (new_base >= xs->offset) { - u64a shift = new_base - xs->offset; - bitmap = shift < 64 ? bitmap >> shift : 0; - } else { - u64a shift = xs->offset - new_base; - bitmap = shift < 64 ? bitmap << shift : 0; - } - - DEBUG_PRINTF("packing %llu into %u bytes\n", bitmap, info->packedCtrlSize); - - // Write out packed bitmap. + storePackedRelative(dest, xs->offset, offset, info->horizon, + info->packedCtrlSize); +} + +static +void repeatPackRange(char *dest, const struct RepeatInfo *info, + const union RepeatControl *ctrl, u64a offset) { + const struct RepeatRangeControl *xs = &ctrl->range; + + // Write out packed relative base offset. + assert(info->packedCtrlSize > 1); + storePackedRelative(dest, xs->offset, offset, info->horizon, + info->packedCtrlSize - 1); + + // Write out range number of elements. + dest[info->packedCtrlSize - 1] = xs->num; +} + +static +void repeatPackBitmap(char *dest, const struct RepeatInfo *info, + const union RepeatControl *ctrl, u64a offset) { + const struct RepeatBitmapControl *xs = &ctrl->bitmap; + const u32 bound = info->repeatMax; + + assert(offset >= xs->offset); + u64a new_base = offset > bound ? offset - bound : 0; + + // Shift bitmap to begin at new_base rather than xs->offset. + u64a bitmap = xs->bitmap; + if (new_base >= xs->offset) { + u64a shift = new_base - xs->offset; + bitmap = shift < 64 ? bitmap >> shift : 0; + } else { + u64a shift = xs->offset - new_base; + bitmap = shift < 64 ? bitmap << shift : 0; + } + + DEBUG_PRINTF("packing %llu into %u bytes\n", bitmap, info->packedCtrlSize); + + // Write out packed bitmap. assert(fits_in_len_bytes(bitmap, info->packedCtrlSize)); - partial_store_u64a(dest, bitmap, info->packedCtrlSize); -} - -static -void repeatPackSparseOptimalP(char *dest, const struct RepeatInfo *info, - const union RepeatControl *ctrl, u64a offset) { - const struct RepeatRingControl *xs = &ctrl->ring; - // set ring index pointer according to patch count - const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4; - const u32 offset_len = info->packedCtrlSize - ring_indices_len; - - // Write out packed relative base offset. - assert(info->packedCtrlSize > ring_indices_len); - storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len); - - // Write out ring indices. - if (ring_indices_len == 4) { - unaligned_store_u16(dest + offset_len, xs->first); - unaligned_store_u16(dest + offset_len + 2, xs->last); - } else { - assert(xs->first < 256 && xs->last < 256); - u8 *indices = (u8 *)dest + offset_len; - indices[0] = xs->first; - indices[1] = xs->last; - } - -} - -static -void repeatPackTrailer(char *dest, const struct RepeatInfo *info, - const union RepeatControl *ctrl, u64a offset) { - const struct RepeatTrailerControl *xs = &ctrl->trailer; - - DEBUG_PRINTF("saving: offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n", - offset, xs->offset, xs->bitmap); - - // XXX: xs->offset may be zero in the NFA path (effectively uninitialized). - u64a top; - if (xs->offset) { - assert(xs->offset >= info->repeatMin); - top = xs->offset - info->repeatMin; - } else { - top = 0; - } - - top = offset - top; // Pack top relative to offset. - - u64a v[2]; - v[0] = MIN(top, info->horizon); - v[1] = xs->bitmap; - - pack_bits_64(dest, v, info->packedFieldSizes, 2); -} - -void repeatPack(char *dest, const struct RepeatInfo *info, - const union RepeatControl *ctrl, u64a offset) { - assert(dest && info && ctrl); - - switch ((enum RepeatType)info->type) { - case REPEAT_RING: - repeatPackRing(dest, info, ctrl, offset); - break; - case REPEAT_FIRST: - case REPEAT_LAST: - repeatPackOffset(dest, info, ctrl, offset); - break; - case REPEAT_RANGE: - repeatPackRange(dest, info, ctrl, offset); - break; - case REPEAT_BITMAP: - repeatPackBitmap(dest, info, ctrl, offset); - break; - case REPEAT_SPARSE_OPTIMAL_P: - repeatPackSparseOptimalP(dest, info, ctrl, offset); - break; - case REPEAT_TRAILER: - repeatPackTrailer(dest, info, ctrl, offset); - break; + partial_store_u64a(dest, bitmap, info->packedCtrlSize); +} + +static +void repeatPackSparseOptimalP(char *dest, const struct RepeatInfo *info, + const union RepeatControl *ctrl, u64a offset) { + const struct RepeatRingControl *xs = &ctrl->ring; + // set ring index pointer according to patch count + const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4; + const u32 offset_len = info->packedCtrlSize - ring_indices_len; + + // Write out packed relative base offset. + assert(info->packedCtrlSize > ring_indices_len); + storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len); + + // Write out ring indices. + if (ring_indices_len == 4) { + unaligned_store_u16(dest + offset_len, xs->first); + unaligned_store_u16(dest + offset_len + 2, xs->last); + } else { + assert(xs->first < 256 && xs->last < 256); + u8 *indices = (u8 *)dest + offset_len; + indices[0] = xs->first; + indices[1] = xs->last; + } + +} + +static +void repeatPackTrailer(char *dest, const struct RepeatInfo *info, + const union RepeatControl *ctrl, u64a offset) { + const struct RepeatTrailerControl *xs = &ctrl->trailer; + + DEBUG_PRINTF("saving: offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n", + offset, xs->offset, xs->bitmap); + + // XXX: xs->offset may be zero in the NFA path (effectively uninitialized). + u64a top; + if (xs->offset) { + assert(xs->offset >= info->repeatMin); + top = xs->offset - info->repeatMin; + } else { + top = 0; + } + + top = offset - top; // Pack top relative to offset. + + u64a v[2]; + v[0] = MIN(top, info->horizon); + v[1] = xs->bitmap; + + pack_bits_64(dest, v, info->packedFieldSizes, 2); +} + +void repeatPack(char *dest, const struct RepeatInfo *info, + const union RepeatControl *ctrl, u64a offset) { + assert(dest && info && ctrl); + + switch ((enum RepeatType)info->type) { + case REPEAT_RING: + repeatPackRing(dest, info, ctrl, offset); + break; + case REPEAT_FIRST: + case REPEAT_LAST: + repeatPackOffset(dest, info, ctrl, offset); + break; + case REPEAT_RANGE: + repeatPackRange(dest, info, ctrl, offset); + break; + case REPEAT_BITMAP: + repeatPackBitmap(dest, info, ctrl, offset); + break; + case REPEAT_SPARSE_OPTIMAL_P: + repeatPackSparseOptimalP(dest, info, ctrl, offset); + break; + case REPEAT_TRAILER: + repeatPackTrailer(dest, info, ctrl, offset); + break; case REPEAT_ALWAYS: /* nothing to do - no state */ break; - } -} - -static really_inline -u64a loadPackedRelative(const char *src, u64a offset, u32 len) { - u64a delta = partial_load_u64a(src, len); - DEBUG_PRINTF("delta %llu\n", delta); - assert(offset >= delta); - return offset - delta; -} - -static -void repeatUnpackRing(const char *src, const struct RepeatInfo *info, - u64a offset, union RepeatControl *ctrl) { - struct RepeatRingControl *xs = &ctrl->ring; - const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4; - const u32 offset_len = info->packedCtrlSize - ring_indices_len; - xs->offset = loadPackedRelative(src, offset, offset_len); - if (ring_indices_len == 4) { - xs->first = unaligned_load_u16(src + offset_len); - xs->last = unaligned_load_u16(src + offset_len + 2); - } else { - const u8 *indices = (const u8 *)src + offset_len; - xs->first = indices[0]; - xs->last = indices[1]; - } -} - -static -void repeatUnpackOffset(const char *src, const struct RepeatInfo *info, - u64a offset, union RepeatControl *ctrl) { - struct RepeatOffsetControl *xs = &ctrl->offset; + } +} + +static really_inline +u64a loadPackedRelative(const char *src, u64a offset, u32 len) { + u64a delta = partial_load_u64a(src, len); + DEBUG_PRINTF("delta %llu\n", delta); + assert(offset >= delta); + return offset - delta; +} + +static +void repeatUnpackRing(const char *src, const struct RepeatInfo *info, + u64a offset, union RepeatControl *ctrl) { + struct RepeatRingControl *xs = &ctrl->ring; + const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4; + const u32 offset_len = info->packedCtrlSize - ring_indices_len; + xs->offset = loadPackedRelative(src, offset, offset_len); + if (ring_indices_len == 4) { + xs->first = unaligned_load_u16(src + offset_len); + xs->last = unaligned_load_u16(src + offset_len + 2); + } else { + const u8 *indices = (const u8 *)src + offset_len; + xs->first = indices[0]; + xs->last = indices[1]; + } +} + +static +void repeatUnpackOffset(const char *src, const struct RepeatInfo *info, + u64a offset, union RepeatControl *ctrl) { + struct RepeatOffsetControl *xs = &ctrl->offset; if (!info->packedCtrlSize) { assert(info->type == REPEAT_ALWAYS); DEBUG_PRINTF("externally guarded .*\n"); @@ -1102,503 +1102,503 @@ void repeatUnpackOffset(const char *src, const struct RepeatInfo *info, } else { xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize); } - DEBUG_PRINTF("unpacking offset %llu [h%u]\n", xs->offset, - info->horizon); -} - -static -void repeatUnpackRange(const char *src, const struct RepeatInfo *info, - u64a offset, union RepeatControl *ctrl) { - struct RepeatRangeControl *xs = &ctrl->range; - xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize - 1); - xs->num = src[info->packedCtrlSize - 1]; -} - -static -void repeatUnpackBitmap(const char *src, const struct RepeatInfo *info, - u64a offset, union RepeatControl *ctrl) { - struct RepeatBitmapControl *xs = &ctrl->bitmap; - xs->offset = offset > info->repeatMax ? offset - info->repeatMax : 0; - xs->bitmap = partial_load_u64a(src, info->packedCtrlSize); -} - -static -void repeatUnpackSparseOptimalP(const char *src, const struct RepeatInfo *info, - u64a offset, union RepeatControl *ctrl) { - struct RepeatRingControl *xs = &ctrl->ring; - const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4; - const u32 offset_len = info->packedCtrlSize - ring_indices_len; - xs->offset = loadPackedRelative(src, offset, offset_len); - if (ring_indices_len == 4) { - xs->first = unaligned_load_u16(src + offset_len); - xs->last = unaligned_load_u16(src + offset_len + 2); - } else { - const u8 *indices = (const u8 *)src + offset_len; - xs->first = indices[0]; - xs->last = indices[1]; - } -} - -static -void repeatUnpackTrailer(const char *src, const struct RepeatInfo *info, - u64a offset, union RepeatControl *ctrl) { - struct RepeatTrailerControl *xs = &ctrl->trailer; - - u64a v[2]; - unpack_bits_64(v, (const u8 *)src, info->packedFieldSizes, 2); - - xs->offset = offset - v[0] + info->repeatMin; - xs->bitmap = v[1]; - - DEBUG_PRINTF("loaded: xs->offset=%llu, xs->bitmap=0x%llx\n", xs->offset, - xs->bitmap); -} - -void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset, - union RepeatControl *ctrl) { - assert(src && info && ctrl); - - switch ((enum RepeatType)info->type) { - case REPEAT_RING: - repeatUnpackRing(src, info, offset, ctrl); - break; - case REPEAT_FIRST: - case REPEAT_LAST: - repeatUnpackOffset(src, info, offset, ctrl); - break; - case REPEAT_RANGE: - repeatUnpackRange(src, info, offset, ctrl); - break; - case REPEAT_BITMAP: - repeatUnpackBitmap(src, info, offset, ctrl); - break; - case REPEAT_SPARSE_OPTIMAL_P: - repeatUnpackSparseOptimalP(src, info, offset, ctrl); - break; - case REPEAT_TRAILER: - repeatUnpackTrailer(src, info, offset, ctrl); - break; + DEBUG_PRINTF("unpacking offset %llu [h%u]\n", xs->offset, + info->horizon); +} + +static +void repeatUnpackRange(const char *src, const struct RepeatInfo *info, + u64a offset, union RepeatControl *ctrl) { + struct RepeatRangeControl *xs = &ctrl->range; + xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize - 1); + xs->num = src[info->packedCtrlSize - 1]; +} + +static +void repeatUnpackBitmap(const char *src, const struct RepeatInfo *info, + u64a offset, union RepeatControl *ctrl) { + struct RepeatBitmapControl *xs = &ctrl->bitmap; + xs->offset = offset > info->repeatMax ? offset - info->repeatMax : 0; + xs->bitmap = partial_load_u64a(src, info->packedCtrlSize); +} + +static +void repeatUnpackSparseOptimalP(const char *src, const struct RepeatInfo *info, + u64a offset, union RepeatControl *ctrl) { + struct RepeatRingControl *xs = &ctrl->ring; + const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4; + const u32 offset_len = info->packedCtrlSize - ring_indices_len; + xs->offset = loadPackedRelative(src, offset, offset_len); + if (ring_indices_len == 4) { + xs->first = unaligned_load_u16(src + offset_len); + xs->last = unaligned_load_u16(src + offset_len + 2); + } else { + const u8 *indices = (const u8 *)src + offset_len; + xs->first = indices[0]; + xs->last = indices[1]; + } +} + +static +void repeatUnpackTrailer(const char *src, const struct RepeatInfo *info, + u64a offset, union RepeatControl *ctrl) { + struct RepeatTrailerControl *xs = &ctrl->trailer; + + u64a v[2]; + unpack_bits_64(v, (const u8 *)src, info->packedFieldSizes, 2); + + xs->offset = offset - v[0] + info->repeatMin; + xs->bitmap = v[1]; + + DEBUG_PRINTF("loaded: xs->offset=%llu, xs->bitmap=0x%llx\n", xs->offset, + xs->bitmap); +} + +void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset, + union RepeatControl *ctrl) { + assert(src && info && ctrl); + + switch ((enum RepeatType)info->type) { + case REPEAT_RING: + repeatUnpackRing(src, info, offset, ctrl); + break; + case REPEAT_FIRST: + case REPEAT_LAST: + repeatUnpackOffset(src, info, offset, ctrl); + break; + case REPEAT_RANGE: + repeatUnpackRange(src, info, offset, ctrl); + break; + case REPEAT_BITMAP: + repeatUnpackBitmap(src, info, offset, ctrl); + break; + case REPEAT_SPARSE_OPTIMAL_P: + repeatUnpackSparseOptimalP(src, info, offset, ctrl); + break; + case REPEAT_TRAILER: + repeatUnpackTrailer(src, info, offset, ctrl); + break; case REPEAT_ALWAYS: /* nothing to do - no state */ break; - } -} - -static really_inline -const u64a *getImplTable(const struct RepeatInfo *info) { - const u64a *table = ((const u64a *)(ROUNDUP_PTR( - ((const char *)(info) + - sizeof(*info)), - alignof(u64a)))); - return table; -} - -static -void storeInitialRingTopPatch(const struct RepeatInfo *info, - struct RepeatRingControl *xs, - u8 *state, u64a offset) { - DEBUG_PRINTF("set the first patch, offset=%llu\n", offset); - xs->offset = offset; - - u8 *active = state; - u32 patch_count = info->patchCount; - mmbit_clear(active, patch_count); - mmbit_set(active, patch_count, 0); - - u8 *ring = active + info->patchesOffset; - u32 encoding_size = info->encodingSize; - partial_store_u64a(ring, 1ull, encoding_size); - xs->first = 0; - xs->last = 1; -} - -static -u32 getSparseOptimalTargetValue(const struct RepeatInfo *info, - const u32 tval, u64a *val) { - u32 patch_size = info->patchSize; - const u64a *repeatTable = getImplTable(info); - u32 loc = 0; - DEBUG_PRINTF("val:%llu \n", *val); - for (u32 i = 1; i <= patch_size - tval; i++) { - u64a tmp = repeatTable[patch_size - i]; - if (*val >= tmp) { - *val -= tmp; - loc = i; - i += (info->minPeriod - 1); - } - } - - return loc; -} - -static -u64a sparseLastTop(const struct RepeatInfo *info, - const struct RepeatRingControl *xs, const u8 *state) { - DEBUG_PRINTF("looking for last top\n"); - u32 patch_size = info->patchSize; - u32 patch_count = info->patchCount; - u32 encoding_size = info->encodingSize; - - u32 occ = ringOccupancy(xs, patch_count); - u32 patch = xs->first + occ - 1; - if (patch >= patch_count) { - patch -= patch_count; - } - - DEBUG_PRINTF("patch%u encoding_size%u occ%u\n", patch, encoding_size, occ); - const u8 *ring = state + info->patchesOffset; - u64a val = partial_load_u64a(ring + encoding_size * patch, encoding_size); - - DEBUG_PRINTF("val:%llu\n", val); - const u64a *repeatTable = getImplTable(info); - for (s32 i = patch_size - 1; i >= 0; i--) { - if (val >= repeatTable[i]) { - DEBUG_PRINTF("xs->offset%llu v%u p%llu\n", - xs->offset, i, repeatTable[i]); - return xs->offset + i + (occ - 1) * patch_size; - } - } - - assert(0); - return 0; -} - -u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state) { - return sparseLastTop(info, &ctrl->ring, state); -} - -u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state, u64a offset) { - const struct RepeatRingControl *xs = &ctrl->ring; - - DEBUG_PRINTF("repeat [%u, %u] looking for match after %llu\n", - info->repeatMin, info->repeatMax, offset); - - assert(offset >= xs->offset); - - u64a nextOffset = offset + 1; - - u32 patch_size = info->patchSize; - u32 patch; - u32 tval; - if (nextOffset <= xs->offset + info->repeatMin) { - patch = xs->first; - tval = 0; - } else if (nextOffset > sparseLastTop(info, xs, state) + info->repeatMax) { - DEBUG_PRINTF("ring is stale\n"); - return 0; - } else { - assert(nextOffset - xs->offset < UINT32_MAX); // ring is not stale - u32 delta = (u32)(nextOffset - xs->offset); - u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0; - patch = lower / patch_size; - tval = lower - patch * patch_size; - } - - DEBUG_PRINTF("patch %u\n", patch); - u32 patch_count = info->patchCount; - if (patch >= patch_count) { - return 0; - } - - DEBUG_PRINTF("initial test for %u\n", tval); - - u32 begin = xs->first + patch; - if (begin >= patch_count) { - begin -= patch_count; - } - - const u8 *active = (const u8 *)state; - const u8 *ring = active + info->patchesOffset; - u32 encoding_size = info->encodingSize; - const u32 end = begin >= xs->last ? patch_count : xs->last; - u32 low = tval; - u64a diff = 0, loc = 0; - DEBUG_PRINTF("begin %u end %u\n", begin, end); - for (u32 p = mmbit_iterate_bounded(active, patch_count, begin, end); - p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count, - p + 1, end)) { - if (p != begin) { - low = 0; - } - - u64a val = partial_load_u64a(ring + encoding_size * p, encoding_size); - u32 p1 = 0; - if (p >= xs->first) { - p1 = p - xs->first; - } else { - p1 = p + patch_count - xs->first; - } - - if (val) { - loc = getSparseOptimalTargetValue(info, low, &val); - diff = (p1 + 1) * patch_size - loc; - } - if (loc) { - u64a rv = MAX(nextOffset, xs->offset + info->repeatMin + diff); - DEBUG_PRINTF("offset%llu next match at %llu\n", xs->offset, rv); - return rv; - } - low = 0; - } - - low = 0; - if (begin >= xs->last) { - for (u32 p = mmbit_iterate_bounded(active, patch_count, 0, xs->last); - p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count, - p + 1, xs->last)) { - - u64a val = partial_load_u64a(ring + encoding_size * p, - encoding_size); - if (val) { - loc = getSparseOptimalTargetValue(info, low, &val); - diff = (p + 1) * patch_size - loc; - } - if (loc) { - u64a rv = MAX(nextOffset, xs->offset + info->repeatMin + - diff + (end - xs->first) * patch_size); - DEBUG_PRINTF("next match at %llu\n", rv); - return rv; - } - } - } - - DEBUG_PRINTF("next match\n"); - return 0; -} - -void repeatStoreSparseOptimalP(const struct RepeatInfo *info, - union RepeatControl *ctrl, void *state, - u64a offset, char is_alive) { - struct RepeatRingControl *xs = &ctrl->ring; - u8 *active = (u8 *)state; - - DEBUG_PRINTF("offset: %llu encoding_size: %u\n", offset, - info->encodingSize); - - // If (a) this is the first top, or (b) the ring is stale, initialize the - // ring and write this offset in as the first top. - if (!is_alive || - offset > sparseLastTop(info, xs, state) + info->repeatMax) { - storeInitialRingTopPatch(info, xs, active, offset); - return; - } - - // Tops should arrive in order, with no duplicates. - assert(offset > sparseLastTop(info, xs, state)); - - // As the ring is not stale, our delta should fit within a u32. - assert(offset - xs->offset <= UINT32_MAX); - u32 delta = (u32)(offset - xs->offset); - u32 patch_size = info->patchSize; - u32 patch_count = info->patchCount; - u32 encoding_size = info->encodingSize; - u32 patch = delta / patch_size; - - DEBUG_PRINTF("delta=%u, patch_size=%u, patch=%u\n", delta, patch_size, - patch); - - u8 *ring = active + info->patchesOffset; - u32 occ = ringOccupancy(xs, patch_count); - u64a val = 0; - u32 idx; - - DEBUG_PRINTF("patch: %u patch_count: %u occ: %u\n", - patch, patch_count, occ); - if (patch >= patch_count) { - u32 patch_shift_count = patch - patch_count + 1; - assert(patch >= patch_shift_count); - DEBUG_PRINTF("shifting by %u\n", patch_shift_count); - xs->offset += patch_size * patch_shift_count; - xs->first += patch_shift_count; - if (xs->first >= patch_count) { - xs->first -= patch_count; - } - idx = xs->last + patch - occ; - mmbit_unset_range(active, patch_count, xs->last, - MIN(idx, patch_count)); - if (idx >= patch_count) { - idx -= patch_count; - mmbit_unset_range(active, patch_count, 0, idx + 1); - } - xs->last = idx + 1; - if (xs->last == patch_count) { - xs->last = 0; - } - } else if (patch < occ) { - assert(patch == occ - 1); - idx = xs->last == 0 ? patch_count - 1 : (u32)xs->last - 1; - val = partial_load_u64a(ring + encoding_size * idx, encoding_size); - } else { - idx = xs->last + patch - occ; - mmbit_unset_range(active, patch_count, xs->last, - MIN(idx, patch_count)); - if (idx >= patch_count) { - idx -= patch_count; - mmbit_unset_range(active, patch_count, 0, idx + 1); - } - xs->last = idx + 1; - if (xs->last == patch_count) { - xs->last = 0; - } - } - - assert((u64a)patch * patch_size <= delta); - u32 diff = delta - patch * patch_size; - const u64a *repeatTable = getImplTable(info); - val += repeatTable[diff]; - - DEBUG_PRINTF("patch=%u, occ=%u\n", patch, occ); - DEBUG_PRINTF("xs->first:%u xs->last:%u patch:%u\n", - xs->first, xs->last, patch); - DEBUG_PRINTF("value:%llu\n", val); + } +} + +static really_inline +const u64a *getImplTable(const struct RepeatInfo *info) { + const u64a *table = ((const u64a *)(ROUNDUP_PTR( + ((const char *)(info) + + sizeof(*info)), + alignof(u64a)))); + return table; +} + +static +void storeInitialRingTopPatch(const struct RepeatInfo *info, + struct RepeatRingControl *xs, + u8 *state, u64a offset) { + DEBUG_PRINTF("set the first patch, offset=%llu\n", offset); + xs->offset = offset; + + u8 *active = state; + u32 patch_count = info->patchCount; + mmbit_clear(active, patch_count); + mmbit_set(active, patch_count, 0); + + u8 *ring = active + info->patchesOffset; + u32 encoding_size = info->encodingSize; + partial_store_u64a(ring, 1ull, encoding_size); + xs->first = 0; + xs->last = 1; +} + +static +u32 getSparseOptimalTargetValue(const struct RepeatInfo *info, + const u32 tval, u64a *val) { + u32 patch_size = info->patchSize; + const u64a *repeatTable = getImplTable(info); + u32 loc = 0; + DEBUG_PRINTF("val:%llu \n", *val); + for (u32 i = 1; i <= patch_size - tval; i++) { + u64a tmp = repeatTable[patch_size - i]; + if (*val >= tmp) { + *val -= tmp; + loc = i; + i += (info->minPeriod - 1); + } + } + + return loc; +} + +static +u64a sparseLastTop(const struct RepeatInfo *info, + const struct RepeatRingControl *xs, const u8 *state) { + DEBUG_PRINTF("looking for last top\n"); + u32 patch_size = info->patchSize; + u32 patch_count = info->patchCount; + u32 encoding_size = info->encodingSize; + + u32 occ = ringOccupancy(xs, patch_count); + u32 patch = xs->first + occ - 1; + if (patch >= patch_count) { + patch -= patch_count; + } + + DEBUG_PRINTF("patch%u encoding_size%u occ%u\n", patch, encoding_size, occ); + const u8 *ring = state + info->patchesOffset; + u64a val = partial_load_u64a(ring + encoding_size * patch, encoding_size); + + DEBUG_PRINTF("val:%llu\n", val); + const u64a *repeatTable = getImplTable(info); + for (s32 i = patch_size - 1; i >= 0; i--) { + if (val >= repeatTable[i]) { + DEBUG_PRINTF("xs->offset%llu v%u p%llu\n", + xs->offset, i, repeatTable[i]); + return xs->offset + i + (occ - 1) * patch_size; + } + } + + assert(0); + return 0; +} + +u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state) { + return sparseLastTop(info, &ctrl->ring, state); +} + +u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state, u64a offset) { + const struct RepeatRingControl *xs = &ctrl->ring; + + DEBUG_PRINTF("repeat [%u, %u] looking for match after %llu\n", + info->repeatMin, info->repeatMax, offset); + + assert(offset >= xs->offset); + + u64a nextOffset = offset + 1; + + u32 patch_size = info->patchSize; + u32 patch; + u32 tval; + if (nextOffset <= xs->offset + info->repeatMin) { + patch = xs->first; + tval = 0; + } else if (nextOffset > sparseLastTop(info, xs, state) + info->repeatMax) { + DEBUG_PRINTF("ring is stale\n"); + return 0; + } else { + assert(nextOffset - xs->offset < UINT32_MAX); // ring is not stale + u32 delta = (u32)(nextOffset - xs->offset); + u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0; + patch = lower / patch_size; + tval = lower - patch * patch_size; + } + + DEBUG_PRINTF("patch %u\n", patch); + u32 patch_count = info->patchCount; + if (patch >= patch_count) { + return 0; + } + + DEBUG_PRINTF("initial test for %u\n", tval); + + u32 begin = xs->first + patch; + if (begin >= patch_count) { + begin -= patch_count; + } + + const u8 *active = (const u8 *)state; + const u8 *ring = active + info->patchesOffset; + u32 encoding_size = info->encodingSize; + const u32 end = begin >= xs->last ? patch_count : xs->last; + u32 low = tval; + u64a diff = 0, loc = 0; + DEBUG_PRINTF("begin %u end %u\n", begin, end); + for (u32 p = mmbit_iterate_bounded(active, patch_count, begin, end); + p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count, + p + 1, end)) { + if (p != begin) { + low = 0; + } + + u64a val = partial_load_u64a(ring + encoding_size * p, encoding_size); + u32 p1 = 0; + if (p >= xs->first) { + p1 = p - xs->first; + } else { + p1 = p + patch_count - xs->first; + } + + if (val) { + loc = getSparseOptimalTargetValue(info, low, &val); + diff = (p1 + 1) * patch_size - loc; + } + if (loc) { + u64a rv = MAX(nextOffset, xs->offset + info->repeatMin + diff); + DEBUG_PRINTF("offset%llu next match at %llu\n", xs->offset, rv); + return rv; + } + low = 0; + } + + low = 0; + if (begin >= xs->last) { + for (u32 p = mmbit_iterate_bounded(active, patch_count, 0, xs->last); + p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count, + p + 1, xs->last)) { + + u64a val = partial_load_u64a(ring + encoding_size * p, + encoding_size); + if (val) { + loc = getSparseOptimalTargetValue(info, low, &val); + diff = (p + 1) * patch_size - loc; + } + if (loc) { + u64a rv = MAX(nextOffset, xs->offset + info->repeatMin + + diff + (end - xs->first) * patch_size); + DEBUG_PRINTF("next match at %llu\n", rv); + return rv; + } + } + } + + DEBUG_PRINTF("next match\n"); + return 0; +} + +void repeatStoreSparseOptimalP(const struct RepeatInfo *info, + union RepeatControl *ctrl, void *state, + u64a offset, char is_alive) { + struct RepeatRingControl *xs = &ctrl->ring; + u8 *active = (u8 *)state; + + DEBUG_PRINTF("offset: %llu encoding_size: %u\n", offset, + info->encodingSize); + + // If (a) this is the first top, or (b) the ring is stale, initialize the + // ring and write this offset in as the first top. + if (!is_alive || + offset > sparseLastTop(info, xs, state) + info->repeatMax) { + storeInitialRingTopPatch(info, xs, active, offset); + return; + } + + // Tops should arrive in order, with no duplicates. + assert(offset > sparseLastTop(info, xs, state)); + + // As the ring is not stale, our delta should fit within a u32. + assert(offset - xs->offset <= UINT32_MAX); + u32 delta = (u32)(offset - xs->offset); + u32 patch_size = info->patchSize; + u32 patch_count = info->patchCount; + u32 encoding_size = info->encodingSize; + u32 patch = delta / patch_size; + + DEBUG_PRINTF("delta=%u, patch_size=%u, patch=%u\n", delta, patch_size, + patch); + + u8 *ring = active + info->patchesOffset; + u32 occ = ringOccupancy(xs, patch_count); + u64a val = 0; + u32 idx; + + DEBUG_PRINTF("patch: %u patch_count: %u occ: %u\n", + patch, patch_count, occ); + if (patch >= patch_count) { + u32 patch_shift_count = patch - patch_count + 1; + assert(patch >= patch_shift_count); + DEBUG_PRINTF("shifting by %u\n", patch_shift_count); + xs->offset += patch_size * patch_shift_count; + xs->first += patch_shift_count; + if (xs->first >= patch_count) { + xs->first -= patch_count; + } + idx = xs->last + patch - occ; + mmbit_unset_range(active, patch_count, xs->last, + MIN(idx, patch_count)); + if (idx >= patch_count) { + idx -= patch_count; + mmbit_unset_range(active, patch_count, 0, idx + 1); + } + xs->last = idx + 1; + if (xs->last == patch_count) { + xs->last = 0; + } + } else if (patch < occ) { + assert(patch == occ - 1); + idx = xs->last == 0 ? patch_count - 1 : (u32)xs->last - 1; + val = partial_load_u64a(ring + encoding_size * idx, encoding_size); + } else { + idx = xs->last + patch - occ; + mmbit_unset_range(active, patch_count, xs->last, + MIN(idx, patch_count)); + if (idx >= patch_count) { + idx -= patch_count; + mmbit_unset_range(active, patch_count, 0, idx + 1); + } + xs->last = idx + 1; + if (xs->last == patch_count) { + xs->last = 0; + } + } + + assert((u64a)patch * patch_size <= delta); + u32 diff = delta - patch * patch_size; + const u64a *repeatTable = getImplTable(info); + val += repeatTable[diff]; + + DEBUG_PRINTF("patch=%u, occ=%u\n", patch, occ); + DEBUG_PRINTF("xs->first:%u xs->last:%u patch:%u\n", + xs->first, xs->last, patch); + DEBUG_PRINTF("value:%llu\n", val); assert(fits_in_len_bytes(val, encoding_size)); - partial_store_u64a(ring + encoding_size * idx, val, encoding_size); - mmbit_set(active, patch_count, idx); -} - -static -char sparseHasMatch(const struct RepeatInfo *info, const u8 *state, - u32 lower, u32 upper) { - u32 patch_size = info->patchSize; - u32 patch_count = info->patchCount; - u32 encoding_size = info->encodingSize; - u32 patch_lower = lower / patch_size; - u32 patch_upper = upper / patch_size; - u32 diff = lower - patch_lower * patch_size; - - DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper); - const u64a *repeatTable = getImplTable(info); - - const u8 *ring = state + info->patchesOffset; - const u8 *active = state; - u64a val; - // test the first patch - if (mmbit_isset(active, patch_count, patch_lower)) { - val = partial_load_u64a(ring + encoding_size * patch_lower, - encoding_size); - DEBUG_PRINTF("patch_size=%u, diff=%u, table=%llu\n", - patch_size, diff, repeatTable[diff]); - DEBUG_PRINTF("patch_lower=%u, patch_upper=%u\n", - patch_lower, patch_upper); - if (patch_upper == patch_lower) { - u32 limit = upper - patch_lower * patch_size; - getSparseOptimalTargetValue(info, limit + 1, &val); - } - if (val >= repeatTable[diff]) { - return 1; - } - } - - if (patch_lower == patch_upper) { - return 0; - } - - // test the patches between first and last - u32 m = mmbit_iterate_bounded(active, patch_count, - patch_lower + 1, patch_upper); - if (m != MMB_INVALID) { - return 1; - } - - if (patch_upper == patch_count) { - return 0; - } - - // test the last patch - if (!mmbit_isset(active, patch_count, patch_upper)) { - return 0; - } - diff = (patch_upper + 1) * patch_size - upper; - DEBUG_PRINTF("diff=%u\n", diff); - val = partial_load_u64a(ring + encoding_size * patch_upper, encoding_size); - getSparseOptimalTargetValue(info, patch_size - diff + 1, &val); - if (val) { - DEBUG_PRINTF("last patch: val=%llu\n", val); - return 1; - } - - return 0; -} - -enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state, u64a offset) { - DEBUG_PRINTF("check for match at %llu corresponding to trigger " - "at [%llu, %llu]\n", offset, offset - info->repeatMax, - offset - info->repeatMin); - - const struct RepeatRingControl *xs = &ctrl->ring; - const u8 *ring = (const u8 *)state; - - assert(offset >= xs->offset); - - if (offset < xs->offset + info->repeatMin) { - DEBUG_PRINTF("too soon\n"); - return REPEAT_NOMATCH; - } else if (offset > sparseLastTop(info, xs, state) + info->repeatMax) { - DEBUG_PRINTF("stale\n"); - return REPEAT_STALE; - } - - // Our delta between the base offset of the ring and the current offset - // must fit within the range [repeatMin, lastPossibleTop + repeatMax]. This - // range fits comfortably within a u32. - assert(offset - xs->offset <= UINT32_MAX); - - u32 delta = (u32)(offset - xs->offset); - u32 patch_size = info->patchSize; - u32 patch_count = info->patchCount; - u32 occ = ringOccupancy(xs, patch_count); - - u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0; - u32 upper = MIN(delta - info->repeatMin, occ * patch_size - 1); - - DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper); - u32 patch_lower = lower / patch_size; - u32 patch_upper = upper / patch_size; - - if (patch_lower >= occ) { - DEBUG_PRINTF("too late\n"); - return REPEAT_NOMATCH; - } - - u32 remaining_lower = lower - patch_lower * patch_size; - u32 remaining_upper = upper - patch_upper * patch_size; - patch_lower += xs->first; - patch_upper += xs->first; - if (patch_lower >= patch_count) { - patch_lower -= patch_count; - patch_upper -= patch_count; - } else if (patch_upper >= patch_count) { - patch_upper -= patch_count; - } - - DEBUG_PRINTF("xs->first:%u xs->last:%u patch_lower:%u, patch_upper:%u\n", - xs->first, xs->last, patch_lower, patch_upper); - - u32 scan_end; - const char is_not_wrapped = (patch_lower <= patch_upper); - if (is_not_wrapped) { - scan_end = patch_upper * patch_size + remaining_upper; - } else { - scan_end = patch_count * patch_size; - } - - lower = patch_lower * patch_size + remaining_lower; - if (sparseHasMatch(info, ring, lower, scan_end)) { - return REPEAT_MATCH; - } - - if (!is_not_wrapped) { - upper -= (patch_count - xs->first) * patch_size; - if (sparseHasMatch(info, ring, 0, upper)) { - return REPEAT_MATCH; - } - } - - return REPEAT_NOMATCH; -} + partial_store_u64a(ring + encoding_size * idx, val, encoding_size); + mmbit_set(active, patch_count, idx); +} + +static +char sparseHasMatch(const struct RepeatInfo *info, const u8 *state, + u32 lower, u32 upper) { + u32 patch_size = info->patchSize; + u32 patch_count = info->patchCount; + u32 encoding_size = info->encodingSize; + u32 patch_lower = lower / patch_size; + u32 patch_upper = upper / patch_size; + u32 diff = lower - patch_lower * patch_size; + + DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper); + const u64a *repeatTable = getImplTable(info); + + const u8 *ring = state + info->patchesOffset; + const u8 *active = state; + u64a val; + // test the first patch + if (mmbit_isset(active, patch_count, patch_lower)) { + val = partial_load_u64a(ring + encoding_size * patch_lower, + encoding_size); + DEBUG_PRINTF("patch_size=%u, diff=%u, table=%llu\n", + patch_size, diff, repeatTable[diff]); + DEBUG_PRINTF("patch_lower=%u, patch_upper=%u\n", + patch_lower, patch_upper); + if (patch_upper == patch_lower) { + u32 limit = upper - patch_lower * patch_size; + getSparseOptimalTargetValue(info, limit + 1, &val); + } + if (val >= repeatTable[diff]) { + return 1; + } + } + + if (patch_lower == patch_upper) { + return 0; + } + + // test the patches between first and last + u32 m = mmbit_iterate_bounded(active, patch_count, + patch_lower + 1, patch_upper); + if (m != MMB_INVALID) { + return 1; + } + + if (patch_upper == patch_count) { + return 0; + } + + // test the last patch + if (!mmbit_isset(active, patch_count, patch_upper)) { + return 0; + } + diff = (patch_upper + 1) * patch_size - upper; + DEBUG_PRINTF("diff=%u\n", diff); + val = partial_load_u64a(ring + encoding_size * patch_upper, encoding_size); + getSparseOptimalTargetValue(info, patch_size - diff + 1, &val); + if (val) { + DEBUG_PRINTF("last patch: val=%llu\n", val); + return 1; + } + + return 0; +} + +enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state, u64a offset) { + DEBUG_PRINTF("check for match at %llu corresponding to trigger " + "at [%llu, %llu]\n", offset, offset - info->repeatMax, + offset - info->repeatMin); + + const struct RepeatRingControl *xs = &ctrl->ring; + const u8 *ring = (const u8 *)state; + + assert(offset >= xs->offset); + + if (offset < xs->offset + info->repeatMin) { + DEBUG_PRINTF("too soon\n"); + return REPEAT_NOMATCH; + } else if (offset > sparseLastTop(info, xs, state) + info->repeatMax) { + DEBUG_PRINTF("stale\n"); + return REPEAT_STALE; + } + + // Our delta between the base offset of the ring and the current offset + // must fit within the range [repeatMin, lastPossibleTop + repeatMax]. This + // range fits comfortably within a u32. + assert(offset - xs->offset <= UINT32_MAX); + + u32 delta = (u32)(offset - xs->offset); + u32 patch_size = info->patchSize; + u32 patch_count = info->patchCount; + u32 occ = ringOccupancy(xs, patch_count); + + u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0; + u32 upper = MIN(delta - info->repeatMin, occ * patch_size - 1); + + DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper); + u32 patch_lower = lower / patch_size; + u32 patch_upper = upper / patch_size; + + if (patch_lower >= occ) { + DEBUG_PRINTF("too late\n"); + return REPEAT_NOMATCH; + } + + u32 remaining_lower = lower - patch_lower * patch_size; + u32 remaining_upper = upper - patch_upper * patch_size; + patch_lower += xs->first; + patch_upper += xs->first; + if (patch_lower >= patch_count) { + patch_lower -= patch_count; + patch_upper -= patch_count; + } else if (patch_upper >= patch_count) { + patch_upper -= patch_count; + } + + DEBUG_PRINTF("xs->first:%u xs->last:%u patch_lower:%u, patch_upper:%u\n", + xs->first, xs->last, patch_lower, patch_upper); + + u32 scan_end; + const char is_not_wrapped = (patch_lower <= patch_upper); + if (is_not_wrapped) { + scan_end = patch_upper * patch_size + remaining_upper; + } else { + scan_end = patch_count * patch_size; + } + + lower = patch_lower * patch_size + remaining_lower; + if (sparseHasMatch(info, ring, lower, scan_end)) { + return REPEAT_MATCH; + } + + if (!is_not_wrapped) { + upper -= (patch_count - xs->first) * patch_size; + if (sparseHasMatch(info, ring, 0, upper)) { + return REPEAT_MATCH; + } + } + + return REPEAT_NOMATCH; +} diff --git a/contrib/libs/hyperscan/src/nfa/repeat.h b/contrib/libs/hyperscan/src/nfa/repeat.h index eeb8448ade..d4f84ea0a9 100644 --- a/contrib/libs/hyperscan/src/nfa/repeat.h +++ b/contrib/libs/hyperscan/src/nfa/repeat.h @@ -1,370 +1,370 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief API for handling bounded repeats. - * - * This file provides an internal API for handling bounded repeats of character - * classes. It is used by the Large Bounded Repeat (LBR) engine and by the - * bounded repeat handling in the LimEx NFA engine as well. - * - * The state required by these functions is split into two regions: - * - * 1. Control block. This is a small structure (size varies with repeat mode) - * that may be copied around or compressed into stream state. - * 2. Repeat state. This is a larger structure that can be quite big for large - * repeats, often containing a multibit ring or large vector of indices. - * This generally lives in stream state and is not copied. - */ - -#ifndef REPEAT_H -#define REPEAT_H - -#include "ue2common.h" -#include "repeat_internal.h" -#include "util/bitutils.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -/** Returns the offset of the most recent 'top' offset set in the repeat. */ -static really_inline -u64a repeatLastTop(const struct RepeatInfo *info, - const union RepeatControl *ctrl, const void *state); - -/** Returns the offset of the next match after 'offset', or zero if no further - * matches are possible. */ -static really_inline -u64a repeatNextMatch(const struct RepeatInfo *info, - const union RepeatControl *ctrl, const void *state, - u64a offset); - -/** Stores a new top in the repeat. If is_alive is false, the repeat will be - * initialised first and this top will become the first (and only) one. */ -static really_inline -void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl, - void *state, u64a offset, char is_alive); - -/** Return type for repeatHasMatch. */ -enum RepeatMatch { - REPEAT_NOMATCH, /**< This offset is not a valid match. */ - REPEAT_MATCH, /**< This offset is a valid match. */ - REPEAT_STALE /**< This offset is not a valid match and no greater - offset will be (unless another top is stored). */ -}; - -/** Query whether the repeat has a match at the given offset. Returns - * ::REPEAT_STALE if it does not have a match at that offset _and_ - * no further matches are possible. */ -static really_inline -enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state, u64a offset); - -/** \brief Serialize a packed version of the repeat control block into stream - * state. */ -void repeatPack(char *dest, const struct RepeatInfo *info, - const union RepeatControl *ctrl, u64a offset); - -/** \brief Deserialize a packed version of the repeat control block. */ -void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset, - union RepeatControl *ctrl); - -//// -//// IMPLEMENTATION. -//// - -u64a repeatLastTopRing(const struct RepeatInfo *info, - const union RepeatControl *ctrl); - -u64a repeatLastTopRange(const union RepeatControl *ctrl, - const void *state); - -u64a repeatLastTopBitmap(const union RepeatControl *ctrl); - -u64a repeatLastTopTrailer(const struct RepeatInfo *info, - const union RepeatControl *ctrl); - -u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state); - -static really_inline -u64a repeatLastTop(const struct RepeatInfo *info, - const union RepeatControl *ctrl, const void *state) { - assert(info && ctrl && state); - - switch ((enum RepeatType)info->type) { - case REPEAT_RING: - return repeatLastTopRing(info, ctrl); - case REPEAT_FIRST: - case REPEAT_LAST: - return ctrl->offset.offset; - case REPEAT_RANGE: - return repeatLastTopRange(ctrl, state); - case REPEAT_BITMAP: - return repeatLastTopBitmap(ctrl); - case REPEAT_SPARSE_OPTIMAL_P: - return repeatLastTopSparseOptimalP(info, ctrl, state); - case REPEAT_TRAILER: - return repeatLastTopTrailer(info, ctrl); +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief API for handling bounded repeats. + * + * This file provides an internal API for handling bounded repeats of character + * classes. It is used by the Large Bounded Repeat (LBR) engine and by the + * bounded repeat handling in the LimEx NFA engine as well. + * + * The state required by these functions is split into two regions: + * + * 1. Control block. This is a small structure (size varies with repeat mode) + * that may be copied around or compressed into stream state. + * 2. Repeat state. This is a larger structure that can be quite big for large + * repeats, often containing a multibit ring or large vector of indices. + * This generally lives in stream state and is not copied. + */ + +#ifndef REPEAT_H +#define REPEAT_H + +#include "ue2common.h" +#include "repeat_internal.h" +#include "util/bitutils.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/** Returns the offset of the most recent 'top' offset set in the repeat. */ +static really_inline +u64a repeatLastTop(const struct RepeatInfo *info, + const union RepeatControl *ctrl, const void *state); + +/** Returns the offset of the next match after 'offset', or zero if no further + * matches are possible. */ +static really_inline +u64a repeatNextMatch(const struct RepeatInfo *info, + const union RepeatControl *ctrl, const void *state, + u64a offset); + +/** Stores a new top in the repeat. If is_alive is false, the repeat will be + * initialised first and this top will become the first (and only) one. */ +static really_inline +void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl, + void *state, u64a offset, char is_alive); + +/** Return type for repeatHasMatch. */ +enum RepeatMatch { + REPEAT_NOMATCH, /**< This offset is not a valid match. */ + REPEAT_MATCH, /**< This offset is a valid match. */ + REPEAT_STALE /**< This offset is not a valid match and no greater + offset will be (unless another top is stored). */ +}; + +/** Query whether the repeat has a match at the given offset. Returns + * ::REPEAT_STALE if it does not have a match at that offset _and_ + * no further matches are possible. */ +static really_inline +enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state, u64a offset); + +/** \brief Serialize a packed version of the repeat control block into stream + * state. */ +void repeatPack(char *dest, const struct RepeatInfo *info, + const union RepeatControl *ctrl, u64a offset); + +/** \brief Deserialize a packed version of the repeat control block. */ +void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset, + union RepeatControl *ctrl); + +//// +//// IMPLEMENTATION. +//// + +u64a repeatLastTopRing(const struct RepeatInfo *info, + const union RepeatControl *ctrl); + +u64a repeatLastTopRange(const union RepeatControl *ctrl, + const void *state); + +u64a repeatLastTopBitmap(const union RepeatControl *ctrl); + +u64a repeatLastTopTrailer(const struct RepeatInfo *info, + const union RepeatControl *ctrl); + +u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state); + +static really_inline +u64a repeatLastTop(const struct RepeatInfo *info, + const union RepeatControl *ctrl, const void *state) { + assert(info && ctrl && state); + + switch ((enum RepeatType)info->type) { + case REPEAT_RING: + return repeatLastTopRing(info, ctrl); + case REPEAT_FIRST: + case REPEAT_LAST: + return ctrl->offset.offset; + case REPEAT_RANGE: + return repeatLastTopRange(ctrl, state); + case REPEAT_BITMAP: + return repeatLastTopBitmap(ctrl); + case REPEAT_SPARSE_OPTIMAL_P: + return repeatLastTopSparseOptimalP(info, ctrl, state); + case REPEAT_TRAILER: + return repeatLastTopTrailer(info, ctrl); case REPEAT_ALWAYS: return 0; - } - - DEBUG_PRINTF("bad repeat type %u\n", info->type); - assert(0); - return 0; -} - -// Used for both FIRST and LAST models. -static really_inline -u64a repeatNextMatchOffset(const struct RepeatInfo *info, - const union RepeatControl *ctrl, u64a offset) { - u64a first = ctrl->offset.offset + info->repeatMin; - if (offset < first) { - return first; - } - - if (info->repeatMax == REPEAT_INF || - offset < ctrl->offset.offset + info->repeatMax) { - return offset + 1; - } - - return 0; // No more matches. -} - -u64a repeatNextMatchRing(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state, u64a offset); - -u64a repeatNextMatchRange(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state, u64a offset); - -u64a repeatNextMatchBitmap(const struct RepeatInfo *info, - const union RepeatControl *ctrl, u64a offset); - -u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state, u64a offset); - -u64a repeatNextMatchTrailer(const struct RepeatInfo *info, - const union RepeatControl *ctrl, u64a offset); - -static really_inline -u64a repeatNextMatch(const struct RepeatInfo *info, - const union RepeatControl *ctrl, const void *state, - u64a offset) { - assert(info && ctrl && state); - assert(ISALIGNED(info)); - assert(ISALIGNED(ctrl)); - - switch ((enum RepeatType)info->type) { - case REPEAT_RING: - return repeatNextMatchRing(info, ctrl, state, offset); - case REPEAT_FIRST: - // fall through - case REPEAT_LAST: - return repeatNextMatchOffset(info, ctrl, offset); - case REPEAT_RANGE: - return repeatNextMatchRange(info, ctrl, state, offset); - case REPEAT_BITMAP: - return repeatNextMatchBitmap(info, ctrl, offset); - case REPEAT_SPARSE_OPTIMAL_P: - return repeatNextMatchSparseOptimalP(info, ctrl, state, offset); - case REPEAT_TRAILER: - return repeatNextMatchTrailer(info, ctrl, offset); + } + + DEBUG_PRINTF("bad repeat type %u\n", info->type); + assert(0); + return 0; +} + +// Used for both FIRST and LAST models. +static really_inline +u64a repeatNextMatchOffset(const struct RepeatInfo *info, + const union RepeatControl *ctrl, u64a offset) { + u64a first = ctrl->offset.offset + info->repeatMin; + if (offset < first) { + return first; + } + + if (info->repeatMax == REPEAT_INF || + offset < ctrl->offset.offset + info->repeatMax) { + return offset + 1; + } + + return 0; // No more matches. +} + +u64a repeatNextMatchRing(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state, u64a offset); + +u64a repeatNextMatchRange(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state, u64a offset); + +u64a repeatNextMatchBitmap(const struct RepeatInfo *info, + const union RepeatControl *ctrl, u64a offset); + +u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state, u64a offset); + +u64a repeatNextMatchTrailer(const struct RepeatInfo *info, + const union RepeatControl *ctrl, u64a offset); + +static really_inline +u64a repeatNextMatch(const struct RepeatInfo *info, + const union RepeatControl *ctrl, const void *state, + u64a offset) { + assert(info && ctrl && state); + assert(ISALIGNED(info)); + assert(ISALIGNED(ctrl)); + + switch ((enum RepeatType)info->type) { + case REPEAT_RING: + return repeatNextMatchRing(info, ctrl, state, offset); + case REPEAT_FIRST: + // fall through + case REPEAT_LAST: + return repeatNextMatchOffset(info, ctrl, offset); + case REPEAT_RANGE: + return repeatNextMatchRange(info, ctrl, state, offset); + case REPEAT_BITMAP: + return repeatNextMatchBitmap(info, ctrl, offset); + case REPEAT_SPARSE_OPTIMAL_P: + return repeatNextMatchSparseOptimalP(info, ctrl, state, offset); + case REPEAT_TRAILER: + return repeatNextMatchTrailer(info, ctrl, offset); case REPEAT_ALWAYS: return offset + 1; - } - - DEBUG_PRINTF("bad repeat type %u\n", info->type); - assert(0); - return 0; -} - -static really_inline -void repeatStoreFirst(union RepeatControl *ctrl, u64a offset, - char is_alive) { - if (is_alive) { - return; - } - ctrl->offset.offset = offset; -} - -static really_inline -void repeatStoreLast(union RepeatControl *ctrl, u64a offset, - UNUSED char is_alive) { - assert(!is_alive || offset >= ctrl->offset.offset); - ctrl->offset.offset = offset; -} - -void repeatStoreRing(const struct RepeatInfo *info, - union RepeatControl *ctrl, void *state, u64a offset, - char is_alive); - -void repeatStoreRange(const struct RepeatInfo *info, - union RepeatControl *ctrl, void *state, u64a offset, - char is_alive); - -void repeatStoreBitmap(const struct RepeatInfo *info, - union RepeatControl *ctrl, u64a offset, - char is_alive); - -void repeatStoreSparseOptimalP(const struct RepeatInfo *info, - union RepeatControl *ctrl, void *state, - u64a offset, char is_alive); - -void repeatStoreTrailer(const struct RepeatInfo *info, - union RepeatControl *ctrl, u64a offset, - char is_alive); - -static really_inline -void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl, - void *state, u64a offset, char is_alive) { - assert(info && ctrl && state); - assert(ISALIGNED(info)); - assert(ISALIGNED(ctrl)); - - assert(info->repeatMin <= info->repeatMax); - assert(info->repeatMax <= REPEAT_INF); - - switch ((enum RepeatType)info->type) { - case REPEAT_RING: - repeatStoreRing(info, ctrl, state, offset, is_alive); - break; - case REPEAT_FIRST: - repeatStoreFirst(ctrl, offset, is_alive); - break; - case REPEAT_LAST: - repeatStoreLast(ctrl, offset, is_alive); - break; - case REPEAT_RANGE: - repeatStoreRange(info, ctrl, state, offset, is_alive); - break; - case REPEAT_BITMAP: - repeatStoreBitmap(info, ctrl, offset, is_alive); - break; - case REPEAT_SPARSE_OPTIMAL_P: - repeatStoreSparseOptimalP(info, ctrl, state, offset, is_alive); - break; - case REPEAT_TRAILER: - repeatStoreTrailer(info, ctrl, offset, is_alive); - break; + } + + DEBUG_PRINTF("bad repeat type %u\n", info->type); + assert(0); + return 0; +} + +static really_inline +void repeatStoreFirst(union RepeatControl *ctrl, u64a offset, + char is_alive) { + if (is_alive) { + return; + } + ctrl->offset.offset = offset; +} + +static really_inline +void repeatStoreLast(union RepeatControl *ctrl, u64a offset, + UNUSED char is_alive) { + assert(!is_alive || offset >= ctrl->offset.offset); + ctrl->offset.offset = offset; +} + +void repeatStoreRing(const struct RepeatInfo *info, + union RepeatControl *ctrl, void *state, u64a offset, + char is_alive); + +void repeatStoreRange(const struct RepeatInfo *info, + union RepeatControl *ctrl, void *state, u64a offset, + char is_alive); + +void repeatStoreBitmap(const struct RepeatInfo *info, + union RepeatControl *ctrl, u64a offset, + char is_alive); + +void repeatStoreSparseOptimalP(const struct RepeatInfo *info, + union RepeatControl *ctrl, void *state, + u64a offset, char is_alive); + +void repeatStoreTrailer(const struct RepeatInfo *info, + union RepeatControl *ctrl, u64a offset, + char is_alive); + +static really_inline +void repeatStore(const struct RepeatInfo *info, union RepeatControl *ctrl, + void *state, u64a offset, char is_alive) { + assert(info && ctrl && state); + assert(ISALIGNED(info)); + assert(ISALIGNED(ctrl)); + + assert(info->repeatMin <= info->repeatMax); + assert(info->repeatMax <= REPEAT_INF); + + switch ((enum RepeatType)info->type) { + case REPEAT_RING: + repeatStoreRing(info, ctrl, state, offset, is_alive); + break; + case REPEAT_FIRST: + repeatStoreFirst(ctrl, offset, is_alive); + break; + case REPEAT_LAST: + repeatStoreLast(ctrl, offset, is_alive); + break; + case REPEAT_RANGE: + repeatStoreRange(info, ctrl, state, offset, is_alive); + break; + case REPEAT_BITMAP: + repeatStoreBitmap(info, ctrl, offset, is_alive); + break; + case REPEAT_SPARSE_OPTIMAL_P: + repeatStoreSparseOptimalP(info, ctrl, state, offset, is_alive); + break; + case REPEAT_TRAILER: + repeatStoreTrailer(info, ctrl, offset, is_alive); + break; case REPEAT_ALWAYS: /* nothing to do - no state */ break; - } -} - -static really_inline -enum RepeatMatch repeatHasMatchFirst(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - u64a offset) { - if (offset < ctrl->offset.offset + info->repeatMin) { - return REPEAT_NOMATCH; - } - - // FIRST models are {N,} repeats, i.e. they always have inf max depth. - assert(info->repeatMax == REPEAT_INF); - return REPEAT_MATCH; -} - -static really_inline -enum RepeatMatch repeatHasMatchLast(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - u64a offset) { - if (offset < ctrl->offset.offset + info->repeatMin) { - return REPEAT_NOMATCH; - } - assert(info->repeatMax < REPEAT_INF); - if (offset <= ctrl->offset.offset + info->repeatMax) { - return REPEAT_MATCH; - } - return REPEAT_STALE; -} - -enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state, u64a offset); - -enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state, u64a offset); - -enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state, u64a offset); - -enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - u64a offset); - -enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - u64a offset); - -static really_inline -enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info, - const union RepeatControl *ctrl, - const void *state, u64a offset) { - assert(info && ctrl && state); - assert(ISALIGNED(info)); - assert(ISALIGNED(ctrl)); - - switch ((enum RepeatType)info->type) { - case REPEAT_RING: - return repeatHasMatchRing(info, ctrl, state, offset); - case REPEAT_FIRST: - return repeatHasMatchFirst(info, ctrl, offset); - case REPEAT_LAST: - return repeatHasMatchLast(info, ctrl, offset); - case REPEAT_RANGE: - return repeatHasMatchRange(info, ctrl, state, offset); - case REPEAT_BITMAP: - return repeatHasMatchBitmap(info, ctrl, offset); - case REPEAT_SPARSE_OPTIMAL_P: - return repeatHasMatchSparseOptimalP(info, ctrl, state, offset); - case REPEAT_TRAILER: - return repeatHasMatchTrailer(info, ctrl, offset); + } +} + +static really_inline +enum RepeatMatch repeatHasMatchFirst(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + u64a offset) { + if (offset < ctrl->offset.offset + info->repeatMin) { + return REPEAT_NOMATCH; + } + + // FIRST models are {N,} repeats, i.e. they always have inf max depth. + assert(info->repeatMax == REPEAT_INF); + return REPEAT_MATCH; +} + +static really_inline +enum RepeatMatch repeatHasMatchLast(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + u64a offset) { + if (offset < ctrl->offset.offset + info->repeatMin) { + return REPEAT_NOMATCH; + } + assert(info->repeatMax < REPEAT_INF); + if (offset <= ctrl->offset.offset + info->repeatMax) { + return REPEAT_MATCH; + } + return REPEAT_STALE; +} + +enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state, u64a offset); + +enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state, u64a offset); + +enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state, u64a offset); + +enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + u64a offset); + +enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + u64a offset); + +static really_inline +enum RepeatMatch repeatHasMatch(const struct RepeatInfo *info, + const union RepeatControl *ctrl, + const void *state, u64a offset) { + assert(info && ctrl && state); + assert(ISALIGNED(info)); + assert(ISALIGNED(ctrl)); + + switch ((enum RepeatType)info->type) { + case REPEAT_RING: + return repeatHasMatchRing(info, ctrl, state, offset); + case REPEAT_FIRST: + return repeatHasMatchFirst(info, ctrl, offset); + case REPEAT_LAST: + return repeatHasMatchLast(info, ctrl, offset); + case REPEAT_RANGE: + return repeatHasMatchRange(info, ctrl, state, offset); + case REPEAT_BITMAP: + return repeatHasMatchBitmap(info, ctrl, offset); + case REPEAT_SPARSE_OPTIMAL_P: + return repeatHasMatchSparseOptimalP(info, ctrl, state, offset); + case REPEAT_TRAILER: + return repeatHasMatchTrailer(info, ctrl, offset); case REPEAT_ALWAYS: return REPEAT_MATCH; - } - - assert(0); - return REPEAT_NOMATCH; -} - -#ifdef __cplusplus -} -#endif - -#endif // REPEAT_H + } + + assert(0); + return REPEAT_NOMATCH; +} + +#ifdef __cplusplus +} +#endif + +#endif // REPEAT_H diff --git a/contrib/libs/hyperscan/src/nfa/repeat_internal.h b/contrib/libs/hyperscan/src/nfa/repeat_internal.h index 399e53c93f..9e3f455c80 100644 --- a/contrib/libs/hyperscan/src/nfa/repeat_internal.h +++ b/contrib/libs/hyperscan/src/nfa/repeat_internal.h @@ -1,87 +1,87 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef REPEAT_INTERNAL_H -#define REPEAT_INTERNAL_H - -#include "ue2common.h" - -/** \file - * \brief Bounded Repeat models. - * - * Used by the NFA, to represent bounded repeats managed via special POS and - * TUG exceptions, and by the LBR (limited bounded repeat) and Castle - * specialist engines. - * - * We currently have a number of different kinds of bounded repeat model, for - * different kinds of {N,M} repeats, described by ::RepeatType. - */ - -/** Different types of bounded repeats. */ -enum RepeatType { - /** General mechanism for tracking {N,M} repeats. Stores the first top as - * an absolute offset, then subsequent tops in the {N,M} range as a ring of - * relative top indices stored in a multibit. */ +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef REPEAT_INTERNAL_H +#define REPEAT_INTERNAL_H + +#include "ue2common.h" + +/** \file + * \brief Bounded Repeat models. + * + * Used by the NFA, to represent bounded repeats managed via special POS and + * TUG exceptions, and by the LBR (limited bounded repeat) and Castle + * specialist engines. + * + * We currently have a number of different kinds of bounded repeat model, for + * different kinds of {N,M} repeats, described by ::RepeatType. + */ + +/** Different types of bounded repeats. */ +enum RepeatType { + /** General mechanism for tracking {N,M} repeats. Stores the first top as + * an absolute offset, then subsequent tops in the {N,M} range as a ring of + * relative top indices stored in a multibit. */ REPEAT_RING, - - /** Used to track {N,} repeats. Uses the \ref RepeatOffsetControl structure, - * since only the first top encountered needs to be stored. */ + + /** Used to track {N,} repeats. Uses the \ref RepeatOffsetControl structure, + * since only the first top encountered needs to be stored. */ REPEAT_FIRST, - - /** Used to track {0,N} repeats. Much like ::REPEAT_FIRST, except that we - * store the most recent top encountered. */ + + /** Used to track {0,N} repeats. Much like ::REPEAT_FIRST, except that we + * store the most recent top encountered. */ REPEAT_LAST, - - /** Like ::REPEAT_RING, this is also used for {N,M} repeats, but for cases - * where there is a large difference between N and M, and developed to - * reduce the state requirements of this case (relative to the RING model). - * Uses a small ordered array of top indices relative to \ref - * RepeatRangeControl::offset. */ + + /** Like ::REPEAT_RING, this is also used for {N,M} repeats, but for cases + * where there is a large difference between N and M, and developed to + * reduce the state requirements of this case (relative to the RING model). + * Uses a small ordered array of top indices relative to \ref + * RepeatRangeControl::offset. */ REPEAT_RANGE, - - /** Used for {N,M} repeats where 0 < M <= 64. Uses the \ref - * RepeatBitmapControl structure at runtime. */ + + /** Used for {N,M} repeats where 0 < M <= 64. Uses the \ref + * RepeatBitmapControl structure at runtime. */ REPEAT_BITMAP, - - /** Optimal mechanism for tracking {N,M} repeats when there is a bound on - * how frequently they can be retriggered. - * Assume f(repeat, min) representing the number of possible bit patterns - * we can have for repeat size = repeat, minimum period = min - * We will have the following recurrence relation: - * f(repeat, min) = f(repeat - 1, min) + f(repeat - min, min); - * We use this recurrence to encode bit patterns with 64-bit values by - * referencing a table that stores values from f(0, min) to f(repeat, min) - * eg: repeat = 5, min = 2. 10001 => f(4,2) + f(0,2) = 9. - * We search the optimal patch size between min and repeat in advance and + + /** Optimal mechanism for tracking {N,M} repeats when there is a bound on + * how frequently they can be retriggered. + * Assume f(repeat, min) representing the number of possible bit patterns + * we can have for repeat size = repeat, minimum period = min + * We will have the following recurrence relation: + * f(repeat, min) = f(repeat - 1, min) + f(repeat - min, min); + * We use this recurrence to encode bit patterns with 64-bit values by + * referencing a table that stores values from f(0, min) to f(repeat, min) + * eg: repeat = 5, min = 2. 10001 => f(4,2) + f(0,2) = 9. + * We search the optimal patch size between min and repeat in advance and * use the scheme above to do encoding and decoding to reduce stream state * size. */ REPEAT_SPARSE_OPTIMAL_P, - + /** Used for {N,M} repeats where 0 < N < 64. Uses the * \ref RepeatTrailerControl structure at runtime. */ REPEAT_TRAILER, @@ -89,130 +89,130 @@ enum RepeatType { /** Degenerate repeat that always returns true. Used by castle for pseudo * [^X]* repeats. */ REPEAT_ALWAYS, -}; - -/** - * \brief Value used to represent an unbounded max repeat. - * - * Note that we do not support \ref RepeatInfo::repeatMax values larger than - * this. - */ -#define REPEAT_INF 65535 - -/** Max slots used by ::REPEAT_RANGE repeat model. */ -#define REPEAT_RANGE_MAX_SLOTS 16 - -/** Structure describing a bounded repeat in the bytecode */ -struct RepeatInfo { - u8 type; //!< from enum RepeatType. - u32 repeatMin; //!< minimum number of repeats. - u32 repeatMax; //!< maximum number of repeats, or REPEAT_INF if unbounded. - - /** Maximum value that is required to be stored in the control block - * counters. Any value greater than this will be capped at the horizon. - */ - u32 horizon; - - /** Size of the compressed control block in bytes. This is what is written - * out to stream state at stream boundaries. */ - u32 packedCtrlSize; - - /** Size of the repeat state block in bytes. This is where the REPEAT_RANGE - * vector and REPEAT_RING multibit are stored, in stream state, and they - * are manipulated directly (i.e. not copied at stream boundaries). */ - u32 stateSize; - - /** How soon after one trigger we can see the next trigger. - * Used by REPEAT_SPARSE_OPTIMAL_P. */ - u32 minPeriod; - - /** Packed control block field sizes (in bits), used by REPEAT_TRAILER. */ - u32 packedFieldSizes[2]; - - /* Number of patches, used by REPEAT_SPARSE_OPTIMAL_P. */ - u32 patchCount; - - /* Optimal patch length, used by REPEAT_SPARSE_OPTIMAL_P. */ - u32 patchSize; - - /* Encoding patch length in bytes, used by REPEAT_SPARSE_OPTIMAL_P. */ - u32 encodingSize; - - /* RepeatInfo struct length including table size. */ - u32 length; - - /** Offset of patches relative to the start of repeat stream state, - * used by REPEAT_SPARSE_OPTIMAL_P. */ - u32 patchesOffset; -}; - -/** Runtime control block structure for ::REPEAT_RING and - * ::REPEAT_SPARSE_OPTIMAL_P bounded repeats. Note that this struct is packed - * (may not be aligned). */ -struct RepeatRingControl { - u64a offset; //!< index of first top. - u16 first; //!< start index in ring. - u16 last; //!< end index in ring. -}; - -/** Runtime control block structure for ::REPEAT_RANGE bounded repeats. Note - * that this struct is packed (may not be aligned). */ -struct RepeatRangeControl { - u64a offset; //!< index of first top. - u8 num; //!< number of elements in array. -}; - -/** Runtime control block structure for cases where only a single offset is - * needed to track the repeat, both ::REPEAT_FIRST and ::REPEAT_LAST. Note that - * this struct is packed (may not be aligned). */ -struct RepeatOffsetControl { - u64a offset; //!< index of a top. -}; - -/** Runtime control block structure for ::REPEAT_BITMAP bounded repeats. */ -struct RepeatBitmapControl { - u64a offset; //!< index of first top. - u64a bitmap; //!< forward bitmap of tops relative to base offset. -}; - -/** Runtime control block structure for ::REPEAT_TRAILER bounded repeats. */ -struct RepeatTrailerControl { - u64a offset; //!< min extent of most recent match window. - u64a bitmap; //!< trailing bitmap of earlier matches, relative to offset. -}; - -/** \brief Union of control block types, used at runtime. */ -union RepeatControl { - struct RepeatRingControl ring; - struct RepeatRangeControl range; - struct RepeatOffsetControl offset; - struct RepeatBitmapControl bitmap; - struct RepeatTrailerControl trailer; -}; - -/** For debugging, returns the name of a repeat model. */ -static really_inline UNUSED -const char *repeatTypeName(u8 type) { - switch ((enum RepeatType)type) { - case REPEAT_RING: - return "RING"; - case REPEAT_FIRST: - return "FIRST"; - case REPEAT_LAST: - return "LAST"; - case REPEAT_RANGE: - return "RANGE"; - case REPEAT_BITMAP: - return "BITMAP"; - case REPEAT_SPARSE_OPTIMAL_P: - return "SPARSE_OPTIMAL_P"; - case REPEAT_TRAILER: - return "TRAILER"; +}; + +/** + * \brief Value used to represent an unbounded max repeat. + * + * Note that we do not support \ref RepeatInfo::repeatMax values larger than + * this. + */ +#define REPEAT_INF 65535 + +/** Max slots used by ::REPEAT_RANGE repeat model. */ +#define REPEAT_RANGE_MAX_SLOTS 16 + +/** Structure describing a bounded repeat in the bytecode */ +struct RepeatInfo { + u8 type; //!< from enum RepeatType. + u32 repeatMin; //!< minimum number of repeats. + u32 repeatMax; //!< maximum number of repeats, or REPEAT_INF if unbounded. + + /** Maximum value that is required to be stored in the control block + * counters. Any value greater than this will be capped at the horizon. + */ + u32 horizon; + + /** Size of the compressed control block in bytes. This is what is written + * out to stream state at stream boundaries. */ + u32 packedCtrlSize; + + /** Size of the repeat state block in bytes. This is where the REPEAT_RANGE + * vector and REPEAT_RING multibit are stored, in stream state, and they + * are manipulated directly (i.e. not copied at stream boundaries). */ + u32 stateSize; + + /** How soon after one trigger we can see the next trigger. + * Used by REPEAT_SPARSE_OPTIMAL_P. */ + u32 minPeriod; + + /** Packed control block field sizes (in bits), used by REPEAT_TRAILER. */ + u32 packedFieldSizes[2]; + + /* Number of patches, used by REPEAT_SPARSE_OPTIMAL_P. */ + u32 patchCount; + + /* Optimal patch length, used by REPEAT_SPARSE_OPTIMAL_P. */ + u32 patchSize; + + /* Encoding patch length in bytes, used by REPEAT_SPARSE_OPTIMAL_P. */ + u32 encodingSize; + + /* RepeatInfo struct length including table size. */ + u32 length; + + /** Offset of patches relative to the start of repeat stream state, + * used by REPEAT_SPARSE_OPTIMAL_P. */ + u32 patchesOffset; +}; + +/** Runtime control block structure for ::REPEAT_RING and + * ::REPEAT_SPARSE_OPTIMAL_P bounded repeats. Note that this struct is packed + * (may not be aligned). */ +struct RepeatRingControl { + u64a offset; //!< index of first top. + u16 first; //!< start index in ring. + u16 last; //!< end index in ring. +}; + +/** Runtime control block structure for ::REPEAT_RANGE bounded repeats. Note + * that this struct is packed (may not be aligned). */ +struct RepeatRangeControl { + u64a offset; //!< index of first top. + u8 num; //!< number of elements in array. +}; + +/** Runtime control block structure for cases where only a single offset is + * needed to track the repeat, both ::REPEAT_FIRST and ::REPEAT_LAST. Note that + * this struct is packed (may not be aligned). */ +struct RepeatOffsetControl { + u64a offset; //!< index of a top. +}; + +/** Runtime control block structure for ::REPEAT_BITMAP bounded repeats. */ +struct RepeatBitmapControl { + u64a offset; //!< index of first top. + u64a bitmap; //!< forward bitmap of tops relative to base offset. +}; + +/** Runtime control block structure for ::REPEAT_TRAILER bounded repeats. */ +struct RepeatTrailerControl { + u64a offset; //!< min extent of most recent match window. + u64a bitmap; //!< trailing bitmap of earlier matches, relative to offset. +}; + +/** \brief Union of control block types, used at runtime. */ +union RepeatControl { + struct RepeatRingControl ring; + struct RepeatRangeControl range; + struct RepeatOffsetControl offset; + struct RepeatBitmapControl bitmap; + struct RepeatTrailerControl trailer; +}; + +/** For debugging, returns the name of a repeat model. */ +static really_inline UNUSED +const char *repeatTypeName(u8 type) { + switch ((enum RepeatType)type) { + case REPEAT_RING: + return "RING"; + case REPEAT_FIRST: + return "FIRST"; + case REPEAT_LAST: + return "LAST"; + case REPEAT_RANGE: + return "RANGE"; + case REPEAT_BITMAP: + return "BITMAP"; + case REPEAT_SPARSE_OPTIMAL_P: + return "SPARSE_OPTIMAL_P"; + case REPEAT_TRAILER: + return "TRAILER"; case REPEAT_ALWAYS: return "ALWAYS"; - } - assert(0); - return "UNKNOWN"; -} - -#endif // REPEAT_INTERNAL_H + } + assert(0); + return "UNKNOWN"; +} + +#endif // REPEAT_INTERNAL_H diff --git a/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp b/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp index 48d96bfa06..934dd29e6b 100644 --- a/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp @@ -1,211 +1,211 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Bounded repeat compile-time code. - */ -#include "repeatcompile.h" -#include "util/bitutils.h" -#include "util/charreach.h" -#include "util/depth.h" -#include "util/dump_charclass.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Bounded repeat compile-time code. + */ +#include "repeatcompile.h" +#include "util/bitutils.h" +#include "util/charreach.h" +#include "util/depth.h" +#include "util/dump_charclass.h" #include "util/multibit_build.h" -#include "util/verify_types.h" - -#include <algorithm> -#include <cstring> // memset -#include <utility> - -using namespace std; - -namespace ue2 { - -/** \brief Calculate the number of slots required to store the given repeat in - * a RANGE model. */ -static -u32 numRangeSlots(u32 repeatMin, u32 repeatMax) { - assert(repeatMax > repeatMin); - - u32 d = repeatMax - repeatMin; - u32 slots = 2 * ((repeatMax / d) + 1); - return slots; -} - -static -u32 calcPackedBits(u64a val) { - assert(val); - if (val <= 1) { - return 1; - } - u32 bits = lg2_64(val - 1) + 1U; /* lg2 rounds down */ - DEBUG_PRINTF("packing %llu into %u bits\n", val, bits); - return bits; -} - -/* returns the min number of bytes required to represent val options */ -u32 calcPackedBytes(u64a val) { - u32 rv = (calcPackedBits(val) + 7U) / 8U; - DEBUG_PRINTF("packing %llu into %u bytes\n", val, rv); - return rv; -} - -static -u32 repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax, - const u32 minPeriod) { - u32 repeatTmp = info->patchCount > 2 ? 64 : (u32)repeatMax; - u32 repeat_index = repeatTmp < minPeriod ? repeatTmp : minPeriod; - for (u32 i = 0; i <= repeat_index; i++) { - info->table.push_back(i + 1); - } - for (u32 i = minPeriod + 1; i <= repeatTmp; i++) { - info->table.push_back(info->table[i - 1] + info->table[i - minPeriod]); - if (info->table[i] < info->table[i - 1]) { - return i - 1; - } - } - return 0; -} - -static -u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax, - const u32 minPeriod, u32 rv) { - u32 cnt = 0; - u32 patch_bits = 0; - u32 total_size = 0; - u32 min = ~0U; - u32 patch_len = 0; - - if (!rv) { - rv = repeatMax; - } - - for (u32 i = minPeriod; i <= rv; i++) { - cnt = ((u32)repeatMax + (i - 1)) / i + 1; - - // no bit packing version - patch_bits = calcPackedBits(info->table[i]); - total_size = (patch_bits + 7U) / 8U * cnt; - - if (total_size < min) { - patch_len = i; - min = total_size; - info->patchCount = cnt; - } - } - return patch_len; -} - -RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod) - : stateSize(0), packedCtrlSize(0), horizon(0), patchCount(0), - patchSize(0), encodingSize(0), patchesOffset(0) { - assert(repeatMin <= repeatMax); - assert(repeatMax.is_reachable()); - assert(minPeriod || type != REPEAT_SPARSE_OPTIMAL_P); - - switch (type) { - case REPEAT_FIRST: - assert(repeatMin.is_finite()); - stateSize = 0; // everything is in the control block. - horizon = repeatMin; - packedCtrlSize = calcPackedBytes(horizon + 1); - break; - case REPEAT_LAST: - assert(repeatMax.is_finite()); - stateSize = 0; // everything is in the control block. - horizon = repeatMax + 1; - packedCtrlSize = calcPackedBytes(horizon + 1); - break; - case REPEAT_RING: - assert(repeatMax.is_finite()); - stateSize = mmbit_size(repeatMax + 1); - horizon = repeatMax * 2 + 1; /* TODO: investigate tightening */ - // Packed offset member, plus two bytes for each ring index, reduced to - // one byte each if they'll fit in eight bits. - { - u32 offset_len = calcPackedBytes(horizon + 1); - u32 ring_indices_len = repeatMax < depth(254) ? 2 : 4; - packedCtrlSize = offset_len + ring_indices_len; - } - break; - case REPEAT_RANGE: - assert(repeatMax.is_finite()); - assert(repeatMin < repeatMax); - stateSize = numRangeSlots(repeatMin, repeatMax) * sizeof(u16); - horizon = repeatMax * 2 + 1; - // Packed offset member, plus one byte for the number of range - // elements. - packedCtrlSize = calcPackedBytes(horizon + 1) + 1; - break; - case REPEAT_BITMAP: - stateSize = 0; // everything is in the control block. - horizon = 0; // unused - packedCtrlSize = ROUNDUP_N(repeatMax + 1, 8) / 8; - break; - case REPEAT_SPARSE_OPTIMAL_P: - assert(minPeriod); - assert(repeatMax.is_finite()); - { - u32 rv = repeatRecurTable(this, repeatMax, minPeriod); - u32 repeatTmp = 0; - if ((u32)repeatMax < minPeriod) { - repeatTmp = repeatMax; - patchCount = 1; - } else { - // find optimal patch size - repeatTmp = - findOptimalPatchSize(this, repeatMax, minPeriod, rv); - assert(patchCount < 65536); - } - DEBUG_PRINTF("repeat[%u %u], period=%u\n", (u32)repeatMin, - (u32)repeatMax, minPeriod); - u64a maxVal = table[repeatTmp]; - encodingSize = calcPackedBytes(maxVal); - patchSize = repeatTmp; - assert(encodingSize <= 64); - - patchesOffset = mmbit_size(patchCount); - stateSize = patchesOffset + encodingSize * patchCount; - horizon = (repeatTmp * patchCount) * 2 + 1; - u32 ring_indices_len = patchCount < depth(254) ? 2 : 4; - packedCtrlSize = calcPackedBytes(horizon + 1) + ring_indices_len; - } - break; - case REPEAT_TRAILER: - assert(repeatMax.is_finite()); - assert(repeatMin <= depth(64)); - stateSize = 0; // everything is in the control block. - horizon = repeatMax + 1; - packedFieldSizes.resize(2); - packedFieldSizes[0] = calcPackedBits(horizon + 1); - packedFieldSizes[1] = repeatMin; - packedCtrlSize = (packedFieldSizes[0] + packedFieldSizes[1] + 7U) / 8U; - break; +#include "util/verify_types.h" + +#include <algorithm> +#include <cstring> // memset +#include <utility> + +using namespace std; + +namespace ue2 { + +/** \brief Calculate the number of slots required to store the given repeat in + * a RANGE model. */ +static +u32 numRangeSlots(u32 repeatMin, u32 repeatMax) { + assert(repeatMax > repeatMin); + + u32 d = repeatMax - repeatMin; + u32 slots = 2 * ((repeatMax / d) + 1); + return slots; +} + +static +u32 calcPackedBits(u64a val) { + assert(val); + if (val <= 1) { + return 1; + } + u32 bits = lg2_64(val - 1) + 1U; /* lg2 rounds down */ + DEBUG_PRINTF("packing %llu into %u bits\n", val, bits); + return bits; +} + +/* returns the min number of bytes required to represent val options */ +u32 calcPackedBytes(u64a val) { + u32 rv = (calcPackedBits(val) + 7U) / 8U; + DEBUG_PRINTF("packing %llu into %u bytes\n", val, rv); + return rv; +} + +static +u32 repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax, + const u32 minPeriod) { + u32 repeatTmp = info->patchCount > 2 ? 64 : (u32)repeatMax; + u32 repeat_index = repeatTmp < minPeriod ? repeatTmp : minPeriod; + for (u32 i = 0; i <= repeat_index; i++) { + info->table.push_back(i + 1); + } + for (u32 i = minPeriod + 1; i <= repeatTmp; i++) { + info->table.push_back(info->table[i - 1] + info->table[i - minPeriod]); + if (info->table[i] < info->table[i - 1]) { + return i - 1; + } + } + return 0; +} + +static +u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax, + const u32 minPeriod, u32 rv) { + u32 cnt = 0; + u32 patch_bits = 0; + u32 total_size = 0; + u32 min = ~0U; + u32 patch_len = 0; + + if (!rv) { + rv = repeatMax; + } + + for (u32 i = minPeriod; i <= rv; i++) { + cnt = ((u32)repeatMax + (i - 1)) / i + 1; + + // no bit packing version + patch_bits = calcPackedBits(info->table[i]); + total_size = (patch_bits + 7U) / 8U * cnt; + + if (total_size < min) { + patch_len = i; + min = total_size; + info->patchCount = cnt; + } + } + return patch_len; +} + +RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod) + : stateSize(0), packedCtrlSize(0), horizon(0), patchCount(0), + patchSize(0), encodingSize(0), patchesOffset(0) { + assert(repeatMin <= repeatMax); + assert(repeatMax.is_reachable()); + assert(minPeriod || type != REPEAT_SPARSE_OPTIMAL_P); + + switch (type) { + case REPEAT_FIRST: + assert(repeatMin.is_finite()); + stateSize = 0; // everything is in the control block. + horizon = repeatMin; + packedCtrlSize = calcPackedBytes(horizon + 1); + break; + case REPEAT_LAST: + assert(repeatMax.is_finite()); + stateSize = 0; // everything is in the control block. + horizon = repeatMax + 1; + packedCtrlSize = calcPackedBytes(horizon + 1); + break; + case REPEAT_RING: + assert(repeatMax.is_finite()); + stateSize = mmbit_size(repeatMax + 1); + horizon = repeatMax * 2 + 1; /* TODO: investigate tightening */ + // Packed offset member, plus two bytes for each ring index, reduced to + // one byte each if they'll fit in eight bits. + { + u32 offset_len = calcPackedBytes(horizon + 1); + u32 ring_indices_len = repeatMax < depth(254) ? 2 : 4; + packedCtrlSize = offset_len + ring_indices_len; + } + break; + case REPEAT_RANGE: + assert(repeatMax.is_finite()); + assert(repeatMin < repeatMax); + stateSize = numRangeSlots(repeatMin, repeatMax) * sizeof(u16); + horizon = repeatMax * 2 + 1; + // Packed offset member, plus one byte for the number of range + // elements. + packedCtrlSize = calcPackedBytes(horizon + 1) + 1; + break; + case REPEAT_BITMAP: + stateSize = 0; // everything is in the control block. + horizon = 0; // unused + packedCtrlSize = ROUNDUP_N(repeatMax + 1, 8) / 8; + break; + case REPEAT_SPARSE_OPTIMAL_P: + assert(minPeriod); + assert(repeatMax.is_finite()); + { + u32 rv = repeatRecurTable(this, repeatMax, minPeriod); + u32 repeatTmp = 0; + if ((u32)repeatMax < minPeriod) { + repeatTmp = repeatMax; + patchCount = 1; + } else { + // find optimal patch size + repeatTmp = + findOptimalPatchSize(this, repeatMax, minPeriod, rv); + assert(patchCount < 65536); + } + DEBUG_PRINTF("repeat[%u %u], period=%u\n", (u32)repeatMin, + (u32)repeatMax, minPeriod); + u64a maxVal = table[repeatTmp]; + encodingSize = calcPackedBytes(maxVal); + patchSize = repeatTmp; + assert(encodingSize <= 64); + + patchesOffset = mmbit_size(patchCount); + stateSize = patchesOffset + encodingSize * patchCount; + horizon = (repeatTmp * patchCount) * 2 + 1; + u32 ring_indices_len = patchCount < depth(254) ? 2 : 4; + packedCtrlSize = calcPackedBytes(horizon + 1) + ring_indices_len; + } + break; + case REPEAT_TRAILER: + assert(repeatMax.is_finite()); + assert(repeatMin <= depth(64)); + stateSize = 0; // everything is in the control block. + horizon = repeatMax + 1; + packedFieldSizes.resize(2); + packedFieldSizes[0] = calcPackedBits(horizon + 1); + packedFieldSizes[1] = repeatMin; + packedCtrlSize = (packedFieldSizes[0] + packedFieldSizes[1] + 7U) / 8U; + break; case REPEAT_ALWAYS: assert(repeatMin == 0ULL); assert(repeatMax.is_infinite()); @@ -213,174 +213,174 @@ RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin, horizon = 0; packedCtrlSize = 0; break; - } - DEBUG_PRINTF("stateSize=%u, packedCtrlSize=%u, horizon=%u\n", stateSize, - packedCtrlSize, horizon); - - assert(packedCtrlSize <= sizeof(RepeatControl)); -} - -/** \brief Returns the packed control block size in bytes for a given bounded - * repeat. */ -static -u32 packedSize(enum RepeatType type, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod) { - RepeatStateInfo rsi(type, repeatMin, repeatMax, minPeriod); - return rsi.packedCtrlSize; -} - -/** \brief Returns the stream state size in bytes for a given bounded - * repeat. */ -static -u32 streamStateSize(enum RepeatType type, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod) { - RepeatStateInfo rsi(type, repeatMin, repeatMax, minPeriod); - return rsi.stateSize; -} - -enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax, + } + DEBUG_PRINTF("stateSize=%u, packedCtrlSize=%u, horizon=%u\n", stateSize, + packedCtrlSize, horizon); + + assert(packedCtrlSize <= sizeof(RepeatControl)); +} + +/** \brief Returns the packed control block size in bytes for a given bounded + * repeat. */ +static +u32 packedSize(enum RepeatType type, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod) { + RepeatStateInfo rsi(type, repeatMin, repeatMax, minPeriod); + return rsi.packedCtrlSize; +} + +/** \brief Returns the stream state size in bytes for a given bounded + * repeat. */ +static +u32 streamStateSize(enum RepeatType type, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod) { + RepeatStateInfo rsi(type, repeatMin, repeatMax, minPeriod); + return rsi.stateSize; +} + +enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax, u32 minPeriod, bool is_reset, bool has_external_guard) { - if (repeatMax.is_infinite()) { + if (repeatMax.is_infinite()) { if (has_external_guard && !repeatMin) { return REPEAT_ALWAYS; } else { return REPEAT_FIRST; } - } - - if (repeatMin == depth(0) || is_reset) { - return REPEAT_LAST; - } - - // Cases with max < 64 can be handled with either bitmap or trailer. We use - // whichever has smaller packed state. - - if (repeatMax < depth(64)) { - u32 bitmap_len = - packedSize(REPEAT_BITMAP, repeatMin, repeatMax, minPeriod); - u32 trailer_len = - packedSize(REPEAT_TRAILER, repeatMin, repeatMax, minPeriod); - return bitmap_len <= trailer_len ? REPEAT_BITMAP : REPEAT_TRAILER; - } - - if (repeatMin <= depth(64)) { - return REPEAT_TRAILER; - } - - u32 range_len = ~0U; - if (repeatMax > repeatMin && - numRangeSlots(repeatMin, repeatMax) <= REPEAT_RANGE_MAX_SLOTS) { - assert(numRangeSlots(repeatMin, repeatMax) < 256); // stored in u8 - range_len = - streamStateSize(REPEAT_RANGE, repeatMin, repeatMax, minPeriod); - } - - assert(repeatMax.is_finite()); - - u32 sparse_len = ~0U; - if (minPeriod > 6) { - sparse_len = - streamStateSize(REPEAT_SPARSE_OPTIMAL_P, repeatMin, repeatMax, minPeriod); - } - - if (range_len != ~0U || sparse_len != ~0U) { - return range_len < sparse_len ? REPEAT_RANGE : REPEAT_SPARSE_OPTIMAL_P; - } - - return REPEAT_RING; -} - -bool matches(vector<CharReach>::const_iterator a_it, - vector<CharReach>::const_iterator a_ite, - vector<CharReach>::const_iterator b_it, - UNUSED vector<CharReach>::const_iterator b_ite) { - for (; a_it != a_ite; ++a_it, ++b_it) { - assert(b_it != b_ite); - if ((*a_it & *b_it).none()) { - return false; - } - } - assert(b_it == b_ite); - return true; -} - -static -u32 minDistAfterA(const vector<CharReach> &a, const vector<CharReach> &b) { - /* we do not count the case where b can end at the same position as a */ - - for (u32 i = 1; i < b.size(); i++) { - u32 overlap_len = b.size() - i; - if (overlap_len <= a.size()) { - if (matches(a.end() - overlap_len, a.end(), - b.begin(), b.end() - i)) { - return i; - } - } else { - assert(overlap_len > a.size()); - if (matches(a.begin(), a.end(), - b.end() - i - a.size(), b.end() - i)) { - return i; - } - } - } - - return b.size(); -} - -vector<size_t> minResetDistToEnd(const vector<vector<CharReach>> &triggers, - const CharReach &cr) { - /* if a trigger does not reset the repeat, it gets a distance of trigger - length */ - vector<size_t> out; - for (const auto &trig : triggers) { - size_t size = trig.size(); - size_t i = 0; - for (; i < size; i++) { - if ((trig[size - i - 1] & cr).none()) { - break; - } - } - out.push_back(i); - } - - return out; -} - -#if defined(DEBUG) || defined(DUMP_SUPPORT) - -static UNUSED -string dumpTrigger(const vector<CharReach> &trigger) { - string s; - for (const auto &cr : trigger) { - s += describeClass(cr); - } - return s; -} - -#endif - -u32 minPeriod(const vector<vector<CharReach>> &triggers, const CharReach &cr, - bool *can_reset) { - assert(!triggers.empty()); - - u32 rv = ~0U; - *can_reset = true; - vector<size_t> min_reset_dist = minResetDistToEnd(triggers, cr); - - for (const auto &trigger : triggers) { - DEBUG_PRINTF("trigger: %s\n", dumpTrigger(trigger).c_str()); - for (size_t j = 0; j < triggers.size(); j++) { - u32 min_ext = minDistAfterA(trigger, triggers[j]); - rv = min(rv, min_ext); - if (min_ext <= min_reset_dist[j]) { - *can_reset = false; - } - } - } - - DEBUG_PRINTF("min period %u\n", rv); - return rv; -} - -} // namespace ue2 + } + + if (repeatMin == depth(0) || is_reset) { + return REPEAT_LAST; + } + + // Cases with max < 64 can be handled with either bitmap or trailer. We use + // whichever has smaller packed state. + + if (repeatMax < depth(64)) { + u32 bitmap_len = + packedSize(REPEAT_BITMAP, repeatMin, repeatMax, minPeriod); + u32 trailer_len = + packedSize(REPEAT_TRAILER, repeatMin, repeatMax, minPeriod); + return bitmap_len <= trailer_len ? REPEAT_BITMAP : REPEAT_TRAILER; + } + + if (repeatMin <= depth(64)) { + return REPEAT_TRAILER; + } + + u32 range_len = ~0U; + if (repeatMax > repeatMin && + numRangeSlots(repeatMin, repeatMax) <= REPEAT_RANGE_MAX_SLOTS) { + assert(numRangeSlots(repeatMin, repeatMax) < 256); // stored in u8 + range_len = + streamStateSize(REPEAT_RANGE, repeatMin, repeatMax, minPeriod); + } + + assert(repeatMax.is_finite()); + + u32 sparse_len = ~0U; + if (minPeriod > 6) { + sparse_len = + streamStateSize(REPEAT_SPARSE_OPTIMAL_P, repeatMin, repeatMax, minPeriod); + } + + if (range_len != ~0U || sparse_len != ~0U) { + return range_len < sparse_len ? REPEAT_RANGE : REPEAT_SPARSE_OPTIMAL_P; + } + + return REPEAT_RING; +} + +bool matches(vector<CharReach>::const_iterator a_it, + vector<CharReach>::const_iterator a_ite, + vector<CharReach>::const_iterator b_it, + UNUSED vector<CharReach>::const_iterator b_ite) { + for (; a_it != a_ite; ++a_it, ++b_it) { + assert(b_it != b_ite); + if ((*a_it & *b_it).none()) { + return false; + } + } + assert(b_it == b_ite); + return true; +} + +static +u32 minDistAfterA(const vector<CharReach> &a, const vector<CharReach> &b) { + /* we do not count the case where b can end at the same position as a */ + + for (u32 i = 1; i < b.size(); i++) { + u32 overlap_len = b.size() - i; + if (overlap_len <= a.size()) { + if (matches(a.end() - overlap_len, a.end(), + b.begin(), b.end() - i)) { + return i; + } + } else { + assert(overlap_len > a.size()); + if (matches(a.begin(), a.end(), + b.end() - i - a.size(), b.end() - i)) { + return i; + } + } + } + + return b.size(); +} + +vector<size_t> minResetDistToEnd(const vector<vector<CharReach>> &triggers, + const CharReach &cr) { + /* if a trigger does not reset the repeat, it gets a distance of trigger + length */ + vector<size_t> out; + for (const auto &trig : triggers) { + size_t size = trig.size(); + size_t i = 0; + for (; i < size; i++) { + if ((trig[size - i - 1] & cr).none()) { + break; + } + } + out.push_back(i); + } + + return out; +} + +#if defined(DEBUG) || defined(DUMP_SUPPORT) + +static UNUSED +string dumpTrigger(const vector<CharReach> &trigger) { + string s; + for (const auto &cr : trigger) { + s += describeClass(cr); + } + return s; +} + +#endif + +u32 minPeriod(const vector<vector<CharReach>> &triggers, const CharReach &cr, + bool *can_reset) { + assert(!triggers.empty()); + + u32 rv = ~0U; + *can_reset = true; + vector<size_t> min_reset_dist = minResetDistToEnd(triggers, cr); + + for (const auto &trigger : triggers) { + DEBUG_PRINTF("trigger: %s\n", dumpTrigger(trigger).c_str()); + for (size_t j = 0; j < triggers.size(); j++) { + u32 min_ext = minDistAfterA(trigger, triggers[j]); + rv = min(rv, min_ext); + if (min_ext <= min_reset_dist[j]) { + *can_reset = false; + } + } + } + + DEBUG_PRINTF("min period %u\n", rv); + return rv; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/repeatcompile.h b/contrib/libs/hyperscan/src/nfa/repeatcompile.h index f3d2df92a0..fe9a710623 100644 --- a/contrib/libs/hyperscan/src/nfa/repeatcompile.h +++ b/contrib/libs/hyperscan/src/nfa/repeatcompile.h @@ -1,90 +1,90 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Bounded repeat compile-time code. - */ - -#ifndef REPEATCOMPILE_H -#define REPEATCOMPILE_H - -#include "repeat_internal.h" - -#include <cstdint> -#include <utility> -#include <vector> - -namespace ue2 { - -class CharReach; -class depth; - -/** - * \brief Structure representing the various state requirements for a given - * bounded repeat. - */ -struct RepeatStateInfo { - RepeatStateInfo(enum RepeatType type, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod); - - u32 stateSize; - u32 packedCtrlSize; - u32 horizon; - u32 patchCount; - u32 patchSize; - u32 encodingSize; - u32 patchesOffset; - std::vector<u32> packedFieldSizes; - std::vector<uint64_t> table; // not u64a, for boost/gcc-4.9 -}; - -/** - * \brief Given the parameters of a repeat, choose a repeat implementation - * type. - */ -enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax, +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Bounded repeat compile-time code. + */ + +#ifndef REPEATCOMPILE_H +#define REPEATCOMPILE_H + +#include "repeat_internal.h" + +#include <cstdint> +#include <utility> +#include <vector> + +namespace ue2 { + +class CharReach; +class depth; + +/** + * \brief Structure representing the various state requirements for a given + * bounded repeat. + */ +struct RepeatStateInfo { + RepeatStateInfo(enum RepeatType type, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod); + + u32 stateSize; + u32 packedCtrlSize; + u32 horizon; + u32 patchCount; + u32 patchSize; + u32 encodingSize; + u32 patchesOffset; + std::vector<u32> packedFieldSizes; + std::vector<uint64_t> table; // not u64a, for boost/gcc-4.9 +}; + +/** + * \brief Given the parameters of a repeat, choose a repeat implementation + * type. + */ +enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax, u32 minPeriod, bool is_reset, bool has_external_guard = false); - -u32 calcPackedBytes(u64a val); - -bool matches(std::vector<CharReach>::const_iterator a_it, - std::vector<CharReach>::const_iterator a_ite, - std::vector<CharReach>::const_iterator b_it, - std::vector<CharReach>::const_iterator b_ite); - -std::vector<size_t> -minResetDistToEnd(const std::vector<std::vector<CharReach>> &triggers, - const CharReach &cr); - -u32 minPeriod(const std::vector<std::vector<CharReach>> &triggers, - const CharReach &cr, bool *can_reset); - -} // namespace ue2 - -#endif // REPEATCOMPILE_H + +u32 calcPackedBytes(u64a val); + +bool matches(std::vector<CharReach>::const_iterator a_it, + std::vector<CharReach>::const_iterator a_ite, + std::vector<CharReach>::const_iterator b_it, + std::vector<CharReach>::const_iterator b_ite); + +std::vector<size_t> +minResetDistToEnd(const std::vector<std::vector<CharReach>> &triggers, + const CharReach &cr); + +u32 minPeriod(const std::vector<std::vector<CharReach>> &triggers, + const CharReach &cr, bool *can_reset); + +} // namespace ue2 + +#endif // REPEATCOMPILE_H diff --git a/contrib/libs/hyperscan/src/nfa/shufti.c b/contrib/libs/hyperscan/src/nfa/shufti.c index f2b3915c2a..09ffc0cf9a 100644 --- a/contrib/libs/hyperscan/src/nfa/shufti.c +++ b/contrib/libs/hyperscan/src/nfa/shufti.c @@ -1,82 +1,82 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Shufti: character class acceleration. - * - * Utilises the SSSE3 pshufb shuffle instruction - */ - -#include "shufti.h" -#include "ue2common.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Shufti: character class acceleration. + * + * Utilises the SSSE3 pshufb shuffle instruction + */ + +#include "shufti.h" +#include "ue2common.h" #include "util/arch.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" -#include "util/unaligned.h" - -#ifdef DEBUG -#include <ctype.h> - -#define DUMP_MSK(_t) \ -static UNUSED \ -void dumpMsk##_t(m##_t msk) { \ - u8 * mskAsU8 = (u8 *)&msk; \ - for (unsigned i = 0; i < sizeof(msk); i++) { \ - u8 c = mskAsU8[i]; \ - for (int j = 0; j < 8; j++) { \ - if ((c >> (7-j)) & 0x1) \ - printf("1"); \ - else \ - printf("0"); \ - } \ - printf(" "); \ - } \ -} \ -static UNUSED \ -void dumpMsk##_t##AsChars(m##_t msk) { \ - u8 * mskAsU8 = (u8 *)&msk; \ - for (unsigned i = 0; i < sizeof(msk); i++) { \ - u8 c = mskAsU8[i]; \ - if (isprint(c)) \ - printf("%c",c); \ - else \ - printf("."); \ - } \ -} - -#endif - +#include "util/bitutils.h" +#include "util/simd_utils.h" +#include "util/unaligned.h" + +#ifdef DEBUG +#include <ctype.h> + +#define DUMP_MSK(_t) \ +static UNUSED \ +void dumpMsk##_t(m##_t msk) { \ + u8 * mskAsU8 = (u8 *)&msk; \ + for (unsigned i = 0; i < sizeof(msk); i++) { \ + u8 c = mskAsU8[i]; \ + for (int j = 0; j < 8; j++) { \ + if ((c >> (7-j)) & 0x1) \ + printf("1"); \ + else \ + printf("0"); \ + } \ + printf(" "); \ + } \ +} \ +static UNUSED \ +void dumpMsk##_t##AsChars(m##_t msk) { \ + u8 * mskAsU8 = (u8 *)&msk; \ + for (unsigned i = 0; i < sizeof(msk); i++) { \ + u8 c = mskAsU8[i]; \ + if (isprint(c)) \ + printf("%c",c); \ + else \ + printf("."); \ + } \ +} + +#endif + /** \brief Naive byte-by-byte implementation. */ static really_inline const u8 *shuftiFwdSlow(const u8 *lo, const u8 *hi, const u8 *buf, const u8 *buf_end) { assert(buf < buf_end); - + for (; buf < buf_end; ++buf) { u8 c = *buf; if (lo[c & 0xf] & hi[c >> 4]) { @@ -102,306 +102,306 @@ const u8 *shuftiRevSlow(const u8 *lo, const u8 *hi, const u8 *buf, } #if !defined(HAVE_AVX2) -/* Normal SSSE3 shufti */ - +/* Normal SSSE3 shufti */ + #ifdef DEBUG DUMP_MSK(128) #endif -#define GET_LO_4(chars) and128(chars, low4bits) +#define GET_LO_4(chars) and128(chars, low4bits) #define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4) - -static really_inline + +static really_inline u32 block(m128 mask_lo, m128 mask_hi, m128 chars, const m128 low4bits, const m128 compare) { m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars)); m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars)); m128 t = and128(c_lo, c_hi); -#ifdef DEBUG +#ifdef DEBUG DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n"); DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n"); DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n"); DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n"); DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); -#endif +#endif return movemask128(eq128(t, compare)); } - + static really_inline const u8 *firstMatch(const u8 *buf, u32 z) { - if (unlikely(z != 0xffff)) { - u32 pos = ctz32(~z & 0xffff); - assert(pos < 16); - return buf + pos; - } else { - return NULL; // no match - } -} - -static really_inline -const u8 *fwdBlock(m128 mask_lo, m128 mask_hi, m128 chars, const u8 *buf, - const m128 low4bits, const m128 zeroes) { + if (unlikely(z != 0xffff)) { + u32 pos = ctz32(~z & 0xffff); + assert(pos < 16); + return buf + pos; + } else { + return NULL; // no match + } +} + +static really_inline +const u8 *fwdBlock(m128 mask_lo, m128 mask_hi, m128 chars, const u8 *buf, + const m128 low4bits, const m128 zeroes) { u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes); - + return firstMatch(buf, z); -} - -const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end) { - assert(buf && buf_end); - assert(buf < buf_end); - - // Slow path for small cases. - if (buf_end - buf < 16) { - return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, - buf, buf_end); - } - - const m128 zeroes = zeroes128(); - const m128 low4bits = _mm_set1_epi8(0xf); - const u8 *rv; - - size_t min = (size_t)buf % 16; - assert(buf_end - buf >= 16); - - // Preconditioning: most of the time our buffer won't be aligned. - m128 chars = loadu128(buf); - rv = fwdBlock(mask_lo, mask_hi, chars, buf, low4bits, zeroes); - if (rv) { - return rv; - } - buf += (16 - min); - - // Unrolling was here, but it wasn't doing anything but taking up space. - // Reroll FTW. - - const u8 *last_block = buf_end - 16; - while (buf < last_block) { - m128 lchars = load128(buf); - rv = fwdBlock(mask_lo, mask_hi, lchars, buf, low4bits, zeroes); - if (rv) { - return rv; - } - buf += 16; - } - - // Use an unaligned load to mop up the last 16 bytes and get an accurate - // picture to buf_end. - assert(buf <= buf_end && buf >= buf_end - 16); - chars = loadu128(buf_end - 16); - rv = fwdBlock(mask_lo, mask_hi, chars, buf_end - 16, low4bits, zeroes); - if (rv) { - return rv; - } - - return buf_end; -} - -static really_inline -const u8 *lastMatch(const u8 *buf, m128 t, m128 compare) { -#ifdef DEBUG - DEBUG_PRINTF("confirming match in:"); dumpMsk128(t); printf("\n"); -#endif - - u32 z = movemask128(eq128(t, compare)); - if (unlikely(z != 0xffff)) { - u32 pos = clz32(~z & 0xffff); - DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos); - assert(pos >= 16 && pos < 32); - return buf + (31 - pos); - } else { - return NULL; // no match - } -} - - -static really_inline -const u8 *revBlock(m128 mask_lo, m128 mask_hi, m128 chars, const u8 *buf, - const m128 low4bits, const m128 zeroes) { +} + +const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end) { + assert(buf && buf_end); + assert(buf < buf_end); + + // Slow path for small cases. + if (buf_end - buf < 16) { + return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, + buf, buf_end); + } + + const m128 zeroes = zeroes128(); + const m128 low4bits = _mm_set1_epi8(0xf); + const u8 *rv; + + size_t min = (size_t)buf % 16; + assert(buf_end - buf >= 16); + + // Preconditioning: most of the time our buffer won't be aligned. + m128 chars = loadu128(buf); + rv = fwdBlock(mask_lo, mask_hi, chars, buf, low4bits, zeroes); + if (rv) { + return rv; + } + buf += (16 - min); + + // Unrolling was here, but it wasn't doing anything but taking up space. + // Reroll FTW. + + const u8 *last_block = buf_end - 16; + while (buf < last_block) { + m128 lchars = load128(buf); + rv = fwdBlock(mask_lo, mask_hi, lchars, buf, low4bits, zeroes); + if (rv) { + return rv; + } + buf += 16; + } + + // Use an unaligned load to mop up the last 16 bytes and get an accurate + // picture to buf_end. + assert(buf <= buf_end && buf >= buf_end - 16); + chars = loadu128(buf_end - 16); + rv = fwdBlock(mask_lo, mask_hi, chars, buf_end - 16, low4bits, zeroes); + if (rv) { + return rv; + } + + return buf_end; +} + +static really_inline +const u8 *lastMatch(const u8 *buf, m128 t, m128 compare) { +#ifdef DEBUG + DEBUG_PRINTF("confirming match in:"); dumpMsk128(t); printf("\n"); +#endif + + u32 z = movemask128(eq128(t, compare)); + if (unlikely(z != 0xffff)) { + u32 pos = clz32(~z & 0xffff); + DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos); + assert(pos >= 16 && pos < 32); + return buf + (31 - pos); + } else { + return NULL; // no match + } +} + + +static really_inline +const u8 *revBlock(m128 mask_lo, m128 mask_hi, m128 chars, const u8 *buf, + const m128 low4bits, const m128 zeroes) { m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(chars)); m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(chars)); - m128 t = and128(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); -#endif - - return lastMatch(buf, t, zeroes); -} - -const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end) { - assert(buf && buf_end); - assert(buf < buf_end); - - // Slow path for small cases. - if (buf_end - buf < 16) { - return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, - buf, buf_end); - } - - const m128 zeroes = zeroes128(); - const m128 low4bits = _mm_set1_epi8(0xf); - const u8 *rv; - - assert(buf_end - buf >= 16); - - // Preconditioning: most of the time our buffer won't be aligned. - m128 chars = loadu128(buf_end - 16); - rv = revBlock(mask_lo, mask_hi, chars, buf_end - 16, low4bits, zeroes); - if (rv) { - return rv; - } - buf_end = (const u8 *)((size_t)buf_end & ~((size_t)0xf)); - - // Unrolling was here, but it wasn't doing anything but taking up space. - // Reroll FTW. - - const u8 *last_block = buf + 16; - while (buf_end > last_block) { - buf_end -= 16; - m128 lchars = load128(buf_end); - rv = revBlock(mask_lo, mask_hi, lchars, buf_end, low4bits, zeroes); - if (rv) { - return rv; - } - } - - // Use an unaligned load to mop up the last 16 bytes and get an accurate - // picture to buf. - chars = loadu128(buf); - rv = revBlock(mask_lo, mask_hi, chars, buf, low4bits, zeroes); - if (rv) { - return rv; - } - - return buf - 1; -} - -static really_inline -const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, - m128 chars, const u8 *buf, const m128 low4bits, - const m128 ones) { - m128 chars_lo = GET_LO_4(chars); - m128 chars_hi = GET_HI_4(chars); + m128 t = and128(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); +#endif + + return lastMatch(buf, t, zeroes); +} + +const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end) { + assert(buf && buf_end); + assert(buf < buf_end); + + // Slow path for small cases. + if (buf_end - buf < 16) { + return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, + buf, buf_end); + } + + const m128 zeroes = zeroes128(); + const m128 low4bits = _mm_set1_epi8(0xf); + const u8 *rv; + + assert(buf_end - buf >= 16); + + // Preconditioning: most of the time our buffer won't be aligned. + m128 chars = loadu128(buf_end - 16); + rv = revBlock(mask_lo, mask_hi, chars, buf_end - 16, low4bits, zeroes); + if (rv) { + return rv; + } + buf_end = (const u8 *)((size_t)buf_end & ~((size_t)0xf)); + + // Unrolling was here, but it wasn't doing anything but taking up space. + // Reroll FTW. + + const u8 *last_block = buf + 16; + while (buf_end > last_block) { + buf_end -= 16; + m128 lchars = load128(buf_end); + rv = revBlock(mask_lo, mask_hi, lchars, buf_end, low4bits, zeroes); + if (rv) { + return rv; + } + } + + // Use an unaligned load to mop up the last 16 bytes and get an accurate + // picture to buf. + chars = loadu128(buf); + rv = revBlock(mask_lo, mask_hi, chars, buf, low4bits, zeroes); + if (rv) { + return rv; + } + + return buf - 1; +} + +static really_inline +const u8 *fwdBlock2(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 mask2_hi, + m128 chars, const u8 *buf, const m128 low4bits, + const m128 ones) { + m128 chars_lo = GET_LO_4(chars); + m128 chars_hi = GET_HI_4(chars); m128 c_lo = pshufb_m128(mask1_lo, chars_lo); m128 c_hi = pshufb_m128(mask1_hi, chars_hi); - m128 t = or128(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); -#endif - + m128 t = or128(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk128AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk128(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk128(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk128(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk128(t); printf("\n"); +#endif + m128 c2_lo = pshufb_m128(mask2_lo, chars_lo); m128 c2_hi = pshufb_m128(mask2_hi, chars_hi); m128 t2 = or128(t, rshiftbyte_m128(or128(c2_lo, c2_hi), 1)); - -#ifdef DEBUG - DEBUG_PRINTF(" c2_lo: "); dumpMsk128(c2_lo); printf("\n"); - DEBUG_PRINTF(" c2_hi: "); dumpMsk128(c2_hi); printf("\n"); - DEBUG_PRINTF(" t2: "); dumpMsk128(t2); printf("\n"); -#endif - + +#ifdef DEBUG + DEBUG_PRINTF(" c2_lo: "); dumpMsk128(c2_lo); printf("\n"); + DEBUG_PRINTF(" c2_hi: "); dumpMsk128(c2_hi); printf("\n"); + DEBUG_PRINTF(" t2: "); dumpMsk128(t2); printf("\n"); +#endif + u32 z = movemask128(eq128(t2, ones)); DEBUG_PRINTF(" z: 0x%08x\n", z); return firstMatch(buf, z); -} - -const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, - m128 mask2_lo, m128 mask2_hi, - const u8 *buf, const u8 *buf_end) { - const m128 ones = ones128(); - const m128 low4bits = _mm_set1_epi8(0xf); - const u8 *rv; - - size_t min = (size_t)buf % 16; - - // Preconditioning: most of the time our buffer won't be aligned. - m128 chars = loadu128(buf); - rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi, - chars, buf, low4bits, ones); - if (rv) { - return rv; - } - buf += (16 - min); - - // Unrolling was here, but it wasn't doing anything but taking up space. - // Reroll FTW. - - const u8 *last_block = buf_end - 16; - while (buf < last_block) { - m128 lchars = load128(buf); - rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi, - lchars, buf, low4bits, ones); - if (rv) { - return rv; - } - buf += 16; - } - - // Use an unaligned load to mop up the last 16 bytes and get an accurate - // picture to buf_end. - chars = loadu128(buf_end - 16); - rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi, - chars, buf_end - 16, low4bits, ones); - if (rv) { - return rv; - } - - return buf_end; -} - +} + +const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, + m128 mask2_lo, m128 mask2_hi, + const u8 *buf, const u8 *buf_end) { + const m128 ones = ones128(); + const m128 low4bits = _mm_set1_epi8(0xf); + const u8 *rv; + + size_t min = (size_t)buf % 16; + + // Preconditioning: most of the time our buffer won't be aligned. + m128 chars = loadu128(buf); + rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi, + chars, buf, low4bits, ones); + if (rv) { + return rv; + } + buf += (16 - min); + + // Unrolling was here, but it wasn't doing anything but taking up space. + // Reroll FTW. + + const u8 *last_block = buf_end - 16; + while (buf < last_block) { + m128 lchars = load128(buf); + rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi, + lchars, buf, low4bits, ones); + if (rv) { + return rv; + } + buf += 16; + } + + // Use an unaligned load to mop up the last 16 bytes and get an accurate + // picture to buf_end. + chars = loadu128(buf_end - 16); + rv = fwdBlock2(mask1_lo, mask1_hi, mask2_lo, mask2_hi, + chars, buf_end - 16, low4bits, ones); + if (rv) { + return rv; + } + + return buf_end; +} + #elif !defined(HAVE_AVX512) // AVX2 - 256 wide shuftis - -#ifdef DEBUG -DUMP_MSK(256) -#endif - -#define GET_LO_4(chars) and256(chars, low4bits) + +#ifdef DEBUG +DUMP_MSK(256) +#endif + +#define GET_LO_4(chars) and256(chars, low4bits) #define GET_HI_4(chars) rshift64_m256(andnot256(low4bits, chars), 4) - -static really_inline + +static really_inline u32 block(m256 mask_lo, m256 mask_hi, m256 chars, const m256 low4bits, const m256 compare) { m256 c_lo = pshufb_m256(mask_lo, GET_LO_4(chars)); m256 c_hi = pshufb_m256(mask_hi, GET_HI_4(chars)); m256 t = and256(c_lo, c_hi); -#ifdef DEBUG +#ifdef DEBUG DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n"); DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n"); DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n"); DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n"); DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); -#endif - +#endif + return movemask256(eq256(t, compare)); } static really_inline const u8 *firstMatch(const u8 *buf, u32 z) { DEBUG_PRINTF("z 0x%08x\n", z); - if (unlikely(z != 0xffffffff)) { - u32 pos = ctz32(~z); - assert(pos < 32); + if (unlikely(z != 0xffffffff)) { + u32 pos = ctz32(~z); + assert(pos < 32); DEBUG_PRINTF("match @ pos %u\n", pos); - return buf + pos; - } else { - return NULL; // no match - } -} - -static really_inline + return buf + pos; + } else { + return NULL; // no match + } +} + +static really_inline const u8 *fwdBlockShort(m256 mask, m128 chars, const u8 *buf, const m256 low4bits) { // do the hi and lo shuffles in the one avx register @@ -435,103 +435,103 @@ const u8 *shuftiFwdShort(m128 mask_lo, m128 mask_hi, const u8 *buf, } static really_inline -const u8 *fwdBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf, - const m256 low4bits, const m256 zeroes) { +const u8 *fwdBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf, + const m256 low4bits, const m256 zeroes) { u32 z = block(mask_lo, mask_hi, chars, low4bits, zeroes); - + return firstMatch(buf, z); -} - -/* takes 128 bit masks, but operates on 256 bits of data */ -const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end) { - assert(buf && buf_end); - assert(buf < buf_end); +} + +/* takes 128 bit masks, but operates on 256 bits of data */ +const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end) { + assert(buf && buf_end); + assert(buf < buf_end); DEBUG_PRINTF("shufti %p len %zu\n", buf, buf_end - buf); - - // Slow path for small cases. + + // Slow path for small cases. if (buf_end - buf < 16) { - return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, - buf, buf_end); - } - + return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, + buf, buf_end); + } + const m256 low4bits = set32x8(0xf); if (buf_end - buf <= 32) { return shuftiFwdShort(mask_lo, mask_hi, buf, buf_end, low4bits); } - const m256 zeroes = zeroes256(); - const m256 wide_mask_lo = set2x128(mask_lo); - const m256 wide_mask_hi = set2x128(mask_hi); - const u8 *rv; - - size_t min = (size_t)buf % 32; - assert(buf_end - buf >= 32); - - // Preconditioning: most of the time our buffer won't be aligned. - m256 chars = loadu256(buf); - rv = fwdBlock(wide_mask_lo, wide_mask_hi, chars, buf, low4bits, zeroes); - if (rv) { - return rv; - } - buf += (32 - min); - - // Unrolling was here, but it wasn't doing anything but taking up space. - // Reroll FTW. - - const u8 *last_block = buf_end - 32; - while (buf < last_block) { - m256 lchars = load256(buf); - rv = fwdBlock(wide_mask_lo, wide_mask_hi, lchars, buf, low4bits, zeroes); - if (rv) { - return rv; - } - buf += 32; - } - - // Use an unaligned load to mop up the last 32 bytes and get an accurate - // picture to buf_end. - assert(buf <= buf_end && buf >= buf_end - 32); - chars = loadu256(buf_end - 32); - rv = fwdBlock(wide_mask_lo, wide_mask_hi, chars, buf_end - 32, low4bits, zeroes); - if (rv) { - return rv; - } - - return buf_end; -} - -static really_inline + const m256 zeroes = zeroes256(); + const m256 wide_mask_lo = set2x128(mask_lo); + const m256 wide_mask_hi = set2x128(mask_hi); + const u8 *rv; + + size_t min = (size_t)buf % 32; + assert(buf_end - buf >= 32); + + // Preconditioning: most of the time our buffer won't be aligned. + m256 chars = loadu256(buf); + rv = fwdBlock(wide_mask_lo, wide_mask_hi, chars, buf, low4bits, zeroes); + if (rv) { + return rv; + } + buf += (32 - min); + + // Unrolling was here, but it wasn't doing anything but taking up space. + // Reroll FTW. + + const u8 *last_block = buf_end - 32; + while (buf < last_block) { + m256 lchars = load256(buf); + rv = fwdBlock(wide_mask_lo, wide_mask_hi, lchars, buf, low4bits, zeroes); + if (rv) { + return rv; + } + buf += 32; + } + + // Use an unaligned load to mop up the last 32 bytes and get an accurate + // picture to buf_end. + assert(buf <= buf_end && buf >= buf_end - 32); + chars = loadu256(buf_end - 32); + rv = fwdBlock(wide_mask_lo, wide_mask_hi, chars, buf_end - 32, low4bits, zeroes); + if (rv) { + return rv; + } + + return buf_end; +} + +static really_inline const u8 *lastMatch(const u8 *buf, u32 z) { - if (unlikely(z != 0xffffffff)) { - u32 pos = clz32(~z); - DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos); - return buf + (31 - pos); - } else { - return NULL; // no match - } -} - -static really_inline -const u8 *revBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf, - const m256 low4bits, const m256 zeroes) { + if (unlikely(z != 0xffffffff)) { + u32 pos = clz32(~z); + DEBUG_PRINTF("buf=%p, pos=%u\n", buf, pos); + return buf + (31 - pos); + } else { + return NULL; // no match + } +} + +static really_inline +const u8 *revBlock(m256 mask_lo, m256 mask_hi, m256 chars, const u8 *buf, + const m256 low4bits, const m256 zeroes) { m256 c_lo = pshufb_m256(mask_lo, GET_LO_4(chars)); m256 c_hi = pshufb_m256(mask_hi, GET_HI_4(chars)); - m256 t = and256(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); -#endif - + m256 t = and256(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); +#endif + u32 z = movemask256(eq256(t, zeroes)); return lastMatch(buf, z); -} - +} + static really_inline const u8 *revBlockShort(m256 mask, m128 chars, const u8 *buf, const m256 low4bits) { @@ -567,95 +567,95 @@ const u8 *shuftiRevShort(m128 mask_lo, m128 mask_hi, const u8 *buf, } -/* takes 128 bit masks, but operates on 256 bits of data */ -const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end) { - assert(buf && buf_end); - assert(buf < buf_end); - - // Slow path for small cases. +/* takes 128 bit masks, but operates on 256 bits of data */ +const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end) { + assert(buf && buf_end); + assert(buf < buf_end); + + // Slow path for small cases. if (buf_end - buf < 16) { - return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, - buf, buf_end); - } - + return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, + buf, buf_end); + } + const m256 low4bits = set32x8(0xf); if (buf_end - buf <= 32) { return shuftiRevShort(mask_lo, mask_hi, buf, buf_end, low4bits); } - const m256 zeroes = zeroes256(); - const m256 wide_mask_lo = set2x128(mask_lo); - const m256 wide_mask_hi = set2x128(mask_hi); - const u8 *rv; - - assert(buf_end - buf >= 32); - - // Preconditioning: most of the time our buffer won't be aligned. - m256 chars = loadu256(buf_end - 32); - rv = revBlock(wide_mask_lo, wide_mask_hi, chars, buf_end - 32, low4bits, zeroes); - if (rv) { - return rv; - } - buf_end = (const u8 *)((size_t)buf_end & ~((size_t)0x1f)); - - // Unrolling was here, but it wasn't doing anything but taking up space. - // Reroll FTW. - const u8 *last_block = buf + 32; - while (buf_end > last_block) { - buf_end -= 32; - m256 lchars = load256(buf_end); - rv = revBlock(wide_mask_lo, wide_mask_hi, lchars, buf_end, low4bits, zeroes); - if (rv) { - return rv; - } - } - - // Use an unaligned load to mop up the last 32 bytes and get an accurate - // picture to buf. - chars = loadu256(buf); - rv = revBlock(wide_mask_lo, wide_mask_hi, chars, buf, low4bits, zeroes); - if (rv) { - return rv; - } - - return buf - 1; -} - -static really_inline -const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi, - m256 chars, const u8 *buf, const m256 low4bits, - const m256 ones) { - DEBUG_PRINTF("buf %p\n", buf); - m256 chars_lo = GET_LO_4(chars); - m256 chars_hi = GET_HI_4(chars); + const m256 zeroes = zeroes256(); + const m256 wide_mask_lo = set2x128(mask_lo); + const m256 wide_mask_hi = set2x128(mask_hi); + const u8 *rv; + + assert(buf_end - buf >= 32); + + // Preconditioning: most of the time our buffer won't be aligned. + m256 chars = loadu256(buf_end - 32); + rv = revBlock(wide_mask_lo, wide_mask_hi, chars, buf_end - 32, low4bits, zeroes); + if (rv) { + return rv; + } + buf_end = (const u8 *)((size_t)buf_end & ~((size_t)0x1f)); + + // Unrolling was here, but it wasn't doing anything but taking up space. + // Reroll FTW. + const u8 *last_block = buf + 32; + while (buf_end > last_block) { + buf_end -= 32; + m256 lchars = load256(buf_end); + rv = revBlock(wide_mask_lo, wide_mask_hi, lchars, buf_end, low4bits, zeroes); + if (rv) { + return rv; + } + } + + // Use an unaligned load to mop up the last 32 bytes and get an accurate + // picture to buf. + chars = loadu256(buf); + rv = revBlock(wide_mask_lo, wide_mask_hi, chars, buf, low4bits, zeroes); + if (rv) { + return rv; + } + + return buf - 1; +} + +static really_inline +const u8 *fwdBlock2(m256 mask1_lo, m256 mask1_hi, m256 mask2_lo, m256 mask2_hi, + m256 chars, const u8 *buf, const m256 low4bits, + const m256 ones) { + DEBUG_PRINTF("buf %p\n", buf); + m256 chars_lo = GET_LO_4(chars); + m256 chars_hi = GET_HI_4(chars); m256 c_lo = pshufb_m256(mask1_lo, chars_lo); m256 c_hi = pshufb_m256(mask1_hi, chars_hi); - m256 t = or256(c_lo, c_hi); - -#ifdef DEBUG - DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n"); - DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n"); - DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n"); - DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n"); - DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); -#endif - + m256 t = or256(c_lo, c_hi); + +#ifdef DEBUG + DEBUG_PRINTF(" chars: "); dumpMsk256AsChars(chars); printf("\n"); + DEBUG_PRINTF(" char: "); dumpMsk256(chars); printf("\n"); + DEBUG_PRINTF(" c_lo: "); dumpMsk256(c_lo); printf("\n"); + DEBUG_PRINTF(" c_hi: "); dumpMsk256(c_hi); printf("\n"); + DEBUG_PRINTF(" t: "); dumpMsk256(t); printf("\n"); +#endif + m256 c2_lo = pshufb_m256(mask2_lo, chars_lo); m256 c2_hi = pshufb_m256(mask2_hi, chars_hi); m256 t2 = or256(t, rshift128_m256(or256(c2_lo, c2_hi), 1)); - -#ifdef DEBUG - DEBUG_PRINTF(" c2_lo: "); dumpMsk256(c2_lo); printf("\n"); - DEBUG_PRINTF(" c2_hi: "); dumpMsk256(c2_hi); printf("\n"); - DEBUG_PRINTF(" t2: "); dumpMsk256(t2); printf("\n"); -#endif + +#ifdef DEBUG + DEBUG_PRINTF(" c2_lo: "); dumpMsk256(c2_lo); printf("\n"); + DEBUG_PRINTF(" c2_hi: "); dumpMsk256(c2_hi); printf("\n"); + DEBUG_PRINTF(" t2: "); dumpMsk256(t2); printf("\n"); +#endif u32 z = movemask256(eq256(t2, ones)); - + return firstMatch(buf, z); -} - +} + static really_inline const u8 *fwdBlockShort2(m256 mask1, m256 mask2, m128 chars, const u8 *buf, const m256 low4bits) { @@ -694,63 +694,63 @@ const u8 *shuftiDoubleShort(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, return buf_end; } -/* takes 128 bit masks, but operates on 256 bits of data */ -const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, - m128 mask2_lo, m128 mask2_hi, - const u8 *buf, const u8 *buf_end) { +/* takes 128 bit masks, but operates on 256 bits of data */ +const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, + m128 mask2_lo, m128 mask2_hi, + const u8 *buf, const u8 *buf_end) { /* we should always have at least 16 bytes */ assert(buf_end - buf >= 16); DEBUG_PRINTF("buf %p len %zu\n", buf, buf_end - buf); - if (buf_end - buf < 32) { + if (buf_end - buf < 32) { return shuftiDoubleShort(mask1_lo, mask1_hi, mask2_lo, mask2_hi, buf, buf_end); - } - - const m256 ones = ones256(); - const m256 low4bits = set32x8(0xf); - const m256 wide_mask1_lo = set2x128(mask1_lo); - const m256 wide_mask1_hi = set2x128(mask1_hi); - const m256 wide_mask2_lo = set2x128(mask2_lo); - const m256 wide_mask2_hi = set2x128(mask2_hi); - const u8 *rv; - - size_t min = (size_t)buf % 32; - - // Preconditioning: most of the time our buffer won't be aligned. - m256 chars = loadu256(buf); - rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, - chars, buf, low4bits, ones); - if (rv) { - return rv; - } - buf += (32 - min); - - // Unrolling was here, but it wasn't doing anything but taking up space. - // Reroll FTW. - const u8 *last_block = buf_end - 32; - while (buf < last_block) { - m256 lchars = load256(buf); - rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, - lchars, buf, low4bits, ones); - if (rv) { - return rv; - } - buf += 32; - } - - // Use an unaligned load to mop up the last 32 bytes and get an accurate - // picture to buf_end. - chars = loadu256(buf_end - 32); - rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, - chars, buf_end - 32, low4bits, ones); - if (rv) { - return rv; - } - - return buf_end; -} - + } + + const m256 ones = ones256(); + const m256 low4bits = set32x8(0xf); + const m256 wide_mask1_lo = set2x128(mask1_lo); + const m256 wide_mask1_hi = set2x128(mask1_hi); + const m256 wide_mask2_lo = set2x128(mask2_lo); + const m256 wide_mask2_hi = set2x128(mask2_hi); + const u8 *rv; + + size_t min = (size_t)buf % 32; + + // Preconditioning: most of the time our buffer won't be aligned. + m256 chars = loadu256(buf); + rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, + chars, buf, low4bits, ones); + if (rv) { + return rv; + } + buf += (32 - min); + + // Unrolling was here, but it wasn't doing anything but taking up space. + // Reroll FTW. + const u8 *last_block = buf_end - 32; + while (buf < last_block) { + m256 lchars = load256(buf); + rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, + lchars, buf, low4bits, ones); + if (rv) { + return rv; + } + buf += 32; + } + + // Use an unaligned load to mop up the last 32 bytes and get an accurate + // picture to buf_end. + chars = loadu256(buf_end - 32); + rv = fwdBlock2(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, + chars, buf_end - 32, low4bits, ones); + if (rv) { + return rv; + } + + return buf_end; +} + #else // defined(HAVE_AVX512) #ifdef DEBUG diff --git a/contrib/libs/hyperscan/src/nfa/shufti.h b/contrib/libs/hyperscan/src/nfa/shufti.h index 2663301488..1ebf776cc7 100644 --- a/contrib/libs/hyperscan/src/nfa/shufti.h +++ b/contrib/libs/hyperscan/src/nfa/shufti.h @@ -1,61 +1,61 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Shufti: character class acceleration. - * - * Utilises the SSSE3 pshufb shuffle instruction - */ - -#ifndef SHUFTI_H -#define SHUFTI_H - -#include "ue2common.h" -#include "util/simd_utils.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end); - -// Returns (buf - 1) if not found. -const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, - const u8 *buf_end); - -const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, - m128 mask2_lo, m128 mask2_hi, - const u8 *buf, const u8 *buf_end); - -#ifdef __cplusplus -} -#endif - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Shufti: character class acceleration. + * + * Utilises the SSSE3 pshufb shuffle instruction + */ + +#ifndef SHUFTI_H +#define SHUFTI_H + +#include "ue2common.h" +#include "util/simd_utils.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end); + +// Returns (buf - 1) if not found. +const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf, + const u8 *buf_end); + +const u8 *shuftiDoubleExec(m128 mask1_lo, m128 mask1_hi, + m128 mask2_lo, m128 mask2_hi, + const u8 *buf, const u8 *buf_end); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/contrib/libs/hyperscan/src/nfa/shufticompile.cpp b/contrib/libs/hyperscan/src/nfa/shufticompile.cpp index 577a8063f5..f712ef94a4 100644 --- a/contrib/libs/hyperscan/src/nfa/shufticompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/shufticompile.cpp @@ -1,113 +1,113 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Shufti acceleration: compile code. - */ -#include "shufticompile.h" -#include "ue2common.h" -#include "util/charreach.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Shufti acceleration: compile code. + */ +#include "shufticompile.h" +#include "ue2common.h" +#include "util/charreach.h" #include "util/container.h" #include "util/flat_containers.h" - -#include <array> -#include <cassert> -#include <cstring> -#include <map> - -using namespace std; - -namespace ue2 { - -/** \brief Single-byte variant. - * - * Returns -1 if unable to construct masks, otherwise returns number of bits - * used in the mask. - * - * Note: always able to construct masks for 8 or fewer characters. - */ + +#include <array> +#include <cassert> +#include <cstring> +#include <map> + +using namespace std; + +namespace ue2 { + +/** \brief Single-byte variant. + * + * Returns -1 if unable to construct masks, otherwise returns number of bits + * used in the mask. + * + * Note: always able to construct masks for 8 or fewer characters. + */ int shuftiBuildMasks(const CharReach &c, u8 *lo, u8 *hi) { - /* Things could be packed much more optimally, but this should be able to - * handle any set of characters entirely in the lower half. */ - - assert(c.count() < 256); - assert(!c.none()); - - map<u8, CharReach> by_hi; /* hi nibble -> set of matching lo nibbles */ - /* group matching characters by high nibble */ - for (size_t i = c.find_first(); i != CharReach::npos; i = c.find_next(i)) { - u8 it_hi = i >> 4; - u8 it_lo = i & 0xf; - by_hi[it_hi].set(it_lo); - } - - map<CharReach, CharReach> by_lo_set; - /* group all hi nibbles with a common set of lo nibbles together */ - for (map<u8, CharReach>::const_iterator it = by_hi.begin(); - it != by_hi.end(); ++it) { - by_lo_set[it->second].set(it->first); - } - - if (by_lo_set.size() > 8) { - /* too many char classes on the dance floor */ - assert(c.size() > 8); - return -1; - } - - u8 bit_index = 0; - array<u8, 16> lo_a; lo_a.fill(0); - array<u8, 16> hi_a; hi_a.fill(0); - for (map<CharReach, CharReach>::const_iterator it = by_lo_set.begin(); - it != by_lo_set.end(); ++it) { - const CharReach &lo_nibbles = it->first; - const CharReach &hi_nibbles = it->second; - - /* set bits in low mask */ - for (size_t j = lo_nibbles.find_first(); j != CharReach::npos; - j = lo_nibbles.find_next(j)) { - lo_a[j] |= (1 << bit_index); - } - - /* set bits in high mask */ - for (size_t j = hi_nibbles.find_first(); j != CharReach::npos; - j = hi_nibbles.find_next(j)) { - hi_a[j] |= (1 << bit_index); - } - - bit_index++; - } - - memcpy(lo, lo_a.data(), sizeof(m128)); - memcpy(hi, hi_a.data(), sizeof(m128)); - - return bit_index; -} - + /* Things could be packed much more optimally, but this should be able to + * handle any set of characters entirely in the lower half. */ + + assert(c.count() < 256); + assert(!c.none()); + + map<u8, CharReach> by_hi; /* hi nibble -> set of matching lo nibbles */ + /* group matching characters by high nibble */ + for (size_t i = c.find_first(); i != CharReach::npos; i = c.find_next(i)) { + u8 it_hi = i >> 4; + u8 it_lo = i & 0xf; + by_hi[it_hi].set(it_lo); + } + + map<CharReach, CharReach> by_lo_set; + /* group all hi nibbles with a common set of lo nibbles together */ + for (map<u8, CharReach>::const_iterator it = by_hi.begin(); + it != by_hi.end(); ++it) { + by_lo_set[it->second].set(it->first); + } + + if (by_lo_set.size() > 8) { + /* too many char classes on the dance floor */ + assert(c.size() > 8); + return -1; + } + + u8 bit_index = 0; + array<u8, 16> lo_a; lo_a.fill(0); + array<u8, 16> hi_a; hi_a.fill(0); + for (map<CharReach, CharReach>::const_iterator it = by_lo_set.begin(); + it != by_lo_set.end(); ++it) { + const CharReach &lo_nibbles = it->first; + const CharReach &hi_nibbles = it->second; + + /* set bits in low mask */ + for (size_t j = lo_nibbles.find_first(); j != CharReach::npos; + j = lo_nibbles.find_next(j)) { + lo_a[j] |= (1 << bit_index); + } + + /* set bits in high mask */ + for (size_t j = hi_nibbles.find_first(); j != CharReach::npos; + j = hi_nibbles.find_next(j)) { + hi_a[j] |= (1 << bit_index); + } + + bit_index++; + } + + memcpy(lo, lo_a.data(), sizeof(m128)); + memcpy(hi, hi_a.data(), sizeof(m128)); + + return bit_index; +} + static array<u16, 4> or_array(array<u16, 4> a, const array<u16, 4> &b) { a[0] |= b[0]; @@ -133,21 +133,21 @@ void set_buckets_from_mask(u16 nibble_mask, u32 bucket, } bool shuftiBuildDoubleMasks(const CharReach &onechar, - const flat_set<pair<u8, u8>> &twochar, + const flat_set<pair<u8, u8>> &twochar, u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2) { - DEBUG_PRINTF("unibytes %zu dibytes %zu\n", onechar.size(), - twochar.size()); - array<u8, 16> lo1_a; - array<u8, 16> lo2_a; - array<u8, 16> hi1_a; - array<u8, 16> hi2_a; - - lo1_a.fill(0xff); - lo2_a.fill(0xff); - hi1_a.fill(0xff); - hi2_a.fill(0xff); - - // two-byte literals + DEBUG_PRINTF("unibytes %zu dibytes %zu\n", onechar.size(), + twochar.size()); + array<u8, 16> lo1_a; + array<u8, 16> lo2_a; + array<u8, 16> hi1_a; + array<u8, 16> hi2_a; + + lo1_a.fill(0xff); + lo2_a.fill(0xff); + hi1_a.fill(0xff); + hi2_a.fill(0xff); + + // two-byte literals vector<array<u16, 4>> nibble_masks; for (const auto &p : twochar) { DEBUG_PRINTF("%02hhx %02hhx\n", p.first, p.second); @@ -156,10 +156,10 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar, u16 b_lo = 1U << (p.second & 0xf); u16 b_hi = 1U << (p.second >> 4); nibble_masks.push_back({{a_lo, a_hi, b_lo, b_hi}}); - } - - // one-byte literals (second byte is a wildcard) - for (size_t it = onechar.find_first(); it != CharReach::npos; + } + + // one-byte literals (second byte is a wildcard) + for (size_t it = onechar.find_first(); it != CharReach::npos; it = onechar.find_next(it)) { DEBUG_PRINTF("%02hhx\n", (u8)it); u16 a_lo = 1U << (it & 0xf); @@ -167,7 +167,7 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar, u16 wildcard = 0xffff; nibble_masks.push_back({{a_lo, a_hi, wildcard, wildcard}}); } - + // try to merge strings into shared buckets for (u32 i = 0; i < 4; i++) { map<array<u16, 4>, array<u16, 4>> new_masks; @@ -179,13 +179,13 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar, } else { new_masks[key] = or_array(new_masks[key], a); } - } + } nibble_masks.clear(); for (const auto &e : new_masks) { nibble_masks.push_back(e.second); } - } - + } + if (nibble_masks.size() > MAX_BUCKETS) { DEBUG_PRINTF("too many buckets needed (%zu)\n", nibble_masks.size()); return false; @@ -200,26 +200,26 @@ bool shuftiBuildDoubleMasks(const CharReach &onechar, i++; } - memcpy(lo1, lo1_a.data(), sizeof(m128)); - memcpy(lo2, lo2_a.data(), sizeof(m128)); - memcpy(hi1, hi1_a.data(), sizeof(m128)); - memcpy(hi2, hi2_a.data(), sizeof(m128)); - + memcpy(lo1, lo1_a.data(), sizeof(m128)); + memcpy(lo2, lo2_a.data(), sizeof(m128)); + memcpy(hi1, hi1_a.data(), sizeof(m128)); + memcpy(hi2, hi2_a.data(), sizeof(m128)); + return true; -} - -#ifdef DUMP_SUPPORT - +} + +#ifdef DUMP_SUPPORT + CharReach shufti2cr(const u8 *lo, const u8 *hi) { - CharReach cr; - for (u32 i = 0; i < 256; i++) { - if (lo[(u8)i & 0xf] & hi[(u8)i >> 4]) { - cr.set(i); - } - } - return cr; -} - -#endif // DUMP_SUPPORT - -} // namespace ue2 + CharReach cr; + for (u32 i = 0; i < 256; i++) { + if (lo[(u8)i & 0xf] & hi[(u8)i >> 4]) { + cr.set(i); + } + } + return cr; +} + +#endif // DUMP_SUPPORT + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/nfa/shufticompile.h b/contrib/libs/hyperscan/src/nfa/shufticompile.h index fefd59f9b4..59b9c38dff 100644 --- a/contrib/libs/hyperscan/src/nfa/shufticompile.h +++ b/contrib/libs/hyperscan/src/nfa/shufticompile.h @@ -1,73 +1,73 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Shufti acceleration: compile code. - */ - -#ifndef SHUFTI_COMPILE_H -#define SHUFTI_COMPILE_H - -#include "ue2common.h" -#include "util/charreach.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Shufti acceleration: compile code. + */ + +#ifndef SHUFTI_COMPILE_H +#define SHUFTI_COMPILE_H + +#include "ue2common.h" +#include "util/charreach.h" #include "util/flat_containers.h" - -#include <utility> - -namespace ue2 { - -/** \brief Single-byte variant. - * - * Returns -1 if unable to construct masks, otherwise returns number of bits - * used in the mask. - * - * Note: always able to construct masks for 8 or fewer characters. - */ + +#include <utility> + +namespace ue2 { + +/** \brief Single-byte variant. + * + * Returns -1 if unable to construct masks, otherwise returns number of bits + * used in the mask. + * + * Note: always able to construct masks for 8 or fewer characters. + */ int shuftiBuildMasks(const CharReach &chars, u8 *lo, u8 *hi); - + /** \brief Double-byte variant * * Returns false if we are unable to build the masks (too many buckets required) */ bool shuftiBuildDoubleMasks(const CharReach &onechar, - const flat_set<std::pair<u8, u8>> &twochar, + const flat_set<std::pair<u8, u8>> &twochar, u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2); - -#ifdef DUMP_SUPPORT - -/** - * \brief Dump code: returns a CharReach with the reach that would match this - * shufti. - */ + +#ifdef DUMP_SUPPORT + +/** + * \brief Dump code: returns a CharReach with the reach that would match this + * shufti. + */ CharReach shufti2cr(const u8 *lo, const u8 *hi); - -#endif // DUMP_SUPPORT - -} // namespace ue2 - -#endif // SHUFTI_COMPILE_H + +#endif // DUMP_SUPPORT + +} // namespace ue2 + +#endif // SHUFTI_COMPILE_H diff --git a/contrib/libs/hyperscan/src/nfa/truffle.c b/contrib/libs/hyperscan/src/nfa/truffle.c index 667d8ea13d..be6b312cf2 100644 --- a/contrib/libs/hyperscan/src/nfa/truffle.c +++ b/contrib/libs/hyperscan/src/nfa/truffle.c @@ -1,106 +1,106 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * Matches a byte in a charclass using three shuffles - */ - - -#include "ue2common.h" -#include "truffle.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Matches a byte in a charclass using three shuffles + */ + + +#include "ue2common.h" +#include "truffle.h" #include "util/arch.h" -#include "util/bitutils.h" -#include "util/simd_utils.h" - +#include "util/bitutils.h" +#include "util/simd_utils.h" + #if !defined(HAVE_AVX2) - -static really_inline + +static really_inline const u8 *lastMatch(const u8 *buf, u32 z) { - if (unlikely(z != 0xffff)) { + if (unlikely(z != 0xffff)) { u32 pos = clz32(~z & 0xffff); assert(pos >= 16 && pos < 32); return buf + (31 - pos); - } - - return NULL; // no match -} - -static really_inline + } + + return NULL; // no match +} + +static really_inline const u8 *firstMatch(const u8 *buf, u32 z) { - if (unlikely(z != 0xffff)) { + if (unlikely(z != 0xffff)) { u32 pos = ctz32(~z & 0xffff); assert(pos < 16); return buf + pos; - } - - return NULL; // no match -} - -static really_inline -u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) { - - m128 highconst = _mm_set1_epi8(0x80); - m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201); - - // and now do the real work + } + + return NULL; // no match +} + +static really_inline +u32 block(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v) { + + m128 highconst = _mm_set1_epi8(0x80); + m128 shuf_mask_hi = _mm_set1_epi64x(0x8040201008040201); + + // and now do the real work m128 shuf1 = pshufb_m128(shuf_mask_lo_highclear, v); - m128 t1 = xor128(v, highconst); + m128 t1 = xor128(v, highconst); m128 shuf2 = pshufb_m128(shuf_mask_lo_highset, t1); m128 t2 = andnot128(highconst, rshift64_m128(v, 4)); m128 shuf3 = pshufb_m128(shuf_mask_hi, t2); - m128 tmp = and128(or128(shuf1, shuf2), shuf3); - m128 tmp2 = eq128(tmp, zeroes128()); - u32 z = movemask128(tmp2); - - return z; -} - -static -const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - uintptr_t len = buf_end - buf; - assert(len < 16); - - m128 chars = zeroes128(); - memcpy(&chars, buf, len); - - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); - // can't be these bytes in z + m128 tmp = and128(or128(shuf1, shuf2), shuf3); + m128 tmp2 = eq128(tmp, zeroes128()); + u32 z = movemask128(tmp2); + + return z; +} + +static +const u8 *truffleMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + uintptr_t len = buf_end - buf; + assert(len < 16); + + m128 chars = zeroes128(); + memcpy(&chars, buf, len); + + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + // can't be these bytes in z u32 mask = (0xffff >> (16 - len)) ^ 0xffff; const u8 *rv = firstMatch(buf, z | mask); - - if (rv) { - return rv; - } else { - return buf_end; - } -} - + + if (rv) { + return rv; + } else { + return buf_end; + } +} + static really_inline const u8 *fwdBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, m128 v, const u8 *buf) { @@ -115,124 +115,124 @@ const u8 *revBlock(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, return lastMatch(buf, z); } -const u8 *truffleExec(m128 shuf_mask_lo_highclear, +const u8 *truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, const u8 *buf, const u8 *buf_end) { - DEBUG_PRINTF("len %zu\n", buf_end - buf); - - assert(buf && buf_end); - assert(buf < buf_end); - const u8 *rv; - - if (buf_end - buf < 16) { - return truffleMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf, - buf_end); - } - - size_t min = (size_t)buf % 16; - assert(buf_end - buf >= 16); - - // Preconditioning: most of the time our buffer won't be aligned. - m128 chars = loadu128(buf); - rv = fwdBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf); - if (rv) { - return rv; - } - buf += (16 - min); - - const u8 *last_block = buf_end - 16; - while (buf < last_block) { - m128 lchars = load128(buf); - rv = fwdBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, lchars, - buf); - if (rv) { - return rv; - } - buf += 16; - } - - // Use an unaligned load to mop up the last 16 bytes and get an accurate - // picture to buf_end. - assert(buf <= buf_end && buf >= buf_end - 16); - chars = loadu128(buf_end - 16); - rv = fwdBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, - buf_end - 16); - if (rv) { - return rv; - } - - return buf_end; -} - -static -const u8 *truffleRevMini(m128 shuf_mask_lo_highclear, + DEBUG_PRINTF("len %zu\n", buf_end - buf); + + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + if (buf_end - buf < 16) { + return truffleMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf, + buf_end); + } + + size_t min = (size_t)buf % 16; + assert(buf_end - buf >= 16); + + // Preconditioning: most of the time our buffer won't be aligned. + m128 chars = loadu128(buf); + rv = fwdBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf); + if (rv) { + return rv; + } + buf += (16 - min); + + const u8 *last_block = buf_end - 16; + while (buf < last_block) { + m128 lchars = load128(buf); + rv = fwdBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, lchars, + buf); + if (rv) { + return rv; + } + buf += 16; + } + + // Use an unaligned load to mop up the last 16 bytes and get an accurate + // picture to buf_end. + assert(buf <= buf_end && buf >= buf_end - 16); + chars = loadu128(buf_end - 16); + rv = fwdBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, + buf_end - 16); + if (rv) { + return rv; + } + + return buf_end; +} + +static +const u8 *truffleRevMini(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, const u8 *buf, const u8 *buf_end) { - uintptr_t len = buf_end - buf; - assert(len < 16); - - m128 chars = zeroes128(); - memcpy(&chars, buf, len); - + uintptr_t len = buf_end - buf; + assert(len < 16); + + m128 chars = zeroes128(); + memcpy(&chars, buf, len); + u32 mask = (0xffff >> (16 - len)) ^ 0xffff; - u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); - const u8 *rv = lastMatch(buf, z | mask); - - if (rv) { - return rv; - } - return buf - 1; -} - -const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, - m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end) { - assert(buf && buf_end); - assert(buf < buf_end); - const u8 *rv; - - DEBUG_PRINTF("len %zu\n", buf_end - buf); - - if (buf_end - buf < 16) { - return truffleRevMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf, - buf_end); - } - - assert(buf_end - buf >= 16); - - // Preconditioning: most of the time our buffer won't be aligned. - m128 chars = loadu128(buf_end - 16); - rv = revBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, - buf_end - 16); - if (rv) { - return rv; - } - buf_end = (const u8 *)((size_t)buf_end & ~((size_t)0xf)); - - const u8 *last_block = buf + 16; - while (buf_end > last_block) { - buf_end -= 16; - m128 lchars = load128(buf_end); - rv = revBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, lchars, - buf_end); - if (rv) { - return rv; - } - } - - // Use an unaligned load to mop up the last 16 bytes and get an accurate - // picture to buf_end. - chars = loadu128(buf); - rv = revBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf); - if (rv) { - return rv; - } - - return buf - 1; -} - + u32 z = block(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars); + const u8 *rv = lastMatch(buf, z | mask); + + if (rv) { + return rv; + } + return buf - 1; +} + +const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, + m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end) { + assert(buf && buf_end); + assert(buf < buf_end); + const u8 *rv; + + DEBUG_PRINTF("len %zu\n", buf_end - buf); + + if (buf_end - buf < 16) { + return truffleRevMini(shuf_mask_lo_highclear, shuf_mask_lo_highset, buf, + buf_end); + } + + assert(buf_end - buf >= 16); + + // Preconditioning: most of the time our buffer won't be aligned. + m128 chars = loadu128(buf_end - 16); + rv = revBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, + buf_end - 16); + if (rv) { + return rv; + } + buf_end = (const u8 *)((size_t)buf_end & ~((size_t)0xf)); + + const u8 *last_block = buf + 16; + while (buf_end > last_block) { + buf_end -= 16; + m128 lchars = load128(buf_end); + rv = revBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, lchars, + buf_end); + if (rv) { + return rv; + } + } + + // Use an unaligned load to mop up the last 16 bytes and get an accurate + // picture to buf_end. + chars = loadu128(buf); + rv = revBlock(shuf_mask_lo_highclear, shuf_mask_lo_highset, chars, buf); + if (rv) { + return rv; + } + + return buf - 1; +} + #elif !defined(HAVE_AVX512) - + // AVX2 static really_inline diff --git a/contrib/libs/hyperscan/src/nfa/truffle.h b/contrib/libs/hyperscan/src/nfa/truffle.h index 327061adcd..f67227ad1e 100644 --- a/contrib/libs/hyperscan/src/nfa/truffle.h +++ b/contrib/libs/hyperscan/src/nfa/truffle.h @@ -1,57 +1,57 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /** \file * \brief Truffle: fully general character class acceleration. * * Utilises the SSSE3 pshufb or AVX2 vpshufb shuffle instructions */ -#ifndef TRUFFLE_H -#define TRUFFLE_H - -#include "util/simd_types.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - -const u8 *truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end); - -const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, - const u8 *buf, const u8 *buf_end); - -#ifdef __cplusplus -} -#endif - - -#endif /* TRUFFLE_H */ - +#ifndef TRUFFLE_H +#define TRUFFLE_H + +#include "util/simd_types.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +const u8 *truffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end); + +const u8 *rtruffleExec(m128 shuf_mask_lo_highclear, m128 shuf_mask_lo_highset, + const u8 *buf, const u8 *buf_end); + +#ifdef __cplusplus +} +#endif + + +#endif /* TRUFFLE_H */ + diff --git a/contrib/libs/hyperscan/src/nfa/trufflecompile.cpp b/contrib/libs/hyperscan/src/nfa/trufflecompile.cpp index 6cca946c08..f19de0ee04 100644 --- a/contrib/libs/hyperscan/src/nfa/trufflecompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/trufflecompile.cpp @@ -1,96 +1,96 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Truffle compiler - * - * truffle is always able to represent an entire character class, providing a - * backstop to other acceleration engines. - */ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ -#include "trufflecompile.h" +/** \file + * \brief Truffle compiler + * + * truffle is always able to represent an entire character class, providing a + * backstop to other acceleration engines. + */ -#include "ue2common.h" -#include "util/charreach.h" +#include "trufflecompile.h" + +#include "ue2common.h" +#include "util/charreach.h" #include "util/dump_mask.h" -#include "util/simd_types.h" - +#include "util/simd_types.h" + #include <cstring> -using namespace std; - -namespace ue2 { - -/* - * To represent an entire charclass (256 chars), truffle uses two 128 bit - * masks - the first is for chars that do not have the high bit/bit 7 set, - * i.e. chars {0..127}. The second mask is for chars with bit 7 set. - * - * Each char to be represented is split into the low nibble (bits {0..3}) and - * bits {4,5,6} - the low nibble is the offset into the mask and the value of - * bits 456 is the bit that is set at that offset. - */ - +using namespace std; + +namespace ue2 { + +/* + * To represent an entire charclass (256 chars), truffle uses two 128 bit + * masks - the first is for chars that do not have the high bit/bit 7 set, + * i.e. chars {0..127}. The second mask is for chars with bit 7 set. + * + * Each char to be represented is split into the low nibble (bits {0..3}) and + * bits {4,5,6} - the low nibble is the offset into the mask and the value of + * bits 456 is the bit that is set at that offset. + */ + void truffleBuildMasks(const CharReach &cr, u8 *shuf_mask_lo_highclear, u8 *shuf_mask_lo_highset) { memset(shuf_mask_lo_highset, 0, sizeof(m128)); memset(shuf_mask_lo_highclear, 0, sizeof(m128)); - - for (size_t v = cr.find_first(); v != CharReach::npos; - v = cr.find_next(v)) { - DEBUG_PRINTF("adding 0x%02x to %s\n", (u8)v, (v & 0x80) ? "highset" : "highclear"); + + for (size_t v = cr.find_first(); v != CharReach::npos; + v = cr.find_next(v)) { + DEBUG_PRINTF("adding 0x%02x to %s\n", (u8)v, (v & 0x80) ? "highset" : "highclear"); u8 *change_mask = (v & 0x80) ? shuf_mask_lo_highset : shuf_mask_lo_highclear; - u8 low_nibble = v & 0xf; - u8 bits_456 = (v & 0x70) >> 4; - change_mask[low_nibble] |= 1 << bits_456; - } -} - -/* - * Reconstruct the charclass that the truffle masks represent - */ + u8 low_nibble = v & 0xf; + u8 bits_456 = (v & 0x70) >> 4; + change_mask[low_nibble] |= 1 << bits_456; + } +} + +/* + * Reconstruct the charclass that the truffle masks represent + */ CharReach truffle2cr(const u8 *highclear, const u8 *highset) { - CharReach cr; - for (u8 i = 0; i < 16; i++) { + CharReach cr; + for (u8 i = 0; i < 16; i++) { u32 bits_456 = highclear[i]; - while (bits_456) { - u32 pos = findAndClearLSB_32(&bits_456); - assert(pos < 8); - cr.set(pos << 4 | i); - } + while (bits_456) { + u32 pos = findAndClearLSB_32(&bits_456); + assert(pos < 8); + cr.set(pos << 4 | i); + } bits_456 = highset[i]; - while (bits_456) { - u32 pos = findAndClearLSB_32(&bits_456); - assert(pos < 8); - cr.set(0x80 | pos << 4 | i); - } - } - return cr; -} - -} // namespc + while (bits_456) { + u32 pos = findAndClearLSB_32(&bits_456); + assert(pos < 8); + cr.set(0x80 | pos << 4 | i); + } + } + return cr; +} + +} // namespc diff --git a/contrib/libs/hyperscan/src/nfa/trufflecompile.h b/contrib/libs/hyperscan/src/nfa/trufflecompile.h index c58da03f63..14b314f391 100644 --- a/contrib/libs/hyperscan/src/nfa/trufflecompile.h +++ b/contrib/libs/hyperscan/src/nfa/trufflecompile.h @@ -1,43 +1,43 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef TRUFFLECOMPILE_H -#define TRUFFLECOMPILE_H - -#include "ue2common.h" -#include "util/charreach.h" - -namespace ue2 { - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TRUFFLECOMPILE_H +#define TRUFFLECOMPILE_H + +#include "ue2common.h" +#include "util/charreach.h" + +namespace ue2 { + void truffleBuildMasks(const CharReach &cr, u8 *mask1, u8 *mask2); CharReach truffle2cr(const u8 *lo_in, const u8 *hi_in); - -} - -#endif /* TRUFFLECOMPILE_H */ - + +} + +#endif /* TRUFFLECOMPILE_H */ + diff --git a/contrib/libs/hyperscan/src/nfa/vermicelli.h b/contrib/libs/hyperscan/src/nfa/vermicelli.h index 358add7c26..ed797d83f9 100644 --- a/contrib/libs/hyperscan/src/nfa/vermicelli.h +++ b/contrib/libs/hyperscan/src/nfa/vermicelli.h @@ -1,54 +1,54 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Vermicelli: single-byte and double-byte acceleration. - */ - -#ifndef VERMICELLI_H -#define VERMICELLI_H - -#include "util/bitutils.h" -#include "util/simd_utils.h" -#include "util/unaligned.h" - -#include "vermicelli_sse.h" - -static really_inline -const u8 *vermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end) { - DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n", - nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); - assert(buf < buf_end); - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Vermicelli: single-byte and double-byte acceleration. + */ + +#ifndef VERMICELLI_H +#define VERMICELLI_H + +#include "util/bitutils.h" +#include "util/simd_utils.h" +#include "util/unaligned.h" + +#include "vermicelli_sse.h" + +static really_inline +const u8 *vermicelliExec(char c, char nocase, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("verm scan %s\\x%02hhx over %zu bytes\n", + nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); + assert(buf < buf_end); + VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ - // Handle small scans. + // Handle small scans. #ifdef HAVE_AVX512 if (buf_end - buf <= VERM_BOUNDARY) { const u8 *ptr = nocase @@ -60,61 +60,61 @@ const u8 *vermicelliExec(char c, char nocase, const u8 *buf, return buf_end; } #else - if (buf_end - buf < VERM_BOUNDARY) { - for (; buf < buf_end; buf++) { - char cur = (char)*buf; - if (nocase) { - cur &= CASE_CLEAR; - } - if (cur == c) { - break; - } - } - return buf; - } + if (buf_end - buf < VERM_BOUNDARY) { + for (; buf < buf_end; buf++) { + char cur = (char)*buf; + if (nocase) { + cur &= CASE_CLEAR; + } + if (cur == c) { + break; + } + } + return buf; + } #endif - - uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; - if (min) { - // Input isn't aligned, so we need to run one iteration with an - // unaligned load, then skip buf forward to the next aligned address. - // There's some small overlap here, but we don't mind scanning it twice - // if we can do it quickly, do we? - const u8 *ptr = nocase ? vermUnalignNocase(chars, buf, 0) - : vermUnalign(chars, buf, 0); - if (ptr) { - return ptr; - } - - buf += VERM_BOUNDARY - min; + + uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; + if (min) { + // Input isn't aligned, so we need to run one iteration with an + // unaligned load, then skip buf forward to the next aligned address. + // There's some small overlap here, but we don't mind scanning it twice + // if we can do it quickly, do we? + const u8 *ptr = nocase ? vermUnalignNocase(chars, buf, 0) + : vermUnalign(chars, buf, 0); + if (ptr) { + return ptr; + } + + buf += VERM_BOUNDARY - min; assert(buf < buf_end); - } - - // Aligned loops from here on in - const u8 *ptr = nocase ? vermSearchAlignedNocase(chars, buf, buf_end - 1, 0) - : vermSearchAligned(chars, buf, buf_end - 1, 0); - if (ptr) { - return ptr; - } - - // Tidy up the mess at the end - ptr = nocase ? vermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 0) - : vermUnalign(chars, buf_end - VERM_BOUNDARY, 0); - return ptr ? ptr : buf_end; -} - -/* like vermicelliExec except returns the address of the first character which - * is not c */ -static really_inline -const u8 *nvermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end) { - DEBUG_PRINTF("nverm scan %s\\x%02hhx over %zu bytes\n", - nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); - assert(buf < buf_end); - + } + + // Aligned loops from here on in + const u8 *ptr = nocase ? vermSearchAlignedNocase(chars, buf, buf_end - 1, 0) + : vermSearchAligned(chars, buf, buf_end - 1, 0); + if (ptr) { + return ptr; + } + + // Tidy up the mess at the end + ptr = nocase ? vermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 0) + : vermUnalign(chars, buf_end - VERM_BOUNDARY, 0); + return ptr ? ptr : buf_end; +} + +/* like vermicelliExec except returns the address of the first character which + * is not c */ +static really_inline +const u8 *nvermicelliExec(char c, char nocase, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("nverm scan %s\\x%02hhx over %zu bytes\n", + nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); + assert(buf < buf_end); + VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ - // Handle small scans. + // Handle small scans. #ifdef HAVE_AVX512 if (buf_end - buf <= VERM_BOUNDARY) { const u8 *ptr = nocase @@ -126,59 +126,59 @@ const u8 *nvermicelliExec(char c, char nocase, const u8 *buf, return buf_end; } #else - if (buf_end - buf < VERM_BOUNDARY) { - for (; buf < buf_end; buf++) { - char cur = (char)*buf; - if (nocase) { - cur &= CASE_CLEAR; - } - if (cur != c) { - break; - } - } - return buf; - } + if (buf_end - buf < VERM_BOUNDARY) { + for (; buf < buf_end; buf++) { + char cur = (char)*buf; + if (nocase) { + cur &= CASE_CLEAR; + } + if (cur != c) { + break; + } + } + return buf; + } #endif - - size_t min = (size_t)buf % VERM_BOUNDARY; - if (min) { - // Input isn't aligned, so we need to run one iteration with an - // unaligned load, then skip buf forward to the next aligned address. - // There's some small overlap here, but we don't mind scanning it twice - // if we can do it quickly, do we? - const u8 *ptr = nocase ? vermUnalignNocase(chars, buf, 1) - : vermUnalign(chars, buf, 1); - if (ptr) { - return ptr; - } - - buf += VERM_BOUNDARY - min; + + size_t min = (size_t)buf % VERM_BOUNDARY; + if (min) { + // Input isn't aligned, so we need to run one iteration with an + // unaligned load, then skip buf forward to the next aligned address. + // There's some small overlap here, but we don't mind scanning it twice + // if we can do it quickly, do we? + const u8 *ptr = nocase ? vermUnalignNocase(chars, buf, 1) + : vermUnalign(chars, buf, 1); + if (ptr) { + return ptr; + } + + buf += VERM_BOUNDARY - min; assert(buf < buf_end); - } - - // Aligned loops from here on in - const u8 *ptr = nocase ? vermSearchAlignedNocase(chars, buf, buf_end - 1, 1) - : vermSearchAligned(chars, buf, buf_end - 1, 1); - if (ptr) { - return ptr; - } - - // Tidy up the mess at the end - ptr = nocase ? vermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 1) - : vermUnalign(chars, buf_end - VERM_BOUNDARY, 1); - return ptr ? ptr : buf_end; -} - -static really_inline -const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, - const u8 *buf_end) { - DEBUG_PRINTF("double verm scan %s\\x%02hhx%02hhx over %zu bytes\n", - nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf)); - assert(buf < buf_end); - - VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */ - VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */ - + } + + // Aligned loops from here on in + const u8 *ptr = nocase ? vermSearchAlignedNocase(chars, buf, buf_end - 1, 1) + : vermSearchAligned(chars, buf, buf_end - 1, 1); + if (ptr) { + return ptr; + } + + // Tidy up the mess at the end + ptr = nocase ? vermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 1) + : vermUnalign(chars, buf_end - VERM_BOUNDARY, 1); + return ptr ? ptr : buf_end; +} + +static really_inline +const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("double verm scan %s\\x%02hhx%02hhx over %zu bytes\n", + nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf)); + assert(buf < buf_end); + + VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */ + VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */ + #ifdef HAVE_AVX512 if (buf_end - buf <= VERM_BOUNDARY) { const u8 *ptr = nocase @@ -201,19 +201,19 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, assert((buf_end - buf) >= VERM_BOUNDARY); uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY; - if (min) { - // Input isn't aligned, so we need to run one iteration with an - // unaligned load, then skip buf forward to the next aligned address. - // There's some small overlap here, but we don't mind scanning it twice - // if we can do it quickly, do we? - const u8 *ptr = nocase - ? dvermPreconditionNocase(chars1, chars2, buf) - : dvermPrecondition(chars1, chars2, buf); - if (ptr) { - return ptr; - } - - buf += VERM_BOUNDARY - min; + if (min) { + // Input isn't aligned, so we need to run one iteration with an + // unaligned load, then skip buf forward to the next aligned address. + // There's some small overlap here, but we don't mind scanning it twice + // if we can do it quickly, do we? + const u8 *ptr = nocase + ? dvermPreconditionNocase(chars1, chars2, buf) + : dvermPrecondition(chars1, chars2, buf); + if (ptr) { + return ptr; + } + + buf += VERM_BOUNDARY - min; assert(buf < buf_end); } @@ -285,18 +285,18 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, const u8 *p = dvermPreconditionMasked(chars1, chars2, mask1, mask2, buf); if (p) { return p; - } + } buf += VERM_BOUNDARY - min; assert(buf < buf_end); - } - - // Aligned loops from here on in + } + + // Aligned loops from here on in const u8 *ptr = dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1, c2, m1, m2, buf, buf_end); if (ptr) { return ptr; - } + } // Tidy up the mess at the end ptr = dvermPreconditionMasked(chars1, chars2, mask1, mask2, @@ -313,20 +313,20 @@ const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2, } return buf_end; -} - -// Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if -// character not found. -static really_inline -const u8 *rvermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end) { - DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %zu bytes\n", - nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); - assert(buf < buf_end); - +} + +// Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if +// character not found. +static really_inline +const u8 *rvermicelliExec(char c, char nocase, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %zu bytes\n", + nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); + assert(buf < buf_end); + VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ - // Handle small scans. + // Handle small scans. #ifdef HAVE_AVX512 if (buf_end - buf <= VERM_BOUNDARY) { const u8 *ptr = nocase @@ -338,26 +338,26 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf, return buf - 1; } #else - if (buf_end - buf < VERM_BOUNDARY) { - for (buf_end--; buf_end >= buf; buf_end--) { - char cur = (char)*buf_end; - if (nocase) { - cur &= CASE_CLEAR; - } - if (cur == c) { - break; - } - } - return buf_end; - } + if (buf_end - buf < VERM_BOUNDARY) { + for (buf_end--; buf_end >= buf; buf_end--) { + char cur = (char)*buf_end; + if (nocase) { + cur &= CASE_CLEAR; + } + if (cur == c) { + break; + } + } + return buf_end; + } #endif - - size_t min = (size_t)buf_end % VERM_BOUNDARY; - if (min) { - // Input isn't aligned, so we need to run one iteration with an - // unaligned load, then skip buf backward to the next aligned address. - // There's some small overlap here, but we don't mind scanning it twice - // if we can do it quickly, do we? + + size_t min = (size_t)buf_end % VERM_BOUNDARY; + if (min) { + // Input isn't aligned, so we need to run one iteration with an + // unaligned load, then skip buf backward to the next aligned address. + // There's some small overlap here, but we don't mind scanning it twice + // if we can do it quickly, do we? const u8 *ptr = nocase ? rvermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 0) @@ -366,39 +366,39 @@ const u8 *rvermicelliExec(char c, char nocase, const u8 *buf, if (ptr) { return ptr; - } - - buf_end -= min; - if (buf >= buf_end) { - return buf_end; - } - } - - // Aligned loops from here on in. - const u8 *ptr = nocase ? rvermSearchAlignedNocase(chars, buf, buf_end, 0) - : rvermSearchAligned(chars, buf, buf_end, 0); - if (ptr) { - return ptr; - } - - // Tidy up the mess at the end, return buf - 1 if not found. - ptr = nocase ? rvermUnalignNocase(chars, buf, 0) - : rvermUnalign(chars, buf, 0); - return ptr ? ptr : buf - 1; -} - -/* like rvermicelliExec except returns the address of the last character which - * is not c */ -static really_inline -const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf, - const u8 *buf_end) { - DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %zu bytes\n", - nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); - assert(buf < buf_end); - + } + + buf_end -= min; + if (buf >= buf_end) { + return buf_end; + } + } + + // Aligned loops from here on in. + const u8 *ptr = nocase ? rvermSearchAlignedNocase(chars, buf, buf_end, 0) + : rvermSearchAligned(chars, buf, buf_end, 0); + if (ptr) { + return ptr; + } + + // Tidy up the mess at the end, return buf - 1 if not found. + ptr = nocase ? rvermUnalignNocase(chars, buf, 0) + : rvermUnalign(chars, buf, 0); + return ptr ? ptr : buf - 1; +} + +/* like rvermicelliExec except returns the address of the last character which + * is not c */ +static really_inline +const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("rev verm scan %s\\x%02hhx over %zu bytes\n", + nocase ? "nocase " : "", c, (size_t)(buf_end - buf)); + assert(buf < buf_end); + VERM_TYPE chars = VERM_SET_FN(c); /* nocase already uppercase */ - // Handle small scans. + // Handle small scans. #ifdef HAVE_AVX512 if (buf_end - buf <= VERM_BOUNDARY) { const u8 *ptr = nocase @@ -410,26 +410,26 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf, return buf - 1; } #else - if (buf_end - buf < VERM_BOUNDARY) { - for (buf_end--; buf_end >= buf; buf_end--) { - char cur = (char)*buf_end; - if (nocase) { - cur &= CASE_CLEAR; - } - if (cur != c) { - break; - } - } - return buf_end; - } + if (buf_end - buf < VERM_BOUNDARY) { + for (buf_end--; buf_end >= buf; buf_end--) { + char cur = (char)*buf_end; + if (nocase) { + cur &= CASE_CLEAR; + } + if (cur != c) { + break; + } + } + return buf_end; + } #endif - - size_t min = (size_t)buf_end % VERM_BOUNDARY; - if (min) { - // Input isn't aligned, so we need to run one iteration with an - // unaligned load, then skip buf backward to the next aligned address. - // There's some small overlap here, but we don't mind scanning it twice - // if we can do it quickly, do we? + + size_t min = (size_t)buf_end % VERM_BOUNDARY; + if (min) { + // Input isn't aligned, so we need to run one iteration with an + // unaligned load, then skip buf backward to the next aligned address. + // There's some small overlap here, but we don't mind scanning it twice + // if we can do it quickly, do we? const u8 *ptr = nocase ? rvermUnalignNocase(chars, buf_end - VERM_BOUNDARY, 1) @@ -438,38 +438,38 @@ const u8 *rnvermicelliExec(char c, char nocase, const u8 *buf, if (ptr) { return ptr; - } - - buf_end -= min; - if (buf >= buf_end) { - return buf_end; - } - } - - // Aligned loops from here on in. - const u8 *ptr = nocase ? rvermSearchAlignedNocase(chars, buf, buf_end, 1) - : rvermSearchAligned(chars, buf, buf_end, 1); - if (ptr) { - return ptr; - } - - // Tidy up the mess at the end, return buf - 1 if not found. - ptr = nocase ? rvermUnalignNocase(chars, buf, 1) - : rvermUnalign(chars, buf, 1); - return ptr ? ptr : buf - 1; -} - -/* returns highest offset of c2 (NOTE: not c1) */ -static really_inline -const u8 *rvermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, - const u8 *buf_end) { - DEBUG_PRINTF("rev double verm scan %s\\x%02hhx%02hhx over %zu bytes\n", - nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf)); - assert(buf < buf_end); - - VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */ - VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */ - + } + + buf_end -= min; + if (buf >= buf_end) { + return buf_end; + } + } + + // Aligned loops from here on in. + const u8 *ptr = nocase ? rvermSearchAlignedNocase(chars, buf, buf_end, 1) + : rvermSearchAligned(chars, buf, buf_end, 1); + if (ptr) { + return ptr; + } + + // Tidy up the mess at the end, return buf - 1 if not found. + ptr = nocase ? rvermUnalignNocase(chars, buf, 1) + : rvermUnalign(chars, buf, 1); + return ptr ? ptr : buf - 1; +} + +/* returns highest offset of c2 (NOTE: not c1) */ +static really_inline +const u8 *rvermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, + const u8 *buf_end) { + DEBUG_PRINTF("rev double verm scan %s\\x%02hhx%02hhx over %zu bytes\n", + nocase ? "nocase " : "", c1, c2, (size_t)(buf_end - buf)); + assert(buf < buf_end); + + VERM_TYPE chars1 = VERM_SET_FN(c1); /* nocase already uppercase */ + VERM_TYPE chars2 = VERM_SET_FN(c2); /* nocase already uppercase */ + #ifdef HAVE_AVX512 if (buf_end - buf <= VERM_BOUNDARY) { const u8 *ptr = nocase @@ -487,32 +487,32 @@ const u8 *rvermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf, assert((buf_end - buf) >= VERM_BOUNDARY); size_t min = (size_t)buf_end % VERM_BOUNDARY; - if (min) { - // input not aligned, so we need to run one iteration with an unaligned - // load, then skip buf forward to the next aligned address. There's - // some small overlap here, but we don't mind scanning it twice if we - // can do it quickly, do we? + if (min) { + // input not aligned, so we need to run one iteration with an unaligned + // load, then skip buf forward to the next aligned address. There's + // some small overlap here, but we don't mind scanning it twice if we + // can do it quickly, do we? const u8 *ptr = nocase ? rdvermPreconditionNocase(chars1, chars2, buf_end - VERM_BOUNDARY) : rdvermPrecondition(chars1, chars2, buf_end - VERM_BOUNDARY); - - if (ptr) { - return ptr; - } - - buf_end -= min; - if (buf >= buf_end) { - return buf_end; - } - } - - // Aligned loops from here on in - if (nocase) { - return rdvermSearchAlignedNocase(chars1, chars2, c1, c2, buf, buf_end); - } else { - return rdvermSearchAligned(chars1, chars2, c1, c2, buf, buf_end); - } -} - -#endif /* VERMICELLI_H */ + + if (ptr) { + return ptr; + } + + buf_end -= min; + if (buf >= buf_end) { + return buf_end; + } + } + + // Aligned loops from here on in + if (nocase) { + return rdvermSearchAlignedNocase(chars1, chars2, c1, c2, buf, buf_end); + } else { + return rdvermSearchAligned(chars1, chars2, c1, c2, buf, buf_end); + } +} + +#endif /* VERMICELLI_H */ diff --git a/contrib/libs/hyperscan/src/nfa/vermicelli_run.h b/contrib/libs/hyperscan/src/nfa/vermicelli_run.h index 4459461ebf..d6fe7ec78f 100644 --- a/contrib/libs/hyperscan/src/nfa/vermicelli_run.h +++ b/contrib/libs/hyperscan/src/nfa/vermicelli_run.h @@ -1,90 +1,90 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "vermicelli.h" - -static really_inline -const u8 *find_xverm_run(char c, char nocase, u32 repeat, UNUSED const u8 *buf, - const u8 *buf_start, const u8 *buf_end, char negate) { - DEBUG_PRINTF("looking for 0x%hhx{%u} in %p [%zd, %zd)\n", c, repeat, buf, - buf_start - buf, buf_end - buf); - - /* TODO optimise on where it is easy to get a dense bitfield of character - * matches */ - if (repeat == 1) { - return negate ? nvermicelliExec(c, nocase, buf_start, buf_end) - : vermicelliExec(c, nocase, buf_start, buf_end); - } - - while (1) { - const u8 *s; - if (negate) { - s = nvermicelliExec(c, nocase, buf_start, buf_end); - } else if (buf_end - buf_start >= VERM_BOUNDARY && !nocase) { - s = vermicelliDoubleExec(c, c, nocase, buf_start, buf_end); - - if (s != buf_end && *s != c) { /* double verm is not certain to be - * precise */ - s = vermicelliExec(c, nocase, s, buf_end); - } - } else { - s = vermicelliExec(c, nocase, buf_start, buf_end); - } - if (s == buf_end) { - return s; - } - - DEBUG_PRINTF("cand %zd\n", s - buf); - - const u8 *test_e = MIN(s + repeat, buf_end); - - const u8 *rv = negate ? vermicelliExec(c, nocase, s, test_e) - : nvermicelliExec(c, nocase, s, test_e); - - assert(rv > buf_start); - assert(rv <= buf_end); - - if (rv == test_e) { - return s; - } - - buf_start = rv; - } -} - -static really_inline -const u8 *find_verm_run(char c, char nocase, u32 repeat, const u8 *buf, - const u8 *buf_start, const u8 *buf_end) { - return find_xverm_run(c, nocase, repeat, buf, buf_start, buf_end, 0); -} - -static really_inline -const u8 *find_nverm_run(char c, char nocase, u32 repeat, const u8 *buf, - const u8 *buf_start, const u8 *buf_end) { - return find_xverm_run(c, nocase, repeat, buf, buf_start, buf_end, 1); -} +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "vermicelli.h" + +static really_inline +const u8 *find_xverm_run(char c, char nocase, u32 repeat, UNUSED const u8 *buf, + const u8 *buf_start, const u8 *buf_end, char negate) { + DEBUG_PRINTF("looking for 0x%hhx{%u} in %p [%zd, %zd)\n", c, repeat, buf, + buf_start - buf, buf_end - buf); + + /* TODO optimise on where it is easy to get a dense bitfield of character + * matches */ + if (repeat == 1) { + return negate ? nvermicelliExec(c, nocase, buf_start, buf_end) + : vermicelliExec(c, nocase, buf_start, buf_end); + } + + while (1) { + const u8 *s; + if (negate) { + s = nvermicelliExec(c, nocase, buf_start, buf_end); + } else if (buf_end - buf_start >= VERM_BOUNDARY && !nocase) { + s = vermicelliDoubleExec(c, c, nocase, buf_start, buf_end); + + if (s != buf_end && *s != c) { /* double verm is not certain to be + * precise */ + s = vermicelliExec(c, nocase, s, buf_end); + } + } else { + s = vermicelliExec(c, nocase, buf_start, buf_end); + } + if (s == buf_end) { + return s; + } + + DEBUG_PRINTF("cand %zd\n", s - buf); + + const u8 *test_e = MIN(s + repeat, buf_end); + + const u8 *rv = negate ? vermicelliExec(c, nocase, s, test_e) + : nvermicelliExec(c, nocase, s, test_e); + + assert(rv > buf_start); + assert(rv <= buf_end); + + if (rv == test_e) { + return s; + } + + buf_start = rv; + } +} + +static really_inline +const u8 *find_verm_run(char c, char nocase, u32 repeat, const u8 *buf, + const u8 *buf_start, const u8 *buf_end) { + return find_xverm_run(c, nocase, repeat, buf, buf_start, buf_end, 0); +} + +static really_inline +const u8 *find_nverm_run(char c, char nocase, u32 repeat, const u8 *buf, + const u8 *buf_start, const u8 *buf_end) { + return find_xverm_run(c, nocase, repeat, buf, buf_start, buf_end, 1); +} diff --git a/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h b/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h index e58023f586..3307486cff 100644 --- a/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h +++ b/contrib/libs/hyperscan/src/nfa/vermicelli_sse.h @@ -1,181 +1,181 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Vermicelli: Intel SSE implementation. - * - * (users should include vermicelli.h) - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Vermicelli: Intel SSE implementation. + * + * (users should include vermicelli.h) + */ + #if !defined(HAVE_AVX512) -#define VERM_BOUNDARY 16 -#define VERM_TYPE m128 -#define VERM_SET_FN set16x8 - -static really_inline -const u8 *vermSearchAligned(m128 chars, const u8 *buf, const u8 *buf_end, - char negate) { - assert((size_t)buf % 16 == 0); - for (; buf + 31 < buf_end; buf += 32) { - m128 data = load128(buf); - u32 z1 = movemask128(eq128(chars, data)); - m128 data2 = load128(buf + 16); - u32 z2 = movemask128(eq128(chars, data2)); - u32 z = z1 | (z2 << 16); - if (negate) { - z = ~z; - } - if (unlikely(z)) { - u32 pos = ctz32(z); - return buf + pos; - } - } - for (; buf + 15 < buf_end; buf += 16) { - m128 data = load128(buf); - u32 z = movemask128(eq128(chars, data)); - if (negate) { - z = ~z & 0xffff; - } - if (unlikely(z)) { - u32 pos = ctz32(z); - return buf + pos; - } - } - return NULL; -} - -static really_inline -const u8 *vermSearchAlignedNocase(m128 chars, const u8 *buf, - const u8 *buf_end, char negate) { - assert((size_t)buf % 16 == 0); - m128 casemask = set16x8(CASE_CLEAR); - - for (; buf + 31 < buf_end; buf += 32) { - m128 data = load128(buf); - u32 z1 = movemask128(eq128(chars, and128(casemask, data))); - m128 data2 = load128(buf + 16); - u32 z2 = movemask128(eq128(chars, and128(casemask, data2))); - u32 z = z1 | (z2 << 16); - if (negate) { - z = ~z; - } - if (unlikely(z)) { - u32 pos = ctz32(z); - return buf + pos; - } - } - - for (; buf + 15 < buf_end; buf += 16) { - m128 data = load128(buf); - u32 z = movemask128(eq128(chars, and128(casemask, data))); - if (negate) { - z = ~z & 0xffff; - } - if (unlikely(z)) { - u32 pos = ctz32(z); - return buf + pos; - } - } - return NULL; -} - -// returns NULL if not found -static really_inline -const u8 *vermUnalign(m128 chars, const u8 *buf, char negate) { - m128 data = loadu128(buf); // unaligned - u32 z = movemask128(eq128(chars, data)); - if (negate) { - z = ~z & 0xffff; - } - if (unlikely(z)) { - return buf + ctz32(z); - } - return NULL; -} - -// returns NULL if not found -static really_inline -const u8 *vermUnalignNocase(m128 chars, const u8 *buf, char negate) { - m128 casemask = set16x8(CASE_CLEAR); - m128 data = loadu128(buf); // unaligned - u32 z = movemask128(eq128(chars, and128(casemask, data))); - if (negate) { - z = ~z & 0xffff; - } - if (unlikely(z)) { - return buf + ctz32(z); - } - return NULL; -} - -static really_inline -const u8 *dvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2, - const u8 *buf, const u8 *buf_end) { - for (; buf + 16 < buf_end; buf += 16) { - m128 data = load128(buf); - u32 z = movemask128(and128(eq128(chars1, data), +#define VERM_BOUNDARY 16 +#define VERM_TYPE m128 +#define VERM_SET_FN set16x8 + +static really_inline +const u8 *vermSearchAligned(m128 chars, const u8 *buf, const u8 *buf_end, + char negate) { + assert((size_t)buf % 16 == 0); + for (; buf + 31 < buf_end; buf += 32) { + m128 data = load128(buf); + u32 z1 = movemask128(eq128(chars, data)); + m128 data2 = load128(buf + 16); + u32 z2 = movemask128(eq128(chars, data2)); + u32 z = z1 | (z2 << 16); + if (negate) { + z = ~z; + } + if (unlikely(z)) { + u32 pos = ctz32(z); + return buf + pos; + } + } + for (; buf + 15 < buf_end; buf += 16) { + m128 data = load128(buf); + u32 z = movemask128(eq128(chars, data)); + if (negate) { + z = ~z & 0xffff; + } + if (unlikely(z)) { + u32 pos = ctz32(z); + return buf + pos; + } + } + return NULL; +} + +static really_inline +const u8 *vermSearchAlignedNocase(m128 chars, const u8 *buf, + const u8 *buf_end, char negate) { + assert((size_t)buf % 16 == 0); + m128 casemask = set16x8(CASE_CLEAR); + + for (; buf + 31 < buf_end; buf += 32) { + m128 data = load128(buf); + u32 z1 = movemask128(eq128(chars, and128(casemask, data))); + m128 data2 = load128(buf + 16); + u32 z2 = movemask128(eq128(chars, and128(casemask, data2))); + u32 z = z1 | (z2 << 16); + if (negate) { + z = ~z; + } + if (unlikely(z)) { + u32 pos = ctz32(z); + return buf + pos; + } + } + + for (; buf + 15 < buf_end; buf += 16) { + m128 data = load128(buf); + u32 z = movemask128(eq128(chars, and128(casemask, data))); + if (negate) { + z = ~z & 0xffff; + } + if (unlikely(z)) { + u32 pos = ctz32(z); + return buf + pos; + } + } + return NULL; +} + +// returns NULL if not found +static really_inline +const u8 *vermUnalign(m128 chars, const u8 *buf, char negate) { + m128 data = loadu128(buf); // unaligned + u32 z = movemask128(eq128(chars, data)); + if (negate) { + z = ~z & 0xffff; + } + if (unlikely(z)) { + return buf + ctz32(z); + } + return NULL; +} + +// returns NULL if not found +static really_inline +const u8 *vermUnalignNocase(m128 chars, const u8 *buf, char negate) { + m128 casemask = set16x8(CASE_CLEAR); + m128 data = loadu128(buf); // unaligned + u32 z = movemask128(eq128(chars, and128(casemask, data))); + if (negate) { + z = ~z & 0xffff; + } + if (unlikely(z)) { + return buf + ctz32(z); + } + return NULL; +} + +static really_inline +const u8 *dvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2, + const u8 *buf, const u8 *buf_end) { + for (; buf + 16 < buf_end; buf += 16) { + m128 data = load128(buf); + u32 z = movemask128(and128(eq128(chars1, data), rshiftbyte_m128(eq128(chars2, data), 1))); - if (buf[15] == c1 && buf[16] == c2) { - z |= (1 << 15); - } - if (unlikely(z)) { - u32 pos = ctz32(z); - return buf + pos; - } - } - - return NULL; -} - -static really_inline -const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2, - const u8 *buf, const u8 *buf_end) { - assert((size_t)buf % 16 == 0); - m128 casemask = set16x8(CASE_CLEAR); - - for (; buf + 16 < buf_end; buf += 16) { - m128 data = load128(buf); - m128 v = and128(casemask, data); - u32 z = movemask128(and128(eq128(chars1, v), + if (buf[15] == c1 && buf[16] == c2) { + z |= (1 << 15); + } + if (unlikely(z)) { + u32 pos = ctz32(z); + return buf + pos; + } + } + + return NULL; +} + +static really_inline +const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2, + const u8 *buf, const u8 *buf_end) { + assert((size_t)buf % 16 == 0); + m128 casemask = set16x8(CASE_CLEAR); + + for (; buf + 16 < buf_end; buf += 16) { + m128 data = load128(buf); + m128 v = and128(casemask, data); + u32 z = movemask128(and128(eq128(chars1, v), rshiftbyte_m128(eq128(chars2, v), 1))); - if ((buf[15] & CASE_CLEAR) == c1 && (buf[16] & CASE_CLEAR) == c2) { - z |= (1 << 15); - } - if (unlikely(z)) { - u32 pos = ctz32(z); - return buf + pos; - } - } + if ((buf[15] & CASE_CLEAR) == c1 && (buf[16] & CASE_CLEAR) == c2) { + z |= (1 << 15); + } + if (unlikely(z)) { + u32 pos = ctz32(z); + return buf + pos; + } + } return NULL; -} - +} + static really_inline const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2, m128 mask1, m128 mask2, u8 c1, u8 c2, u8 m1, @@ -200,41 +200,41 @@ const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2, return NULL; } -// returns NULL if not found -static really_inline -const u8 *dvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) { - m128 data = loadu128(buf); // unaligned - u32 z = movemask128(and128(eq128(chars1, data), +// returns NULL if not found +static really_inline +const u8 *dvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) { + m128 data = loadu128(buf); // unaligned + u32 z = movemask128(and128(eq128(chars1, data), rshiftbyte_m128(eq128(chars2, data), 1))); - - /* no fixup of the boundary required - the aligned run will pick it up */ - if (unlikely(z)) { - u32 pos = ctz32(z); - return buf + pos; - } - return NULL; -} - -// returns NULL if not found -static really_inline -const u8 *dvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) { - /* due to laziness, nonalphas and nocase having interesting behaviour */ - m128 casemask = set16x8(CASE_CLEAR); - m128 data = loadu128(buf); // unaligned - m128 v = and128(casemask, data); - u32 z = movemask128(and128(eq128(chars1, v), + + /* no fixup of the boundary required - the aligned run will pick it up */ + if (unlikely(z)) { + u32 pos = ctz32(z); + return buf + pos; + } + return NULL; +} + +// returns NULL if not found +static really_inline +const u8 *dvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) { + /* due to laziness, nonalphas and nocase having interesting behaviour */ + m128 casemask = set16x8(CASE_CLEAR); + m128 data = loadu128(buf); // unaligned + m128 v = and128(casemask, data); + u32 z = movemask128(and128(eq128(chars1, v), rshiftbyte_m128(eq128(chars2, v), 1))); - - /* no fixup of the boundary required - the aligned run will pick it up */ - if (unlikely(z)) { - u32 pos = ctz32(z); - return buf + pos; - } - return NULL; -} - + + /* no fixup of the boundary required - the aligned run will pick it up */ + if (unlikely(z)) { + u32 pos = ctz32(z); + return buf + pos; + } + return NULL; +} + // returns NULL if not found -static really_inline +static really_inline const u8 *dvermPreconditionMasked(m128 chars1, m128 chars2, m128 mask1, m128 mask2, const u8 *buf) { m128 data = loadu128(buf); // unaligned @@ -251,148 +251,148 @@ const u8 *dvermPreconditionMasked(m128 chars1, m128 chars2, } static really_inline -const u8 *lastMatchOffset(const u8 *buf_end, u32 z) { - assert(z); - return buf_end - 16 + 31 - clz32(z); -} - -static really_inline -const u8 *rvermSearchAligned(m128 chars, const u8 *buf, const u8 *buf_end, - char negate) { - assert((size_t)buf_end % 16 == 0); - for (; buf + 15 < buf_end; buf_end -= 16) { - m128 data = load128(buf_end - 16); - u32 z = movemask128(eq128(chars, data)); - if (negate) { - z = ~z & 0xffff; - } - if (unlikely(z)) { - return lastMatchOffset(buf_end, z); - } - } - return NULL; -} - -static really_inline -const u8 *rvermSearchAlignedNocase(m128 chars, const u8 *buf, - const u8 *buf_end, char negate) { - assert((size_t)buf_end % 16 == 0); - m128 casemask = set16x8(CASE_CLEAR); - - for (; buf + 15 < buf_end; buf_end -= 16) { - m128 data = load128(buf_end - 16); - u32 z = movemask128(eq128(chars, and128(casemask, data))); - if (negate) { - z = ~z & 0xffff; - } - if (unlikely(z)) { - return lastMatchOffset(buf_end, z); - } - } - return NULL; -} - -// returns NULL if not found -static really_inline -const u8 *rvermUnalign(m128 chars, const u8 *buf, char negate) { - m128 data = loadu128(buf); // unaligned - u32 z = movemask128(eq128(chars, data)); - if (negate) { - z = ~z & 0xffff; - } - if (unlikely(z)) { - return lastMatchOffset(buf + 16, z); - } - return NULL; -} - -// returns NULL if not found -static really_inline -const u8 *rvermUnalignNocase(m128 chars, const u8 *buf, char negate) { - m128 casemask = set16x8(CASE_CLEAR); - m128 data = loadu128(buf); // unaligned - u32 z = movemask128(eq128(chars, and128(casemask, data))); - if (negate) { - z = ~z & 0xffff; - } - if (unlikely(z)) { - return lastMatchOffset(buf + 16, z); - } - return NULL; -} - -static really_inline -const u8 *rdvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2, - const u8 *buf, const u8 *buf_end) { - assert((size_t)buf_end % 16 == 0); - - for (; buf + 16 < buf_end; buf_end -= 16) { - m128 data = load128(buf_end - 16); - u32 z = movemask128(and128(eq128(chars2, data), +const u8 *lastMatchOffset(const u8 *buf_end, u32 z) { + assert(z); + return buf_end - 16 + 31 - clz32(z); +} + +static really_inline +const u8 *rvermSearchAligned(m128 chars, const u8 *buf, const u8 *buf_end, + char negate) { + assert((size_t)buf_end % 16 == 0); + for (; buf + 15 < buf_end; buf_end -= 16) { + m128 data = load128(buf_end - 16); + u32 z = movemask128(eq128(chars, data)); + if (negate) { + z = ~z & 0xffff; + } + if (unlikely(z)) { + return lastMatchOffset(buf_end, z); + } + } + return NULL; +} + +static really_inline +const u8 *rvermSearchAlignedNocase(m128 chars, const u8 *buf, + const u8 *buf_end, char negate) { + assert((size_t)buf_end % 16 == 0); + m128 casemask = set16x8(CASE_CLEAR); + + for (; buf + 15 < buf_end; buf_end -= 16) { + m128 data = load128(buf_end - 16); + u32 z = movemask128(eq128(chars, and128(casemask, data))); + if (negate) { + z = ~z & 0xffff; + } + if (unlikely(z)) { + return lastMatchOffset(buf_end, z); + } + } + return NULL; +} + +// returns NULL if not found +static really_inline +const u8 *rvermUnalign(m128 chars, const u8 *buf, char negate) { + m128 data = loadu128(buf); // unaligned + u32 z = movemask128(eq128(chars, data)); + if (negate) { + z = ~z & 0xffff; + } + if (unlikely(z)) { + return lastMatchOffset(buf + 16, z); + } + return NULL; +} + +// returns NULL if not found +static really_inline +const u8 *rvermUnalignNocase(m128 chars, const u8 *buf, char negate) { + m128 casemask = set16x8(CASE_CLEAR); + m128 data = loadu128(buf); // unaligned + u32 z = movemask128(eq128(chars, and128(casemask, data))); + if (negate) { + z = ~z & 0xffff; + } + if (unlikely(z)) { + return lastMatchOffset(buf + 16, z); + } + return NULL; +} + +static really_inline +const u8 *rdvermSearchAligned(m128 chars1, m128 chars2, u8 c1, u8 c2, + const u8 *buf, const u8 *buf_end) { + assert((size_t)buf_end % 16 == 0); + + for (; buf + 16 < buf_end; buf_end -= 16) { + m128 data = load128(buf_end - 16); + u32 z = movemask128(and128(eq128(chars2, data), lshiftbyte_m128(eq128(chars1, data), 1))); - if (buf_end[-17] == c1 && buf_end[-16] == c2) { - z |= 1; - } - if (unlikely(z)) { - return lastMatchOffset(buf_end, z); - } - } - return buf_end; -} - -static really_inline -const u8 *rdvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2, - const u8 *buf, const u8 *buf_end) { - assert((size_t)buf_end % 16 == 0); - m128 casemask = set16x8(CASE_CLEAR); - - for (; buf + 16 < buf_end; buf_end -= 16) { - m128 data = load128(buf_end - 16); - m128 v = and128(casemask, data); - u32 z = movemask128(and128(eq128(chars2, v), + if (buf_end[-17] == c1 && buf_end[-16] == c2) { + z |= 1; + } + if (unlikely(z)) { + return lastMatchOffset(buf_end, z); + } + } + return buf_end; +} + +static really_inline +const u8 *rdvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2, + const u8 *buf, const u8 *buf_end) { + assert((size_t)buf_end % 16 == 0); + m128 casemask = set16x8(CASE_CLEAR); + + for (; buf + 16 < buf_end; buf_end -= 16) { + m128 data = load128(buf_end - 16); + m128 v = and128(casemask, data); + u32 z = movemask128(and128(eq128(chars2, v), lshiftbyte_m128(eq128(chars1, v), 1))); - if ((buf_end[-17] & CASE_CLEAR) == c1 - && (buf_end[-16] & CASE_CLEAR) == c2) { - z |= 1; - } - if (unlikely(z)) { - return lastMatchOffset(buf_end, z); - } - } - return buf_end; -} - -// returns NULL if not found -static really_inline -const u8 *rdvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) { - m128 data = loadu128(buf); - u32 z = movemask128(and128(eq128(chars2, data), + if ((buf_end[-17] & CASE_CLEAR) == c1 + && (buf_end[-16] & CASE_CLEAR) == c2) { + z |= 1; + } + if (unlikely(z)) { + return lastMatchOffset(buf_end, z); + } + } + return buf_end; +} + +// returns NULL if not found +static really_inline +const u8 *rdvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) { + m128 data = loadu128(buf); + u32 z = movemask128(and128(eq128(chars2, data), lshiftbyte_m128(eq128(chars1, data), 1))); - - /* no fixup of the boundary required - the aligned run will pick it up */ - if (unlikely(z)) { - return lastMatchOffset(buf + 16, z); - } - - return NULL; -} - -// returns NULL if not found -static really_inline -const u8 *rdvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) { - /* due to laziness, nonalphas and nocase having interesting behaviour */ - m128 casemask = set16x8(CASE_CLEAR); - m128 data = loadu128(buf); - m128 v = and128(casemask, data); - u32 z = movemask128(and128(eq128(chars2, v), + + /* no fixup of the boundary required - the aligned run will pick it up */ + if (unlikely(z)) { + return lastMatchOffset(buf + 16, z); + } + + return NULL; +} + +// returns NULL if not found +static really_inline +const u8 *rdvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) { + /* due to laziness, nonalphas and nocase having interesting behaviour */ + m128 casemask = set16x8(CASE_CLEAR); + m128 data = loadu128(buf); + m128 v = and128(casemask, data); + u32 z = movemask128(and128(eq128(chars2, v), lshiftbyte_m128(eq128(chars1, v), 1))); - /* no fixup of the boundary required - the aligned run will pick it up */ - if (unlikely(z)) { - return lastMatchOffset(buf + 16, z); - } - - return NULL; -} + /* no fixup of the boundary required - the aligned run will pick it up */ + if (unlikely(z)) { + return lastMatchOffset(buf + 16, z); + } + + return NULL; +} #else // HAVE_AVX512 |