diff options
author | Ivan Blinkov <ivan@blinkov.ru> | 2022-02-10 16:47:10 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:10 +0300 |
commit | 1aeb9a455974457866f78722ad98114bafc84e8a (patch) | |
tree | e4340eaf1668684d83a0a58c36947c5def5350ad /contrib/libs/hyperscan/src/hwlm | |
parent | bd5ef432f5cfb1e18851381329d94665a4c22470 (diff) | |
download | ydb-1aeb9a455974457866f78722ad98114bafc84e8a.tar.gz |
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/hwlm')
-rw-r--r-- | contrib/libs/hyperscan/src/hwlm/hwlm.c | 80 | ||||
-rw-r--r-- | contrib/libs/hyperscan/src/hwlm/hwlm.h | 42 | ||||
-rw-r--r-- | contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp | 156 | ||||
-rw-r--r-- | contrib/libs/hyperscan/src/hwlm/hwlm_build.h | 114 | ||||
-rw-r--r-- | contrib/libs/hyperscan/src/hwlm/hwlm_literal.cpp | 24 | ||||
-rw-r--r-- | contrib/libs/hyperscan/src/hwlm/hwlm_literal.h | 68 | ||||
-rw-r--r-- | contrib/libs/hyperscan/src/hwlm/noodle_build.cpp | 184 | ||||
-rw-r--r-- | contrib/libs/hyperscan/src/hwlm/noodle_build.h | 16 | ||||
-rw-r--r-- | contrib/libs/hyperscan/src/hwlm/noodle_engine.c | 394 | ||||
-rw-r--r-- | contrib/libs/hyperscan/src/hwlm/noodle_engine.h | 10 | ||||
-rw-r--r-- | contrib/libs/hyperscan/src/hwlm/noodle_engine_avx2.c | 58 | ||||
-rw-r--r-- | contrib/libs/hyperscan/src/hwlm/noodle_engine_avx512.c | 382 | ||||
-rw-r--r-- | contrib/libs/hyperscan/src/hwlm/noodle_engine_sse.c | 56 | ||||
-rw-r--r-- | contrib/libs/hyperscan/src/hwlm/noodle_internal.h | 24 |
14 files changed, 804 insertions, 804 deletions
diff --git a/contrib/libs/hyperscan/src/hwlm/hwlm.c b/contrib/libs/hyperscan/src/hwlm/hwlm.c index 8cf585a98c..666bb80803 100644 --- a/contrib/libs/hyperscan/src/hwlm/hwlm.c +++ b/contrib/libs/hyperscan/src/hwlm/hwlm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -37,7 +37,7 @@ #include "fdr/fdr.h" #include "nfa/accel.h" #include "nfa/shufti.h" -#include "nfa/truffle.h" +#include "nfa/truffle.h" #include "nfa/vermicelli.h" #include <string.h> @@ -65,13 +65,13 @@ const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr, case ACCEL_SHUFTI: DEBUG_PRINTF("single shufti\n"); return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end); - case ACCEL_TRUFFLE: - DEBUG_PRINTF("truffle\n"); - return truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end); + case ACCEL_TRUFFLE: + DEBUG_PRINTF("truffle\n"); + return truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end); default: /* no acceleration, fall through and return current ptr */ - DEBUG_PRINTF("no accel; %u\n", (int)aux->accel_type); - assert(aux->accel_type == ACCEL_NONE); + DEBUG_PRINTF("no accel; %u\n", (int)aux->accel_type); + assert(aux->accel_type == ACCEL_NONE); return ptr; } } @@ -170,10 +170,10 @@ void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen, } hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, - size_t start, HWLMCallback cb, struct hs_scratch *scratch, + size_t start, HWLMCallback cb, struct hs_scratch *scratch, hwlm_group_t groups) { - assert(t); - + assert(t); + DEBUG_PRINTF("buf len=%zu, start=%zu, groups=%llx\n", len, start, groups); if (!groups) { DEBUG_PRINTF("groups all off\n"); @@ -184,26 +184,26 @@ hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len, if (t->type == HWLM_ENGINE_NOOD) { DEBUG_PRINTF("calling noodExec\n"); - return noodExec(HWLM_C_DATA(t), buf, len, start, cb, scratch); - } - - assert(t->type == HWLM_ENGINE_FDR); - const union AccelAux *aa = &t->accel0; - if ((groups & ~t->accel1_groups) == 0) { - DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type); - aa = &t->accel1; + return noodExec(HWLM_C_DATA(t), buf, len, start, cb, scratch); } - do_accel_block(aa, buf, len, &start); - DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start); - return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, scratch, groups); + + assert(t->type == HWLM_ENGINE_FDR); + const union AccelAux *aa = &t->accel0; + if ((groups & ~t->accel1_groups) == 0) { + DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type); + aa = &t->accel1; + } + do_accel_block(aa, buf, len, &start); + DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start); + return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, scratch, groups); } -hwlm_error_t hwlmExecStreaming(const struct HWLM *t, size_t len, size_t start, - HWLMCallback cb, struct hs_scratch *scratch, - hwlm_group_t groups) { - assert(t); - assert(scratch); - +hwlm_error_t hwlmExecStreaming(const struct HWLM *t, size_t len, size_t start, + HWLMCallback cb, struct hs_scratch *scratch, + hwlm_group_t groups) { + assert(t); + assert(scratch); + const u8 *hbuf = scratch->core_info.hbuf; const size_t hlen = scratch->core_info.hlen; const u8 *buf = scratch->core_info.buf; @@ -222,21 +222,21 @@ hwlm_error_t hwlmExecStreaming(const struct HWLM *t, size_t len, size_t start, // If we've been handed a start offset, we can use a block mode scan at // that offset. if (start) { - return noodExec(HWLM_C_DATA(t), buf, len, start, cb, scratch); + return noodExec(HWLM_C_DATA(t), buf, len, start, cb, scratch); } else { return noodExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, cb, - scratch); + scratch); } } - - assert(t->type == HWLM_ENGINE_FDR); - const union AccelAux *aa = &t->accel0; - if ((groups & ~t->accel1_groups) == 0) { - DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type); - aa = &t->accel1; - } - do_accel_streaming(aa, hbuf, hlen, buf, len, &start); - DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start); - return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, start, cb, - scratch, groups); + + assert(t->type == HWLM_ENGINE_FDR); + const union AccelAux *aa = &t->accel0; + if ((groups & ~t->accel1_groups) == 0) { + DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type); + aa = &t->accel1; + } + do_accel_streaming(aa, hbuf, hlen, buf, len, &start); + DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start); + return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, start, cb, + scratch, groups); } diff --git a/contrib/libs/hyperscan/src/hwlm/hwlm.h b/contrib/libs/hyperscan/src/hwlm/hwlm.h index 224ecf6bf9..4f21ccf038 100644 --- a/contrib/libs/hyperscan/src/hwlm/hwlm.h +++ b/contrib/libs/hyperscan/src/hwlm/hwlm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -71,17 +71,17 @@ typedef hwlm_group_t hwlmcb_rv_t; * designed for a different architecture). */ #define HWLM_ERROR_UNKNOWN 2 -/** \brief Max length of the literal passed to HWLM. */ -#define HWLM_LITERAL_MAX_LEN 8 - +/** \brief Max length of the literal passed to HWLM. */ +#define HWLM_LITERAL_MAX_LEN 8 + struct hs_scratch; struct HWLM; /** \brief The type for an HWLM callback. * - * This callback receives an end-of-match offset, the ID of the match and - * the context pointer that was passed into \ref hwlmExec or - * \ref hwlmExecStreaming. + * This callback receives an end-of-match offset, the ID of the match and + * the context pointer that was passed into \ref hwlmExec or + * \ref hwlmExecStreaming. * * A callback return of \ref HWLM_TERMINATE_MATCHING will stop matching. * @@ -95,8 +95,8 @@ struct HWLM; * belonging to the literal which was active at the when the end match location * was first reached. */ -typedef hwlmcb_rv_t (*HWLMCallback)(size_t end, u32 id, - struct hs_scratch *scratch); +typedef hwlmcb_rv_t (*HWLMCallback)(size_t end, u32 id, + struct hs_scratch *scratch); /** \brief Match strings in table. * @@ -107,36 +107,36 @@ typedef hwlmcb_rv_t (*HWLMCallback)(size_t end, u32 id, * Returns \ref HWLM_TERMINATED if scanning is cancelled due to the callback * returning \ref HWLM_TERMINATE_MATCHING. * - * \p start is the first offset at which a match may start. Note: match - * starts may include masks overhanging the main literal. + * \p start is the first offset at which a match may start. Note: match + * starts may include masks overhanging the main literal. * * The underlying engine may choose not to report any match which starts before * the first possible match of a literal which is in the initial group mask. */ hwlm_error_t hwlmExec(const struct HWLM *tab, const u8 *buf, size_t len, - size_t start, HWLMCallback callback, - struct hs_scratch *scratch, hwlm_group_t groups); + size_t start, HWLMCallback callback, + struct hs_scratch *scratch, hwlm_group_t groups); /** \brief As for \ref hwlmExec, but a streaming case across two buffers. * * \p len is the length of the main buffer to be scanned. * * \p start is an advisory hint representing the first offset at which a match - * may start. Some underlying literal matches may not respect it. Note: match - * starts may include masks overhanging the main literal. - * - * \p scratch is used to access the history buffer, history length and - * the main buffer. + * may start. Some underlying literal matches may not respect it. Note: match + * starts may include masks overhanging the main literal. * + * \p scratch is used to access the history buffer, history length and + * the main buffer. + * * Two buffers/lengths are provided. Matches that occur entirely within * the history buffer will not be reported by this function. The offsets * reported for the main buffer are relative to the start of that buffer (a * match at byte 10 of the main buffer is reported as 10). Matches that start * in the history buffer will have starts reported with 'negative' values. */ -hwlm_error_t hwlmExecStreaming(const struct HWLM *tab, size_t len, size_t start, - HWLMCallback callback, - struct hs_scratch *scratch, hwlm_group_t groups); +hwlm_error_t hwlmExecStreaming(const struct HWLM *tab, size_t len, size_t start, + HWLMCallback callback, + struct hs_scratch *scratch, hwlm_group_t groups); #ifdef __cplusplus } /* extern "C" */ diff --git a/contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp b/contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp index 1b33281529..2a9b9d79f6 100644 --- a/contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp +++ b/contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,24 +29,24 @@ /** \file * \brief Hamster Wheel Literal Matcher: build code. */ - -#include "hwlm_build.h" - + +#include "hwlm_build.h" + #include "grey.h" #include "hwlm.h" #include "hwlm_internal.h" -#include "hwlm_literal.h" +#include "hwlm_literal.h" #include "noodle_engine.h" #include "noodle_build.h" -#include "scratch.h" +#include "scratch.h" #include "ue2common.h" #include "fdr/fdr_compile.h" -#include "fdr/fdr_compile_internal.h" -#include "fdr/fdr_engine_description.h" -#include "fdr/teddy_engine_description.h" +#include "fdr/fdr_compile_internal.h" +#include "fdr/fdr_engine_description.h" +#include "fdr/teddy_engine_description.h" #include "util/compile_context.h" #include "util/compile_error.h" -#include "util/make_unique.h" +#include "util/make_unique.h" #include "util/ue2string.h" #include <cassert> @@ -57,27 +57,27 @@ using namespace std; namespace ue2 { -HWLMProto::HWLMProto(u8 engType_in, vector<hwlmLiteral> lits_in) - : engType(engType_in), lits(move(lits_in)) {} +HWLMProto::HWLMProto(u8 engType_in, vector<hwlmLiteral> lits_in) + : engType(engType_in), lits(move(lits_in)) {} -HWLMProto::HWLMProto(u8 engType_in, - unique_ptr<FDREngineDescription> eng_in, - vector<hwlmLiteral> lits_in, - map<u32, vector<u32>> bucketToLits_in, - bool make_small_in) - : engType(engType_in), fdrEng(move(eng_in)), lits(move(lits_in)), - bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {} +HWLMProto::HWLMProto(u8 engType_in, + unique_ptr<FDREngineDescription> eng_in, + vector<hwlmLiteral> lits_in, + map<u32, vector<u32>> bucketToLits_in, + bool make_small_in) + : engType(engType_in), fdrEng(move(eng_in)), lits(move(lits_in)), + bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {} -HWLMProto::HWLMProto(u8 engType_in, - unique_ptr<TeddyEngineDescription> eng_in, - vector<hwlmLiteral> lits_in, - map<u32, vector<u32>> bucketToLits_in, - bool make_small_in) - : engType(engType_in), teddyEng(move(eng_in)), - lits(move(lits_in)), - bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {} +HWLMProto::HWLMProto(u8 engType_in, + unique_ptr<TeddyEngineDescription> eng_in, + vector<hwlmLiteral> lits_in, + map<u32, vector<u32>> bucketToLits_in, + bool make_small_in) + : engType(engType_in), teddyEng(move(eng_in)), + lits(move(lits_in)), + bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {} -HWLMProto::~HWLMProto() {} +HWLMProto::~HWLMProto() {} static void dumpLits(UNUSED const vector<hwlmLiteral> &lits) { @@ -115,55 +115,55 @@ bool isNoodleable(const vector<hwlmLiteral> &lits, return false; } - return true; -} - -bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc, - UNUSED hwlm_group_t expected_groups) { - size_t engSize = 0; - shared_ptr<void> eng; - - const auto &lits = proto.lits; - DEBUG_PRINTF("building table with %zu strings\n", lits.size()); - - if (proto.engType == HWLM_ENGINE_NOOD) { - DEBUG_PRINTF("build noodle table\n"); - const hwlmLiteral &lit = lits.front(); - auto noodle = noodBuildTable(lit); - if (noodle) { - engSize = noodle.size(); + return true; +} + +bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc, + UNUSED hwlm_group_t expected_groups) { + size_t engSize = 0; + shared_ptr<void> eng; + + const auto &lits = proto.lits; + DEBUG_PRINTF("building table with %zu strings\n", lits.size()); + + if (proto.engType == HWLM_ENGINE_NOOD) { + DEBUG_PRINTF("build noodle table\n"); + const hwlmLiteral &lit = lits.front(); + auto noodle = noodBuildTable(lit); + if (noodle) { + engSize = noodle.size(); } - eng = move(noodle); - } else { - DEBUG_PRINTF("building a new deal\n"); - auto fdr = fdrBuildTable(proto, cc.grey); - if (fdr) { - engSize = fdr.size(); - } - eng = move(fdr); + eng = move(noodle); + } else { + DEBUG_PRINTF("building a new deal\n"); + auto fdr = fdrBuildTable(proto, cc.grey); + if (fdr) { + engSize = fdr.size(); + } + eng = move(fdr); } - if (!eng) { - return nullptr; + if (!eng) { + return nullptr; } - assert(engSize); - if (engSize > cc.grey.limitLiteralMatcherSize) { - throw ResourceLimitError(); - } - - const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize; - auto h = make_zeroed_bytecode_ptr<HWLM>(hwlm_len, 64); - - h->type = proto.engType; - memcpy(HWLM_DATA(h.get()), eng.get(), engSize); - - return h; + assert(engSize); + if (engSize > cc.grey.limitLiteralMatcherSize) { + throw ResourceLimitError(); + } + + const size_t hwlm_len = ROUNDUP_CL(sizeof(HWLM)) + engSize; + auto h = make_zeroed_bytecode_ptr<HWLM>(hwlm_len, 64); + + h->type = proto.engType; + memcpy(HWLM_DATA(h.get()), eng.get(), engSize); + + return h; } -unique_ptr<HWLMProto> -hwlmBuildProto(vector<hwlmLiteral> &lits, bool make_small, - const CompileContext &cc) { +unique_ptr<HWLMProto> +hwlmBuildProto(vector<hwlmLiteral> &lits, bool make_small, + const CompileContext &cc) { assert(!lits.empty()); dumpLits(lits); @@ -193,25 +193,25 @@ hwlmBuildProto(vector<hwlmLiteral> &lits, bool make_small, } } - unique_ptr<HWLMProto> proto; + unique_ptr<HWLMProto> proto; DEBUG_PRINTF("building table with %zu strings\n", lits.size()); assert(everyoneHasGroups(lits)); - if (isNoodleable(lits, cc)) { + if (isNoodleable(lits, cc)) { DEBUG_PRINTF("build noodle table\n"); - proto = ue2::make_unique<HWLMProto>(HWLM_ENGINE_NOOD, lits); + proto = ue2::make_unique<HWLMProto>(HWLM_ENGINE_NOOD, lits); } else { DEBUG_PRINTF("building a new deal\n"); - proto = fdrBuildProto(HWLM_ENGINE_FDR, lits, make_small, - cc.target_info, cc.grey); - if (!proto) { - return nullptr; + proto = fdrBuildProto(HWLM_ENGINE_FDR, lits, make_small, + cc.target_info, cc.grey); + if (!proto) { + return nullptr; } } - return proto; + return proto; } size_t hwlmSize(const HWLM *h) { diff --git a/contrib/libs/hyperscan/src/hwlm/hwlm_build.h b/contrib/libs/hyperscan/src/hwlm/hwlm_build.h index 91f227dce4..6b61cc1f0d 100644 --- a/contrib/libs/hyperscan/src/hwlm/hwlm_build.h +++ b/contrib/libs/hyperscan/src/hwlm/hwlm_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,9 +36,9 @@ #include "hwlm.h" #include "hwlm_literal.h" #include "ue2common.h" -#include "util/bytecode_ptr.h" +#include "util/bytecode_ptr.h" -#include <map> +#include <map> #include <memory> #include <vector> @@ -46,62 +46,62 @@ struct HWLM; namespace ue2 { -class FDREngineDescription; -class TeddyEngineDescription; +class FDREngineDescription; +class TeddyEngineDescription; struct CompileContext; struct Grey; -/** \brief Class representing a literal matcher prototype. */ -struct HWLMProto { - /** - * \brief Engine type to distinguish noodle from FDR and Teddy. - */ - u8 engType; - - /** - * \brief FDR engine description. - */ - std::unique_ptr<FDREngineDescription> fdrEng; - - /** - * \brief Teddy engine description. - */ - std::unique_ptr<TeddyEngineDescription> teddyEng; - - /** - * \brief HWLM literals passed from Rose. - */ - std::vector<hwlmLiteral> lits; - - /** - * \brief Bucket assignment info in FDR and Teddy - */ - std::map<u32, std::vector<u32>> bucketToLits; - - /** - * \brief Flag to optimise matcher for small size from Rose. - */ - bool make_small = false; - - HWLMProto(u8 engType_in, std::vector<hwlmLiteral> lits_in); - - HWLMProto(u8 engType_in, std::unique_ptr<FDREngineDescription> eng_in, - std::vector<hwlmLiteral> lits_in, - std::map<u32, std::vector<u32>> bucketToLits_in, - bool make_small_in); - - HWLMProto(u8 engType_in, std::unique_ptr<TeddyEngineDescription> eng_in, - std::vector<hwlmLiteral> lits_in, - std::map<u32, std::vector<u32>> bucketToLits_in, - bool make_small_in); - - ~HWLMProto(); +/** \brief Class representing a literal matcher prototype. */ +struct HWLMProto { + /** + * \brief Engine type to distinguish noodle from FDR and Teddy. + */ + u8 engType; + + /** + * \brief FDR engine description. + */ + std::unique_ptr<FDREngineDescription> fdrEng; + + /** + * \brief Teddy engine description. + */ + std::unique_ptr<TeddyEngineDescription> teddyEng; + + /** + * \brief HWLM literals passed from Rose. + */ + std::vector<hwlmLiteral> lits; + + /** + * \brief Bucket assignment info in FDR and Teddy + */ + std::map<u32, std::vector<u32>> bucketToLits; + + /** + * \brief Flag to optimise matcher for small size from Rose. + */ + bool make_small = false; + + HWLMProto(u8 engType_in, std::vector<hwlmLiteral> lits_in); + + HWLMProto(u8 engType_in, std::unique_ptr<FDREngineDescription> eng_in, + std::vector<hwlmLiteral> lits_in, + std::map<u32, std::vector<u32>> bucketToLits_in, + bool make_small_in); + + HWLMProto(u8 engType_in, std::unique_ptr<TeddyEngineDescription> eng_in, + std::vector<hwlmLiteral> lits_in, + std::map<u32, std::vector<u32>> bucketToLits_in, + bool make_small_in); + + ~HWLMProto(); }; /** \brief Build an \ref HWLM literal matcher runtime structure for a group of * literals. * - * \param proto Literal matcher prototype. + * \param proto Literal matcher prototype. * \param cc Compile context. * \param expected_groups FIXME: document me! * @@ -109,13 +109,13 @@ struct HWLMProto { * may result in a nullptr return value, or a std::bad_alloc exception being * thrown. */ -bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc, - hwlm_group_t expected_groups = HWLM_ALL_GROUPS); - -std::unique_ptr<HWLMProto> -hwlmBuildProto(std::vector<hwlmLiteral> &lits, bool make_small, - const CompileContext &cc); +bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc, + hwlm_group_t expected_groups = HWLM_ALL_GROUPS); +std::unique_ptr<HWLMProto> +hwlmBuildProto(std::vector<hwlmLiteral> &lits, bool make_small, + const CompileContext &cc); + /** * Returns an estimate of the number of repeated characters on the end of a * literal that will make a literal set of size \a numLiterals suffer diff --git a/contrib/libs/hyperscan/src/hwlm/hwlm_literal.cpp b/contrib/libs/hyperscan/src/hwlm/hwlm_literal.cpp index 692f7c6c0e..baf774d35d 100644 --- a/contrib/libs/hyperscan/src/hwlm/hwlm_literal.cpp +++ b/contrib/libs/hyperscan/src/hwlm/hwlm_literal.cpp @@ -34,7 +34,7 @@ #include "util/compare.h" // for ourisalpha #include "util/ue2string.h" // for escapeString -#include <algorithm> +#include <algorithm> #include <iomanip> #include <sstream> @@ -86,21 +86,21 @@ hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in, const vector<u8> &msk_in, const vector<u8> &cmp_in) : s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in), groups(groups_in), msk(msk_in), cmp(cmp_in) { - assert(s.size() <= HWLM_LITERAL_MAX_LEN); + assert(s.size() <= HWLM_LITERAL_MAX_LEN); assert(msk.size() <= HWLM_MASKLEN); assert(msk.size() == cmp.size()); - // If we've been handled a nocase literal, all letter characters must be - // upper-case. - if (nocase) { - upperString(s); - } - - DEBUG_PRINTF("literal '%s'%s, msk=%s, cmp=%s\n", escapeString(s).c_str(), - nocase ? " (nocase)" : "", dumpMask(msk).c_str(), + // If we've been handled a nocase literal, all letter characters must be + // upper-case. + if (nocase) { + upperString(s); + } + + DEBUG_PRINTF("literal '%s'%s, msk=%s, cmp=%s\n", escapeString(s).c_str(), + nocase ? " (nocase)" : "", dumpMask(msk).c_str(), dumpMask(cmp).c_str()); - + // Mask and compare vectors MUST be the same size. assert(msk.size() == cmp.size()); @@ -108,7 +108,7 @@ hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in, assert(maskIsConsistent(s, nocase, msk, cmp)); // In the name of good hygiene, zap msk/cmp if msk is all zeroes. - if (all_of(begin(msk), end(msk), [](u8 val) { return val == 0; })) { + if (all_of(begin(msk), end(msk), [](u8 val) { return val == 0; })) { msk.clear(); cmp.clear(); } diff --git a/contrib/libs/hyperscan/src/hwlm/hwlm_literal.h b/contrib/libs/hyperscan/src/hwlm/hwlm_literal.h index 598de81471..6d709157fa 100644 --- a/contrib/libs/hyperscan/src/hwlm/hwlm_literal.h +++ b/contrib/libs/hyperscan/src/hwlm/hwlm_literal.h @@ -37,7 +37,7 @@ #include "ue2common.h" #include <string> -#include <tuple> +#include <tuple> #include <vector> namespace ue2 { @@ -45,8 +45,8 @@ namespace ue2 { /** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */ #define HWLM_MASKLEN 8 -#define INVALID_LIT_ID ~0U - +#define INVALID_LIT_ID ~0U + /** \brief Class representing a literal, fed to \ref hwlmBuild. */ struct hwlmLiteral { std::string s; //!< \brief The literal itself. @@ -66,21 +66,21 @@ struct hwlmLiteral { * can be quashed by the literal matcher. */ bool noruns; - /** \brief included literal id. */ - u32 included_id = INVALID_LIT_ID; - - /** \brief Squash mask for FDR's confirm mask for included literals. - * - * In FDR confirm, if we have included literal in another bucket, - * we can use this mask to squash the bit for the bucket in FDR confirm - * mask and then run programs of included literal directly and avoid - * confirm work. - * - * This value is calculated in FDR compile code once bucket assignment is - * completed - */ - u8 squash = 0; - + /** \brief included literal id. */ + u32 included_id = INVALID_LIT_ID; + + /** \brief Squash mask for FDR's confirm mask for included literals. + * + * In FDR confirm, if we have included literal in another bucket, + * we can use this mask to squash the bit for the bucket in FDR confirm + * mask and then run programs of included literal directly and avoid + * confirm work. + * + * This value is calculated in FDR compile code once bucket assignment is + * completed + */ + u8 squash = 0; + /** \brief Set of groups that literal belongs to. * * Use \ref HWLM_ALL_GROUPS for a literal that could match regardless of @@ -120,27 +120,27 @@ struct hwlmLiteral { hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in, u32 id_in, hwlm_group_t groups_in, const std::vector<u8> &msk_in, const std::vector<u8> &cmp_in); - + /** \brief Simple constructor: no group information, no msk/cmp. * * This constructor is only used in internal unit test. */ - hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in) - : hwlmLiteral(s_in, nocase_in, false, id_in, HWLM_ALL_GROUPS, {}, {}) {} + hwlmLiteral(const std::string &s_in, bool nocase_in, u32 id_in) + : hwlmLiteral(s_in, nocase_in, false, id_in, HWLM_ALL_GROUPS, {}, {}) {} }; -inline -bool operator<(const hwlmLiteral &a, const hwlmLiteral &b) { - return std::tie(a.id, a.s, a.nocase, a.noruns, a.groups, a.msk, a.cmp) < - std::tie(b.id, b.s, b.nocase, b.noruns, b.groups, b.msk, b.cmp); -} - -inline -bool operator==(const hwlmLiteral &a, const hwlmLiteral &b) { - return a.id == b.id && a.s == b.s && a.nocase == b.nocase && - a.noruns == b.noruns && a.groups == b.groups && a.msk == b.msk && - a.cmp == b.cmp; -} - +inline +bool operator<(const hwlmLiteral &a, const hwlmLiteral &b) { + return std::tie(a.id, a.s, a.nocase, a.noruns, a.groups, a.msk, a.cmp) < + std::tie(b.id, b.s, b.nocase, b.noruns, b.groups, b.msk, b.cmp); +} + +inline +bool operator==(const hwlmLiteral &a, const hwlmLiteral &b) { + return a.id == b.id && a.s == b.s && a.nocase == b.nocase && + a.noruns == b.noruns && a.groups == b.groups && a.msk == b.msk && + a.cmp == b.cmp; +} + /** * Consistency test; returns false if the given msk/cmp test can never match * the literal string s. diff --git a/contrib/libs/hyperscan/src/hwlm/noodle_build.cpp b/contrib/libs/hyperscan/src/hwlm/noodle_build.cpp index a0128d0ad7..4bd3af0103 100644 --- a/contrib/libs/hyperscan/src/hwlm/noodle_build.cpp +++ b/contrib/libs/hyperscan/src/hwlm/noodle_build.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,53 +26,53 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file +/** + * \file * \brief Noodle literal matcher: build code. */ #include "noodle_build.h" - -#include "hwlm_literal.h" + +#include "hwlm_literal.h" #include "noodle_internal.h" -#include "util/bitutils.h" +#include "util/bitutils.h" #include "util/compare.h" #include "util/verify_types.h" -#include "ue2common.h" - -#include <cstring> // for memcpy -#include <vector> - -using std::vector; +#include "ue2common.h" +#include <cstring> // for memcpy +#include <vector> + +using std::vector; + namespace ue2 { static -u64a make_u64a_mask(const vector<u8> &v) { - assert(v.size() <= sizeof(u64a)); - if (v.size() > sizeof(u64a)) { - throw std::exception(); - } - - u64a mask = 0; - size_t len = v.size(); - unsigned char *m = (unsigned char *)&mask; - DEBUG_PRINTF("making mask len %zu\n", len); - memcpy(m, &v[0], len); - return mask; -} - -static -size_t findNoodFragOffset(const hwlmLiteral &lit) { - const auto &s = lit.s; - const size_t len = lit.s.length(); - +u64a make_u64a_mask(const vector<u8> &v) { + assert(v.size() <= sizeof(u64a)); + if (v.size() > sizeof(u64a)) { + throw std::exception(); + } + + u64a mask = 0; + size_t len = v.size(); + unsigned char *m = (unsigned char *)&mask; + DEBUG_PRINTF("making mask len %zu\n", len); + memcpy(m, &v[0], len); + return mask; +} + +static +size_t findNoodFragOffset(const hwlmLiteral &lit) { + const auto &s = lit.s; + const size_t len = lit.s.length(); + size_t offset = 0; for (size_t i = 0; i + 1 < len; i++) { int diff = 0; - const char c = s[i]; - const char d = s[i + 1]; - if (lit.nocase && ourisalpha(c)) { + const char c = s[i]; + const char d = s[i + 1]; + if (lit.nocase && ourisalpha(c)) { diff = (mytoupper(c) != mytoupper(d)); } else { diff = (c != d); @@ -85,60 +85,60 @@ size_t findNoodFragOffset(const hwlmLiteral &lit) { return offset; } -bytecode_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit) { - const auto &s = lit.s; - - size_t mask_len = std::max(s.length(), lit.msk.size()); - DEBUG_PRINTF("mask is %zu bytes\n", lit.msk.size()); - assert(mask_len <= 8); - assert(lit.msk.size() == lit.cmp.size()); - - vector<u8> n_msk(mask_len); - vector<u8> n_cmp(mask_len); - - for (unsigned i = mask_len - lit.msk.size(), j = 0; i < mask_len; - i++, j++) { - DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx\n", i, lit.msk[j], i, lit.cmp[j]); - n_msk[i] = lit.msk[j]; - n_cmp[i] = lit.cmp[j]; - } - - size_t s_off = mask_len - s.length(); - for (unsigned i = s_off; i < mask_len; i++) { - u8 c = s[i - s_off]; - u8 si_msk = lit.nocase && ourisalpha(c) ? (u8)CASE_CLEAR : (u8)0xff; - n_msk[i] |= si_msk; - n_cmp[i] |= c & si_msk; - assert((n_cmp[i] & si_msk) == c); - DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx '%c'\n", i, n_msk[i], i, n_cmp[i], - ourisprint(c) ? (char)c : '.'); - } - - auto n = make_zeroed_bytecode_ptr<noodTable>(sizeof(noodTable)); +bytecode_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit) { + const auto &s = lit.s; + + size_t mask_len = std::max(s.length(), lit.msk.size()); + DEBUG_PRINTF("mask is %zu bytes\n", lit.msk.size()); + assert(mask_len <= 8); + assert(lit.msk.size() == lit.cmp.size()); + + vector<u8> n_msk(mask_len); + vector<u8> n_cmp(mask_len); + + for (unsigned i = mask_len - lit.msk.size(), j = 0; i < mask_len; + i++, j++) { + DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx\n", i, lit.msk[j], i, lit.cmp[j]); + n_msk[i] = lit.msk[j]; + n_cmp[i] = lit.cmp[j]; + } + + size_t s_off = mask_len - s.length(); + for (unsigned i = s_off; i < mask_len; i++) { + u8 c = s[i - s_off]; + u8 si_msk = lit.nocase && ourisalpha(c) ? (u8)CASE_CLEAR : (u8)0xff; + n_msk[i] |= si_msk; + n_cmp[i] |= c & si_msk; + assert((n_cmp[i] & si_msk) == c); + DEBUG_PRINTF("m[%u] %hhx c[%u] %hhx '%c'\n", i, n_msk[i], i, n_cmp[i], + ourisprint(c) ? (char)c : '.'); + } + + auto n = make_zeroed_bytecode_ptr<noodTable>(sizeof(noodTable)); assert(n); - DEBUG_PRINTF("size of nood %zu\n", sizeof(noodTable)); - - size_t key_offset = findNoodFragOffset(lit); - - n->id = lit.id; - n->single = s.length() == 1 ? 1 : 0; - n->key_offset = verify_u8(s.length() - key_offset); - n->nocase = lit.nocase ? 1 : 0; - n->key0 = s[key_offset]; - if (n->single) { - n->key1 = 0; - } else { - n->key1 = s[key_offset + 1]; - } - n->msk = make_u64a_mask(n_msk); - n->cmp = make_u64a_mask(n_cmp); - n->msk_len = mask_len; + DEBUG_PRINTF("size of nood %zu\n", sizeof(noodTable)); + + size_t key_offset = findNoodFragOffset(lit); + + n->id = lit.id; + n->single = s.length() == 1 ? 1 : 0; + n->key_offset = verify_u8(s.length() - key_offset); + n->nocase = lit.nocase ? 1 : 0; + n->key0 = s[key_offset]; + if (n->single) { + n->key1 = 0; + } else { + n->key1 = s[key_offset + 1]; + } + n->msk = make_u64a_mask(n_msk); + n->cmp = make_u64a_mask(n_cmp); + n->msk_len = mask_len; return n; } -size_t noodSize(const noodTable *) { - return sizeof(noodTable); +size_t noodSize(const noodTable *) { + return sizeof(noodTable); } } // namespace ue2 @@ -150,17 +150,17 @@ namespace ue2 { void noodPrintStats(const noodTable *n, FILE *f) { fprintf(f, "Noodle table\n"); - fprintf(f, "Key Offset: %u\n", n->key_offset); - fprintf(f, "Msk: %llx Cmp: %llx MskLen %u\n", - n->msk >> 8 * (8 - n->msk_len), n->cmp >> 8 * (8 - n->msk_len), - n->msk_len); + fprintf(f, "Key Offset: %u\n", n->key_offset); + fprintf(f, "Msk: %llx Cmp: %llx MskLen %u\n", + n->msk >> 8 * (8 - n->msk_len), n->cmp >> 8 * (8 - n->msk_len), + n->msk_len); fprintf(f, "String: "); - for (u32 i = 0; i < n->msk_len; i++) { - const u8 *m = (const u8 *)&n->cmp; - if (isgraph(m[i]) && m[i] != '\\') { - fprintf(f, "%c", m[i]); + for (u32 i = 0; i < n->msk_len; i++) { + const u8 *m = (const u8 *)&n->cmp; + if (isgraph(m[i]) && m[i] != '\\') { + fprintf(f, "%c", m[i]); } else { - fprintf(f, "\\x%02hhx", m[i]); + fprintf(f, "\\x%02hhx", m[i]); } } fprintf(f, "\n"); diff --git a/contrib/libs/hyperscan/src/hwlm/noodle_build.h b/contrib/libs/hyperscan/src/hwlm/noodle_build.h index b5725f0827..c721e7485f 100644 --- a/contrib/libs/hyperscan/src/hwlm/noodle_build.h +++ b/contrib/libs/hyperscan/src/hwlm/noodle_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,20 +30,20 @@ * \brief Noodle literal matcher: build code. */ -#ifndef NOODLE_BUILD_H -#define NOODLE_BUILD_H +#ifndef NOODLE_BUILD_H +#define NOODLE_BUILD_H #include "ue2common.h" -#include "util/bytecode_ptr.h" +#include "util/bytecode_ptr.h" struct noodTable; namespace ue2 { -struct hwlmLiteral; - +struct hwlmLiteral; + /** \brief Construct a Noodle matcher for the given literal. */ -bytecode_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit); +bytecode_ptr<noodTable> noodBuildTable(const hwlmLiteral &lit); size_t noodSize(const noodTable *n); @@ -61,5 +61,5 @@ void noodPrintStats(const noodTable *n, FILE *f); #endif // DUMP_SUPPORT -#endif /* NOODLE_BUILD_H */ +#endif /* NOODLE_BUILD_H */ diff --git a/contrib/libs/hyperscan/src/hwlm/noodle_engine.c b/contrib/libs/hyperscan/src/hwlm/noodle_engine.c index d4f6902a2d..5ecbee679a 100644 --- a/contrib/libs/hyperscan/src/hwlm/noodle_engine.c +++ b/contrib/libs/hyperscan/src/hwlm/noodle_engine.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,15 +32,15 @@ #include "hwlm.h" #include "noodle_engine.h" #include "noodle_internal.h" -#include "scratch.h" +#include "scratch.h" #include "ue2common.h" -#include "util/arch.h" +#include "util/arch.h" #include "util/bitutils.h" #include "util/compare.h" -#include "util/intrinsics.h" -#include "util/join.h" +#include "util/intrinsics.h" +#include "util/join.h" #include "util/masked_move.h" -#include "util/partial_store.h" +#include "util/partial_store.h" #include "util/simd_utils.h" #include <ctype.h> @@ -51,28 +51,28 @@ struct cb_info { HWLMCallback cb; //!< callback function called on match u32 id; //!< ID to pass to callback on match - struct hs_scratch *scratch; //!< scratch to pass to callback + struct hs_scratch *scratch; //!< scratch to pass to callback size_t offsetAdj; //!< used in streaming mode }; -#if defined(HAVE_AVX512) -#define CHUNKSIZE 64 -#define MASK_TYPE m512 -#define Z_BITS 64 -#define Z_TYPE u64a -#elif defined(HAVE_AVX2) -#define CHUNKSIZE 32 -#define MASK_TYPE m256 -#define Z_BITS 32 -#define Z_TYPE u32 -#else -#define CHUNKSIZE 16 -#define MASK_TYPE m128 -#define Z_BITS 32 -#define Z_TYPE u32 -#endif - - +#if defined(HAVE_AVX512) +#define CHUNKSIZE 64 +#define MASK_TYPE m512 +#define Z_BITS 64 +#define Z_TYPE u64a +#elif defined(HAVE_AVX2) +#define CHUNKSIZE 32 +#define MASK_TYPE m256 +#define Z_BITS 32 +#define Z_TYPE u32 +#else +#define CHUNKSIZE 16 +#define MASK_TYPE m128 +#define Z_BITS 32 +#define Z_TYPE u32 +#endif + + #define RETURN_IF_TERMINATED(x) \ { \ if ((x) == HWLM_TERMINATED) { \ @@ -83,10 +83,10 @@ struct cb_info { #define SINGLE_ZSCAN() \ do { \ while (unlikely(z)) { \ - Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \ + Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \ size_t matchPos = d - buf + pos; \ - DEBUG_PRINTF("match pos %zu\n", matchPos); \ - hwlmcb_rv_t rv = final(n, buf, len, 1, cbi, matchPos); \ + DEBUG_PRINTF("match pos %zu\n", matchPos); \ + hwlmcb_rv_t rv = final(n, buf, len, 1, cbi, matchPos); \ RETURN_IF_TERMINATED(rv); \ } \ } while (0) @@ -94,10 +94,10 @@ struct cb_info { #define DOUBLE_ZSCAN() \ do { \ while (unlikely(z)) { \ - Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \ + Z_TYPE pos = JOIN(findAndClearLSB_, Z_BITS)(&z); \ size_t matchPos = d - buf + pos - 1; \ - DEBUG_PRINTF("match pos %zu\n", matchPos); \ - hwlmcb_rv_t rv = final(n, buf, len, 0, cbi, matchPos); \ + DEBUG_PRINTF("match pos %zu\n", matchPos); \ + hwlmcb_rv_t rv = final(n, buf, len, 0, cbi, matchPos); \ RETURN_IF_TERMINATED(rv); \ } \ } while (0) @@ -111,37 +111,37 @@ u8 caseClear8(u8 x, bool noCase) { // is used only for single chars with case insensitivity used correctly, // so it can go straight to the callback if we get this far. static really_inline -hwlm_error_t final(const struct noodTable *n, const u8 *buf, UNUSED size_t len, - char single, const struct cb_info *cbi, size_t pos) { - if (single) { - if (n->msk_len == 1) { - goto match; +hwlm_error_t final(const struct noodTable *n, const u8 *buf, UNUSED size_t len, + char single, const struct cb_info *cbi, size_t pos) { + if (single) { + if (n->msk_len == 1) { + goto match; } } - assert(len >= n->msk_len); - u64a v = - partial_load_u64a(buf + pos + n->key_offset - n->msk_len, n->msk_len); - DEBUG_PRINTF("v %016llx msk %016llx cmp %016llx\n", v, n->msk, n->cmp); - if ((v & n->msk) != n->cmp) { - /* mask didn't match */ - return HWLM_SUCCESS; - } - -match: - pos -= cbi->offsetAdj; - DEBUG_PRINTF("match @ %zu\n", pos + n->key_offset); - hwlmcb_rv_t rv = cbi->cb(pos + n->key_offset - 1, cbi->id, cbi->scratch); + assert(len >= n->msk_len); + u64a v = + partial_load_u64a(buf + pos + n->key_offset - n->msk_len, n->msk_len); + DEBUG_PRINTF("v %016llx msk %016llx cmp %016llx\n", v, n->msk, n->cmp); + if ((v & n->msk) != n->cmp) { + /* mask didn't match */ + return HWLM_SUCCESS; + } + +match: + pos -= cbi->offsetAdj; + DEBUG_PRINTF("match @ %zu\n", pos + n->key_offset); + hwlmcb_rv_t rv = cbi->cb(pos + n->key_offset - 1, cbi->id, cbi->scratch); if (rv == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATED; } return HWLM_SUCCESS; } -#if defined(HAVE_AVX512) -#define CHUNKSIZE 64 -#define MASK_TYPE m512 -#include "noodle_engine_avx512.c" -#elif defined(HAVE_AVX2) +#if defined(HAVE_AVX512) +#define CHUNKSIZE 64 +#define MASK_TYPE m512 +#include "noodle_engine_avx512.c" +#elif defined(HAVE_AVX2) #define CHUNKSIZE 32 #define MASK_TYPE m256 #include "noodle_engine_avx2.c" @@ -152,43 +152,43 @@ match: #endif static really_inline -hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf, - size_t len, size_t start, bool noCase, - const struct cb_info *cbi) { +hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, bool noCase, + const struct cb_info *cbi) { - const MASK_TYPE mask1 = getMask(n->key0, noCase); + const MASK_TYPE mask1 = getMask(n->key0, noCase); const MASK_TYPE caseMask = getCaseMask(); - size_t offset = start + n->msk_len - 1; - size_t end = len; - assert(offset < end); - -#if !defined(HAVE_AVX512) - hwlm_error_t rv; - - if (end - offset < CHUNKSIZE) { - rv = scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, offset, - end); + size_t offset = start + n->msk_len - 1; + size_t end = len; + assert(offset < end); + +#if !defined(HAVE_AVX512) + hwlm_error_t rv; + + if (end - offset < CHUNKSIZE) { + rv = scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, offset, + end); return rv; } - if (end - offset == CHUNKSIZE) { - rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, - cbi, offset, end); + if (end - offset == CHUNKSIZE) { + rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, + cbi, offset, end); return rv; } uintptr_t data = (uintptr_t)buf; - uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data; + uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data; uintptr_t last = data + end; uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data; - uintptr_t s3Start = end - CHUNKSIZE; + uintptr_t s3Start = end - CHUNKSIZE; - if (offset != s2Start) { + if (offset != s2Start) { // first scan out to the fast scan starting point DEBUG_PRINTF("stage 1: -> %zu\n", s2Start); - rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, - cbi, offset, s2Start); + rv = scanSingleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, + cbi, offset, s2Start); RETURN_IF_TERMINATED(rv); } @@ -196,70 +196,70 @@ hwlm_error_t scanSingleMain(const struct noodTable *n, const u8 *buf, // scan as far as we can, bounded by the last point this key can // possibly match DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s2End); - rv = scanSingleFast(n, buf, len, noCase, caseMask, mask1, cbi, s2Start, - s2End); + rv = scanSingleFast(n, buf, len, noCase, caseMask, mask1, cbi, s2Start, + s2End); RETURN_IF_TERMINATED(rv); } // if we are done bail out - if (s2End == len) { + if (s2End == len) { return HWLM_SUCCESS; } - DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, len); - rv = scanSingleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1, cbi, - s2End, len); + DEBUG_PRINTF("stage 3: %zu -> %zu\n", s2End, len); + rv = scanSingleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1, cbi, + s2End, len); return rv; -#else // HAVE_AVX512 - return scanSingle512(n, buf, len, noCase, caseMask, mask1, cbi, offset, - end); -#endif +#else // HAVE_AVX512 + return scanSingle512(n, buf, len, noCase, caseMask, mask1, cbi, offset, + end); +#endif } static really_inline -hwlm_error_t scanDoubleMain(const struct noodTable *n, const u8 *buf, - size_t len, size_t start, bool noCase, +hwlm_error_t scanDoubleMain(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, bool noCase, const struct cb_info *cbi) { // we stop scanning for the key-fragment when the rest of the key can't // possibly fit in the remaining buffer - size_t end = len - n->key_offset + 2; - - // the first place the key can match - size_t offset = start + n->msk_len - n->key_offset; + size_t end = len - n->key_offset + 2; + // the first place the key can match + size_t offset = start + n->msk_len - n->key_offset; + const MASK_TYPE caseMask = getCaseMask(); - const MASK_TYPE mask1 = getMask(n->key0, noCase); - const MASK_TYPE mask2 = getMask(n->key1, noCase); - -#if !defined(HAVE_AVX512) - hwlm_error_t rv; - - if (end - offset < CHUNKSIZE) { - rv = scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi, - offset, end); + const MASK_TYPE mask1 = getMask(n->key0, noCase); + const MASK_TYPE mask2 = getMask(n->key1, noCase); + +#if !defined(HAVE_AVX512) + hwlm_error_t rv; + + if (end - offset < CHUNKSIZE) { + rv = scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi, + offset, end); return rv; } - if (end - offset == CHUNKSIZE) { - rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, - mask2, cbi, offset, end); + if (end - offset == CHUNKSIZE) { + rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, + mask2, cbi, offset, end); return rv; } uintptr_t data = (uintptr_t)buf; - uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data; + uintptr_t s2Start = ROUNDUP_N(data + offset, CHUNKSIZE) - data; uintptr_t s1End = s2Start + 1; uintptr_t last = data + end; uintptr_t s2End = ROUNDDOWN_N(last, CHUNKSIZE) - data; uintptr_t s3Start = end - CHUNKSIZE; - uintptr_t off = offset; + uintptr_t off = offset; - if (s2Start != off) { + if (s2Start != off) { // first scan out to the fast scan starting point plus one char past to // catch the key on the overlap - DEBUG_PRINTF("stage 1: %zu -> %zu\n", off, s2Start); - rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, - mask2, cbi, off, s1End); + DEBUG_PRINTF("stage 1: %zu -> %zu\n", off, s2Start); + rv = scanDoubleUnaligned(n, buf, len, offset, noCase, caseMask, mask1, + mask2, cbi, off, s1End); RETURN_IF_TERMINATED(rv); } off = s1End; @@ -273,8 +273,8 @@ hwlm_error_t scanDoubleMain(const struct noodTable *n, const u8 *buf, // scan as far as we can, bounded by the last point this key can // possibly match DEBUG_PRINTF("fast: ~ %zu -> %zu\n", s2Start, s3Start); - rv = scanDoubleFast(n, buf, len, noCase, caseMask, mask1, mask2, cbi, - s2Start, s2End); + rv = scanDoubleFast(n, buf, len, noCase, caseMask, mask1, mask2, cbi, + s2Start, s2End); RETURN_IF_TERMINATED(rv); off = s2End; } @@ -285,158 +285,158 @@ hwlm_error_t scanDoubleMain(const struct noodTable *n, const u8 *buf, } DEBUG_PRINTF("stage 3: %zu -> %zu\n", s3Start, end); - rv = scanDoubleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1, - mask2, cbi, off, end); + rv = scanDoubleUnaligned(n, buf, len, s3Start, noCase, caseMask, mask1, + mask2, cbi, off, end); return rv; -#else // AVX512 - return scanDouble512(n, buf, len, noCase, caseMask, mask1, mask2, cbi, - offset, end); -#endif // AVX512 +#else // AVX512 + return scanDouble512(n, buf, len, noCase, caseMask, mask1, mask2, cbi, + offset, end); +#endif // AVX512 } static really_inline -hwlm_error_t scanSingleNoCase(const struct noodTable *n, const u8 *buf, - size_t len, size_t start, +hwlm_error_t scanSingleNoCase(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, const struct cb_info *cbi) { - return scanSingleMain(n, buf, len, start, 1, cbi); + return scanSingleMain(n, buf, len, start, 1, cbi); } static really_inline -hwlm_error_t scanSingleCase(const struct noodTable *n, const u8 *buf, - size_t len, size_t start, +hwlm_error_t scanSingleCase(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, const struct cb_info *cbi) { - return scanSingleMain(n, buf, len, start, 0, cbi); + return scanSingleMain(n, buf, len, start, 0, cbi); } // Single-character specialisation, used when keyLen = 1 static really_inline -hwlm_error_t scanSingle(const struct noodTable *n, const u8 *buf, size_t len, - size_t start, bool noCase, const struct cb_info *cbi) { - if (!ourisalpha(n->key0)) { +hwlm_error_t scanSingle(const struct noodTable *n, const u8 *buf, size_t len, + size_t start, bool noCase, const struct cb_info *cbi) { + if (!ourisalpha(n->key0)) { noCase = 0; // force noCase off if we don't have an alphabetic char } // kinda ugly, but this forces constant propagation if (noCase) { - return scanSingleNoCase(n, buf, len, start, cbi); + return scanSingleNoCase(n, buf, len, start, cbi); } else { - return scanSingleCase(n, buf, len, start, cbi); + return scanSingleCase(n, buf, len, start, cbi); } } static really_inline -hwlm_error_t scanDoubleNoCase(const struct noodTable *n, const u8 *buf, - size_t len, size_t start, +hwlm_error_t scanDoubleNoCase(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, const struct cb_info *cbi) { - return scanDoubleMain(n, buf, len, start, 1, cbi); + return scanDoubleMain(n, buf, len, start, 1, cbi); } static really_inline -hwlm_error_t scanDoubleCase(const struct noodTable *n, const u8 *buf, - size_t len, size_t start, +hwlm_error_t scanDoubleCase(const struct noodTable *n, const u8 *buf, + size_t len, size_t start, const struct cb_info *cbi) { - return scanDoubleMain(n, buf, len, start, 0, cbi); + return scanDoubleMain(n, buf, len, start, 0, cbi); } static really_inline -hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len, - size_t start, bool noCase, const struct cb_info *cbi) { +hwlm_error_t scanDouble(const struct noodTable *n, const u8 *buf, size_t len, + size_t start, bool noCase, const struct cb_info *cbi) { // kinda ugly, but this forces constant propagation if (noCase) { - return scanDoubleNoCase(n, buf, len, start, cbi); + return scanDoubleNoCase(n, buf, len, start, cbi); } else { - return scanDoubleCase(n, buf, len, start, cbi); + return scanDoubleCase(n, buf, len, start, cbi); } } // main entry point for the scan code static really_inline -hwlm_error_t scan(const struct noodTable *n, const u8 *buf, size_t len, - size_t start, char single, bool noCase, - const struct cb_info *cbi) { - if (len - start < n->msk_len) { +hwlm_error_t scan(const struct noodTable *n, const u8 *buf, size_t len, + size_t start, char single, bool noCase, + const struct cb_info *cbi) { + if (len - start < n->msk_len) { // can't find string of length keyLen in a shorter buffer return HWLM_SUCCESS; } - if (single) { - return scanSingle(n, buf, len, start, noCase, cbi); + if (single) { + return scanSingle(n, buf, len, start, noCase, cbi); } else { - return scanDouble(n, buf, len, start, noCase, cbi); + return scanDouble(n, buf, len, start, noCase, cbi); } } /** \brief Block-mode scanner. */ hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len, - size_t start, HWLMCallback cb, - struct hs_scratch *scratch) { + size_t start, HWLMCallback cb, + struct hs_scratch *scratch) { assert(n && buf); - struct cb_info cbi = {cb, n->id, scratch, 0}; - DEBUG_PRINTF("nood scan of %zu bytes for %*s @ %p\n", len, n->msk_len, - (const char *)&n->cmp, buf); - - return scan(n, buf, len, start, n->single, n->nocase, &cbi); + struct cb_info cbi = {cb, n->id, scratch, 0}; + DEBUG_PRINTF("nood scan of %zu bytes for %*s @ %p\n", len, n->msk_len, + (const char *)&n->cmp, buf); + + return scan(n, buf, len, start, n->single, n->nocase, &cbi); } /** \brief Streaming-mode scanner. */ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, - HWLMCallback cb, struct hs_scratch *scratch) { + HWLMCallback cb, struct hs_scratch *scratch) { assert(n); - if (len + hlen < n->msk_len) { - DEBUG_PRINTF("not enough bytes for a match\n"); - return HWLM_SUCCESS; - } - - struct cb_info cbi = {cb, n->id, scratch, 0}; - DEBUG_PRINTF("nood scan of %zu bytes (%zu hlen) for %*s @ %p\n", len, hlen, - n->msk_len, (const char *)&n->cmp, buf); - - if (hlen && n->msk_len > 1) { - /* - * we have history, so build up a buffer from enough of the history - * buffer plus what we've been given to scan. Since this is relatively - * short, just check against msk+cmp per byte offset for matches. - */ + if (len + hlen < n->msk_len) { + DEBUG_PRINTF("not enough bytes for a match\n"); + return HWLM_SUCCESS; + } + + struct cb_info cbi = {cb, n->id, scratch, 0}; + DEBUG_PRINTF("nood scan of %zu bytes (%zu hlen) for %*s @ %p\n", len, hlen, + n->msk_len, (const char *)&n->cmp, buf); + + if (hlen && n->msk_len > 1) { + /* + * we have history, so build up a buffer from enough of the history + * buffer plus what we've been given to scan. Since this is relatively + * short, just check against msk+cmp per byte offset for matches. + */ assert(hbuf); - u8 ALIGN_DIRECTIVE temp_buf[HWLM_LITERAL_MAX_LEN * 2]; - memset(temp_buf, 0, sizeof(temp_buf)); - - assert(n->msk_len); - size_t tl1 = MIN((size_t)n->msk_len - 1, hlen); - size_t tl2 = MIN((size_t)n->msk_len - 1, len); - - assert(tl1 + tl2 <= sizeof(temp_buf)); - assert(tl1 + tl2 >= n->msk_len); - assert(tl1 <= sizeof(u64a)); - assert(tl2 <= sizeof(u64a)); - DEBUG_PRINTF("using %zu bytes of hist and %zu bytes of buf\n", tl1, tl2); - - unaligned_store_u64a(temp_buf, - partial_load_u64a(hbuf + hlen - tl1, tl1)); - unaligned_store_u64a(temp_buf + tl1, partial_load_u64a(buf, tl2)); - - for (size_t i = 0; i <= tl1 + tl2 - n->msk_len; i++) { - u64a v = unaligned_load_u64a(temp_buf + i); - if ((v & n->msk) == n->cmp) { - size_t m_end = -tl1 + i + n->msk_len - 1; - DEBUG_PRINTF("match @ %zu (i %zu)\n", m_end, i); - hwlmcb_rv_t rv = cb(m_end, n->id, scratch); - if (rv == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATED; - } - } + u8 ALIGN_DIRECTIVE temp_buf[HWLM_LITERAL_MAX_LEN * 2]; + memset(temp_buf, 0, sizeof(temp_buf)); + + assert(n->msk_len); + size_t tl1 = MIN((size_t)n->msk_len - 1, hlen); + size_t tl2 = MIN((size_t)n->msk_len - 1, len); + + assert(tl1 + tl2 <= sizeof(temp_buf)); + assert(tl1 + tl2 >= n->msk_len); + assert(tl1 <= sizeof(u64a)); + assert(tl2 <= sizeof(u64a)); + DEBUG_PRINTF("using %zu bytes of hist and %zu bytes of buf\n", tl1, tl2); + + unaligned_store_u64a(temp_buf, + partial_load_u64a(hbuf + hlen - tl1, tl1)); + unaligned_store_u64a(temp_buf + tl1, partial_load_u64a(buf, tl2)); + + for (size_t i = 0; i <= tl1 + tl2 - n->msk_len; i++) { + u64a v = unaligned_load_u64a(temp_buf + i); + if ((v & n->msk) == n->cmp) { + size_t m_end = -tl1 + i + n->msk_len - 1; + DEBUG_PRINTF("match @ %zu (i %zu)\n", m_end, i); + hwlmcb_rv_t rv = cb(m_end, n->id, scratch); + if (rv == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATED; + } + } } } assert(buf); cbi.offsetAdj = 0; - return scan(n, buf, len, 0, n->single, n->nocase, &cbi); + return scan(n, buf, len, 0, n->single, n->nocase, &cbi); } diff --git a/contrib/libs/hyperscan/src/hwlm/noodle_engine.h b/contrib/libs/hyperscan/src/hwlm/noodle_engine.h index 64422c41f0..be02286e8f 100644 --- a/contrib/libs/hyperscan/src/hwlm/noodle_engine.h +++ b/contrib/libs/hyperscan/src/hwlm/noodle_engine.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,17 +41,17 @@ extern "C" #endif struct noodTable; -struct hs_scratch; +struct hs_scratch; /** \brief Block-mode scanner. */ hwlm_error_t noodExec(const struct noodTable *n, const u8 *buf, size_t len, - size_t start, HWLMCallback cb, - struct hs_scratch *scratch); + size_t start, HWLMCallback cb, + struct hs_scratch *scratch); /** \brief Streaming-mode scanner. */ hwlm_error_t noodExecStreaming(const struct noodTable *n, const u8 *hbuf, size_t hlen, const u8 *buf, size_t len, - HWLMCallback cb, struct hs_scratch *scratch); + HWLMCallback cb, struct hs_scratch *scratch); #ifdef __cplusplus } /* extern "C" */ diff --git a/contrib/libs/hyperscan/src/hwlm/noodle_engine_avx2.c b/contrib/libs/hyperscan/src/hwlm/noodle_engine_avx2.c index 5edc646af1..dbac7fd90e 100644 --- a/contrib/libs/hyperscan/src/hwlm/noodle_engine_avx2.c +++ b/contrib/libs/hyperscan/src/hwlm/noodle_engine_avx2.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,11 +38,11 @@ static really_inline m256 getCaseMask(void) { } static really_inline -hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf, - size_t len, size_t offset, bool noCase, - m256 caseMask, m256 mask1, - const struct cb_info *cbi, size_t start, - size_t end) { +hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf, + size_t len, size_t offset, bool noCase, + m256 caseMask, m256 mask1, + const struct cb_info *cbi, size_t start, + size_t end) { const u8 *d = buf + offset; DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset); const size_t l = end - start; @@ -67,11 +67,11 @@ hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf, } static really_inline -hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf, - size_t len, size_t offset, bool noCase, - m256 caseMask, m256 mask1, m256 mask2, - const struct cb_info *cbi, size_t start, - size_t end) { +hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf, + size_t len, size_t offset, bool noCase, + m256 caseMask, m256 mask1, m256 mask2, + const struct cb_info *cbi, size_t start, + size_t end) { const u8 *d = buf + offset; DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset); size_t l = end - start; @@ -101,8 +101,8 @@ hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf, // alignment boundary if needed and to finish off data that the aligned scan // function can't handle (due to small/unaligned chunk at end) static really_inline -hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf, - size_t len, bool noCase, m256 caseMask, m256 mask1, +hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m256 caseMask, m256 mask1, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start; @@ -118,9 +118,9 @@ hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf, if (l < 4) { u8 *vp = (u8*)&v; switch (l) { - case 3: vp[2] = d[2]; // fallthrough - case 2: vp[1] = d[1]; // fallthrough - case 1: vp[0] = d[0]; // fallthrough + case 3: vp[2] = d[2]; // fallthrough + case 2: vp[1] = d[1]; // fallthrough + case 1: vp[0] = d[0]; // fallthrough } } else { v = masked_move256_len(d, l); @@ -141,10 +141,10 @@ hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf, } static really_inline -hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, - size_t len, bool noCase, m256 caseMask, m256 mask1, - m256 mask2, const struct cb_info *cbi, - size_t start, size_t end) { +hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m256 caseMask, m256 mask1, + m256 mask2, const struct cb_info *cbi, + size_t start, size_t end) { const u8 *d = buf + start; size_t l = end - start; if (!l) { @@ -157,9 +157,9 @@ hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, if (l < 4) { u8 *vp = (u8*)&v; switch (l) { - case 3: vp[2] = d[2]; // fallthrough - case 2: vp[1] = d[1]; // fallthrough - case 1: vp[0] = d[0]; // fallthrough + case 3: vp[2] = d[2]; // fallthrough + case 2: vp[1] = d[1]; // fallthrough + case 1: vp[0] = d[0]; // fallthrough } } else { v = masked_move256_len(d, l); @@ -182,8 +182,8 @@ hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, } static really_inline -hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf, - size_t len, bool noCase, m256 caseMask, m256 mask1, +hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m256 caseMask, m256 mask1, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start, *e = buf + end; @@ -203,9 +203,9 @@ hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf, } static really_inline -hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf, - size_t len, bool noCase, m256 caseMask, m256 mask1, - m256 mask2, const struct cb_info *cbi, size_t start, +hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m256 caseMask, m256 mask1, + m256 mask2, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start, *e = buf + end; DEBUG_PRINTF("start %zu end %zu \n", start, end); @@ -220,7 +220,7 @@ hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf, u32 z0 = movemask256(eq256(mask1, v)); u32 z1 = movemask256(eq256(mask2, v)); u32 z = (lastz0 | (z0 << 1)) & z1; - lastz0 = z0 >> 31; + lastz0 = z0 >> 31; // On large packet buffers, this prefetch appears to get us about 2%. __builtin_prefetch(d + 128); diff --git a/contrib/libs/hyperscan/src/hwlm/noodle_engine_avx512.c b/contrib/libs/hyperscan/src/hwlm/noodle_engine_avx512.c index 8cac1b15c2..9bf445821a 100644 --- a/contrib/libs/hyperscan/src/hwlm/noodle_engine_avx512.c +++ b/contrib/libs/hyperscan/src/hwlm/noodle_engine_avx512.c @@ -1,191 +1,191 @@ -/* - * Copyright (c) 2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/* noodle scan parts for AVX512 */ - -static really_inline -m512 getMask(u8 c, bool noCase) { - u8 k = caseClear8(c, noCase); - return set64x8(k); -} - -static really_inline -m512 getCaseMask(void) { - return set64x8(CASE_CLEAR); -} - -// The short scan routine. It is used both to scan data up to an -// alignment boundary if needed and to finish off data that the aligned scan -// function can't handle (due to small/unaligned chunk at end) -static really_inline -hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf, - size_t len, bool noCase, m512 caseMask, m512 mask1, - const struct cb_info *cbi, size_t start, - size_t end) { - const u8 *d = buf + start; - ptrdiff_t scan_len = end - start; - DEBUG_PRINTF("scan_len %zu\n", scan_len); - assert(scan_len <= 64); - if (!scan_len) { - return HWLM_SUCCESS; - } - - __mmask64 k = (~0ULL) >> (64 - scan_len); - DEBUG_PRINTF("load mask 0x%016llx\n", k); - - m512 v = loadu_maskz_m512(k, d); - - if (noCase) { - v = and512(v, caseMask); - } - - // reuse the load mask to indicate valid bytes - u64a z = masked_eq512mask(k, mask1, v); - - SINGLE_ZSCAN(); - - return HWLM_SUCCESS; -} - -static really_inline -hwlm_error_t scanSingle512(const struct noodTable *n, const u8 *buf, size_t len, - bool noCase, m512 caseMask, m512 mask1, - const struct cb_info *cbi, size_t start, - size_t end) { - const u8 *d = buf + start; - const u8 *e = buf + end; - DEBUG_PRINTF("start %p end %p \n", d, e); - assert(d < e); - if (d + 64 >= e) { - goto tail; - } - - // peel off first part to cacheline boundary - const u8 *d1 = ROUNDUP_PTR(d, 64); - if (scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, start, - d1 - buf) == HWLM_TERMINATED) { - return HWLM_TERMINATED; - } - d = d1; - - for (; d + 64 < e; d += 64) { - DEBUG_PRINTF("d %p e %p \n", d, e); - m512 v = noCase ? and512(load512(d), caseMask) : load512(d); - - u64a z = eq512mask(mask1, v); - __builtin_prefetch(d + 128); - - SINGLE_ZSCAN(); - } - -tail: - DEBUG_PRINTF("d %p e %p \n", d, e); - // finish off tail - - return scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, d - buf, - e - buf); -} - -static really_inline -hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, - size_t len, bool noCase, m512 caseMask, m512 mask1, - m512 mask2, const struct cb_info *cbi, - u64a *lastz0, size_t start, size_t end) { - DEBUG_PRINTF("start %zu end %zu last 0x%016llx\n", start, end, *lastz0); - const u8 *d = buf + start; - ptrdiff_t scan_len = end - start; - if (!scan_len) { - return HWLM_SUCCESS; - } - assert(scan_len <= 64); - __mmask64 k = (~0ULL) >> (64 - scan_len); - DEBUG_PRINTF("load mask 0x%016llx scan_len %zu\n", k, scan_len); - - m512 v = loadu_maskz_m512(k, d); - if (noCase) { - v = and512(v, caseMask); - } - - u64a z0 = masked_eq512mask(k, mask1, v); - u64a z1 = masked_eq512mask(k, mask2, v); - u64a z = (*lastz0 | (z0 << 1)) & z1; - DEBUG_PRINTF("z 0x%016llx\n", z); - - DOUBLE_ZSCAN(); - *lastz0 = z0 >> (scan_len - 1); - return HWLM_SUCCESS; -} - -static really_inline -hwlm_error_t scanDouble512(const struct noodTable *n, const u8 *buf, size_t len, - bool noCase, m512 caseMask, m512 mask1, m512 mask2, - const struct cb_info *cbi, size_t start, - size_t end) { - const u8 *d = buf + start; - const u8 *e = buf + end; - u64a lastz0 = 0; - DEBUG_PRINTF("start %zu end %zu \n", start, end); - assert(d < e); - if (d + 64 >= e) { - goto tail; - } - - // peel off first part to cacheline boundary - const u8 *d1 = ROUNDUP_PTR(d, 64); - if (scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi, - &lastz0, start, d1 - buf) == HWLM_TERMINATED) { - return HWLM_TERMINATED; - } - d = d1; - - for (; d + 64 < e; d += 64) { - DEBUG_PRINTF("d %p e %p 0x%016llx\n", d, e, lastz0); - m512 v = noCase ? and512(load512(d), caseMask) : load512(d); - - /* we have to pull the masks out of the AVX registers because we can't - byte shift between the lanes */ - u64a z0 = eq512mask(mask1, v); - u64a z1 = eq512mask(mask2, v); - u64a z = (lastz0 | (z0 << 1)) & z1; - lastz0 = z0 >> 63; - - // On large packet buffers, this prefetch appears to get us about 2%. - __builtin_prefetch(d + 256); - - DEBUG_PRINTF("z 0x%016llx\n", z); - - DOUBLE_ZSCAN(); - } - -tail: - DEBUG_PRINTF("d %p e %p off %zu \n", d, e, d - buf); - // finish off tail - - return scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi, - &lastz0, d - buf, end); -} +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* noodle scan parts for AVX512 */ + +static really_inline +m512 getMask(u8 c, bool noCase) { + u8 k = caseClear8(c, noCase); + return set64x8(k); +} + +static really_inline +m512 getCaseMask(void) { + return set64x8(CASE_CLEAR); +} + +// The short scan routine. It is used both to scan data up to an +// alignment boundary if needed and to finish off data that the aligned scan +// function can't handle (due to small/unaligned chunk at end) +static really_inline +hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m512 caseMask, m512 mask1, + const struct cb_info *cbi, size_t start, + size_t end) { + const u8 *d = buf + start; + ptrdiff_t scan_len = end - start; + DEBUG_PRINTF("scan_len %zu\n", scan_len); + assert(scan_len <= 64); + if (!scan_len) { + return HWLM_SUCCESS; + } + + __mmask64 k = (~0ULL) >> (64 - scan_len); + DEBUG_PRINTF("load mask 0x%016llx\n", k); + + m512 v = loadu_maskz_m512(k, d); + + if (noCase) { + v = and512(v, caseMask); + } + + // reuse the load mask to indicate valid bytes + u64a z = masked_eq512mask(k, mask1, v); + + SINGLE_ZSCAN(); + + return HWLM_SUCCESS; +} + +static really_inline +hwlm_error_t scanSingle512(const struct noodTable *n, const u8 *buf, size_t len, + bool noCase, m512 caseMask, m512 mask1, + const struct cb_info *cbi, size_t start, + size_t end) { + const u8 *d = buf + start; + const u8 *e = buf + end; + DEBUG_PRINTF("start %p end %p \n", d, e); + assert(d < e); + if (d + 64 >= e) { + goto tail; + } + + // peel off first part to cacheline boundary + const u8 *d1 = ROUNDUP_PTR(d, 64); + if (scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, start, + d1 - buf) == HWLM_TERMINATED) { + return HWLM_TERMINATED; + } + d = d1; + + for (; d + 64 < e; d += 64) { + DEBUG_PRINTF("d %p e %p \n", d, e); + m512 v = noCase ? and512(load512(d), caseMask) : load512(d); + + u64a z = eq512mask(mask1, v); + __builtin_prefetch(d + 128); + + SINGLE_ZSCAN(); + } + +tail: + DEBUG_PRINTF("d %p e %p \n", d, e); + // finish off tail + + return scanSingleShort(n, buf, len, noCase, caseMask, mask1, cbi, d - buf, + e - buf); +} + +static really_inline +hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m512 caseMask, m512 mask1, + m512 mask2, const struct cb_info *cbi, + u64a *lastz0, size_t start, size_t end) { + DEBUG_PRINTF("start %zu end %zu last 0x%016llx\n", start, end, *lastz0); + const u8 *d = buf + start; + ptrdiff_t scan_len = end - start; + if (!scan_len) { + return HWLM_SUCCESS; + } + assert(scan_len <= 64); + __mmask64 k = (~0ULL) >> (64 - scan_len); + DEBUG_PRINTF("load mask 0x%016llx scan_len %zu\n", k, scan_len); + + m512 v = loadu_maskz_m512(k, d); + if (noCase) { + v = and512(v, caseMask); + } + + u64a z0 = masked_eq512mask(k, mask1, v); + u64a z1 = masked_eq512mask(k, mask2, v); + u64a z = (*lastz0 | (z0 << 1)) & z1; + DEBUG_PRINTF("z 0x%016llx\n", z); + + DOUBLE_ZSCAN(); + *lastz0 = z0 >> (scan_len - 1); + return HWLM_SUCCESS; +} + +static really_inline +hwlm_error_t scanDouble512(const struct noodTable *n, const u8 *buf, size_t len, + bool noCase, m512 caseMask, m512 mask1, m512 mask2, + const struct cb_info *cbi, size_t start, + size_t end) { + const u8 *d = buf + start; + const u8 *e = buf + end; + u64a lastz0 = 0; + DEBUG_PRINTF("start %zu end %zu \n", start, end); + assert(d < e); + if (d + 64 >= e) { + goto tail; + } + + // peel off first part to cacheline boundary + const u8 *d1 = ROUNDUP_PTR(d, 64); + if (scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi, + &lastz0, start, d1 - buf) == HWLM_TERMINATED) { + return HWLM_TERMINATED; + } + d = d1; + + for (; d + 64 < e; d += 64) { + DEBUG_PRINTF("d %p e %p 0x%016llx\n", d, e, lastz0); + m512 v = noCase ? and512(load512(d), caseMask) : load512(d); + + /* we have to pull the masks out of the AVX registers because we can't + byte shift between the lanes */ + u64a z0 = eq512mask(mask1, v); + u64a z1 = eq512mask(mask2, v); + u64a z = (lastz0 | (z0 << 1)) & z1; + lastz0 = z0 >> 63; + + // On large packet buffers, this prefetch appears to get us about 2%. + __builtin_prefetch(d + 256); + + DEBUG_PRINTF("z 0x%016llx\n", z); + + DOUBLE_ZSCAN(); + } + +tail: + DEBUG_PRINTF("d %p e %p off %zu \n", d, e, d - buf); + // finish off tail + + return scanDoubleShort(n, buf, len, noCase, caseMask, mask1, mask2, cbi, + &lastz0, d - buf, end); +} diff --git a/contrib/libs/hyperscan/src/hwlm/noodle_engine_sse.c b/contrib/libs/hyperscan/src/hwlm/noodle_engine_sse.c index 7cd53d7ced..0fc33bc342 100644 --- a/contrib/libs/hyperscan/src/hwlm/noodle_engine_sse.c +++ b/contrib/libs/hyperscan/src/hwlm/noodle_engine_sse.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,8 +38,8 @@ static really_inline m128 getCaseMask(void) { } static really_inline -hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf, - size_t len, bool noCase, m128 caseMask, m128 mask1, +hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m128 caseMask, m128 mask1, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start; @@ -67,11 +67,11 @@ hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf, } static really_inline -hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf, - size_t len, size_t offset, bool noCase, - m128 caseMask, m128 mask1, - const struct cb_info *cbi, size_t start, - size_t end) { +hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf, + size_t len, size_t offset, bool noCase, + m128 caseMask, m128 mask1, + const struct cb_info *cbi, size_t start, + size_t end) { const u8 *d = buf + offset; DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset); const size_t l = end - start; @@ -97,10 +97,10 @@ hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf, } static really_inline -hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, - size_t len, bool noCase, m128 caseMask, m128 mask1, - m128 mask2, const struct cb_info *cbi, - size_t start, size_t end) { +hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m128 caseMask, m128 mask1, + m128 mask2, const struct cb_info *cbi, + size_t start, size_t end) { const u8 *d = buf + start; size_t l = end - start; if (!l) { @@ -115,8 +115,8 @@ hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, v = and128(v, caseMask); } - u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1), - eq128(mask2, v))); + u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1), + eq128(mask2, v))); // mask out where we can't match u32 mask = (0xFFFF >> (16 - l)); @@ -128,11 +128,11 @@ hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf, } static really_inline -hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf, - size_t len, size_t offset, bool noCase, - m128 caseMask, m128 mask1, m128 mask2, - const struct cb_info *cbi, size_t start, - size_t end) { +hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf, + size_t len, size_t offset, bool noCase, + m128 caseMask, m128 mask1, m128 mask2, + const struct cb_info *cbi, size_t start, + size_t end) { const u8 *d = buf + offset; DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset); size_t l = end - start; @@ -143,8 +143,8 @@ hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf, v = and128(v, caseMask); } - u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1), - eq128(mask2, v))); + u32 z = movemask128(and128(lshiftbyte_m128(eq128(mask1, v), 1), + eq128(mask2, v))); // mask out where we can't match u32 buf_off = start - offset; @@ -158,8 +158,8 @@ hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf, } static really_inline -hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf, - size_t len, bool noCase, m128 caseMask, m128 mask1, +hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m128 caseMask, m128 mask1, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start, *e = buf + end; @@ -179,9 +179,9 @@ hwlm_error_t scanSingleFast(const struct noodTable *n, const u8 *buf, } static really_inline -hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf, - size_t len, bool noCase, m128 caseMask, m128 mask1, - m128 mask2, const struct cb_info *cbi, size_t start, +hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf, + size_t len, bool noCase, m128 caseMask, m128 mask1, + m128 mask2, const struct cb_info *cbi, size_t start, size_t end) { const u8 *d = buf + start, *e = buf + end; assert(d < e); @@ -191,8 +191,8 @@ hwlm_error_t scanDoubleFast(const struct noodTable *n, const u8 *buf, m128 v = noCase ? and128(load128(d), caseMask) : load128(d); m128 z1 = eq128(mask1, v); m128 z2 = eq128(mask2, v); - u32 z = movemask128(and128(palignr(z1, lastz1, 15), z2)); - lastz1 = z1; + u32 z = movemask128(and128(palignr(z1, lastz1, 15), z2)); + lastz1 = z1; // On large packet buffers, this prefetch appears to get us about 2%. __builtin_prefetch(d + 128); diff --git a/contrib/libs/hyperscan/src/hwlm/noodle_internal.h b/contrib/libs/hyperscan/src/hwlm/noodle_internal.h index 8f76f177e1..6a4b65e936 100644 --- a/contrib/libs/hyperscan/src/hwlm/noodle_internal.h +++ b/contrib/libs/hyperscan/src/hwlm/noodle_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,22 +30,22 @@ * \brief Data structures for Noodle literal matcher engine. */ -#ifndef NOODLE_INTERNAL_H -#define NOODLE_INTERNAL_H +#ifndef NOODLE_INTERNAL_H +#define NOODLE_INTERNAL_H #include "ue2common.h" struct noodTable { u32 id; - u64a msk; - u64a cmp; - u8 msk_len; - u8 key_offset; - u8 nocase; - u8 single; - u8 key0; - u8 key1; + u64a msk; + u64a cmp; + u8 msk_len; + u8 key_offset; + u8 nocase; + u8 single; + u8 key0; + u8 key1; }; -#endif /* NOODLE_INTERNAL_H */ +#endif /* NOODLE_INTERNAL_H */ |