diff options
author | bnagaev <bnagaev@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
commit | d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (patch) | |
tree | d5dca6d44593f5e52556a1cc7b1ab0386e096ebe /contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp | |
parent | 1861d4c1402bb2c67a3e6b43b51706081b74508a (diff) | |
download | ydb-d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d.tar.gz |
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp')
-rw-r--r-- | contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp | 382 |
1 files changed, 191 insertions, 191 deletions
diff --git a/contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp b/contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp index 1b33281529..bbd49d340d 100644 --- a/contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp +++ b/contrib/libs/hyperscan/src/hwlm/hwlm_build.cpp @@ -1,65 +1,65 @@ -/* +/* * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Hamster Wheel Literal Matcher: build code. - */ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Hamster Wheel Literal Matcher: build code. + */ #include "hwlm_build.h" -#include "grey.h" -#include "hwlm.h" -#include "hwlm_internal.h" +#include "grey.h" +#include "hwlm.h" +#include "hwlm_internal.h" #include "hwlm_literal.h" -#include "noodle_engine.h" -#include "noodle_build.h" +#include "noodle_engine.h" +#include "noodle_build.h" #include "scratch.h" -#include "ue2common.h" -#include "fdr/fdr_compile.h" +#include "ue2common.h" +#include "fdr/fdr_compile.h" #include "fdr/fdr_compile_internal.h" #include "fdr/fdr_engine_description.h" #include "fdr/teddy_engine_description.h" -#include "util/compile_context.h" -#include "util/compile_error.h" +#include "util/compile_context.h" +#include "util/compile_error.h" #include "util/make_unique.h" -#include "util/ue2string.h" - -#include <cassert> -#include <cstring> -#include <vector> - -using namespace std; - -namespace ue2 { - +#include "util/ue2string.h" + +#include <cassert> +#include <cstring> +#include <vector> + +using namespace std; + +namespace ue2 { + HWLMProto::HWLMProto(u8 engType_in, vector<hwlmLiteral> lits_in) : engType(engType_in), lits(move(lits_in)) {} - + HWLMProto::HWLMProto(u8 engType_in, unique_ptr<FDREngineDescription> eng_in, vector<hwlmLiteral> lits_in, @@ -67,7 +67,7 @@ HWLMProto::HWLMProto(u8 engType_in, bool make_small_in) : engType(engType_in), fdrEng(move(eng_in)), lits(move(lits_in)), bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {} - + HWLMProto::HWLMProto(u8 engType_in, unique_ptr<TeddyEngineDescription> eng_in, vector<hwlmLiteral> lits_in, @@ -76,45 +76,45 @@ HWLMProto::HWLMProto(u8 engType_in, : engType(engType_in), teddyEng(move(eng_in)), lits(move(lits_in)), bucketToLits(move(bucketToLits_in)), make_small(make_small_in) {} - + HWLMProto::~HWLMProto() {} - -static -void dumpLits(UNUSED const vector<hwlmLiteral> &lits) { -#ifdef DEBUG - DEBUG_PRINTF("building lit table for:\n"); - for (const auto &lit : lits) { - printf("\t%u:%016llx %s%s\n", lit.id, lit.groups, - escapeString(lit.s).c_str(), lit.nocase ? " (nc)" : ""); - } -#endif -} - -#ifndef NDEBUG -// Called by an assertion. -static -bool everyoneHasGroups(const vector<hwlmLiteral> &lits) { - for (const auto &lit : lits) { - if (!lit.groups) { - return false; - } - } - return true; -} -#endif - -static -bool isNoodleable(const vector<hwlmLiteral> &lits, - const CompileContext &cc) { - if (!cc.grey.allowNoodle) { - return false; - } - - if (lits.size() != 1) { - DEBUG_PRINTF("too many literals for noodle\n"); - return false; - } - + +static +void dumpLits(UNUSED const vector<hwlmLiteral> &lits) { +#ifdef DEBUG + DEBUG_PRINTF("building lit table for:\n"); + for (const auto &lit : lits) { + printf("\t%u:%016llx %s%s\n", lit.id, lit.groups, + escapeString(lit.s).c_str(), lit.nocase ? " (nc)" : ""); + } +#endif +} + +#ifndef NDEBUG +// Called by an assertion. +static +bool everyoneHasGroups(const vector<hwlmLiteral> &lits) { + for (const auto &lit : lits) { + if (!lit.groups) { + return false; + } + } + return true; +} +#endif + +static +bool isNoodleable(const vector<hwlmLiteral> &lits, + const CompileContext &cc) { + if (!cc.grey.allowNoodle) { + return false; + } + + if (lits.size() != 1) { + DEBUG_PRINTF("too many literals for noodle\n"); + return false; + } + return true; } @@ -132,7 +132,7 @@ bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc, auto noodle = noodBuildTable(lit); if (noodle) { engSize = noodle.size(); - } + } eng = move(noodle); } else { DEBUG_PRINTF("building a new deal\n"); @@ -141,12 +141,12 @@ bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc, engSize = fdr.size(); } eng = move(fdr); - } - + } + if (!eng) { return nullptr; - } - + } + assert(engSize); if (engSize > cc.grey.limitLiteralMatcherSize) { throw ResourceLimitError(); @@ -159,111 +159,111 @@ bytecode_ptr<HWLM> hwlmBuild(const HWLMProto &proto, const CompileContext &cc, memcpy(HWLM_DATA(h.get()), eng.get(), engSize); return h; -} - +} + unique_ptr<HWLMProto> hwlmBuildProto(vector<hwlmLiteral> &lits, bool make_small, const CompileContext &cc) { - assert(!lits.empty()); - dumpLits(lits); - - // Check that we haven't exceeded the maximum number of literals. - if (lits.size() > cc.grey.limitLiteralCount) { - throw ResourceLimitError(); - } - - // Safety and resource limit checks. - u64a total_chars = 0; - for (const auto &lit : lits) { - assert(!lit.s.empty()); - - if (lit.s.length() > cc.grey.limitLiteralLength) { - throw ResourceLimitError(); - } - total_chars += lit.s.length(); - if (total_chars > cc.grey.limitLiteralMatcherChars) { - throw ResourceLimitError(); - } - - // We do not allow the all-ones ID, as we reserve that for internal use - // within literal matchers. - if (lit.id == 0xffffffffu) { - assert(!"reserved id 0xffffffff used"); - throw CompileError("Internal error."); - } - } - + assert(!lits.empty()); + dumpLits(lits); + + // Check that we haven't exceeded the maximum number of literals. + if (lits.size() > cc.grey.limitLiteralCount) { + throw ResourceLimitError(); + } + + // Safety and resource limit checks. + u64a total_chars = 0; + for (const auto &lit : lits) { + assert(!lit.s.empty()); + + if (lit.s.length() > cc.grey.limitLiteralLength) { + throw ResourceLimitError(); + } + total_chars += lit.s.length(); + if (total_chars > cc.grey.limitLiteralMatcherChars) { + throw ResourceLimitError(); + } + + // We do not allow the all-ones ID, as we reserve that for internal use + // within literal matchers. + if (lit.id == 0xffffffffu) { + assert(!"reserved id 0xffffffff used"); + throw CompileError("Internal error."); + } + } + unique_ptr<HWLMProto> proto; - - DEBUG_PRINTF("building table with %zu strings\n", lits.size()); - - assert(everyoneHasGroups(lits)); - + + DEBUG_PRINTF("building table with %zu strings\n", lits.size()); + + assert(everyoneHasGroups(lits)); + if (isNoodleable(lits, cc)) { - DEBUG_PRINTF("build noodle table\n"); + DEBUG_PRINTF("build noodle table\n"); proto = ue2::make_unique<HWLMProto>(HWLM_ENGINE_NOOD, lits); - } else { - DEBUG_PRINTF("building a new deal\n"); + } else { + DEBUG_PRINTF("building a new deal\n"); proto = fdrBuildProto(HWLM_ENGINE_FDR, lits, make_small, cc.target_info, cc.grey); if (!proto) { return nullptr; - } - } - + } + } + return proto; -} - -size_t hwlmSize(const HWLM *h) { - size_t engSize = 0; - - switch (h->type) { - case HWLM_ENGINE_NOOD: - engSize = noodSize((const noodTable *)HWLM_C_DATA(h)); - break; - case HWLM_ENGINE_FDR: - engSize = fdrSize((const FDR *)HWLM_C_DATA(h)); - break; - } - - if (!engSize) { - return 0; - } - - return engSize + ROUNDUP_CL(sizeof(*h)); -} - -size_t hwlmFloodProneSuffixLen(size_t numLiterals, const CompileContext &cc) { - const size_t NO_LIMIT = ~(size_t)0; - - // NOTE: this function contains a number of magic numbers which are - // conservative estimates of flood-proneness based on internal details of - // the various literal engines that fall under the HWLM aegis. If you - // change those engines, you might need to change this function too. - - DEBUG_PRINTF("%zu literals\n", numLiterals); - - if (cc.grey.allowNoodle && numLiterals <= 1) { - DEBUG_PRINTF("noodle\n"); - return NO_LIMIT; - } - - if (cc.grey.fdrAllowTeddy) { - if (numLiterals <= 48) { - DEBUG_PRINTF("teddy\n"); - return 3; - } - if (cc.target_info.has_avx2() && numLiterals <= 96) { - DEBUG_PRINTF("avx2 teddy\n"); - return 3; - } - } - - // TODO: we had thought we could push this value up to 9, but it seems that - // hurts performance on floods in some FDR models. Super-conservative for - // now. - DEBUG_PRINTF("fdr\n"); - return 3; -} - -} // namespace ue2 +} + +size_t hwlmSize(const HWLM *h) { + size_t engSize = 0; + + switch (h->type) { + case HWLM_ENGINE_NOOD: + engSize = noodSize((const noodTable *)HWLM_C_DATA(h)); + break; + case HWLM_ENGINE_FDR: + engSize = fdrSize((const FDR *)HWLM_C_DATA(h)); + break; + } + + if (!engSize) { + return 0; + } + + return engSize + ROUNDUP_CL(sizeof(*h)); +} + +size_t hwlmFloodProneSuffixLen(size_t numLiterals, const CompileContext &cc) { + const size_t NO_LIMIT = ~(size_t)0; + + // NOTE: this function contains a number of magic numbers which are + // conservative estimates of flood-proneness based on internal details of + // the various literal engines that fall under the HWLM aegis. If you + // change those engines, you might need to change this function too. + + DEBUG_PRINTF("%zu literals\n", numLiterals); + + if (cc.grey.allowNoodle && numLiterals <= 1) { + DEBUG_PRINTF("noodle\n"); + return NO_LIMIT; + } + + if (cc.grey.fdrAllowTeddy) { + if (numLiterals <= 48) { + DEBUG_PRINTF("teddy\n"); + return 3; + } + if (cc.target_info.has_avx2() && numLiterals <= 96) { + DEBUG_PRINTF("avx2 teddy\n"); + return 3; + } + } + + // TODO: we had thought we could push this value up to 9, but it seems that + // hurts performance on floods in some FDR models. Super-conservative for + // now. + DEBUG_PRINTF("fdr\n"); + return 3; +} + +} // namespace ue2 |