diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp')
-rw-r--r-- | contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp | 349 |
1 files changed, 349 insertions, 0 deletions
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp new file mode 100644 index 0000000000..d8ba503ce6 --- /dev/null +++ b/contrib/libs/hyperscan/src/nfagraph/ng_lbr.cpp @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Large Bounded Repeat (LBR) engine build code. + */ + +#include "ng_lbr.h" + +#include "grey.h" +#include "ng_holder.h" +#include "ng_repeat.h" +#include "ng_reports.h" +#include "nfa/castlecompile.h" +#include "nfa/lbr_internal.h" +#include "nfa/nfa_internal.h" +#include "nfa/repeatcompile.h" +#include "nfa/shufticompile.h" +#include "nfa/trufflecompile.h" +#include "util/alloc.h" +#include "util/bitutils.h" // for lg2 +#include "util/compile_context.h" +#include "util/container.h" +#include "util/depth.h" +#include "util/dump_charclass.h" +#include "util/report_manager.h" +#include "util/verify_types.h" + +using namespace std; + +namespace ue2 { + +static +u32 depth_to_u32(const depth &d) { + assert(d.is_reachable()); + if (d.is_infinite()) { + return REPEAT_INF; + } + + u32 d_val = d; + assert(d_val < REPEAT_INF); + return d_val; +} + +template<class LbrStruct> static +u64a* getTable(NFA *nfa) { + char *ptr = (char *)nfa + sizeof(struct NFA) + sizeof(LbrStruct) + + sizeof(RepeatInfo); + ptr = ROUNDUP_PTR(ptr, alignof(u64a)); + return (u64a *)ptr; +} + +template <class LbrStruct> static +void fillNfa(NFA *nfa, lbr_common *c, ReportID report, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, enum RepeatType rtype) { + assert(nfa); + + RepeatStateInfo rsi(rtype, repeatMin, repeatMax, minPeriod); + + DEBUG_PRINTF("selected %s model for {%s,%s} repeat\n", + repeatTypeName(rtype), repeatMin.str().c_str(), + repeatMax.str().c_str()); + + // Fill the lbr_common structure first. Note that the RepeatInfo structure + // directly follows the LbrStruct. + const u32 info_offset = sizeof(LbrStruct); + c->repeatInfoOffset = info_offset; + c->report = report; + + RepeatInfo *info = (RepeatInfo *)((char *)c + info_offset); + info->type = verify_u8(rtype); + info->repeatMin = depth_to_u32(repeatMin); + info->repeatMax = depth_to_u32(repeatMax); + info->stateSize = rsi.stateSize; + info->packedCtrlSize = rsi.packedCtrlSize; + info->horizon = rsi.horizon; + info->minPeriod = minPeriod; + copy_bytes(&info->packedFieldSizes, rsi.packedFieldSizes); + info->patchCount = rsi.patchCount; + info->patchSize = rsi.patchSize; + info->encodingSize = rsi.encodingSize; + info->patchesOffset = rsi.patchesOffset; + + // Fill the NFA structure. + nfa->nPositions = repeatMin; + nfa->streamStateSize = verify_u32(rsi.packedCtrlSize + rsi.stateSize); + nfa->scratchStateSize = (u32)sizeof(lbr_state); + nfa->minWidth = verify_u32(repeatMin); + nfa->maxWidth = repeatMax.is_finite() ? verify_u32(repeatMax) : 0; + + // Fill the lbr table for sparse lbr model. + if (rtype == REPEAT_SPARSE_OPTIMAL_P) { + u64a *table = getTable<LbrStruct>(nfa); + // Adjust table length according to the optimal patch length. + size_t len = nfa->length; + assert((u32)repeatMax >= rsi.patchSize); + len -= sizeof(u64a) * ((u32)repeatMax - rsi.patchSize); + nfa->length = verify_u32(len); + info->length = verify_u32(sizeof(RepeatInfo) + + sizeof(u64a) * (rsi.patchSize + 1)); + copy_bytes(table, rsi.table); + } +} + +template <class LbrStruct> static +bytecode_ptr<NFA> makeLbrNfa(NFAEngineType nfa_type, enum RepeatType rtype, + const depth &repeatMax) { + size_t tableLen = 0; + if (rtype == REPEAT_SPARSE_OPTIMAL_P) { + tableLen = sizeof(u64a) * (repeatMax + 1); + } + size_t len = sizeof(NFA) + sizeof(LbrStruct) + sizeof(RepeatInfo) + + tableLen + sizeof(u64a); + auto nfa = make_zeroed_bytecode_ptr<NFA>(len); + nfa->type = verify_u8(nfa_type); + nfa->length = verify_u32(len); + return nfa; +} + +static +bytecode_ptr<NFA> buildLbrDot(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { + if (!cr.all()) { + return nullptr; + } + + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); + auto nfa = makeLbrNfa<lbr_dot>(LBR_NFA_DOT, rtype, repeatMax); + struct lbr_dot *ld = (struct lbr_dot *)getMutableImplNfa(nfa.get()); + + fillNfa<lbr_dot>(nfa.get(), &ld->common, report, repeatMin, repeatMax, + minPeriod, rtype); + + DEBUG_PRINTF("built dot lbr\n"); + return nfa; +} + +static +bytecode_ptr<NFA> buildLbrVerm(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { + const CharReach escapes(~cr); + + if (escapes.count() != 1) { + return nullptr; + } + + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); + auto nfa = makeLbrNfa<lbr_verm>(LBR_NFA_VERM, rtype, repeatMax); + struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get()); + lv->c = escapes.find_first(); + + fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax, + minPeriod, rtype); + + DEBUG_PRINTF("built verm lbr\n"); + return nfa; +} + +static +bytecode_ptr<NFA> buildLbrNVerm(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { + const CharReach escapes(cr); + + if (escapes.count() != 1) { + return nullptr; + } + + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); + auto nfa = makeLbrNfa<lbr_verm>(LBR_NFA_NVERM, rtype, repeatMax); + struct lbr_verm *lv = (struct lbr_verm *)getMutableImplNfa(nfa.get()); + lv->c = escapes.find_first(); + + fillNfa<lbr_verm>(nfa.get(), &lv->common, report, repeatMin, repeatMax, + minPeriod, rtype); + + DEBUG_PRINTF("built negated verm lbr\n"); + return nfa; +} + +static +bytecode_ptr<NFA> buildLbrShuf(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); + auto nfa = makeLbrNfa<lbr_shuf>(LBR_NFA_SHUF, rtype, repeatMax); + struct lbr_shuf *ls = (struct lbr_shuf *)getMutableImplNfa(nfa.get()); + + fillNfa<lbr_shuf>(nfa.get(), &ls->common, report, repeatMin, repeatMax, + minPeriod, rtype); + + if (shuftiBuildMasks(~cr, (u8 *)&ls->mask_lo, (u8 *)&ls->mask_hi) == -1) { + return nullptr; + } + + DEBUG_PRINTF("built shuf lbr\n"); + return nfa; +} + +static +bytecode_ptr<NFA> buildLbrTruf(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { + enum RepeatType rtype = chooseRepeatType(repeatMin, repeatMax, minPeriod, + is_reset); + auto nfa = makeLbrNfa<lbr_truf>(LBR_NFA_TRUF, rtype, repeatMax); + struct lbr_truf *lc = (struct lbr_truf *)getMutableImplNfa(nfa.get()); + + fillNfa<lbr_truf>(nfa.get(), &lc->common, report, repeatMin, repeatMax, + minPeriod, rtype); + + truffleBuildMasks(~cr, (u8 *)&lc->mask1, (u8 *)&lc->mask2); + + DEBUG_PRINTF("built truffle lbr\n"); + return nfa; +} + +static +bytecode_ptr<NFA> constructLBR(const CharReach &cr, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod, + bool is_reset, ReportID report) { + DEBUG_PRINTF("bounds={%s,%s}, cr=%s (count %zu), report=%u\n", + repeatMin.str().c_str(), repeatMax.str().c_str(), + describeClass(cr, 20, CC_OUT_TEXT).c_str(), cr.count(), + report); + assert(repeatMin <= repeatMax); + assert(repeatMax.is_reachable()); + + auto nfa = + buildLbrDot(cr, repeatMin, repeatMax, minPeriod, is_reset, report); + + if (!nfa) { + nfa = buildLbrVerm(cr, repeatMin, repeatMax, minPeriod, is_reset, + report); + } + if (!nfa) { + nfa = buildLbrNVerm(cr, repeatMin, repeatMax, minPeriod, is_reset, + report); + } + if (!nfa) { + nfa = buildLbrShuf(cr, repeatMin, repeatMax, minPeriod, is_reset, + report); + } + if (!nfa) { + nfa = buildLbrTruf(cr, repeatMin, repeatMax, minPeriod, is_reset, + report); + } + + if (!nfa) { + assert(0); + return nullptr; + } + + return nfa; +} + +bytecode_ptr<NFA> constructLBR(const CastleProto &proto, + const vector<vector<CharReach>> &triggers, + const CompileContext &cc, + const ReportManager &rm) { + if (!cc.grey.allowLbr) { + return nullptr; + } + + if (proto.repeats.size() != 1) { + return nullptr; + } + + const PureRepeat &repeat = proto.repeats.begin()->second; + assert(!repeat.reach.none()); + + if (repeat.reports.size() != 1) { + DEBUG_PRINTF("too many reports\n"); + return nullptr; + } + + bool is_reset; + u32 min_period = minPeriod(triggers, repeat.reach, &is_reset); + + if (depth(min_period) > repeat.bounds.max) { + DEBUG_PRINTF("trigger is longer than repeat; only need one offset\n"); + is_reset = true; + } + + ReportID report = *repeat.reports.begin(); + if (has_managed_reports(proto.kind)) { + report = rm.getProgramOffset(report); + } + + DEBUG_PRINTF("building LBR %s\n", repeat.bounds.str().c_str()); + return constructLBR(repeat.reach, repeat.bounds.min, repeat.bounds.max, + min_period, is_reset, report); +} + +/** \brief Construct an LBR engine from the given graph \p g. */ +bytecode_ptr<NFA> constructLBR(const NGHolder &g, + const vector<vector<CharReach>> &triggers, + const CompileContext &cc, + const ReportManager &rm) { + if (!cc.grey.allowLbr) { + return nullptr; + } + + PureRepeat repeat; + if (!isPureRepeat(g, repeat)) { + return nullptr; + } + if (repeat.reports.size() != 1) { + DEBUG_PRINTF("too many reports\n"); + return nullptr; + } + + CastleProto proto(g.kind, repeat); + return constructLBR(proto, triggers, cc, rm); +} + +} // namespace ue2 |