diff options
author | bnagaev <bnagaev@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
commit | c74559fb88da8adac0d9186cfa55a6b13c47695f (patch) | |
tree | b83306b6e37edeea782e9eed673d89286c4fef35 /contrib/libs/hyperscan/src/rose/rose_build_impl.h | |
parent | d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (diff) | |
download | ydb-c74559fb88da8adac0d9186cfa55a6b13c47695f.tar.gz |
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/rose/rose_build_impl.h')
-rw-r--r-- | contrib/libs/hyperscan/src/rose/rose_build_impl.h | 948 |
1 files changed, 474 insertions, 474 deletions
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_impl.h b/contrib/libs/hyperscan/src/rose/rose_build_impl.h index 9c601f1e5f..7780848b1b 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_impl.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_impl.h @@ -1,64 +1,64 @@ -/* +/* * Copyright (c) 2015-2019, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + #ifndef ROSE_BUILD_IMPL_H #define ROSE_BUILD_IMPL_H - -#include "rose_build.h" -#include "rose_build_util.h" + +#include "rose_build.h" +#include "rose_build_util.h" #include "rose_common.h" -#include "rose_graph.h" -#include "nfa/mpvcompile.h" -#include "nfa/goughcompile.h" -#include "nfa/nfa_internal.h" -#include "nfagraph/ng_holder.h" -#include "nfagraph/ng_revacc.h" +#include "rose_graph.h" +#include "nfa/mpvcompile.h" +#include "nfa/goughcompile.h" +#include "nfa/nfa_internal.h" +#include "nfagraph/ng_holder.h" +#include "nfagraph/ng_revacc.h" #include "util/bytecode_ptr.h" #include "util/flat_containers.h" #include "util/hash.h" -#include "util/order_check.h" -#include "util/queue_index_factory.h" +#include "util/order_check.h" +#include "util/queue_index_factory.h" #include "util/ue2string.h" #include "util/unordered.h" #include "util/verify_types.h" - -#include <deque> -#include <map> -#include <string> -#include <vector> + +#include <deque> +#include <map> +#include <string> +#include <vector> #include <boost/variant.hpp> - -struct RoseEngine; - -namespace ue2 { - -#define ROSE_GROUPS_MAX 64 - + +struct RoseEngine; + +namespace ue2 { + +#define ROSE_GROUPS_MAX 64 + #define ROSE_LONG_LITERAL_THRESHOLD_MIN 33 /** @@ -72,66 +72,66 @@ namespace ue2 { */ #define ROSE_SHORT_LITERAL_LEN_MAX 8 -struct BoundaryReports; -struct CastleProto; -struct CompileContext; -class ReportManager; +struct BoundaryReports; +struct CastleProto; +struct CompileContext; +class ReportManager; class SmallWriteBuild; -class SomSlotManager; - -struct suffix_id { - suffix_id(const RoseSuffixInfo &in) - : g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()), +class SomSlotManager; + +struct suffix_id { + suffix_id(const RoseSuffixInfo &in) + : g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()), h(in.haig.get()), t(in.tamarama.get()), dfa_min_width(in.dfa_min_width), - dfa_max_width(in.dfa_max_width) { - assert(!g || g->kind == NFA_SUFFIX); - } - bool operator==(const suffix_id &b) const { + dfa_max_width(in.dfa_max_width) { + assert(!g || g->kind == NFA_SUFFIX); + } + bool operator==(const suffix_id &b) const { bool rv = g == b.g && c == b.c && h == b.h && d == b.d && t == b.t; - assert(!rv || dfa_min_width == b.dfa_min_width); - assert(!rv || dfa_max_width == b.dfa_max_width); - return rv; - } - bool operator!=(const suffix_id &b) const { return !(*this == b); } - bool operator<(const suffix_id &b) const { - const suffix_id &a = *this; - ORDER_CHECK(g); - ORDER_CHECK(c); - ORDER_CHECK(d); - ORDER_CHECK(h); + assert(!rv || dfa_min_width == b.dfa_min_width); + assert(!rv || dfa_max_width == b.dfa_max_width); + return rv; + } + bool operator!=(const suffix_id &b) const { return !(*this == b); } + bool operator<(const suffix_id &b) const { + const suffix_id &a = *this; + ORDER_CHECK(g); + ORDER_CHECK(c); + ORDER_CHECK(d); + ORDER_CHECK(h); ORDER_CHECK(t); - return false; - } - - NGHolder *graph() { - if (!d && !h) { - assert(dfa_min_width == depth(0)); - assert(dfa_max_width == depth::infinity()); - } - return g; - } - const NGHolder *graph() const { - if (!d && !h) { - assert(dfa_min_width == depth(0)); - assert(dfa_max_width == depth::infinity()); - } - return g; - } - CastleProto *castle() { - if (!d && !h) { - assert(dfa_min_width == depth(0)); - assert(dfa_max_width == depth::infinity()); - } - return c; - } - const CastleProto *castle() const { - if (!d && !h) { - assert(dfa_min_width == depth(0)); - assert(dfa_max_width == depth::infinity()); - } - return c; - } + return false; + } + + NGHolder *graph() { + if (!d && !h) { + assert(dfa_min_width == depth(0)); + assert(dfa_max_width == depth::infinity()); + } + return g; + } + const NGHolder *graph() const { + if (!d && !h) { + assert(dfa_min_width == depth(0)); + assert(dfa_max_width == depth::infinity()); + } + return g; + } + CastleProto *castle() { + if (!d && !h) { + assert(dfa_min_width == depth(0)); + assert(dfa_max_width == depth::infinity()); + } + return c; + } + const CastleProto *castle() const { + if (!d && !h) { + assert(dfa_min_width == depth(0)); + assert(dfa_max_width == depth::infinity()); + } + return c; + } TamaProto *tamarama() { if (!d && !h) { assert(dfa_min_width == depth(0)); @@ -148,148 +148,148 @@ struct suffix_id { } - raw_som_dfa *haig() { return h; } - const raw_som_dfa *haig() const { return h; } - raw_dfa *dfa() { return d; } - const raw_dfa *dfa() const { return d; } - - size_t hash() const; - -private: - NGHolder *g; - CastleProto *c; - raw_dfa *d; - raw_som_dfa *h; + raw_som_dfa *haig() { return h; } + const raw_som_dfa *haig() const { return h; } + raw_dfa *dfa() { return d; } + const raw_dfa *dfa() const { return d; } + + size_t hash() const; + +private: + NGHolder *g; + CastleProto *c; + raw_dfa *d; + raw_som_dfa *h; TamaProto *t; - depth dfa_min_width; - depth dfa_max_width; - - friend depth findMinWidth(const suffix_id &s); - friend depth findMaxWidth(const suffix_id &s); - friend depth findMinWidth(const suffix_id &s, u32 top); - friend depth findMaxWidth(const suffix_id &s, u32 top); -}; - -std::set<ReportID> all_reports(const suffix_id &s); -std::set<u32> all_tops(const suffix_id &s); -bool has_eod_accepts(const suffix_id &s); -bool has_non_eod_accepts(const suffix_id &s); -depth findMinWidth(const suffix_id &s); -depth findMaxWidth(const suffix_id &s); -depth findMinWidth(const suffix_id &s, u32 top); -depth findMaxWidth(const suffix_id &s, u32 top); - -/** \brief represents an engine to the left of a rose role */ -struct left_id { - left_id(const LeftEngInfo &in) - : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()), - h(in.haig.get()), dfa_min_width(in.dfa_min_width), - dfa_max_width(in.dfa_max_width) { + depth dfa_min_width; + depth dfa_max_width; + + friend depth findMinWidth(const suffix_id &s); + friend depth findMaxWidth(const suffix_id &s); + friend depth findMinWidth(const suffix_id &s, u32 top); + friend depth findMaxWidth(const suffix_id &s, u32 top); +}; + +std::set<ReportID> all_reports(const suffix_id &s); +std::set<u32> all_tops(const suffix_id &s); +bool has_eod_accepts(const suffix_id &s); +bool has_non_eod_accepts(const suffix_id &s); +depth findMinWidth(const suffix_id &s); +depth findMaxWidth(const suffix_id &s); +depth findMinWidth(const suffix_id &s, u32 top); +depth findMaxWidth(const suffix_id &s, u32 top); + +/** \brief represents an engine to the left of a rose role */ +struct left_id { + left_id(const LeftEngInfo &in) + : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()), + h(in.haig.get()), dfa_min_width(in.dfa_min_width), + dfa_max_width(in.dfa_max_width) { assert(!g || !has_managed_reports(*g)); - } - bool operator==(const left_id &b) const { - bool rv = g == b.g && c == b.c && h == b.h && d == b.d; - assert(!rv || dfa_min_width == b.dfa_min_width); - assert(!rv || dfa_max_width == b.dfa_max_width); - return rv; - } - bool operator!=(const left_id &b) const { return !(*this == b); } - bool operator<(const left_id &b) const { - const left_id &a = *this; - ORDER_CHECK(g); - ORDER_CHECK(c); - ORDER_CHECK(d); - ORDER_CHECK(h); - return false; - } - - NGHolder *graph() { - if (!d && !h) { - assert(dfa_min_width == depth(0)); - assert(dfa_max_width == depth::infinity()); - } - return g; - } - const NGHolder *graph() const { - if (!d && !h) { - assert(dfa_min_width == depth(0)); - assert(dfa_max_width == depth::infinity()); - } - return g; - } - CastleProto *castle() { - if (!d && !h) { - assert(dfa_min_width == depth(0)); - assert(dfa_max_width == depth::infinity()); - } - - return c; - } - const CastleProto *castle() const { - if (!d && !h) { - assert(dfa_min_width == depth(0)); - assert(dfa_max_width == depth::infinity()); - } - - return c; - } - raw_som_dfa *haig() { return h; } - const raw_som_dfa *haig() const { return h; } - raw_dfa *dfa() { return d; } - const raw_dfa *dfa() const { return d; } - - size_t hash() const; - -private: - NGHolder *g; - CastleProto *c; - raw_dfa *d; - raw_som_dfa *h; - depth dfa_min_width; - depth dfa_max_width; - - friend bool isAnchored(const left_id &r); - friend depth findMinWidth(const left_id &r); - friend depth findMaxWidth(const left_id &r); -}; - -std::set<u32> all_tops(const left_id &r); + } + bool operator==(const left_id &b) const { + bool rv = g == b.g && c == b.c && h == b.h && d == b.d; + assert(!rv || dfa_min_width == b.dfa_min_width); + assert(!rv || dfa_max_width == b.dfa_max_width); + return rv; + } + bool operator!=(const left_id &b) const { return !(*this == b); } + bool operator<(const left_id &b) const { + const left_id &a = *this; + ORDER_CHECK(g); + ORDER_CHECK(c); + ORDER_CHECK(d); + ORDER_CHECK(h); + return false; + } + + NGHolder *graph() { + if (!d && !h) { + assert(dfa_min_width == depth(0)); + assert(dfa_max_width == depth::infinity()); + } + return g; + } + const NGHolder *graph() const { + if (!d && !h) { + assert(dfa_min_width == depth(0)); + assert(dfa_max_width == depth::infinity()); + } + return g; + } + CastleProto *castle() { + if (!d && !h) { + assert(dfa_min_width == depth(0)); + assert(dfa_max_width == depth::infinity()); + } + + return c; + } + const CastleProto *castle() const { + if (!d && !h) { + assert(dfa_min_width == depth(0)); + assert(dfa_max_width == depth::infinity()); + } + + return c; + } + raw_som_dfa *haig() { return h; } + const raw_som_dfa *haig() const { return h; } + raw_dfa *dfa() { return d; } + const raw_dfa *dfa() const { return d; } + + size_t hash() const; + +private: + NGHolder *g; + CastleProto *c; + raw_dfa *d; + raw_som_dfa *h; + depth dfa_min_width; + depth dfa_max_width; + + friend bool isAnchored(const left_id &r); + friend depth findMinWidth(const left_id &r); + friend depth findMaxWidth(const left_id &r); +}; + +std::set<u32> all_tops(const left_id &r); std::set<ReportID> all_reports(const left_id &left); -bool isAnchored(const left_id &r); -depth findMinWidth(const left_id &r); -depth findMaxWidth(const left_id &r); -u32 num_tops(const left_id &r); - -struct rose_literal_info { +bool isAnchored(const left_id &r); +depth findMinWidth(const left_id &r); +depth findMaxWidth(const left_id &r); +u32 num_tops(const left_id &r); + +struct rose_literal_info { flat_set<u32> delayed_ids; flat_set<RoseVertex> vertices; - rose_group group_mask = 0; - u32 undelayed_id = MO_INVALID_IDX; - bool squash_group = false; - bool requires_benefits = false; -}; - -/** - * \brief Main literal struct used at Rose build time. Numeric literal IDs - * used at build time point at these (via the RoseBuildImpl::literals map). - */ -struct rose_literal_id { - rose_literal_id(const ue2_literal &s_in, rose_literal_table table_in, - u32 delay_in) - : s(s_in), table(table_in), delay(delay_in), distinctiveness(0) {} - - rose_literal_id(const ue2_literal &s_in, const std::vector<u8> &msk_in, - const std::vector<u8> &cmp_in, rose_literal_table table_in, - u32 delay_in); - - ue2_literal s; - std::vector<u8> msk; - std::vector<u8> cmp; - rose_literal_table table; - u32 delay; - u32 distinctiveness; - - size_t elength(void) const { return s.length() + delay; } + rose_group group_mask = 0; + u32 undelayed_id = MO_INVALID_IDX; + bool squash_group = false; + bool requires_benefits = false; +}; + +/** + * \brief Main literal struct used at Rose build time. Numeric literal IDs + * used at build time point at these (via the RoseBuildImpl::literals map). + */ +struct rose_literal_id { + rose_literal_id(const ue2_literal &s_in, rose_literal_table table_in, + u32 delay_in) + : s(s_in), table(table_in), delay(delay_in), distinctiveness(0) {} + + rose_literal_id(const ue2_literal &s_in, const std::vector<u8> &msk_in, + const std::vector<u8> &cmp_in, rose_literal_table table_in, + u32 delay_in); + + ue2_literal s; + std::vector<u8> msk; + std::vector<u8> cmp; + rose_literal_table table; + u32 delay; + u32 distinctiveness; + + size_t elength(void) const { return s.length() + delay; } size_t elength_including_mask(void) const { size_t mask_len = msk.size(); for (u8 c : msk) { @@ -310,19 +310,19 @@ struct rose_literal_id { size_t hash() const { return hash_all(s, msk, cmp, table, delay, distinctiveness); } -}; - -static inline -bool operator<(const rose_literal_id &a, const rose_literal_id &b) { - ORDER_CHECK(distinctiveness); - ORDER_CHECK(table); - ORDER_CHECK(s); - ORDER_CHECK(delay); - ORDER_CHECK(msk); - ORDER_CHECK(cmp); - return 0; -} - +}; + +static inline +bool operator<(const rose_literal_id &a, const rose_literal_id &b) { + ORDER_CHECK(distinctiveness); + ORDER_CHECK(table); + ORDER_CHECK(s); + ORDER_CHECK(delay); + ORDER_CHECK(msk); + ORDER_CHECK(cmp); + return 0; +} + class RoseLiteralMap { /** * \brief Main storage for literals. @@ -332,7 +332,7 @@ class RoseLiteralMap { * the loop. */ std::deque<rose_literal_id> lits; - + /** \brief Quick-lookup index from literal -> index in lits. */ ue2_unordered_map<rose_literal_id, u32> lits_index; @@ -372,68 +372,68 @@ public: } }; -struct simple_anchored_info { - simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit) - : min_bound(min_b), max_bound(max_b), literal(lit) {} - u32 min_bound; /**< min number of characters required before literal can - * start matching */ - u32 max_bound; /**< max number of characters allowed before literal can - * start matching */ - ue2_literal literal; -}; - -static really_inline -bool operator<(const simple_anchored_info &a, const simple_anchored_info &b) { - ORDER_CHECK(min_bound); - ORDER_CHECK(max_bound); - ORDER_CHECK(literal); - return 0; -} - +struct simple_anchored_info { + simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit) + : min_bound(min_b), max_bound(max_b), literal(lit) {} + u32 min_bound; /**< min number of characters required before literal can + * start matching */ + u32 max_bound; /**< max number of characters allowed before literal can + * start matching */ + ue2_literal literal; +}; + +static really_inline +bool operator<(const simple_anchored_info &a, const simple_anchored_info &b) { + ORDER_CHECK(min_bound); + ORDER_CHECK(max_bound); + ORDER_CHECK(literal); + return 0; +} + struct MpvProto { bool empty() const { return puffettes.empty() && triggered_puffettes.empty(); - } + } void reset() { puffettes.clear(); triggered_puffettes.clear(); - } + } std::vector<raw_puff> puffettes; std::vector<raw_puff> triggered_puffettes; }; - + struct OutfixInfo { template<class T> explicit OutfixInfo(std::unique_ptr<T> x) : proto(std::move(x)) {} explicit OutfixInfo(MpvProto mpv_in) : proto(std::move(mpv_in)) {} - u32 get_queue(QueueIndexFactory &qif); - + u32 get_queue(QueueIndexFactory &qif); + u32 get_queue() const { assert(queue != ~0U); return queue; } - bool is_nonempty_mpv() const { + bool is_nonempty_mpv() const { auto *m = boost::get<MpvProto>(&proto); return m && !m->empty(); - } - - bool is_dead() const { + } + + bool is_dead() const { auto *m = boost::get<MpvProto>(&proto); if (m) { return m->empty(); } return boost::get<boost::blank>(&proto) != nullptr; - } - - void clear() { + } + + void clear() { proto = boost::blank(); - } - + } + // Convenience accessor functions. - + NGHolder *holder() { auto *up = boost::get<std::unique_ptr<NGHolder>>(&proto); return up ? up->get() : nullptr; @@ -449,7 +449,7 @@ struct OutfixInfo { MpvProto *mpv() { return boost::get<MpvProto>(&proto); } - + // Convenience const accessor functions. const NGHolder *holder() const { @@ -479,214 +479,214 @@ struct OutfixInfo { std::unique_ptr<raw_som_dfa>, MpvProto> proto = boost::blank(); - RevAccInfo rev_info; - u32 maxBAWidth = 0; //!< max bi-anchored width + RevAccInfo rev_info; + u32 maxBAWidth = 0; //!< max bi-anchored width depth minWidth{depth::infinity()}; depth maxWidth{0}; - u64a maxOffset = 0; - bool in_sbmatcher = false; //!< handled by small-block matcher. - -private: - u32 queue = ~0U; -}; - -std::set<ReportID> all_reports(const OutfixInfo &outfix); - -// Concrete impl class -class RoseBuildImpl : public RoseBuild { -public: + u64a maxOffset = 0; + bool in_sbmatcher = false; //!< handled by small-block matcher. + +private: + u32 queue = ~0U; +}; + +std::set<ReportID> all_reports(const OutfixInfo &outfix); + +// Concrete impl class +class RoseBuildImpl : public RoseBuild { +public: RoseBuildImpl(ReportManager &rm, SomSlotManager &ssm, SmallWriteBuild &smwr, - const CompileContext &cc, const BoundaryReports &boundary); - - ~RoseBuildImpl() override; - - // Adds a single literal. - void add(bool anchored, bool eod, const ue2_literal &lit, + const CompileContext &cc, const BoundaryReports &boundary); + + ~RoseBuildImpl() override; + + // Adds a single literal. + void add(bool anchored, bool eod, const ue2_literal &lit, const flat_set<ReportID> &ids) override; - + bool addRose(const RoseInGraph &ig, bool prefilter) override; - bool addSombeRose(const RoseInGraph &ig) override; - - bool addOutfix(const NGHolder &h) override; - bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) override; - bool addOutfix(const raw_puff &rp) override; - - bool addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) override; - - // Returns true if we were able to add it as a mask - bool add(bool anchored, const std::vector<CharReach> &mask, + bool addSombeRose(const RoseInGraph &ig) override; + + bool addOutfix(const NGHolder &h) override; + bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) override; + bool addOutfix(const raw_puff &rp) override; + + bool addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) override; + + // Returns true if we were able to add it as a mask + bool add(bool anchored, const std::vector<CharReach> &mask, const flat_set<ReportID> &reports) override; - - bool addAnchoredAcyclic(const NGHolder &graph) override; - - bool validateMask(const std::vector<CharReach> &mask, + + bool addAnchoredAcyclic(const NGHolder &graph) override; + + bool validateMask(const std::vector<CharReach> &mask, const flat_set<ReportID> &reports, bool anchored, - bool eod) const override; - void addMask(const std::vector<CharReach> &mask, + bool eod) const override; + void addMask(const std::vector<CharReach> &mask, const flat_set<ReportID> &reports, bool anchored, - bool eod) override; - - // Construct a runtime implementation. + bool eod) override; + + // Construct a runtime implementation. bytecode_ptr<RoseEngine> buildRose(u32 minWidth) override; bytecode_ptr<RoseEngine> buildFinalEngine(u32 minWidth); - - void setSom() override { hasSom = true; } - - std::unique_ptr<RoseDedupeAux> generateDedupeAux() const override; - - // Find the maximum bound on the edges to this vertex's successors. - u32 calcSuccMaxBound(RoseVertex u) const; - - /* Returns the ID of the given literal in the literal map, adding it if - * necessary. */ - u32 getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table); - - // Variant with msk/cmp. - u32 getLiteralId(const ue2_literal &s, const std::vector<u8> &msk, - const std::vector<u8> &cmp, u32 delay, - rose_literal_table table); - - u32 getNewLiteralId(void); - - void removeVertices(const std::vector<RoseVertex> &dead); - - // Is the Rose anchored? - bool hasNoFloatingRoots() const; - - u32 calcHistoryRequired() const; - - rose_group getInitialGroups() const; - rose_group getSuccGroups(RoseVertex start) const; - rose_group getGroups(RoseVertex v) const; - - bool hasDelayedLiteral(RoseVertex v) const; - bool hasDelayPred(RoseVertex v) const; - bool hasLiteralInTable(RoseVertex v, enum rose_literal_table t) const; - bool hasAnchoredTablePred(RoseVertex v) const; - - // Is the given vertex a successor of either root or anchored_root? - bool isRootSuccessor(const RoseVertex &v) const; - /* Is the given vertex a successor of something other than root or - * anchored_root? */ - bool isNonRootSuccessor(const RoseVertex &v) const; - - bool isDirectReport(u32 id) const; - bool isDelayed(u32 id) const; - - bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored - * table */ - bool isFloating(RoseVertex v) const; /* true iff has literal in floating - * table */ - bool isInETable(RoseVertex v) const; /* true iff has literal in eod - * table */ - - size_t maxLiteralLen(RoseVertex v) const; - size_t minLiteralLen(RoseVertex v) const; - - // max overlap considered for every pair (ulit, vlit). - size_t maxLiteralOverlap(RoseVertex u, RoseVertex v) const; - - bool isPseudoStar(const RoseEdge &e) const; - bool isPseudoStarOrFirstOnly(const RoseEdge &e) const; - bool hasOnlyPseudoStarInEdges(RoseVertex v) const; - - bool isAnyStart(const RoseVertex &v) const { - return v == root || v == anchored_root; - } - - bool isVirtualVertex(const RoseVertex &v) const { - return g[v].eod_accept || isAnyStart(v); - } - - void handleMixedSensitivity(void); - - void findTransientLeftfixes(void); - - const CompileContext &cc; - RoseGraph g; - const RoseVertex root; - const RoseVertex anchored_root; - RoseLiteralMap literals; - std::map<RoseVertex, RoseVertex> ghost; - ReportID getNewNfaReport() override { - return next_nfa_report++; - } - std::deque<rose_literal_info> literal_info; - bool hasSom; //!< at least one pattern requires SOM. - std::map<size_t, std::vector<std::unique_ptr<raw_dfa>>> anchored_nfas; - std::map<simple_anchored_info, std::set<u32>> anchored_simple; - std::map<u32, std::set<u32> > group_to_literal; - u32 group_end; - - u32 ematcher_region_size; /**< number of bytes the eod table runs over */ - - /** \brief Mapping from anchored literal ID to the original literal suffix - * present when the literal was added to the literal matcher. Used for - * overlap calculation in history assignment. */ - std::map<u32, rose_literal_id> anchoredLitSuffix; - + + void setSom() override { hasSom = true; } + + std::unique_ptr<RoseDedupeAux> generateDedupeAux() const override; + + // Find the maximum bound on the edges to this vertex's successors. + u32 calcSuccMaxBound(RoseVertex u) const; + + /* Returns the ID of the given literal in the literal map, adding it if + * necessary. */ + u32 getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table); + + // Variant with msk/cmp. + u32 getLiteralId(const ue2_literal &s, const std::vector<u8> &msk, + const std::vector<u8> &cmp, u32 delay, + rose_literal_table table); + + u32 getNewLiteralId(void); + + void removeVertices(const std::vector<RoseVertex> &dead); + + // Is the Rose anchored? + bool hasNoFloatingRoots() const; + + u32 calcHistoryRequired() const; + + rose_group getInitialGroups() const; + rose_group getSuccGroups(RoseVertex start) const; + rose_group getGroups(RoseVertex v) const; + + bool hasDelayedLiteral(RoseVertex v) const; + bool hasDelayPred(RoseVertex v) const; + bool hasLiteralInTable(RoseVertex v, enum rose_literal_table t) const; + bool hasAnchoredTablePred(RoseVertex v) const; + + // Is the given vertex a successor of either root or anchored_root? + bool isRootSuccessor(const RoseVertex &v) const; + /* Is the given vertex a successor of something other than root or + * anchored_root? */ + bool isNonRootSuccessor(const RoseVertex &v) const; + + bool isDirectReport(u32 id) const; + bool isDelayed(u32 id) const; + + bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored + * table */ + bool isFloating(RoseVertex v) const; /* true iff has literal in floating + * table */ + bool isInETable(RoseVertex v) const; /* true iff has literal in eod + * table */ + + size_t maxLiteralLen(RoseVertex v) const; + size_t minLiteralLen(RoseVertex v) const; + + // max overlap considered for every pair (ulit, vlit). + size_t maxLiteralOverlap(RoseVertex u, RoseVertex v) const; + + bool isPseudoStar(const RoseEdge &e) const; + bool isPseudoStarOrFirstOnly(const RoseEdge &e) const; + bool hasOnlyPseudoStarInEdges(RoseVertex v) const; + + bool isAnyStart(const RoseVertex &v) const { + return v == root || v == anchored_root; + } + + bool isVirtualVertex(const RoseVertex &v) const { + return g[v].eod_accept || isAnyStart(v); + } + + void handleMixedSensitivity(void); + + void findTransientLeftfixes(void); + + const CompileContext &cc; + RoseGraph g; + const RoseVertex root; + const RoseVertex anchored_root; + RoseLiteralMap literals; + std::map<RoseVertex, RoseVertex> ghost; + ReportID getNewNfaReport() override { + return next_nfa_report++; + } + std::deque<rose_literal_info> literal_info; + bool hasSom; //!< at least one pattern requires SOM. + std::map<size_t, std::vector<std::unique_ptr<raw_dfa>>> anchored_nfas; + std::map<simple_anchored_info, std::set<u32>> anchored_simple; + std::map<u32, std::set<u32> > group_to_literal; + u32 group_end; + + u32 ematcher_region_size; /**< number of bytes the eod table runs over */ + + /** \brief Mapping from anchored literal ID to the original literal suffix + * present when the literal was added to the literal matcher. Used for + * overlap calculation in history assignment. */ + std::map<u32, rose_literal_id> anchoredLitSuffix; + ue2_unordered_set<left_id> transient; ue2_unordered_map<left_id, rose_group> rose_squash_masks; - - std::vector<OutfixInfo> outfixes; - - /** \brief MPV outfix entry. Null if not used, and moved into the outfixes - * list before we start building the bytecode (at which point it is set to - * null again). */ - std::unique_ptr<OutfixInfo> mpv_outfix = nullptr; - - u32 eod_event_literal_id; // ID of EOD event literal, or MO_INVALID_IDX. - - u32 max_rose_anchored_floating_overlap; - + + std::vector<OutfixInfo> outfixes; + + /** \brief MPV outfix entry. Null if not used, and moved into the outfixes + * list before we start building the bytecode (at which point it is set to + * null again). */ + std::unique_ptr<OutfixInfo> mpv_outfix = nullptr; + + u32 eod_event_literal_id; // ID of EOD event literal, or MO_INVALID_IDX. + + u32 max_rose_anchored_floating_overlap; + rose_group boundary_group_mask = 0; - - QueueIndexFactory qif; - ReportManager &rm; - SomSlotManager &ssm; + + QueueIndexFactory qif; + ReportManager &rm; + SomSlotManager &ssm; SmallWriteBuild &smwr; - const BoundaryReports &boundary; - -private: - ReportID next_nfa_report; -}; - + const BoundaryReports &boundary; + +private: + ReportID next_nfa_report; +}; + size_t calcLongLitThreshold(const RoseBuildImpl &build, const size_t historyRequired); -// Free functions, in rose_build_misc.cpp - -bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v); -bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v); - -size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b); +// Free functions, in rose_build_misc.cpp + +bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v); +bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v); + +size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b); ue2_literal findNonOverlappingTail(const std::set<ue2_literal> &lits, const ue2_literal &s); - -#ifndef NDEBUG + +#ifndef NDEBUG bool roseHasTops(const RoseBuildImpl &build, RoseVertex v); bool hasOrphanedTops(const RoseBuildImpl &build); -#endif - -u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm); - -// Function that operates on a msk/cmp pair and a literal, as used in -// hwlmLiteral, and zeroes msk elements that don't add any power to the -// literal. -void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk, - std::vector<u8> &cmp); - +#endif + +u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm); + +// Function that operates on a msk/cmp pair and a literal, as used in +// hwlmLiteral, and zeroes msk elements that don't add any power to the +// literal. +void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk, + std::vector<u8> &cmp); + u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id); u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id); - + bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e); - -#ifndef NDEBUG -bool canImplementGraphs(const RoseBuildImpl &tbi); -#endif - -} // namespace ue2 - + +#ifndef NDEBUG +bool canImplementGraphs(const RoseBuildImpl &tbi); +#endif + +} // namespace ue2 + namespace std { template<> |