aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/nfa/castlecompile.h
blob: cd830eb3a0447440f68843e8dd3a0f8a409cdb43 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/*
 * Copyright (c) 2015-2017, Intel Corporation 
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *  * Neither the name of Intel Corporation nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/** 
 * \file 
 * \brief Castle: multi-tenant repeat engine, compiler code.
 */

#ifndef NFA_CASTLECOMPILE_H
#define NFA_CASTLECOMPILE_H

#include "nfa_kind.h"
#include "ue2common.h"
#include "nfagraph/ng_repeat.h"
#include "util/bytecode_ptr.h" 
#include "util/depth.h"
#include "util/flat_containers.h" 

#include <map>
#include <memory>
#include <set>
#include <unordered_map> 
#include <vector>

struct NFA;

namespace ue2 {

class CharReach;
class NGHolder;
class ReportManager; 
struct CompileContext;

/**
 * \brief Prototype for a Castle engine: contains at least one CastleRepeat.
 *
 * Currently, all repeats in a Castle must have the same character
 * reachability.
 *
 * A CastleProto is converted into a single NFA, with each top triggering a
 * unique repeat. A CastleProto can contain at most CastleProto::max_occupancy
 * elements.
 */
struct CastleProto {
    static constexpr size_t max_occupancy = 65536; // arbitrary limit
    CastleProto(nfa_kind k, const PureRepeat &pr); 
    const CharReach &reach() const;

    /** \brief Add a new repeat. */
    u32 add(const PureRepeat &pr);

    /** \brief Remove a repeat. */
    void erase(u32 top);

    /**
     * \brief Merge in the given repeat, returning the top used.
     *
     * If the repeat already exists in this castle, we will re-use (and return)
     * the old top. If it doesn't, it will be added and assigned a new top.
     * Returns \ref max_occupancy if capacity would be exceeded.
     */
    u32 merge(const PureRepeat &pr);

    /** \brief Mapping from unique top id to repeat. */
    std::map<u32, PureRepeat> repeats;

    /** \brief Mapping from report to associated tops. */
    std::unordered_map<ReportID, flat_set<u32>> report_map; 

    /**
     * \brief Next top id to use. Repeats may be removed without top remapping,
     * so we track this explicitly instead of using repeats.size().
     */
    u32 next_top = 1;
 
    /** \brief Kind for this engine. */ 
    nfa_kind kind; 
};

std::set<ReportID> all_reports(const CastleProto &proto);
depth findMinWidth(const CastleProto &proto);
depth findMaxWidth(const CastleProto &proto);
depth findMinWidth(const CastleProto &proto, u32 top);
depth findMaxWidth(const CastleProto &proto, u32 top);

/**
 * \brief Remap tops to be contiguous.
 *
 * Remap the tops in the given CastleProto so that they're contiguous in the
 * range [0 .. N-1].
 */
void remapCastleTops(CastleProto &proto, std::map<u32, u32> &top_map);

/**
 * \brief Construct an NFA from a CastleProto.
 *
 * NOTE: Tops must be contiguous, i.e. \ref remapCastleTops must have been run
 * first.
 */
bytecode_ptr<NFA> 
buildCastle(const CastleProto &proto,
            const std::map<u32, std::vector<std::vector<CharReach>>> &triggers,
            const CompileContext &cc, const ReportManager &rm); 

/**
 * \brief Merge two CastleProto prototypes together, if possible. If a 
 * particular repeat from c2 is already in c1, then it will be reused rather 
 * than adding a duplicate repeat. 
 *
 * Returns true if merge of all repeats in c2 into c1 succeeds, and fills
 * mapping with the repeat indices.
 */
bool mergeCastle(CastleProto &c1, const CastleProto &c2,
                 std::map<u32, u32> &top_map);

/**
 * \brief True if the two castles are identical with respect to the reports
 * given; i.e. the same tops lead to the same repeats, just with report1 in c1
 * and report2 in c2.
 *
 * Repeats leading to other reports are ignored.
 */
bool is_equal(const CastleProto &c1, ReportID report1, const CastleProto &c2,
              ReportID report2);

/**
 * \brief True if the two castles given are identical.
 */
bool is_equal(const CastleProto &c1, const CastleProto &c2);

/**
 * \brief True if the given castle contains more than a single instance of any
 * of the reports in the given set.
 */
bool requiresDedupe(const CastleProto &proto,
                    const flat_set<ReportID> &reports); 

/**
 * \brief Build an NGHolder from a CastleProto.
 */
std::unique_ptr<NGHolder> makeHolder(const CastleProto &castle, 
                                     const CompileContext &cc);

} // namespace ue2

#endif // NFA_CASTLECOMPILE_H