aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/scratch.h
blob: 1256f7aba82e587b663ff694ae5f3ac599b312d8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
/*
 * Copyright (c) 2015-2019, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *  * Neither the name of Intel Corporation nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/** \file
 * \brief Scratch and associated data structures.
 *
 * This header gets pulled into many places (many deep, slow to compile
 * places). Try to keep the included headers under control.
 */

#ifndef SCRATCH_H_DA6D4FC06FF410
#define SCRATCH_H_DA6D4FC06FF410

#include "hs_common.h"
#include "ue2common.h"
#include "rose/rose_types.h"

#ifdef __cplusplus
extern "C"
{
#endif

UNUSED static const u32 SCRATCH_MAGIC = 0x544F4259;

struct fatbit;
struct hs_scratch;
struct RoseEngine;
struct mq;

struct queue_match {
    /** \brief used to store the current location of an (suf|out)fix match in
     * the current buffer.
     *
     * As (suf|out)fixes always run in the main buffer and never in history
     * this number will always be positive (matches at 0 belong to previous
     * write). Hence we can get away with a size_t rather than the usual s64a
     * for a location. */
    size_t loc;

    u32 queue; /**< queue index. */
};

struct catchup_pq {
    struct queue_match *qm;
    u32 qm_size; /**< current size of the priority queue */
};

/** \brief Status flag: user requested termination. */
#define STATUS_TERMINATED   (1U << 0)

/** \brief Status flag: it has been determined that it is not possible for this
 * stream to raise any more matches.
 *
 * This may be because all its exhaustion keys are on or for other reasons
 * (anchored sections not matching). */
#define STATUS_EXHAUSTED    (1U << 1)

/** \brief Status flag: Rose requires rebuild as delay literal matched in
 * history. */
#define STATUS_DELAY_DIRTY  (1U << 2)

/** \brief Status flag: Unexpected Rose program error. */
#define STATUS_ERROR        (1U << 3)

/** \brief Core information about the current scan, used everywhere. */
struct core_info {
    void *userContext; /**< user-supplied context */

    /** \brief user-supplied match callback */
    int (HS_CDECL *userCallback)(unsigned int id, unsigned long long from,
                                 unsigned long long to, unsigned int flags,
                                 void *ctx);

    const struct RoseEngine *rose;
    char *state; /**< full stream state */
    char *exhaustionVector; /**< pointer to evec for this stream */
    char *logicalVector; /**< pointer to lvec for this stream */
    char *combVector; /**< pointer to cvec for this stream */
    const u8 *buf; /**< main scan buffer */
    size_t len; /**< length of main scan buffer in bytes */
    const u8 *hbuf; /**< history buffer */
    size_t hlen; /**< length of history buffer in bytes. */
    u64a buf_offset; /**< stream offset, for the base of the buffer */
    u8 status; /**< stream status bitmask, using STATUS_ flags above */
};

/** \brief Rose state information. */
struct RoseContext {
    u8 mpv_inactive;
    u64a groups;
    u64a lit_offset_adjust; /**< offset to add to matches coming from hwlm */
    u64a delayLastEndOffset; /**< end of the last match from FDR used by delay
                              * code */
    u64a lastEndOffset; /**< end of the last match from FDR/anchored DFAs used
                         * by history code. anchored DFA matches update this
                         * when they are inserted into the literal match
                         * stream */
    u64a lastMatchOffset; /**< last match offset report up out of rose;
                           * used _only_ for debugging, asserts */
    u64a lastCombMatchOffset; /**< last match offset of active combinations */
    u64a minMatchOffset; /**< the earliest offset that we are still allowed to
                          * report */
    u64a minNonMpvMatchOffset; /**< the earliest offset that non-mpv engines are
                                * still allowed to report */
    u64a next_mpv_offset; /**< earliest offset that the MPV can next report a
                           * match, cleared if top events arrive */
    u32 filledDelayedSlots;
    u32 curr_qi;    /**< currently executing main queue index during
                     * \ref nfaQueueExec */

    /**
     * \brief Buffer for caseful long literal support, used in streaming mode
     * only.
     *
     * If a long literal prefix was at the end of the buffer at the end of a
     * stream write, then the long lit table hashes it and stores the result in
     * stream state. At the start of the next write, this value is used to set
     * this buffer to the matching prefix string (stored in the bytecode.
     */
    const u8 *ll_buf;

    /** \brief Length in bytes of the string pointed to by ll_buf. */
    size_t ll_len;

    /** \brief Caseless version of ll_buf. */
    const u8 *ll_buf_nocase;

    /** \brief Length in bytes of the string pointed to by ll_buf_nocase. */
    size_t ll_len_nocase;
};

struct match_deduper {
    struct fatbit *log[2]; /**< even, odd logs */
    struct fatbit *som_log[2]; /**< even, odd fatbit logs for som */
    u64a *som_start_log[2]; /**< even, odd start offset logs for som */
    u32 dkey_count;
    u32 log_size;
    u64a current_report_offset;
    u8 som_log_dirty;
};

/** \brief Hyperscan scratch region header.
 *
 * NOTE: there is no requirement that scratch is 16-byte aligned, as it is
 * allocated by a malloc equivalent, possibly supplied by the user.
 */
struct ALIGN_CL_DIRECTIVE hs_scratch {
    u32 magic;
    u8 in_use; /**< non-zero when being used by an API call. */
    u32 queueCount;
    u32 activeQueueArraySize; /**< size of active queue array fatbit in bytes */
    u32 bStateSize; /**< sizeof block mode states */
    u32 tStateSize; /**< sizeof transient rose states */
    u32 fullStateSize; /**< size of uncompressed nfa state */
    struct RoseContext tctxt;
    char *bstate; /**< block mode states */
    char *tstate; /**< state for transient roses */
    char *fullState; /**< uncompressed NFA state */
    struct mq *queues;
    struct fatbit *aqa; /**< active queue array; fatbit of queues that are valid
                         * & active */
    struct fatbit **delay_slots;
    struct fatbit **al_log;
    u64a al_log_sum;
    struct catchup_pq catchup_pq;
    struct core_info core_info;
    struct match_deduper deduper;
    u32 anchored_literal_region_len;
    u32 anchored_literal_fatbit_size; /**< size of each anch fatbit in bytes */
    struct fatbit *handled_roles; /**< fatbit of ROLES (not states) already
                                   * handled by this literal */
    u64a *som_store; /**< array of som locations */
    u64a *som_attempted_store; /**< array of som locations for fail stores */
    struct fatbit *som_set_now; /**< fatbit, true if the som location was set
                                 * based on a match at the current offset */
    struct fatbit *som_attempted_set; /**< fatbit, true if the som location
                            * would have been set at the current offset if the
                            * location had been writable */
    u64a som_set_now_offset; /**< offset at which som_set_now represents */
    u32 som_store_count;
    u32 som_fatbit_size; /**< size of som location fatbit structures in bytes */
    u32 handledKeyFatbitSize; /**< size of handled_keys fatbit in bytes */
    u32 delay_fatbit_size; /**< size of each delay fatbit in bytes */
    u32 scratchSize;
    char *scratch_alloc; /* user allocated scratch object */
    u64a *fdr_conf; /**< FDR confirm value */
    u8 fdr_conf_offset; /**< offset where FDR/Teddy front end matches
                         * in buffer */
};

/* array of fatbit ptr; TODO: why not an array of fatbits? */
static really_inline
struct fatbit **getAnchoredLiteralLog(struct hs_scratch *scratch) {
    return scratch->al_log;
}

static really_inline
struct fatbit **getDelaySlots(struct hs_scratch *scratch) {
    return scratch->delay_slots;
}

static really_inline
char told_to_stop_matching(const struct hs_scratch *scratch) {
    return scratch->core_info.status & STATUS_TERMINATED;
}

static really_inline
char can_stop_matching(const struct hs_scratch *scratch) {
    return scratch->core_info.status &
           (STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_ERROR);
}

static really_inline
char internal_matching_error(const struct hs_scratch *scratch) {
    return scratch->core_info.status & STATUS_ERROR;
}

/**
 * \brief Mark scratch as in use.
 *
 * Returns non-zero if it was already in use, zero otherwise.
 */
static really_inline
char markScratchInUse(struct hs_scratch *scratch) {
    DEBUG_PRINTF("marking scratch as in use\n");
    assert(scratch && scratch->magic == SCRATCH_MAGIC);
    if (scratch->in_use) {
        DEBUG_PRINTF("scratch already in use!\n");
        return 1;
    }
    scratch->in_use = 1;
    return 0;
}

/**
 * \brief Mark scratch as no longer in use.
 */
static really_inline
void unmarkScratchInUse(struct hs_scratch *scratch) {
    DEBUG_PRINTF("marking scratch as not in use\n");
    assert(scratch && scratch->magic == SCRATCH_MAGIC);
    assert(scratch->in_use == 1);
    scratch->in_use = 0;
}

#ifdef __cplusplus
} /* extern "C" */
#endif

#endif /* SCRATCH_H_DA6D4FC06FF410 */