aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/rose/rose_build_impl.h
blob: 7780848b1b6de1ea9b902837ad4036c023e194d2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
/*
 * Copyright (c) 2015-2019, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *  * Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *  * Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *  * Neither the name of Intel Corporation nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef ROSE_BUILD_IMPL_H
#define ROSE_BUILD_IMPL_H

#include "rose_build.h"
#include "rose_build_util.h"
#include "rose_common.h"
#include "rose_graph.h"
#include "nfa/mpvcompile.h"
#include "nfa/goughcompile.h"
#include "nfa/nfa_internal.h"
#include "nfagraph/ng_holder.h"
#include "nfagraph/ng_revacc.h"
#include "util/bytecode_ptr.h"
#include "util/flat_containers.h"
#include "util/hash.h"
#include "util/order_check.h"
#include "util/queue_index_factory.h"
#include "util/ue2string.h"
#include "util/unordered.h"
#include "util/verify_types.h"

#include <deque>
#include <map>
#include <string>
#include <vector>
#include <boost/variant.hpp>

struct RoseEngine;

namespace ue2 {

#define ROSE_GROUPS_MAX 64

#define ROSE_LONG_LITERAL_THRESHOLD_MIN 33

/**
 * \brief The largest allowable "short" literal fragment which can be given to
 * a literal matcher directly.
 *
 * Literals longer than this will be truncated to their suffix and confirmed in
 * the Rose interpreter, either as "medium length" literals which can be
 * confirmed from history, or "long literals" which make use of the streaming
 * table support.
 */
#define ROSE_SHORT_LITERAL_LEN_MAX 8

struct BoundaryReports;
struct CastleProto;
struct CompileContext;
class ReportManager;
class SmallWriteBuild;
class SomSlotManager;

struct suffix_id {
    suffix_id(const RoseSuffixInfo &in)
        : g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()),
          h(in.haig.get()), t(in.tamarama.get()),
          dfa_min_width(in.dfa_min_width),
          dfa_max_width(in.dfa_max_width) {
            assert(!g || g->kind == NFA_SUFFIX);
    }
    bool operator==(const suffix_id &b) const {
        bool rv = g == b.g && c == b.c && h == b.h && d == b.d && t == b.t;
        assert(!rv || dfa_min_width == b.dfa_min_width);
        assert(!rv || dfa_max_width == b.dfa_max_width);
        return rv;
    }
    bool operator!=(const suffix_id &b) const { return !(*this == b); }
    bool operator<(const suffix_id &b) const {
        const suffix_id &a = *this;
        ORDER_CHECK(g);
        ORDER_CHECK(c);
        ORDER_CHECK(d);
        ORDER_CHECK(h);
        ORDER_CHECK(t);
        return false;
    }

    NGHolder *graph() {
        if (!d && !h) {
            assert(dfa_min_width == depth(0));
            assert(dfa_max_width == depth::infinity());
        }
        return g;
    }
    const NGHolder *graph() const {
        if (!d && !h) {
            assert(dfa_min_width == depth(0));
            assert(dfa_max_width == depth::infinity());
        }
        return g;
    }
    CastleProto *castle() {
        if (!d && !h) {
            assert(dfa_min_width == depth(0));
            assert(dfa_max_width == depth::infinity());
        }
        return c;
    }
    const CastleProto *castle() const {
        if (!d && !h) {
            assert(dfa_min_width == depth(0));
            assert(dfa_max_width == depth::infinity());
        }
        return c;
    }
    TamaProto *tamarama() {
        if (!d && !h) {
            assert(dfa_min_width == depth(0));
            assert(dfa_max_width == depth::infinity());
        }
        return t;
    }
    const TamaProto *tamarama() const {
        if (!d && !h) {
            assert(dfa_min_width == depth(0));
            assert(dfa_max_width == depth::infinity());
        }
        return t;
    }


    raw_som_dfa *haig() { return h; }
    const raw_som_dfa *haig() const { return h; }
    raw_dfa *dfa() { return d; }
    const raw_dfa *dfa() const { return d; }

    size_t hash() const;

private:
    NGHolder *g;
    CastleProto *c;
    raw_dfa *d;
    raw_som_dfa *h;
    TamaProto *t;
    depth dfa_min_width;
    depth dfa_max_width;

    friend depth findMinWidth(const suffix_id &s);
    friend depth findMaxWidth(const suffix_id &s);
    friend depth findMinWidth(const suffix_id &s, u32 top);
    friend depth findMaxWidth(const suffix_id &s, u32 top);
};

std::set<ReportID> all_reports(const suffix_id &s);
std::set<u32> all_tops(const suffix_id &s);
bool has_eod_accepts(const suffix_id &s);
bool has_non_eod_accepts(const suffix_id &s);
depth findMinWidth(const suffix_id &s);
depth findMaxWidth(const suffix_id &s);
depth findMinWidth(const suffix_id &s, u32 top);
depth findMaxWidth(const suffix_id &s, u32 top);

/** \brief represents an engine to the left of a rose role */
struct left_id {
    left_id(const LeftEngInfo &in)
        : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()),
          h(in.haig.get()), dfa_min_width(in.dfa_min_width),
          dfa_max_width(in.dfa_max_width) {
        assert(!g || !has_managed_reports(*g));
    }
    bool operator==(const left_id &b) const {
        bool rv = g == b.g && c == b.c && h == b.h && d == b.d;
        assert(!rv || dfa_min_width == b.dfa_min_width);
        assert(!rv || dfa_max_width == b.dfa_max_width);
        return rv;
    }
    bool operator!=(const left_id &b) const { return !(*this == b); }
    bool operator<(const left_id &b) const {
        const left_id &a = *this;
        ORDER_CHECK(g);
        ORDER_CHECK(c);
        ORDER_CHECK(d);
        ORDER_CHECK(h);
        return false;
    }

    NGHolder *graph() {
        if (!d && !h) {
            assert(dfa_min_width == depth(0));
            assert(dfa_max_width == depth::infinity());
        }
        return g;
    }
    const NGHolder *graph() const {
        if (!d && !h) {
            assert(dfa_min_width == depth(0));
            assert(dfa_max_width == depth::infinity());
        }
        return g;
    }
    CastleProto *castle() {
        if (!d && !h) {
            assert(dfa_min_width == depth(0));
            assert(dfa_max_width == depth::infinity());
        }

        return c;
    }
    const CastleProto *castle() const {
        if (!d && !h) {
            assert(dfa_min_width == depth(0));
            assert(dfa_max_width == depth::infinity());
        }

        return c;
    }
    raw_som_dfa *haig() { return h; }
    const raw_som_dfa *haig() const { return h; }
    raw_dfa *dfa() { return d; }
    const raw_dfa *dfa() const { return d; }

    size_t hash() const;

private:
    NGHolder *g;
    CastleProto *c;
    raw_dfa *d;
    raw_som_dfa *h;
    depth dfa_min_width;
    depth dfa_max_width;

    friend bool isAnchored(const left_id &r);
    friend depth findMinWidth(const left_id &r);
    friend depth findMaxWidth(const left_id &r);
};

std::set<u32> all_tops(const left_id &r);
std::set<ReportID> all_reports(const left_id &left);
bool isAnchored(const left_id &r);
depth findMinWidth(const left_id &r);
depth findMaxWidth(const left_id &r);
u32 num_tops(const left_id &r);

struct rose_literal_info {
    flat_set<u32> delayed_ids;
    flat_set<RoseVertex> vertices;
    rose_group group_mask = 0;
    u32 undelayed_id = MO_INVALID_IDX;
    bool squash_group = false;
    bool requires_benefits = false;
};

/**
 * \brief Main literal struct used at Rose build time. Numeric literal IDs
 * used at build time point at these (via the RoseBuildImpl::literals map).
 */
struct rose_literal_id {
    rose_literal_id(const ue2_literal &s_in, rose_literal_table table_in,
                    u32 delay_in)
        : s(s_in), table(table_in), delay(delay_in), distinctiveness(0) {}

    rose_literal_id(const ue2_literal &s_in, const std::vector<u8> &msk_in,
                    const std::vector<u8> &cmp_in, rose_literal_table table_in,
                    u32 delay_in);

    ue2_literal s;
    std::vector<u8> msk;
    std::vector<u8> cmp;
    rose_literal_table table;
    u32 delay;
    u32 distinctiveness;

    size_t elength(void) const { return s.length() + delay; }
    size_t elength_including_mask(void) const {
        size_t mask_len = msk.size();
        for (u8 c : msk) {
            if (!c) {
                mask_len--;
            } else {
                break;
            }
        }
        return MAX(mask_len, s.length()) + delay;
    }

    bool operator==(const rose_literal_id &b) const {
        return s == b.s && msk == b.msk && cmp == b.cmp && table == b.table &&
               delay == b.delay && distinctiveness == b.distinctiveness;
    }

    size_t hash() const {
        return hash_all(s, msk, cmp, table, delay, distinctiveness);
    }
};

static inline
bool operator<(const rose_literal_id &a, const rose_literal_id &b) {
    ORDER_CHECK(distinctiveness);
    ORDER_CHECK(table);
    ORDER_CHECK(s);
    ORDER_CHECK(delay);
    ORDER_CHECK(msk);
    ORDER_CHECK(cmp);
    return 0;
}

class RoseLiteralMap {
    /**
     * \brief Main storage for literals.
     *
     * Note that this cannot be a vector, as the present code relies on
     * iterator stability when iterating over this list and adding to it inside
     * the loop.
     */
    std::deque<rose_literal_id> lits;

    /** \brief Quick-lookup index from literal -> index in lits. */
    ue2_unordered_map<rose_literal_id, u32> lits_index;

public:
    std::pair<u32, bool> insert(const rose_literal_id &lit) {
        auto it = lits_index.find(lit);
        if (it != lits_index.end()) {
            return {it->second, false};
        }
        u32 id = verify_u32(lits.size());
        lits.push_back(lit);
        lits_index.emplace(lit, id);
        return {id, true};
    }

    // Erase the last num elements.
    void erase_back(size_t num) {
        assert(num <= lits.size());
        for (size_t i = 0; i < num; i++) {
            lits_index.erase(lits.back());
            lits.pop_back();
        }
        assert(lits.size() == lits_index.size());
    }

    const rose_literal_id &at(u32 id) const {
        assert(id < lits.size());
        return lits.at(id);
    }

    using const_iterator = decltype(lits)::const_iterator;
    const_iterator begin() const { return lits.begin(); }
    const_iterator end() const { return lits.end(); }

    size_t size() const {
        return lits.size();
    }
};

struct simple_anchored_info {
    simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit)
    : min_bound(min_b), max_bound(max_b), literal(lit) {}
    u32 min_bound; /**< min number of characters required before literal can
                    * start matching */
    u32 max_bound; /**< max number of characters allowed before literal can
                    * start matching */
    ue2_literal literal;
};

static really_inline
bool operator<(const simple_anchored_info &a, const simple_anchored_info &b) {
    ORDER_CHECK(min_bound);
    ORDER_CHECK(max_bound);
    ORDER_CHECK(literal);
    return 0;
}

struct MpvProto {
    bool empty() const {
        return puffettes.empty() && triggered_puffettes.empty();
    }
    void reset() {
        puffettes.clear();
        triggered_puffettes.clear();
    }
    std::vector<raw_puff> puffettes;
    std::vector<raw_puff> triggered_puffettes;
};

struct OutfixInfo {
    template<class T>
    explicit OutfixInfo(std::unique_ptr<T> x) : proto(std::move(x)) {}

    explicit OutfixInfo(MpvProto mpv_in) : proto(std::move(mpv_in)) {}

    u32 get_queue(QueueIndexFactory &qif);

    u32 get_queue() const {
        assert(queue != ~0U);
        return queue;
    }

    bool is_nonempty_mpv() const {
        auto *m = boost::get<MpvProto>(&proto);
        return m && !m->empty();
    }

    bool is_dead() const {
        auto *m = boost::get<MpvProto>(&proto);
        if (m) {
            return m->empty();
        }
        return boost::get<boost::blank>(&proto) != nullptr;
    }

    void clear() {
        proto = boost::blank();
    }

    // Convenience accessor functions.

    NGHolder *holder() {
        auto *up = boost::get<std::unique_ptr<NGHolder>>(&proto);
        return up ? up->get() : nullptr;
    }
    raw_dfa *rdfa() {
        auto *up = boost::get<std::unique_ptr<raw_dfa>>(&proto);
        return up ? up->get() : nullptr;
    }
    raw_som_dfa *haig() {
        auto *up = boost::get<std::unique_ptr<raw_som_dfa>>(&proto);
        return up ? up->get() : nullptr;
    }
    MpvProto *mpv() {
        return boost::get<MpvProto>(&proto);
    }

    // Convenience const accessor functions.

    const NGHolder *holder() const {
        auto *up = boost::get<std::unique_ptr<NGHolder>>(&proto);
        return up ? up->get() : nullptr;
    }
    const raw_dfa *rdfa() const {
        auto *up = boost::get<std::unique_ptr<raw_dfa>>(&proto);
        return up ? up->get() : nullptr;
    }
    const raw_som_dfa *haig() const {
        auto *up = boost::get<std::unique_ptr<raw_som_dfa>>(&proto);
        return up ? up->get() : nullptr;
    }
    const MpvProto *mpv() const {
        return boost::get<MpvProto>(&proto);
    }

    /**
     * \brief Variant wrapping the various engine types. If this is
     * boost::blank, it means that this outfix is unused (dead).
     */
    boost::variant<
        boost::blank,
        std::unique_ptr<NGHolder>,
        std::unique_ptr<raw_dfa>,
        std::unique_ptr<raw_som_dfa>,
        MpvProto> proto = boost::blank();

    RevAccInfo rev_info;
    u32 maxBAWidth = 0; //!< max bi-anchored width
    depth minWidth{depth::infinity()};
    depth maxWidth{0};
    u64a maxOffset = 0;
    bool in_sbmatcher = false; //!< handled by small-block matcher.

private:
    u32 queue = ~0U;
};

std::set<ReportID> all_reports(const OutfixInfo &outfix);

// Concrete impl class
class RoseBuildImpl : public RoseBuild {
public:
    RoseBuildImpl(ReportManager &rm, SomSlotManager &ssm, SmallWriteBuild &smwr,
                  const CompileContext &cc, const BoundaryReports &boundary);

    ~RoseBuildImpl() override;

    // Adds a single literal.
    void add(bool anchored, bool eod, const ue2_literal &lit,
             const flat_set<ReportID> &ids) override;

    bool addRose(const RoseInGraph &ig, bool prefilter) override;
    bool addSombeRose(const RoseInGraph &ig) override;

    bool addOutfix(const NGHolder &h) override;
    bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) override;
    bool addOutfix(const raw_puff &rp) override;

    bool addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) override;

    // Returns true if we were able to add it as a mask
    bool add(bool anchored, const std::vector<CharReach> &mask,
             const flat_set<ReportID> &reports) override;

    bool addAnchoredAcyclic(const NGHolder &graph) override;

    bool validateMask(const std::vector<CharReach> &mask,
                      const flat_set<ReportID> &reports, bool anchored,
                      bool eod) const override;
    void addMask(const std::vector<CharReach> &mask,
                 const flat_set<ReportID> &reports, bool anchored,
                 bool eod) override;

    // Construct a runtime implementation.
    bytecode_ptr<RoseEngine> buildRose(u32 minWidth) override;
    bytecode_ptr<RoseEngine> buildFinalEngine(u32 minWidth);

    void setSom() override { hasSom = true; }

    std::unique_ptr<RoseDedupeAux> generateDedupeAux() const override;

    // Find the maximum bound on the edges to this vertex's successors.
    u32 calcSuccMaxBound(RoseVertex u) const;

    /* Returns the ID of the given literal in the literal map, adding it if
     * necessary. */
    u32 getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table);

    // Variant with msk/cmp.
    u32 getLiteralId(const ue2_literal &s, const std::vector<u8> &msk,
                     const std::vector<u8> &cmp, u32 delay,
                     rose_literal_table table);

    u32 getNewLiteralId(void);

    void removeVertices(const std::vector<RoseVertex> &dead);

    // Is the Rose anchored?
    bool hasNoFloatingRoots() const;

    u32 calcHistoryRequired() const;

    rose_group getInitialGroups() const;
    rose_group getSuccGroups(RoseVertex start) const;
    rose_group getGroups(RoseVertex v) const;

    bool hasDelayedLiteral(RoseVertex v) const;
    bool hasDelayPred(RoseVertex v) const;
    bool hasLiteralInTable(RoseVertex v, enum rose_literal_table t) const;
    bool hasAnchoredTablePred(RoseVertex v) const;

    // Is the given vertex a successor of either root or anchored_root?
    bool isRootSuccessor(const RoseVertex &v) const;
    /* Is the given vertex a successor of something other than root or
     * anchored_root? */
    bool isNonRootSuccessor(const RoseVertex &v) const;

    bool isDirectReport(u32 id) const;
    bool isDelayed(u32 id) const;

    bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored
                                          * table */
    bool isFloating(RoseVertex v) const; /* true iff has literal in floating
                                          * table */
    bool isInETable(RoseVertex v) const; /* true iff has literal in eod
                                          * table */

    size_t maxLiteralLen(RoseVertex v) const;
    size_t minLiteralLen(RoseVertex v) const;

    // max overlap considered for every pair (ulit, vlit).
    size_t maxLiteralOverlap(RoseVertex u, RoseVertex v) const;

    bool isPseudoStar(const RoseEdge &e) const;
    bool isPseudoStarOrFirstOnly(const RoseEdge &e) const;
    bool hasOnlyPseudoStarInEdges(RoseVertex v) const;

    bool isAnyStart(const RoseVertex &v) const {
        return v == root || v == anchored_root;
    }

    bool isVirtualVertex(const RoseVertex &v) const {
        return g[v].eod_accept || isAnyStart(v);
    }

    void handleMixedSensitivity(void);

    void findTransientLeftfixes(void);

    const CompileContext &cc;
    RoseGraph g;
    const RoseVertex root;
    const RoseVertex anchored_root;
    RoseLiteralMap literals;
    std::map<RoseVertex, RoseVertex> ghost;
    ReportID getNewNfaReport() override {
        return next_nfa_report++;
    }
    std::deque<rose_literal_info> literal_info;
    bool hasSom; //!< at least one pattern requires SOM.
    std::map<size_t, std::vector<std::unique_ptr<raw_dfa>>> anchored_nfas;
    std::map<simple_anchored_info, std::set<u32>> anchored_simple;
    std::map<u32, std::set<u32> > group_to_literal;
    u32 group_end;

    u32 ematcher_region_size; /**< number of bytes the eod table runs over */

    /** \brief Mapping from anchored literal ID to the original literal suffix
     * present when the literal was added to the literal matcher. Used for
     * overlap calculation in history assignment. */
    std::map<u32, rose_literal_id> anchoredLitSuffix;

    ue2_unordered_set<left_id> transient;
    ue2_unordered_map<left_id, rose_group> rose_squash_masks;

    std::vector<OutfixInfo> outfixes;

    /** \brief MPV outfix entry. Null if not used, and moved into the outfixes
     * list before we start building the bytecode (at which point it is set to
     * null again). */
    std::unique_ptr<OutfixInfo> mpv_outfix = nullptr;

    u32 eod_event_literal_id; // ID of EOD event literal, or MO_INVALID_IDX.

    u32 max_rose_anchored_floating_overlap;

    rose_group boundary_group_mask = 0;

    QueueIndexFactory qif;
    ReportManager &rm;
    SomSlotManager &ssm;
    SmallWriteBuild &smwr;
    const BoundaryReports &boundary;

private:
    ReportID next_nfa_report;
};

size_t calcLongLitThreshold(const RoseBuildImpl &build,
                            const size_t historyRequired);

// Free functions, in rose_build_misc.cpp

bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v);
bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v);

size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b);
ue2_literal findNonOverlappingTail(const std::set<ue2_literal> &lits,
                                   const ue2_literal &s);

#ifndef NDEBUG
bool roseHasTops(const RoseBuildImpl &build, RoseVertex v);
bool hasOrphanedTops(const RoseBuildImpl &build);
#endif

u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm);

// Function that operates on a msk/cmp pair and a literal, as used in
// hwlmLiteral, and zeroes msk elements that don't add any power to the
// literal.
void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk,
                          std::vector<u8> &cmp);

u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id);
u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id);

bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e);

#ifndef NDEBUG
bool canImplementGraphs(const RoseBuildImpl &tbi);
#endif

} // namespace ue2

namespace std {

template<>
struct hash<ue2::left_id> {
    size_t operator()(const ue2::left_id &l) const {
        return l.hash();
    }
};

template<>
struct hash<ue2::suffix_id> {
    size_t operator()(const ue2::suffix_id &s) const {
        return s.hash();
    }
};

} // namespace std

#endif /* ROSE_BUILD_IMPL_H */