diff options
author | Ivan Blinkov <ivan@blinkov.ru> | 2022-02-10 16:47:11 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:11 +0300 |
commit | 5b283123c882433dafbaf6b338adeea16c1a0ea0 (patch) | |
tree | 339adc63bce23800021202ae4a8328a843dc447a /contrib/libs/hyperscan/src/rose | |
parent | 1aeb9a455974457866f78722ad98114bafc84e8a (diff) | |
download | ydb-5b283123c882433dafbaf6b338adeea16c1a0ea0.tar.gz |
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/rose')
71 files changed, 19598 insertions, 19598 deletions
diff --git a/contrib/libs/hyperscan/src/rose/block.c b/contrib/libs/hyperscan/src/rose/block.c index 9148ecb8ff..b3f424cb73 100644 --- a/contrib/libs/hyperscan/src/rose/block.c +++ b/contrib/libs/hyperscan/src/rose/block.c @@ -29,9 +29,9 @@ #include "catchup.h" #include "init.h" #include "match.h" -#include "program_runtime.h" -#include "rose.h" -#include "rose_common.h" +#include "program_runtime.h" +#include "rose.h" +#include "rose_common.h" #include "nfa/nfa_api.h" #include "nfa/nfa_internal.h" #include "nfa/nfa_rev_api.h" @@ -62,11 +62,11 @@ void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable, if (nfa->type == MCCLELLAN_NFA_8) { nfaExecMcClellan8_B(nfa, curr->anchoredMinDistance, local_buffer, local_alen, - roseAnchoredCallback, scratch); + roseAnchoredCallback, scratch); } else { nfaExecMcClellan16_B(nfa, curr->anchoredMinDistance, local_buffer, local_alen, - roseAnchoredCallback, scratch); + roseAnchoredCallback, scratch); } } @@ -79,12 +79,12 @@ void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable, } static really_inline -void init_state_for_block(const struct RoseEngine *t, char *state) { +void init_state_for_block(const struct RoseEngine *t, char *state) { assert(t); assert(state); - DEBUG_PRINTF("init for Rose %p with %u state indices\n", t, - t->rolesWithStateCount); + DEBUG_PRINTF("init for Rose %p with %u state indices\n", t, + t->rolesWithStateCount); // Rose is guaranteed 8-aligned state assert(ISALIGNED_N(state, 8)); @@ -94,7 +94,7 @@ void init_state_for_block(const struct RoseEngine *t, char *state) { static really_inline void init_outfixes_for_block(const struct RoseEngine *t, - struct hs_scratch *scratch, char *state, + struct hs_scratch *scratch, char *state, char is_small_block) { /* active leaf array has been cleared by the init scatter */ @@ -114,7 +114,7 @@ void init_outfixes_for_block(const struct RoseEngine *t, fatbit_set(scratch->aqa, qCount, 0); struct mq *q = scratch->queues; - initQueue(q, 0, t, scratch); + initQueue(q, 0, t, scratch); q->length = len; /* adjust for rev_accel */ nfaQueueInitState(nfa, q); pushQueueAt(q, 0, MQE_START, 0); @@ -134,7 +134,7 @@ void init_outfixes_for_block(const struct RoseEngine *t, static really_inline void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, - char *state, char is_small_block) { + char *state, char is_small_block) { init_state_for_block(t, state); struct RoseContext *tctxt = &scratch->tctxt; @@ -159,211 +159,211 @@ void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, init_outfixes_for_block(t, scratch, state, is_small_block); } -static rose_inline -void roseBlockEodExec(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch) { - assert(t->requiresEodCheck); - assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF - || offset <= t->maxBiAnchoredWidth); - - assert(!can_stop_matching(scratch)); - assert(t->eodProgramOffset); - - // Ensure that history is correct before we look for EOD matches. - roseFlushLastByteHistory(t, scratch, offset); - scratch->tctxt.lastEndOffset = offset; - - DEBUG_PRINTF("running eod program at %u\n", t->eodProgramOffset); - - // There should be no pending delayed literals. - assert(!scratch->tctxt.filledDelayedSlots); - - const u64a som = 0; - const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; - - // Note: we ignore the result, as this is the last thing to ever happen on - // a scan. - roseRunProgram(t, scratch, t->eodProgramOffset, som, offset, flags); -} - -/** - * \brief Run the anchored matcher, if any. Returns non-zero if matching should - * halt. - */ -static rose_inline -int roseBlockAnchored(const struct RoseEngine *t, struct hs_scratch *scratch) { - const void *atable = getALiteralMatcher(t); - if (!atable) { - DEBUG_PRINTF("no anchored table\n"); - return 0; - } - - const size_t length = scratch->core_info.len; - - if (t->amatcherMaxBiAnchoredWidth != ROSE_BOUND_INF && - length > t->amatcherMaxBiAnchoredWidth) { - return 0; - } - - if (length < t->amatcherMinWidth) { - return 0; - } - - runAnchoredTableBlock(t, atable, scratch); - - return can_stop_matching(scratch); -} - -/** - * \brief Run the floating matcher, if any. Returns non-zero if matching should - * halt. - */ -static rose_inline -int roseBlockFloating(const struct RoseEngine *t, struct hs_scratch *scratch) { - const struct HWLM *ftable = getFLiteralMatcher(t); - if (!ftable) { - return 0; - } - - const size_t length = scratch->core_info.len; - char *state = scratch->core_info.state; - struct RoseContext *tctxt = &scratch->tctxt; - - DEBUG_PRINTF("ftable fd=%u fmd %u\n", t->floatingDistance, - t->floatingMinDistance); - if (t->noFloatingRoots && !roseHasInFlightMatches(t, state, scratch)) { - DEBUG_PRINTF("skip FLOATING: no inflight matches\n"); - return 0; - } - - if (t->fmatcherMaxBiAnchoredWidth != ROSE_BOUND_INF && - length > t->fmatcherMaxBiAnchoredWidth) { - return 0; - } - - if (length < t->fmatcherMinWidth) { - return 0; - } - - const u8 *buffer = scratch->core_info.buf; - size_t flen = length; - if (t->floatingDistance != ROSE_BOUND_INF) { - flen = MIN(t->floatingDistance, length); - } - if (flen <= t->floatingMinDistance) { - return 0; - } - - DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length); - DEBUG_PRINTF("-- %016llx\n", tctxt->groups); - hwlmExec(ftable, buffer, flen, t->floatingMinDistance, roseFloatingCallback, - scratch, tctxt->groups & t->floating_group_mask); - - return can_stop_matching(scratch); -} - -static rose_inline -void runEagerPrefixesBlock(const struct RoseEngine *t, - struct hs_scratch *scratch) { - if (!t->eagerIterOffset) { - return; - } - - char *state = scratch->core_info.state; - u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into - * left_table */ - const u32 arCount = t->activeLeftCount; - const u32 qCount = t->queueCount; - const struct LeftNfaInfo *left_table = getLeftTable(t); - const struct mmbit_sparse_iter *it = getByOffset(t, t->eagerIterOffset); - - struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; - - u32 idx = 0; - u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state); - for (; ri != MMB_INVALID; - ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) { - const struct LeftNfaInfo *left = left_table + ri; - u32 qi = ri + t->leftfixBeginQueue; - DEBUG_PRINTF("leftfix %u/%u, maxLag=%u\n", ri, arCount, left->maxLag); - - assert(!fatbit_isset(scratch->aqa, qCount, qi)); - assert(left->eager); - assert(!left->infix); - - struct mq *q = scratch->queues + qi; - const struct NFA *nfa = getNfaByQueue(t, qi); - - if (scratch->core_info.len < nfa->minWidth) { - /* we know that there is not enough data for this to ever match, so - * we can immediately squash/ */ - mmbit_unset(ara, arCount, ri); - scratch->tctxt.groups &= left->squash_mask; - } - - s64a loc = MIN(scratch->core_info.len, EAGER_STOP_OFFSET); - - fatbit_set(scratch->aqa, qCount, qi); - initRoseQueue(t, qi, left, scratch); - - pushQueueAt(q, 0, MQE_START, 0); - pushQueueAt(q, 1, MQE_TOP, 0); - pushQueueAt(q, 2, MQE_END, loc); - nfaQueueInitState(nfa, q); - - char alive = nfaQueueExecToMatch(q->nfa, q, loc); - - if (!alive) { - DEBUG_PRINTF("queue %u dead, squashing\n", qi); - mmbit_unset(ara, arCount, ri); - fatbit_unset(scratch->aqa, qCount, qi); - scratch->tctxt.groups &= left->squash_mask; - } else if (q->cur == q->end) { - assert(alive != MO_MATCHES_PENDING); - if (loc == (s64a)scratch->core_info.len) { - /* We know that the prefix does not match in the block so we - * can squash the groups anyway even though it did not die */ - /* TODO: if we knew the minimum lag the leftfix is checked at we - * could make this check tighter */ - DEBUG_PRINTF("queue %u has no match in block, squashing\n", qi); - mmbit_unset(ara, arCount, ri); - fatbit_unset(scratch->aqa, qCount, qi); - scratch->tctxt.groups &= left->squash_mask; - } else { - DEBUG_PRINTF("queue %u finished, nfa lives\n", qi); - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } - } else { - assert(alive == MO_MATCHES_PENDING); - DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi); - q->end--; /* remove end item */ - } - } -} - -void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) { +static rose_inline +void roseBlockEodExec(const struct RoseEngine *t, u64a offset, + struct hs_scratch *scratch) { + assert(t->requiresEodCheck); + assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF + || offset <= t->maxBiAnchoredWidth); + + assert(!can_stop_matching(scratch)); + assert(t->eodProgramOffset); + + // Ensure that history is correct before we look for EOD matches. + roseFlushLastByteHistory(t, scratch, offset); + scratch->tctxt.lastEndOffset = offset; + + DEBUG_PRINTF("running eod program at %u\n", t->eodProgramOffset); + + // There should be no pending delayed literals. + assert(!scratch->tctxt.filledDelayedSlots); + + const u64a som = 0; + const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; + + // Note: we ignore the result, as this is the last thing to ever happen on + // a scan. + roseRunProgram(t, scratch, t->eodProgramOffset, som, offset, flags); +} + +/** + * \brief Run the anchored matcher, if any. Returns non-zero if matching should + * halt. + */ +static rose_inline +int roseBlockAnchored(const struct RoseEngine *t, struct hs_scratch *scratch) { + const void *atable = getALiteralMatcher(t); + if (!atable) { + DEBUG_PRINTF("no anchored table\n"); + return 0; + } + + const size_t length = scratch->core_info.len; + + if (t->amatcherMaxBiAnchoredWidth != ROSE_BOUND_INF && + length > t->amatcherMaxBiAnchoredWidth) { + return 0; + } + + if (length < t->amatcherMinWidth) { + return 0; + } + + runAnchoredTableBlock(t, atable, scratch); + + return can_stop_matching(scratch); +} + +/** + * \brief Run the floating matcher, if any. Returns non-zero if matching should + * halt. + */ +static rose_inline +int roseBlockFloating(const struct RoseEngine *t, struct hs_scratch *scratch) { + const struct HWLM *ftable = getFLiteralMatcher(t); + if (!ftable) { + return 0; + } + + const size_t length = scratch->core_info.len; + char *state = scratch->core_info.state; + struct RoseContext *tctxt = &scratch->tctxt; + + DEBUG_PRINTF("ftable fd=%u fmd %u\n", t->floatingDistance, + t->floatingMinDistance); + if (t->noFloatingRoots && !roseHasInFlightMatches(t, state, scratch)) { + DEBUG_PRINTF("skip FLOATING: no inflight matches\n"); + return 0; + } + + if (t->fmatcherMaxBiAnchoredWidth != ROSE_BOUND_INF && + length > t->fmatcherMaxBiAnchoredWidth) { + return 0; + } + + if (length < t->fmatcherMinWidth) { + return 0; + } + + const u8 *buffer = scratch->core_info.buf; + size_t flen = length; + if (t->floatingDistance != ROSE_BOUND_INF) { + flen = MIN(t->floatingDistance, length); + } + if (flen <= t->floatingMinDistance) { + return 0; + } + + DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length); + DEBUG_PRINTF("-- %016llx\n", tctxt->groups); + hwlmExec(ftable, buffer, flen, t->floatingMinDistance, roseFloatingCallback, + scratch, tctxt->groups & t->floating_group_mask); + + return can_stop_matching(scratch); +} + +static rose_inline +void runEagerPrefixesBlock(const struct RoseEngine *t, + struct hs_scratch *scratch) { + if (!t->eagerIterOffset) { + return; + } + + char *state = scratch->core_info.state; + u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into + * left_table */ + const u32 arCount = t->activeLeftCount; + const u32 qCount = t->queueCount; + const struct LeftNfaInfo *left_table = getLeftTable(t); + const struct mmbit_sparse_iter *it = getByOffset(t, t->eagerIterOffset); + + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + u32 idx = 0; + u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state); + for (; ri != MMB_INVALID; + ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) { + const struct LeftNfaInfo *left = left_table + ri; + u32 qi = ri + t->leftfixBeginQueue; + DEBUG_PRINTF("leftfix %u/%u, maxLag=%u\n", ri, arCount, left->maxLag); + + assert(!fatbit_isset(scratch->aqa, qCount, qi)); + assert(left->eager); + assert(!left->infix); + + struct mq *q = scratch->queues + qi; + const struct NFA *nfa = getNfaByQueue(t, qi); + + if (scratch->core_info.len < nfa->minWidth) { + /* we know that there is not enough data for this to ever match, so + * we can immediately squash/ */ + mmbit_unset(ara, arCount, ri); + scratch->tctxt.groups &= left->squash_mask; + } + + s64a loc = MIN(scratch->core_info.len, EAGER_STOP_OFFSET); + + fatbit_set(scratch->aqa, qCount, qi); + initRoseQueue(t, qi, left, scratch); + + pushQueueAt(q, 0, MQE_START, 0); + pushQueueAt(q, 1, MQE_TOP, 0); + pushQueueAt(q, 2, MQE_END, loc); + nfaQueueInitState(nfa, q); + + char alive = nfaQueueExecToMatch(q->nfa, q, loc); + + if (!alive) { + DEBUG_PRINTF("queue %u dead, squashing\n", qi); + mmbit_unset(ara, arCount, ri); + fatbit_unset(scratch->aqa, qCount, qi); + scratch->tctxt.groups &= left->squash_mask; + } else if (q->cur == q->end) { + assert(alive != MO_MATCHES_PENDING); + if (loc == (s64a)scratch->core_info.len) { + /* We know that the prefix does not match in the block so we + * can squash the groups anyway even though it did not die */ + /* TODO: if we knew the minimum lag the leftfix is checked at we + * could make this check tighter */ + DEBUG_PRINTF("queue %u has no match in block, squashing\n", qi); + mmbit_unset(ara, arCount, ri); + fatbit_unset(scratch->aqa, qCount, qi); + scratch->tctxt.groups &= left->squash_mask; + } else { + DEBUG_PRINTF("queue %u finished, nfa lives\n", qi); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } + } else { + assert(alive == MO_MATCHES_PENDING); + DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi); + q->end--; /* remove end item */ + } + } +} + +void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) { assert(t); assert(scratch); assert(scratch->core_info.buf); assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount) < MAX_SPARSE_ITER_STATES); - // We should not have been called if we've already been told to terminate - // matching. - assert(!told_to_stop_matching(scratch)); - - // If this block is shorter than our minimum width, then no pattern in this - // RoseEngine could match. - /* minWidth checks should have already been performed by the caller */ - assert(scratch->core_info.len >= t->minWidth); - - // Similarly, we may have a maximum width (for engines constructed entirely - // of bi-anchored patterns). - /* This check is now handled by the interpreter */ - assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF - || scratch->core_info.len <= t->maxBiAnchoredWidth); - + // We should not have been called if we've already been told to terminate + // matching. + assert(!told_to_stop_matching(scratch)); + + // If this block is shorter than our minimum width, then no pattern in this + // RoseEngine could match. + /* minWidth checks should have already been performed by the caller */ + assert(scratch->core_info.len >= t->minWidth); + + // Similarly, we may have a maximum width (for engines constructed entirely + // of bi-anchored patterns). + /* This check is now handled by the interpreter */ + assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF + || scratch->core_info.len <= t->maxBiAnchoredWidth); + const size_t length = scratch->core_info.len; // We have optimizations for small block scans: we run a single coalesced @@ -373,9 +373,9 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) { const char is_small_block = (length < ROSE_SMALL_BLOCK_LEN && t->sbmatcherOffset); - char *state = scratch->core_info.state; + char *state = scratch->core_info.state; - init_for_block(t, scratch, state, is_small_block); + init_for_block(t, scratch, state, is_small_block); struct RoseContext *tctxt = &scratch->tctxt; @@ -388,35 +388,35 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) { DEBUG_PRINTF("BEGIN SMALL BLOCK (over %zu/%zu)\n", sblen, length); DEBUG_PRINTF("-- %016llx\n", tctxt->groups); hwlmExec(sbtable, scratch->core_info.buf, sblen, 0, roseCallback, - scratch, tctxt->groups); - } else { - runEagerPrefixesBlock(t, scratch); + scratch, tctxt->groups); + } else { + runEagerPrefixesBlock(t, scratch); - if (roseBlockAnchored(t, scratch)) { - return; + if (roseBlockAnchored(t, scratch)) { + return; } - if (roseBlockFloating(t, scratch)) { - return; + if (roseBlockFloating(t, scratch)) { + return; } - } + } - if (cleanUpDelayed(t, scratch, length, 0) == HWLM_TERMINATE_MATCHING) { - return; + if (cleanUpDelayed(t, scratch, length, 0) == HWLM_TERMINATE_MATCHING) { + return; } - assert(!can_stop_matching(scratch)); + assert(!can_stop_matching(scratch)); - roseCatchUpTo(t, scratch, length); + roseCatchUpTo(t, scratch, length); - if (!t->requiresEodCheck || !t->eodProgramOffset) { - DEBUG_PRINTF("no eod check required\n"); - return; + if (!t->requiresEodCheck || !t->eodProgramOffset) { + DEBUG_PRINTF("no eod check required\n"); + return; } - if (can_stop_matching(scratch)) { - DEBUG_PRINTF("bailing, already halted\n"); + if (can_stop_matching(scratch)) { + DEBUG_PRINTF("bailing, already halted\n"); return; } - roseBlockEodExec(t, length, scratch); + roseBlockEodExec(t, length, scratch); } diff --git a/contrib/libs/hyperscan/src/rose/catchup.c b/contrib/libs/hyperscan/src/rose/catchup.c index 98ebd2f83b..7a6648da98 100644 --- a/contrib/libs/hyperscan/src/rose/catchup.c +++ b/contrib/libs/hyperscan/src/rose/catchup.c @@ -26,20 +26,20 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file - * \brief Rose runtime: code for catching up output-exposed engines. - */ - +/** + * \file + * \brief Rose runtime: code for catching up output-exposed engines. + */ + #include "catchup.h" #include "match.h" -#include "program_runtime.h" +#include "program_runtime.h" #include "rose.h" #include "nfa/nfa_rev_api.h" #include "nfa/mpv.h" #include "som/som_runtime.h" #include "util/fatbit.h" -#include "report.h" +#include "report.h" typedef struct queue_match PQ_T; #define PQ_COMP(pqc_items, a, b) ((pqc_items)[a].loc < (pqc_items)[b].loc) @@ -48,57 +48,57 @@ typedef struct queue_match PQ_T; #include "util/pqueue.h" static really_inline -int roseNfaRunProgram(const struct RoseEngine *rose, struct hs_scratch *scratch, - u64a som, u64a offset, ReportID id, const char from_mpv) { - const u32 program = id; - u8 flags = ROSE_PROG_FLAG_IN_CATCHUP; - if (from_mpv) { - flags |= ROSE_PROG_FLAG_FROM_MPV; +int roseNfaRunProgram(const struct RoseEngine *rose, struct hs_scratch *scratch, + u64a som, u64a offset, ReportID id, const char from_mpv) { + const u32 program = id; + u8 flags = ROSE_PROG_FLAG_IN_CATCHUP; + if (from_mpv) { + flags |= ROSE_PROG_FLAG_FROM_MPV; } - roseRunProgram(rose, scratch, program, som, offset, flags); - - return can_stop_matching(scratch) ? MO_HALT_MATCHING : MO_CONTINUE_MATCHING; + roseRunProgram(rose, scratch, program, som, offset, flags); + + return can_stop_matching(scratch) ? MO_HALT_MATCHING : MO_CONTINUE_MATCHING; } -static rose_inline -char roseSuffixInfoIsExhausted(const struct RoseEngine *rose, - const struct NfaInfo *info, - const char *exhausted) { - if (!info->ekeyListOffset) { +static rose_inline +char roseSuffixInfoIsExhausted(const struct RoseEngine *rose, + const struct NfaInfo *info, + const char *exhausted) { + if (!info->ekeyListOffset) { return 0; } - DEBUG_PRINTF("check exhaustion -> start at %u\n", info->ekeyListOffset); + DEBUG_PRINTF("check exhaustion -> start at %u\n", info->ekeyListOffset); - /* INVALID_EKEY terminated list */ - const u32 *ekeys = getByOffset(rose, info->ekeyListOffset); - while (*ekeys != INVALID_EKEY) { - DEBUG_PRINTF("check %u\n", *ekeys); - if (!isExhausted(rose, exhausted, *ekeys)) { - DEBUG_PRINTF("not exhausted -> alive\n"); - return 0; - } - ++ekeys; + /* INVALID_EKEY terminated list */ + const u32 *ekeys = getByOffset(rose, info->ekeyListOffset); + while (*ekeys != INVALID_EKEY) { + DEBUG_PRINTF("check %u\n", *ekeys); + if (!isExhausted(rose, exhausted, *ekeys)) { + DEBUG_PRINTF("not exhausted -> alive\n"); + return 0; + } + ++ekeys; } - DEBUG_PRINTF("all ekeys exhausted -> dead\n"); - return 1; + DEBUG_PRINTF("all ekeys exhausted -> dead\n"); + return 1; } -static really_inline -char roseSuffixIsExhausted(const struct RoseEngine *rose, u32 qi, - const char *exhausted) { - DEBUG_PRINTF("check queue %u\n", qi); - const struct NfaInfo *info = getNfaInfoByQueue(rose, qi); - return roseSuffixInfoIsExhausted(rose, info, exhausted); +static really_inline +char roseSuffixIsExhausted(const struct RoseEngine *rose, u32 qi, + const char *exhausted) { + DEBUG_PRINTF("check queue %u\n", qi); + const struct NfaInfo *info = getNfaInfoByQueue(rose, qi); + return roseSuffixInfoIsExhausted(rose, info, exhausted); } static really_inline -void deactivateQueue(const struct RoseEngine *t, u8 *aa, u32 qi, - struct hs_scratch *scratch) { - u32 aaCount = t->activeArrayCount; - u32 qCount = t->queueCount; +void deactivateQueue(const struct RoseEngine *t, u8 *aa, u32 qi, + struct hs_scratch *scratch) { + u32 aaCount = t->activeArrayCount; + u32 qCount = t->queueCount; /* this is sailing close to the wind with regards to invalidating an * iteration. We are saved by the fact that unsetting does not clear the @@ -114,7 +114,7 @@ void ensureQueueActive(const struct RoseEngine *t, u32 qi, u32 qCount, struct mq *q, struct hs_scratch *scratch) { if (!fatbit_set(scratch->aqa, qCount, qi)) { DEBUG_PRINTF("initing %u\n", qi); - initQueue(q, qi, t, scratch); + initQueue(q, qi, t, scratch); loadStreamState(q->nfa, q, 0); pushQueueAt(q, 0, MQE_START, 0); } @@ -165,8 +165,8 @@ s64a pq_top_loc(struct catchup_pq *pq) { /* requires that we are the top item on the pq */ static really_inline -hwlmcb_rv_t runExistingNfaToNextMatch(const struct RoseEngine *t, u32 qi, - struct mq *q, s64a loc, +hwlmcb_rv_t runExistingNfaToNextMatch(const struct RoseEngine *t, u32 qi, + struct mq *q, s64a loc, struct hs_scratch *scratch, u8 *aa, char report_curr) { assert(pq_top(scratch->catchup_pq.qm)->queue == qi); @@ -197,7 +197,7 @@ hwlmcb_rv_t runExistingNfaToNextMatch(const struct RoseEngine *t, u32 qi, return HWLM_TERMINATE_MATCHING; } - deactivateQueue(t, aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); } else if (q->cur == q->end) { DEBUG_PRINTF("queue %u finished, nfa lives\n", qi); q->cur = q->end = 0; @@ -222,8 +222,8 @@ hwlmcb_rv_t runExistingNfaToNextMatch(const struct RoseEngine *t, u32 qi, } static really_inline -hwlmcb_rv_t runNewNfaToNextMatch(const struct RoseEngine *t, u32 qi, - struct mq *q, s64a loc, +hwlmcb_rv_t runNewNfaToNextMatch(const struct RoseEngine *t, u32 qi, + struct mq *q, s64a loc, struct hs_scratch *scratch, u8 *aa, s64a report_ok_loc) { assert(!q->report_current); @@ -256,7 +256,7 @@ restart: return HWLM_TERMINATE_MATCHING; } - deactivateQueue(t, aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); } else if (q->cur == q->end) { DEBUG_PRINTF("queue %u finished, nfa lives\n", qi); q->cur = q->end = 0; @@ -279,23 +279,23 @@ restart: } /* for use by mpv (chained) only */ -static -int roseNfaFinalBlastAdaptor(u64a start, u64a end, ReportID id, void *context) { - struct hs_scratch *scratch = context; - assert(scratch && scratch->magic == SCRATCH_MAGIC); - const struct RoseEngine *t = scratch->core_info.rose; +static +int roseNfaFinalBlastAdaptor(u64a start, u64a end, ReportID id, void *context) { + struct hs_scratch *scratch = context; + assert(scratch && scratch->magic == SCRATCH_MAGIC); + const struct RoseEngine *t = scratch->core_info.rose; - DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); + DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); - int cb_rv = roseNfaRunProgram(t, scratch, start, end, id, 1); + int cb_rv = roseNfaRunProgram(t, scratch, start, end, id, 1); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { return MO_CONTINUE_MATCHING; } else { assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(t, 0, - scratch->core_info.exhaustionVector); + return !roseSuffixIsExhausted(t, 0, + scratch->core_info.exhaustionVector); } } @@ -318,7 +318,7 @@ hwlmcb_rv_t add_to_queue(const struct RoseEngine *t, struct mq *queues, if (roseSuffixInfoIsExhausted(t, info, scratch->core_info.exhaustionVector)) { - deactivateQueue(t, aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); return HWLM_CONTINUE_MATCHING; } @@ -331,7 +331,7 @@ hwlmcb_rv_t add_to_queue(const struct RoseEngine *t, struct mq *queues, ensureEnd(q, qi, loc); - return runNewNfaToNextMatch(t, qi, q, loc, scratch, aa, report_ok_loc); + return runNewNfaToNextMatch(t, qi, q, loc, scratch, aa, report_ok_loc); } static really_inline @@ -352,9 +352,9 @@ s64a findSecondPlace(struct catchup_pq *pq, s64a loc_limit) { } } -hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, +hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, struct hs_scratch *scratch) { - char *state = scratch->core_info.state; + char *state = scratch->core_info.state; struct mq *queues = scratch->queues; u8 *aa = getActiveLeafArray(t, state); UNUSED u32 aaCount = t->activeArrayCount; @@ -377,7 +377,7 @@ hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, if (roseSuffixInfoIsExhausted(t, info, scratch->core_info.exhaustionVector)) { - deactivateQueue(t, aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); goto done; } @@ -392,7 +392,7 @@ hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, assert(!q->report_current); - q->cb = roseNfaFinalBlastAdaptor; + q->cb = roseNfaFinalBlastAdaptor; DEBUG_PRINTF("queue %u blasting, %u/%u [%lld/%lld]\n", qi, q->cur, q->end, q->items[q->cur].location, loc); @@ -400,13 +400,13 @@ hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, scratch->tctxt.mpv_inactive = 0; /* we know it is going to be an mpv, skip the indirection */ - next_pos_match_loc = nfaExecMpv_QueueExecRaw(q->nfa, q, loc); + next_pos_match_loc = nfaExecMpv_QueueExecRaw(q->nfa, q, loc); assert(!q->report_current); if (!next_pos_match_loc) { /* 0 means dead */ DEBUG_PRINTF("mpv is pining for the fjords\n"); if (can_stop_matching(scratch)) { - deactivateQueue(t, aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); return HWLM_TERMINATE_MATCHING; } @@ -441,59 +441,59 @@ done: : HWLM_CONTINUE_MATCHING; } -static really_inline -char in_mpv(const struct RoseEngine *rose, const struct hs_scratch *scratch) { - const struct RoseContext *tctxt = &scratch->tctxt; - assert(tctxt->curr_qi < rose->queueCount); - if (tctxt->curr_qi < rose->outfixBeginQueue) { - assert(getNfaByQueue(rose, tctxt->curr_qi)->type == MPV_NFA); - return 1; - } - return 0; +static really_inline +char in_mpv(const struct RoseEngine *rose, const struct hs_scratch *scratch) { + const struct RoseContext *tctxt = &scratch->tctxt; + assert(tctxt->curr_qi < rose->queueCount); + if (tctxt->curr_qi < rose->outfixBeginQueue) { + assert(getNfaByQueue(rose, tctxt->curr_qi)->type == MPV_NFA); + return 1; + } + return 0; } -static -int roseNfaBlastAdaptor(u64a start, u64a end, ReportID id, void *context) { - struct hs_scratch *scratch = context; - assert(scratch && scratch->magic == SCRATCH_MAGIC); - const struct RoseEngine *t = scratch->core_info.rose; +static +int roseNfaBlastAdaptor(u64a start, u64a end, ReportID id, void *context) { + struct hs_scratch *scratch = context; + assert(scratch && scratch->magic == SCRATCH_MAGIC); + const struct RoseEngine *t = scratch->core_info.rose; - DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); + DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); - const char from_mpv = in_mpv(t, scratch); - int cb_rv = roseNfaRunProgram(t, scratch, start, end, id, from_mpv); + const char from_mpv = in_mpv(t, scratch); + int cb_rv = roseNfaRunProgram(t, scratch, start, end, id, from_mpv); if (cb_rv == MO_HALT_MATCHING) { return MO_HALT_MATCHING; } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { return MO_CONTINUE_MATCHING; } else { assert(cb_rv == MO_CONTINUE_MATCHING); - return !roseSuffixIsExhausted(t, scratch->tctxt.curr_qi, - scratch->core_info.exhaustionVector); + return !roseSuffixIsExhausted(t, scratch->tctxt.curr_qi, + scratch->core_info.exhaustionVector); } } -int roseNfaAdaptor(u64a start, u64a end, ReportID id, void *context) { - struct hs_scratch *scratch = context; - assert(scratch && scratch->magic == SCRATCH_MAGIC); +int roseNfaAdaptor(u64a start, u64a end, ReportID id, void *context) { + struct hs_scratch *scratch = context; + assert(scratch && scratch->magic == SCRATCH_MAGIC); - DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); + DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); /* must be a external report as haig cannot directly participate in chain */ - return roseNfaRunProgram(scratch->core_info.rose, scratch, start, end, id, - 0); + return roseNfaRunProgram(scratch->core_info.rose, scratch, start, end, id, + 0); } static really_inline -char blast_queue(struct hs_scratch *scratch, struct mq *q, u32 qi, s64a to_loc, - char report_current) { - scratch->tctxt.curr_qi = qi; - q->cb = roseNfaBlastAdaptor; +char blast_queue(struct hs_scratch *scratch, struct mq *q, u32 qi, s64a to_loc, + char report_current) { + scratch->tctxt.curr_qi = qi; + q->cb = roseNfaBlastAdaptor; q->report_current = report_current; DEBUG_PRINTF("queue %u blasting, %u/%u [%lld/%lld]\n", qi, q->cur, q->end, q_cur_loc(q), to_loc); char alive = nfaQueueExec(q->nfa, q, to_loc); - q->cb = roseNfaAdaptor; + q->cb = roseNfaAdaptor; assert(!q->report_current); return alive; @@ -510,7 +510,7 @@ hwlmcb_rv_t buildSufPQ_final(const struct RoseEngine *t, s64a report_ok_loc, if (roseSuffixInfoIsExhausted(t, info, scratch->core_info.exhaustionVector)) { - deactivateQueue(t, aa, a_qi, scratch); + deactivateQueue(t, aa, a_qi, scratch); return HWLM_CONTINUE_MATCHING; } @@ -523,9 +523,9 @@ hwlmcb_rv_t buildSufPQ_final(const struct RoseEngine *t, s64a report_ok_loc, ensureEnd(q, a_qi, final_loc); - char alive = blast_queue(scratch, q, a_qi, second_place_loc, 0); + char alive = blast_queue(scratch, q, a_qi, second_place_loc, 0); - /* We have three possible outcomes: + /* We have three possible outcomes: * (1) the nfa died * (2) we completed the queue (implies that second_place_loc == final_loc) * (3) the queue ran to second_place_loc and stopped. In this case we need @@ -538,7 +538,7 @@ hwlmcb_rv_t buildSufPQ_final(const struct RoseEngine *t, s64a report_ok_loc, return HWLM_TERMINATE_MATCHING; } - deactivateQueue(t, aa, a_qi, scratch); + deactivateQueue(t, aa, a_qi, scratch); } else if (q->cur == q->end) { DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", a_qi, final_loc); @@ -554,8 +554,8 @@ hwlmcb_rv_t buildSufPQ_final(const struct RoseEngine *t, s64a report_ok_loc, assert(second_place_loc < final_loc); assert(q_cur_loc(q) >= second_place_loc); - if (runNewNfaToNextMatch(t, a_qi, q, final_loc, scratch, aa, - report_ok_loc) == HWLM_TERMINATE_MATCHING) { + if (runNewNfaToNextMatch(t, a_qi, q, final_loc, scratch, aa, + report_ok_loc) == HWLM_TERMINATE_MATCHING) { DEBUG_PRINTF("roseCatchUpNfas done\n"); return HWLM_TERMINATE_MATCHING; } @@ -564,7 +564,7 @@ hwlmcb_rv_t buildSufPQ_final(const struct RoseEngine *t, s64a report_ok_loc, return HWLM_CONTINUE_MATCHING; } -void streamInitSufPQ(const struct RoseEngine *t, char *state, +void streamInitSufPQ(const struct RoseEngine *t, char *state, struct hs_scratch *scratch) { assert(scratch->catchup_pq.qm_size == 0); assert(t->outfixBeginQueue != t->outfixEndQueue); @@ -595,7 +595,7 @@ void streamInitSufPQ(const struct RoseEngine *t, char *state, pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl); } else if (!alive) { - deactivateQueue(t, aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); } else { assert(q->cur == q->end); /* TODO: can this be simplified? the nfa will never produce any @@ -609,7 +609,7 @@ void streamInitSufPQ(const struct RoseEngine *t, char *state, } } -void blockInitSufPQ(const struct RoseEngine *t, char *state, +void blockInitSufPQ(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, char is_small_block) { DEBUG_PRINTF("initSufPQ: outfixes [%u,%u)\n", t->outfixBeginQueue, t->outfixEndQueue); @@ -642,7 +642,7 @@ void blockInitSufPQ(const struct RoseEngine *t, char *state, mmbit_set(aa, aaCount, qi); fatbit_set(aqa, qCount, qi); struct mq *q = queues + qi; - initQueue(q, qi, t, scratch); + initQueue(q, qi, t, scratch); q->length = len; /* adjust for rev_accel */ nfaQueueInitState(nfa, q); pushQueueAt(q, 0, MQE_START, 0); @@ -659,7 +659,7 @@ void blockInitSufPQ(const struct RoseEngine *t, char *state, pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl); } else if (!alive) { - deactivateQueue(t, aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); } else { assert(q->cur == q->end); /* TODO: can this be simplified? the nfa will never produce any @@ -675,7 +675,7 @@ void blockInitSufPQ(const struct RoseEngine *t, char *state, * safe_loc is ??? */ static rose_inline -hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, char *state, s64a safe_loc, +hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, char *state, s64a safe_loc, s64a final_loc, struct hs_scratch *scratch) { assert(scratch->catchup_pq.qm_size <= t->outfixEndQueue); @@ -714,9 +714,9 @@ hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, char *state, s64a safe_loc, s64a report_ok_loc = tctxt->minNonMpvMatchOffset + 1 - scratch->core_info.buf_offset; - hwlmcb_rv_t rv = roseCatchUpMPV(t, report_ok_loc, scratch); + hwlmcb_rv_t rv = roseCatchUpMPV(t, report_ok_loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { - DEBUG_PRINTF("terminating...\n"); + DEBUG_PRINTF("terminating...\n"); return rv; } @@ -728,7 +728,7 @@ hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, char *state, s64a safe_loc, = scratch->catchup_pq.qm_size ? pq_top_loc(&scratch->catchup_pq) : safe_loc; second_place_loc = MIN(second_place_loc, safe_loc); - if (n_qi == MMB_INVALID && report_ok_loc <= second_place_loc) { + if (n_qi == MMB_INVALID && report_ok_loc <= second_place_loc) { if (buildSufPQ_final(t, report_ok_loc, second_place_loc, final_loc, scratch, aa, a_qi) == HWLM_TERMINATE_MATCHING) { @@ -752,19 +752,19 @@ hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, char *state, s64a safe_loc, } static never_inline -hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, s64a loc, +hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, s64a loc, s64a final_loc, struct hs_scratch *scratch) { assert(t->activeArrayCount); - DEBUG_PRINTF("roseCatchUpNfas offset=%llu + %lld/%lld\n", - scratch->core_info.buf_offset, loc, final_loc); + DEBUG_PRINTF("roseCatchUpNfas offset=%llu + %lld/%lld\n", + scratch->core_info.buf_offset, loc, final_loc); DEBUG_PRINTF("min non mpv match offset %llu\n", scratch->tctxt.minNonMpvMatchOffset); - struct RoseContext *tctxt = &scratch->tctxt; - assert(scratch->core_info.buf_offset + loc >= tctxt->minNonMpvMatchOffset); - - char *state = scratch->core_info.state; + struct RoseContext *tctxt = &scratch->tctxt; + assert(scratch->core_info.buf_offset + loc >= tctxt->minNonMpvMatchOffset); + + char *state = scratch->core_info.state; struct mq *queues = scratch->queues; u8 *aa = getActiveLeafArray(t, state); @@ -785,7 +785,7 @@ hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, s64a loc, } /* catch up char matches to this point */ - if (roseCatchUpMPV(t, match_loc, scratch) + if (roseCatchUpMPV(t, match_loc, scratch) == HWLM_TERMINATE_MATCHING) { DEBUG_PRINTF("roseCatchUpNfas done\n"); return HWLM_TERMINATE_MATCHING; @@ -812,14 +812,14 @@ hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, s64a loc, DEBUG_PRINTF("second place %lld loc %lld\n", second_place_loc, loc); if (second_place_loc == q_cur_loc(q)) { - if (runExistingNfaToNextMatch(t, qi, q, q_final_loc, scratch, aa, 1) + if (runExistingNfaToNextMatch(t, qi, q, q_final_loc, scratch, aa, 1) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } continue; } - char alive = blast_queue(scratch, q, qi, second_place_loc, 1); + char alive = blast_queue(scratch, q, qi, second_place_loc, 1); if (!alive) { if (can_stop_matching(scratch)) { @@ -827,7 +827,7 @@ hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, s64a loc, return HWLM_TERMINATE_MATCHING; } - deactivateQueue(t, aa, qi, scratch); + deactivateQueue(t, aa, qi, scratch); pq_pop_nice(&scratch->catchup_pq); } else if (q->cur == q->end) { DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", qi, loc); @@ -841,7 +841,7 @@ hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, s64a loc, } else { DEBUG_PRINTF("queue %u not finished, %u/%u [%lld/%lld]\n", qi, q->cur, q->end, q->items[q->cur].location, loc); - runExistingNfaToNextMatch(t, qi, q, q_final_loc, scratch, aa, 0); + runExistingNfaToNextMatch(t, qi, q, q_final_loc, scratch, aa, 0); } } exit:; @@ -858,23 +858,23 @@ hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch) { assert(scratch->core_info.buf_offset + loc > scratch->tctxt.minNonMpvMatchOffset); - const struct RoseEngine *t = scratch->core_info.rose; - char *state = scratch->core_info.state; - - hwlmcb_rv_t rv = buildSufPQ(t, state, loc, loc, scratch); + const struct RoseEngine *t = scratch->core_info.rose; + char *state = scratch->core_info.state; + + hwlmcb_rv_t rv = buildSufPQ(t, state, loc, loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { return rv; } - rv = roseCatchUpNfas(t, loc, loc, scratch); + rv = roseCatchUpNfas(t, loc, loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { return rv; } - rv = roseCatchUpMPV(t, loc, scratch); + rv = roseCatchUpMPV(t, loc, scratch); assert(rv != HWLM_CONTINUE_MATCHING - || scratch->catchup_pq.qm_size <= t->outfixEndQueue); - assert(!can_stop_matching(scratch) || rv == HWLM_TERMINATE_MATCHING); + || scratch->catchup_pq.qm_size <= t->outfixEndQueue); + assert(!can_stop_matching(scratch) || rv == HWLM_TERMINATE_MATCHING); return rv; } @@ -884,17 +884,17 @@ hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch) { assert(scratch->core_info.buf_offset + loc > scratch->tctxt.minNonMpvMatchOffset); - const struct RoseEngine *t = scratch->core_info.rose; - char *state = scratch->core_info.state; - - hwlmcb_rv_t rv = buildSufPQ(t, state, loc, loc, scratch); + const struct RoseEngine *t = scratch->core_info.rose; + char *state = scratch->core_info.state; + + hwlmcb_rv_t rv = buildSufPQ(t, state, loc, loc, scratch); if (rv != HWLM_CONTINUE_MATCHING) { return rv; } - rv = roseCatchUpNfas(t, loc, loc, scratch); - assert(rv != HWLM_CONTINUE_MATCHING || - scratch->catchup_pq.qm_size <= t->outfixEndQueue); + rv = roseCatchUpNfas(t, loc, loc, scratch); + assert(rv != HWLM_CONTINUE_MATCHING || + scratch->catchup_pq.qm_size <= t->outfixEndQueue); return rv; } diff --git a/contrib/libs/hyperscan/src/rose/catchup.h b/contrib/libs/hyperscan/src/rose/catchup.h index 978a8c7007..8188d5af01 100644 --- a/contrib/libs/hyperscan/src/rose/catchup.h +++ b/contrib/libs/hyperscan/src/rose/catchup.h @@ -26,25 +26,25 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** - * \file - * \brief Rose runtime: code for catching up output-exposed engines. - * - * Rose has several components which run behind the main (floating table) clock - * and need to be caught up before we report matches. - * - * Currently we have to deal with: - * 1. Suffix/Outfix NFAs - * 2. A single MPV NFA (chained), which may also be triggered by (1). - * - * The approach is to: - * - (A) build a priority queue of the suffix/outfixes based on their first - * match location; - * - (B) process the matches from the priority queue in order; - * - (C) As we report matches from (B) we interleave matches from the MPV if it - * exists. - */ - +/** + * \file + * \brief Rose runtime: code for catching up output-exposed engines. + * + * Rose has several components which run behind the main (floating table) clock + * and need to be caught up before we report matches. + * + * Currently we have to deal with: + * 1. Suffix/Outfix NFAs + * 2. A single MPV NFA (chained), which may also be triggered by (1). + * + * The approach is to: + * - (A) build a priority queue of the suffix/outfixes based on their first + * match location; + * - (B) process the matches from the priority queue in order; + * - (C) As we report matches from (B) we interleave matches from the MPV if it + * exists. + */ + #ifndef ROSE_CATCHUP_H #define ROSE_CATCHUP_H @@ -59,74 +59,74 @@ hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch); -/* will only catch mpv up to last reported external match */ +/* will only catch mpv up to last reported external match */ hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch); -hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, +hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc, struct hs_scratch *scratch); -void blockInitSufPQ(const struct RoseEngine *t, char *state, +void blockInitSufPQ(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, char is_small_block); -void streamInitSufPQ(const struct RoseEngine *t, char *state, +void streamInitSufPQ(const struct RoseEngine *t, char *state, struct hs_scratch *scratch); static really_inline -int canSkipCatchUpMPV(const struct RoseEngine *t, struct hs_scratch *scratch, - u64a cur_offset) { +int canSkipCatchUpMPV(const struct RoseEngine *t, struct hs_scratch *scratch, + u64a cur_offset) { if (!has_chained_nfas(t)) { - return 1; + return 1; } /* note: we may have to run at less than tctxt.minMatchOffset as we may * have a full queue of postponed events that we need to flush */ if (cur_offset < scratch->tctxt.next_mpv_offset) { - DEBUG_PRINTF("skipping cur_offset %llu min %llu, mpv %llu\n", + DEBUG_PRINTF("skipping cur_offset %llu min %llu, mpv %llu\n", cur_offset, scratch->tctxt.minMatchOffset, scratch->tctxt.next_mpv_offset); - return 1; + return 1; } assert(t->activeArrayCount); - DEBUG_PRINTF("cur offset offset: %llu\n", cur_offset); + DEBUG_PRINTF("cur offset offset: %llu\n", cur_offset); DEBUG_PRINTF("min match offset %llu\n", scratch->tctxt.minMatchOffset); assert(t->outfixBeginQueue == 1); /* if it exists mpv is queue 0 */ - const u8 *aa = getActiveLeafArray(t, scratch->core_info.state); - return !mmbit_isset(aa, t->activeArrayCount, 0); -} - -/** \brief Catches up the MPV. */ -static really_inline -hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc, - struct hs_scratch *scratch) { - u64a cur_offset = loc + scratch->core_info.buf_offset; - assert(cur_offset >= scratch->tctxt.minMatchOffset); - assert(!can_stop_matching(scratch)); - - if (canSkipCatchUpMPV(t, scratch, cur_offset)) { + const u8 *aa = getActiveLeafArray(t, scratch->core_info.state); + return !mmbit_isset(aa, t->activeArrayCount, 0); +} + +/** \brief Catches up the MPV. */ +static really_inline +hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc, + struct hs_scratch *scratch) { + u64a cur_offset = loc + scratch->core_info.buf_offset; + assert(cur_offset >= scratch->tctxt.minMatchOffset); + assert(!can_stop_matching(scratch)); + + if (canSkipCatchUpMPV(t, scratch, cur_offset)) { if (t->flushCombProgramOffset) { if (roseRunFlushCombProgram(t, scratch, cur_offset) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } - updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset); - return HWLM_CONTINUE_MATCHING; + updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset); + return HWLM_CONTINUE_MATCHING; } /* Note: chained tails MUST not participate in the priority queue as * they may have events pushed on during this process which may be before * the catch up point */ - return roseCatchUpMPV_i(t, loc, scratch); + return roseCatchUpMPV_i(t, loc, scratch); } -/** \brief Catches up NFAs and the MPV. */ +/** \brief Catches up NFAs and the MPV. */ static rose_inline -hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a end) { +hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end) { /* no need to catch up if we are at the same offset as last time */ if (end <= scratch->tctxt.minMatchOffset) { /* we must already be up to date */ @@ -134,48 +134,48 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t, return HWLM_CONTINUE_MATCHING; } - char *state = scratch->core_info.state; + char *state = scratch->core_info.state; s64a loc = end - scratch->core_info.buf_offset; if (end <= scratch->tctxt.minNonMpvMatchOffset) { /* only need to catch up the mpv */ - return roseCatchUpMPV(t, loc, scratch); + return roseCatchUpMPV(t, loc, scratch); } assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset); hwlmcb_rv_t rv; - if (!t->activeArrayCount - || !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { + if (!t->activeArrayCount + || !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { if (t->flushCombProgramOffset) { if (roseRunFlushCombProgram(t, scratch, end) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } - updateMinMatchOffset(&scratch->tctxt, end); - rv = HWLM_CONTINUE_MATCHING; + updateMinMatchOffset(&scratch->tctxt, end); + rv = HWLM_CONTINUE_MATCHING; } else { - rv = roseCatchUpAll(loc, scratch); + rv = roseCatchUpAll(loc, scratch); } assert(rv != HWLM_CONTINUE_MATCHING || scratch->tctxt.minMatchOffset == end); assert(rv != HWLM_CONTINUE_MATCHING || scratch->tctxt.minNonMpvMatchOffset == end); - assert(!can_stop_matching(scratch) || rv == HWLM_TERMINATE_MATCHING); + assert(!can_stop_matching(scratch) || rv == HWLM_TERMINATE_MATCHING); return rv; } -/** - * \brief Catches up anything which may add triggers on the MPV (suffixes and - * outfixes). - * - * The MPV will be run only to intersperse matches in the output match stream - * if external matches are raised. - */ +/** + * \brief Catches up anything which may add triggers on the MPV (suffixes and + * outfixes). + * + * The MPV will be run only to intersperse matches in the output match stream + * if external matches are raised. + */ static rose_inline -hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a end) { +hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end) { /* no need to catch up if we are at the same offset as last time */ if (end <= scratch->tctxt.minNonMpvMatchOffset) { /* we must already be up to date */ @@ -188,20 +188,20 @@ hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t, assert(t->activeArrayCount); /* mpv is in active array */ assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset); - if (!t->mpvTriggeredByLeaf) { - /* no need to check as they never put triggers onto the mpv */ - return HWLM_CONTINUE_MATCHING; - } - - /* sadly, this branch rarely gets taken as the mpv itself is usually - * alive. */ - char *state = scratch->core_info.state; - if (!mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { - scratch->tctxt.minNonMpvMatchOffset = end; - return HWLM_CONTINUE_MATCHING; + if (!t->mpvTriggeredByLeaf) { + /* no need to check as they never put triggers onto the mpv */ + return HWLM_CONTINUE_MATCHING; + } + + /* sadly, this branch rarely gets taken as the mpv itself is usually + * alive. */ + char *state = scratch->core_info.state; + if (!mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { + scratch->tctxt.minNonMpvMatchOffset = end; + return HWLM_CONTINUE_MATCHING; } - - return roseCatchUpSuf(loc, scratch); + + return roseCatchUpSuf(loc, scratch); } #endif diff --git a/contrib/libs/hyperscan/src/rose/counting_miracle.h b/contrib/libs/hyperscan/src/rose/counting_miracle.h index c68404925b..976208b738 100644 --- a/contrib/libs/hyperscan/src/rose/counting_miracle.h +++ b/contrib/libs/hyperscan/src/rose/counting_miracle.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -82,7 +82,7 @@ char roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end, } #define GET_LO_4(chars) and128(chars, low4bits) -#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4) +#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4) static really_inline u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison, @@ -98,8 +98,8 @@ u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison, for (; d + 16 <= d_end; d_end -= 16) { m128 data = loadu128(d_end - 16); - m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data)); - m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data)); + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data)); m128 t = and128(c_lo, c_hi); u32 z1 = movemask128(eq128(t, zeroes)); count += popcount32(z1 ^ 0xffff); @@ -117,8 +117,8 @@ u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison, memset(temp, poison, sizeof(temp)); memcpy(temp, d, d_end - d); m128 data = loadu128(temp); - m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data)); - m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data)); + m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data)); + m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data)); m128 t = and128(c_lo, c_hi); u32 z1 = movemask128(eq128(t, zeroes)); count += popcount32(z1 ^ 0xffff); diff --git a/contrib/libs/hyperscan/src/rose/infix.h b/contrib/libs/hyperscan/src/rose/infix.h index cfae16abd9..9cf9c0ad74 100644 --- a/contrib/libs/hyperscan/src/rose/infix.h +++ b/contrib/libs/hyperscan/src/rose/infix.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,7 +32,7 @@ #include "ue2common.h" #include "nfa/nfa_api.h" #include "nfa/nfa_api_queue.h" -#include "nfa/nfa_internal.h" +#include "nfa/nfa_internal.h" static really_inline int infixTooOld(struct mq *q, s64a curr_loc) { @@ -45,27 +45,27 @@ int infixTooOld(struct mq *q, s64a curr_loc) { return q_last_loc(q) + maxAge < curr_loc; } -static really_inline -int canReduceQueue(const struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) { - u32 qlen = q->end - q->cur; /* includes MQE_START */ - - if (maxAge && q->items[q->cur].location + maxAge < curr_loc) { - return 1; - } - - if (qlen - 1 > maxTops) { - return 1; - } - - if (qlen - 1 == maxTops - && q->items[q->cur].location != q->items[q->cur + 1].location) { - /* we can advance start to the first top location */ - return 1; - } - - return 0; -} - +static really_inline +int canReduceQueue(const struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) { + u32 qlen = q->end - q->cur; /* includes MQE_START */ + + if (maxAge && q->items[q->cur].location + maxAge < curr_loc) { + return 1; + } + + if (qlen - 1 > maxTops) { + return 1; + } + + if (qlen - 1 == maxTops + && q->items[q->cur].location != q->items[q->cur + 1].location) { + /* we can advance start to the first top location */ + return 1; + } + + return 0; +} + /** * Removes tops which are known not to affect the final state from the queue. * May also reinitialise the engine state if it is unneeded. @@ -84,14 +84,14 @@ int canReduceQueue(const struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) { * engine. */ static really_inline -void reduceInfixQueue(struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) { +void reduceInfixQueue(struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) { assert(q->end > q->cur); assert(maxTops); u32 qlen = q->end - q->cur; /* includes MQE_START */ DEBUG_PRINTF("q=%p, len=%u, maxTops=%u maxAge=%u\n", q, qlen, maxTops, maxAge); - if (!canReduceQueue(q, curr_loc, maxTops, maxAge)) { + if (!canReduceQueue(q, curr_loc, maxTops, maxAge)) { DEBUG_PRINTF("nothing to do\n"); return; } diff --git a/contrib/libs/hyperscan/src/rose/init.c b/contrib/libs/hyperscan/src/rose/init.c index 0c6a1cfcde..025ecca0d6 100644 --- a/contrib/libs/hyperscan/src/rose/init.c +++ b/contrib/libs/hyperscan/src/rose/init.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -42,14 +42,14 @@ #include <string.h> static really_inline -void init_rstate(const struct RoseEngine *t, char *state) { - // Set runtime state: we take our initial groups from the RoseEngine. +void init_rstate(const struct RoseEngine *t, char *state) { + // Set runtime state: we take our initial groups from the RoseEngine. DEBUG_PRINTF("setting initial groups to 0x%016llx\n", t->initialGroups); storeGroups(t, state, t->initialGroups); } static really_inline -void init_outfixes(const struct RoseEngine *t, char *state) { +void init_outfixes(const struct RoseEngine *t, char *state) { /* The active leaf array has been init'ed by the scatter with outfix * bits set on */ @@ -71,12 +71,12 @@ void init_outfixes(const struct RoseEngine *t, char *state) { } } -void roseInitState(const struct RoseEngine *t, char *state) { +void roseInitState(const struct RoseEngine *t, char *state) { assert(t); assert(state); - DEBUG_PRINTF("init for Rose %p with %u state indices)\n", t, - t->rolesWithStateCount); + DEBUG_PRINTF("init for Rose %p with %u state indices)\n", t, + t->rolesWithStateCount); // Rose is guaranteed 8-aligned state assert(ISALIGNED_N(state, 8)); diff --git a/contrib/libs/hyperscan/src/rose/init.h b/contrib/libs/hyperscan/src/rose/init.h index bb622e6e0b..b37053b261 100644 --- a/contrib/libs/hyperscan/src/rose/init.h +++ b/contrib/libs/hyperscan/src/rose/init.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -39,7 +39,7 @@ */ static really_inline -void init_state(const struct RoseEngine *t, char *state) { +void init_state(const struct RoseEngine *t, char *state) { scatter(state, t, &t->state_init); } diff --git a/contrib/libs/hyperscan/src/rose/match.c b/contrib/libs/hyperscan/src/rose/match.c index 7fdacd22a3..84d3b1fdc2 100644 --- a/contrib/libs/hyperscan/src/rose/match.c +++ b/contrib/libs/hyperscan/src/rose/match.c @@ -28,7 +28,7 @@ #include "catchup.h" #include "match.h" -#include "program_runtime.h" +#include "program_runtime.h" #include "rose.h" #include "util/bitutils.h" #include "util/fatbit.h" @@ -66,32 +66,32 @@ void printMatch(const struct core_info *ci, u64a start, u64a end) { } #endif -hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id, - struct hs_scratch *scratch) { +hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id, + struct hs_scratch *scratch) { struct RoseContext *tctx = &scratch->tctxt; struct core_info *ci = &scratch->core_info; - const struct RoseEngine *t = ci->rose; + const struct RoseEngine *t = ci->rose; size_t rb_len = MIN(ci->hlen, t->delayRebuildLength); u64a real_end = ci->buf_offset - rb_len + end + 1; // index after last byte #ifdef DEBUG - DEBUG_PRINTF("REBUILD MATCH id=%u end offset@%llu]: ", id, real_end); - u64a start = real_end < 8 ? 1 : real_end - 7; - printMatch(ci, start, real_end); + DEBUG_PRINTF("REBUILD MATCH id=%u end offset@%llu]: ", id, real_end); + u64a start = real_end < 8 ? 1 : real_end - 7; + printMatch(ci, start, real_end); printf("\n"); #endif - DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups); + DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups); - assert(id && id < t->size); // id is a program offset - const u64a som = 0; - const u8 flags = 0; - UNUSED hwlmcb_rv_t rv = - roseRunProgram(t, scratch, id, som, real_end, flags); - assert(rv != HWLM_TERMINATE_MATCHING); + assert(id && id < t->size); // id is a program offset + const u64a som = 0; + const u8 flags = 0; + UNUSED hwlmcb_rv_t rv = + roseRunProgram(t, scratch, id, som, real_end, flags); + assert(rv != HWLM_TERMINATE_MATCHING); - /* we are just repopulating the delay queue, groups should be + /* we are just repopulating the delay queue, groups should be * already set from the original scan. */ return tctx->groups; @@ -100,23 +100,23 @@ hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id, static really_inline hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t, struct hs_scratch *scratch, u32 qi, s64a loc, - char in_chained) { - return ensureQueueFlushed_i(t, scratch, qi, loc, 1, in_chained); + char in_chained) { + return ensureQueueFlushed_i(t, scratch, qi, loc, 1, in_chained); } -hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 event, - u64a top_squash_distance, u64a end, - char in_catchup) { - assert(event == MQE_TOP || event >= MQE_TOP_FIRST); +hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 event, + u64a top_squash_distance, u64a end, + char in_catchup) { + assert(event == MQE_TOP || event >= MQE_TOP_FIRST); struct core_info *ci = &scratch->core_info; - u8 *aa = getActiveLeafArray(t, scratch->core_info.state); + u8 *aa = getActiveLeafArray(t, scratch->core_info.state); u32 aaCount = t->activeArrayCount; struct fatbit *activeQueues = scratch->aqa; u32 qCount = t->queueCount; - const u32 qi = 0; /* MPV is always queue 0 if it exists */ + const u32 qi = 0; /* MPV is always queue 0 if it exists */ struct mq *q = &scratch->queues[qi]; const struct NfaInfo *info = getNfaInfoByQueue(t, qi); @@ -124,7 +124,7 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen); if (!mmbit_set(aa, aaCount, qi)) { - initQueue(q, qi, t, scratch); + initQueue(q, qi, t, scratch); nfaQueueInitState(q->nfa, q); pushQueueAt(q, 0, MQE_START, loc); fatbit_set(activeQueues, qCount, qi); @@ -133,25 +133,25 @@ hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, /* nfa only needs one top; we can go home now */ return HWLM_CONTINUE_MATCHING; } else if (!fatbit_set(activeQueues, qCount, qi)) { - initQueue(q, qi, t, scratch); + initQueue(q, qi, t, scratch); loadStreamState(q->nfa, q, 0); pushQueueAt(q, 0, MQE_START, 0); } else if (isQueueFull(q)) { DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi); /* we know it is a chained nfa and the suffixes/outfixes must already * be known to be consistent */ - if (ensureMpvQueueFlushed(t, scratch, qi, loc, in_catchup) + if (ensureMpvQueueFlushed(t, scratch, qi, loc, in_catchup) == HWLM_TERMINATE_MATCHING) { - DEBUG_PRINTF("terminating...\n"); + DEBUG_PRINTF("terminating...\n"); return HWLM_TERMINATE_MATCHING; } } - if (top_squash_distance) { - assert(q->cur < q->end); + if (top_squash_distance) { + assert(q->cur < q->end); struct mq_item *last = &q->items[q->end - 1]; if (last->type == event - && last->location >= loc - (s64a)top_squash_distance) { + && last->location >= loc - (s64a)top_squash_distance) { last->location = loc; goto event_enqueued; } @@ -166,7 +166,7 @@ event_enqueued: pushQueueNoMerge(q, MQE_END, loc); char alive = nfaQueueExec(q->nfa, q, loc); if (alive) { - scratch->tctxt.mpv_inactive = 0; + scratch->tctxt.mpv_inactive = 0; q->cur = q->end = 0; pushQueueAt(q, 0, MQE_START, loc); } else { @@ -176,24 +176,24 @@ event_enqueued: } DEBUG_PRINTF("added mpv event at %lld\n", loc); - scratch->tctxt.next_mpv_offset = 0; /* the top event may result in matches - * earlier than expected */ + scratch->tctxt.next_mpv_offset = 0; /* the top event may result in matches + * earlier than expected */ return HWLM_CONTINUE_MATCHING; } -int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) { - struct hs_scratch *scratch = ctx; - assert(scratch && scratch->magic == SCRATCH_MAGIC); - struct RoseContext *tctxt = &scratch->tctxt; +int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) { + struct hs_scratch *scratch = ctx; + assert(scratch && scratch->magic == SCRATCH_MAGIC); + struct RoseContext *tctxt = &scratch->tctxt; struct core_info *ci = &scratch->core_info; - const struct RoseEngine *t = ci->rose; + const struct RoseEngine *t = ci->rose; u64a real_end = ci->buf_offset + end; // index after last byte DEBUG_PRINTF("MATCH id=%u offsets=[???,%llu]\n", id, real_end); - DEBUG_PRINTF("STATE groups=0x%016llx\n", tctxt->groups); + DEBUG_PRINTF("STATE groups=0x%016llx\n", tctxt->groups); - if (can_stop_matching(scratch)) { + if (can_stop_matching(scratch)) { DEBUG_PRINTF("received a match when we're already dead!\n"); return MO_HALT_MATCHING; } @@ -206,38 +206,38 @@ int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) { * boundary */ if (real_end <= t->floatingMinLiteralMatchOffset) { - roseFlushLastByteHistory(t, scratch, real_end); + roseFlushLastByteHistory(t, scratch, real_end); tctxt->lastEndOffset = real_end; } - // Note that the "id" we have been handed is the program offset. - const u8 flags = ROSE_PROG_FLAG_IN_ANCHORED; - if (roseRunProgram(t, scratch, id, start, real_end, flags) - == HWLM_TERMINATE_MATCHING) { - assert(can_stop_matching(scratch)); + // Note that the "id" we have been handed is the program offset. + const u8 flags = ROSE_PROG_FLAG_IN_ANCHORED; + if (roseRunProgram(t, scratch, id, start, real_end, flags) + == HWLM_TERMINATE_MATCHING) { + assert(can_stop_matching(scratch)); DEBUG_PRINTF("caller requested termination\n"); return MO_HALT_MATCHING; } - DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); + DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); return MO_CONTINUE_MATCHING; } -/** - * \brief Run the program for the given literal ID, with the interpreter - * inlined into this call. - * - * Assumes not in_anchored. - */ +/** + * \brief Run the program for the given literal ID, with the interpreter + * inlined into this call. + * + * Assumes not in_anchored. + */ static really_inline -hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a end, - u32 id) { +hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end, + u32 id) { DEBUG_PRINTF("id=%u\n", id); - assert(id && id < t->size); // id is an offset into bytecode - const u64a som = 0; - const u8 flags = 0; + assert(id && id < t->size); // id is an offset into bytecode + const u64a som = 0; + const u8 flags = 0; if (t->pureLiteral) { return roseRunProgram_l(t, scratch, id, som, end, flags); } else { @@ -246,38 +246,38 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t, } static rose_inline -hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, - struct hs_scratch *scratch, - struct fatbit **delaySlots, u32 vicIndex, - u64a offset) { +hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, + struct hs_scratch *scratch, + struct fatbit **delaySlots, u32 vicIndex, + u64a offset) { /* assert(!tctxt->in_anchored); */ assert(vicIndex < DELAY_SLOT_COUNT); - const struct fatbit *vicSlot = delaySlots[vicIndex]; - u32 delay_count = t->delay_count; + const struct fatbit *vicSlot = delaySlots[vicIndex]; + u32 delay_count = t->delay_count; - if (offset < t->floatingMinLiteralMatchOffset) { + if (offset < t->floatingMinLiteralMatchOffset) { DEBUG_PRINTF("too soon\n"); return HWLM_CONTINUE_MATCHING; } - struct RoseContext *tctxt = &scratch->tctxt; - roseFlushLastByteHistory(t, scratch, offset); + struct RoseContext *tctxt = &scratch->tctxt; + roseFlushLastByteHistory(t, scratch, offset); tctxt->lastEndOffset = offset; - const u32 *programs = getByOffset(t, t->delayProgramOffset); + const u32 *programs = getByOffset(t, t->delayProgramOffset); - for (u32 it = fatbit_iterate(vicSlot, delay_count, MMB_INVALID); - it != MMB_INVALID; it = fatbit_iterate(vicSlot, delay_count, it)) { + for (u32 it = fatbit_iterate(vicSlot, delay_count, MMB_INVALID); + it != MMB_INVALID; it = fatbit_iterate(vicSlot, delay_count, it)) { UNUSED rose_group old_groups = tctxt->groups; - DEBUG_PRINTF("DELAYED MATCH id=%u offset=%llu\n", it, offset); - const u64a som = 0; - const u8 flags = 0; - hwlmcb_rv_t rv = roseRunProgram(t, scratch, programs[it], som, offset, - flags); - DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); + DEBUG_PRINTF("DELAYED MATCH id=%u offset=%llu\n", it, offset); + const u64a som = 0; + const u8 flags = 0; + hwlmcb_rv_t rv = roseRunProgram(t, scratch, programs[it], som, offset, + flags); + DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); - /* delayed literals can't safely set groups. + /* delayed literals can't safely set groups. * However we may be setting groups that successors already have * worked out that we don't need to match the group */ DEBUG_PRINTF("groups in %016llx out %016llx\n", old_groups, @@ -292,30 +292,30 @@ hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, } static really_inline -hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, - struct hs_scratch *scratch, - u32 curr_loc) { - struct RoseContext *tctxt = &scratch->tctxt; - struct fatbit *curr_row = getAnchoredLiteralLog(scratch)[curr_loc - 1]; - u32 region_width = t->anchored_count; - - const u32 *programs = getByOffset(t, t->anchoredProgramOffset); - +hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, + struct hs_scratch *scratch, + u32 curr_loc) { + struct RoseContext *tctxt = &scratch->tctxt; + struct fatbit *curr_row = getAnchoredLiteralLog(scratch)[curr_loc - 1]; + u32 region_width = t->anchored_count; + + const u32 *programs = getByOffset(t, t->anchoredProgramOffset); + DEBUG_PRINTF("report matches at curr loc\n"); - for (u32 it = fatbit_iterate(curr_row, region_width, MMB_INVALID); - it != MMB_INVALID; it = fatbit_iterate(curr_row, region_width, it)) { + for (u32 it = fatbit_iterate(curr_row, region_width, MMB_INVALID); + it != MMB_INVALID; it = fatbit_iterate(curr_row, region_width, it)) { DEBUG_PRINTF("it = %u/%u\n", it, region_width); rose_group old_groups = tctxt->groups; - DEBUG_PRINTF("ANCH REPLAY MATCH id=%u offset=%u\n", it, curr_loc); - const u64a som = 0; - const u8 flags = 0; - hwlmcb_rv_t rv = roseRunProgram(t, scratch, programs[it], som, curr_loc, - flags); - DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); - - /* anchored literals can't safely set groups. - * However we may be setting groups that successors already + DEBUG_PRINTF("ANCH REPLAY MATCH id=%u offset=%u\n", it, curr_loc); + const u64a som = 0; + const u8 flags = 0; + hwlmcb_rv_t rv = roseRunProgram(t, scratch, programs[it], som, curr_loc, + flags); + DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups); + + /* anchored literals can't safely set groups. + * However we may be setting groups that successors already * have worked out that we don't need to match the group */ DEBUG_PRINTF("groups in %016llx out %016llx\n", old_groups, tctxt->groups); @@ -333,22 +333,22 @@ hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t, } static really_inline -u32 anchored_it_begin(struct hs_scratch *scratch) { - struct RoseContext *tctxt = &scratch->tctxt; +u32 anchored_it_begin(struct hs_scratch *scratch) { + struct RoseContext *tctxt = &scratch->tctxt; if (tctxt->lastEndOffset >= scratch->anchored_literal_region_len) { return MMB_INVALID; } u32 begin = tctxt->lastEndOffset; begin--; - return bf64_iterate(scratch->al_log_sum, begin); + return bf64_iterate(scratch->al_log_sum, begin); } static really_inline -hwlmcb_rv_t flushAnchoredLiterals(const struct RoseEngine *t, - struct hs_scratch *scratch, +hwlmcb_rv_t flushAnchoredLiterals(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 *anchored_it_param, u64a to_off) { - struct RoseContext *tctxt = &scratch->tctxt; + struct RoseContext *tctxt = &scratch->tctxt; u32 anchored_it = *anchored_it_param; /* catch up any remaining anchored matches */ for (; anchored_it != MMB_INVALID && anchored_it < to_off; @@ -356,10 +356,10 @@ hwlmcb_rv_t flushAnchoredLiterals(const struct RoseEngine *t, assert(anchored_it < scratch->anchored_literal_region_len); DEBUG_PRINTF("loc_it = %u\n", anchored_it); u32 curr_off = anchored_it + 1; - roseFlushLastByteHistory(t, scratch, curr_off); + roseFlushLastByteHistory(t, scratch, curr_off); tctxt->lastEndOffset = curr_off; - if (flushAnchoredLiteralAtLoc(t, scratch, curr_off) + if (flushAnchoredLiteralAtLoc(t, scratch, curr_off) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } @@ -370,21 +370,21 @@ hwlmcb_rv_t flushAnchoredLiterals(const struct RoseEngine *t, } static really_inline -hwlmcb_rv_t playVictims(const struct RoseEngine *t, struct hs_scratch *scratch, - u32 *anchored_it, u64a lastEnd, u64a victimDelaySlots, - struct fatbit **delaySlots) { +hwlmcb_rv_t playVictims(const struct RoseEngine *t, struct hs_scratch *scratch, + u32 *anchored_it, u64a lastEnd, u64a victimDelaySlots, + struct fatbit **delaySlots) { while (victimDelaySlots) { u32 vic = findAndClearLSB_64(&victimDelaySlots); DEBUG_PRINTF("vic = %u\n", vic); u64a vicOffset = vic + (lastEnd & ~(u64a)DELAY_MASK); - if (flushAnchoredLiterals(t, scratch, anchored_it, vicOffset) + if (flushAnchoredLiterals(t, scratch, anchored_it, vicOffset) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - if (playDelaySlot(t, scratch, delaySlots, vic % DELAY_SLOT_COUNT, - vicOffset) == HWLM_TERMINATE_MATCHING) { + if (playDelaySlot(t, scratch, delaySlots, vic % DELAY_SLOT_COUNT, + vicOffset) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } @@ -393,16 +393,16 @@ hwlmcb_rv_t playVictims(const struct RoseEngine *t, struct hs_scratch *scratch, } /* call flushQueuedLiterals instead */ -hwlmcb_rv_t flushQueuedLiterals_i(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a currEnd) { - struct RoseContext *tctxt = &scratch->tctxt; +hwlmcb_rv_t flushQueuedLiterals_i(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a currEnd) { + struct RoseContext *tctxt = &scratch->tctxt; u64a lastEnd = tctxt->delayLastEndOffset; DEBUG_PRINTF("flushing backed up matches @%llu up from %llu\n", currEnd, lastEnd); assert(currEnd != lastEnd); /* checked in main entry point */ - u32 anchored_it = anchored_it_begin(scratch); + u32 anchored_it = anchored_it_begin(scratch); if (!tctxt->filledDelayedSlots) { DEBUG_PRINTF("no delayed, no flush\n"); @@ -410,7 +410,7 @@ hwlmcb_rv_t flushQueuedLiterals_i(const struct RoseEngine *t, } { - struct fatbit **delaySlots = getDelaySlots(scratch); + struct fatbit **delaySlots = getDelaySlots(scratch); u32 lastIndex = lastEnd & DELAY_MASK; u32 currIndex = currEnd & DELAY_MASK; @@ -463,45 +463,45 @@ hwlmcb_rv_t flushQueuedLiterals_i(const struct RoseEngine *t, second_half, victimDelaySlots, lastIndex); } - if (playVictims(t, scratch, &anchored_it, lastEnd, victimDelaySlots, - delaySlots) == HWLM_TERMINATE_MATCHING) { + if (playVictims(t, scratch, &anchored_it, lastEnd, victimDelaySlots, + delaySlots) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } anchored_leftovers:; - hwlmcb_rv_t rv = flushAnchoredLiterals(t, scratch, &anchored_it, currEnd); + hwlmcb_rv_t rv = flushAnchoredLiterals(t, scratch, &anchored_it, currEnd); tctxt->delayLastEndOffset = currEnd; return rv; } -static really_inline -hwlmcb_rv_t roseCallback_i(size_t end, u32 id, struct hs_scratch *scratch) { - struct RoseContext *tctx = &scratch->tctxt; - const struct RoseEngine *t = scratch->core_info.rose; - +static really_inline +hwlmcb_rv_t roseCallback_i(size_t end, u32 id, struct hs_scratch *scratch) { + struct RoseContext *tctx = &scratch->tctxt; + const struct RoseEngine *t = scratch->core_info.rose; + u64a real_end = end + tctx->lit_offset_adjust; #if defined(DEBUG) - DEBUG_PRINTF("MATCH id=%u end offset@%llu: ", id, real_end); - u64a start = real_end < 8 ? 1 : real_end - 7; - printMatch(&scratch->core_info, start, real_end); + DEBUG_PRINTF("MATCH id=%u end offset@%llu: ", id, real_end); + u64a start = real_end < 8 ? 1 : real_end - 7; + printMatch(&scratch->core_info, start, real_end); printf("\n"); #endif DEBUG_PRINTF("last end %llu\n", tctx->lastEndOffset); - DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups); + DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups); - if (can_stop_matching(scratch)) { + if (can_stop_matching(scratch)) { DEBUG_PRINTF("received a match when we're already dead!\n"); return HWLM_TERMINATE_MATCHING; } - hwlmcb_rv_t rv = flushQueuedLiterals(t, scratch, real_end); + hwlmcb_rv_t rv = flushQueuedLiterals(t, scratch, real_end); /* flushDelayed may have advanced tctx->lastEndOffset */ - if (real_end >= t->floatingMinLiteralMatchOffset) { - roseFlushLastByteHistory(t, scratch, real_end); + if (real_end >= t->floatingMinLiteralMatchOffset) { + roseFlushLastByteHistory(t, scratch, real_end); tctx->lastEndOffset = real_end; } @@ -509,72 +509,72 @@ hwlmcb_rv_t roseCallback_i(size_t end, u32 id, struct hs_scratch *scratch) { return HWLM_TERMINATE_MATCHING; } - rv = roseProcessMatchInline(t, scratch, real_end, id); + rv = roseProcessMatchInline(t, scratch, real_end, id); - DEBUG_PRINTF("DONE groups=0x%016llx\n", tctx->groups); + DEBUG_PRINTF("DONE groups=0x%016llx\n", tctx->groups); if (rv != HWLM_TERMINATE_MATCHING) { return tctx->groups; } - assert(can_stop_matching(scratch)); + assert(can_stop_matching(scratch)); DEBUG_PRINTF("user requested halt\n"); return HWLM_TERMINATE_MATCHING; } -hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch) { - return roseCallback_i(end, id, scratch); -} - -hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id, - struct hs_scratch *scratch) { - const struct RoseEngine *t = scratch->core_info.rose; - - return roseCallback_i(end, id, scratch) & t->floating_group_mask; -} - -/** - * \brief Execute a boundary report program. - * - * Returns MO_HALT_MATCHING if the stream is exhausted or the user has - * instructed us to halt, or MO_CONTINUE_MATCHING otherwise. - */ -int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, - u64a stream_offset, struct hs_scratch *scratch) { - DEBUG_PRINTF("running boundary program at offset %u\n", program); - - if (can_stop_matching(scratch)) { - DEBUG_PRINTF("can stop matching\n"); - return MO_HALT_MATCHING; - } - - if (rose->hasSom && scratch->deduper.current_report_offset == ~0ULL) { - /* we cannot delay the initialization of the som deduper logs any longer - * as we are reporting matches. This is done explicitly as we are - * shortcutting the som handling in the vacuous repeats as we know they - * all come from non-som patterns. */ - fatbit_clear(scratch->deduper.som_log[0]); - fatbit_clear(scratch->deduper.som_log[1]); - scratch->deduper.som_log_dirty = 0; - } - - // Keep assertions in program report path happy. At offset zero, there can - // have been no earlier reports. At EOD, all earlier reports should have - // been handled and we will have been caught up to the stream offset by the - // time we are running boundary report programs. - scratch->tctxt.minMatchOffset = stream_offset; - - const u64a som = 0; - const u8 flags = 0; - hwlmcb_rv_t rv = roseRunProgram(rose, scratch, program, som, stream_offset, - flags); - if (rv == HWLM_TERMINATE_MATCHING) { - return MO_HALT_MATCHING; - } - - return MO_CONTINUE_MATCHING; +hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch) { + return roseCallback_i(end, id, scratch); +} + +hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id, + struct hs_scratch *scratch) { + const struct RoseEngine *t = scratch->core_info.rose; + + return roseCallback_i(end, id, scratch) & t->floating_group_mask; } - + +/** + * \brief Execute a boundary report program. + * + * Returns MO_HALT_MATCHING if the stream is exhausted or the user has + * instructed us to halt, or MO_CONTINUE_MATCHING otherwise. + */ +int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, + u64a stream_offset, struct hs_scratch *scratch) { + DEBUG_PRINTF("running boundary program at offset %u\n", program); + + if (can_stop_matching(scratch)) { + DEBUG_PRINTF("can stop matching\n"); + return MO_HALT_MATCHING; + } + + if (rose->hasSom && scratch->deduper.current_report_offset == ~0ULL) { + /* we cannot delay the initialization of the som deduper logs any longer + * as we are reporting matches. This is done explicitly as we are + * shortcutting the som handling in the vacuous repeats as we know they + * all come from non-som patterns. */ + fatbit_clear(scratch->deduper.som_log[0]); + fatbit_clear(scratch->deduper.som_log[1]); + scratch->deduper.som_log_dirty = 0; + } + + // Keep assertions in program report path happy. At offset zero, there can + // have been no earlier reports. At EOD, all earlier reports should have + // been handled and we will have been caught up to the stream offset by the + // time we are running boundary report programs. + scratch->tctxt.minMatchOffset = stream_offset; + + const u64a som = 0; + const u8 flags = 0; + hwlmcb_rv_t rv = roseRunProgram(rose, scratch, program, som, stream_offset, + flags); + if (rv == HWLM_TERMINATE_MATCHING) { + return MO_HALT_MATCHING; + } + + return MO_CONTINUE_MATCHING; +} + /** * \brief Execute a flush combination program. * @@ -608,26 +608,26 @@ int roseRunLastFlushCombProgram(const struct RoseEngine *rose, return MO_CONTINUE_MATCHING; } -int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) { - struct hs_scratch *scratch = context; - assert(scratch && scratch->magic == SCRATCH_MAGIC); - - DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); - - const struct RoseEngine *rose = scratch->core_info.rose; - - // Our match ID is the program offset. - const u32 program = id; - const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; +int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) { + struct hs_scratch *scratch = context; + assert(scratch && scratch->magic == SCRATCH_MAGIC); + + DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end); + + const struct RoseEngine *rose = scratch->core_info.rose; + + // Our match ID is the program offset. + const u32 program = id; + const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; hwlmcb_rv_t rv; if (rose->pureLiteral) { rv = roseRunProgram_l(rose, scratch, program, start, end, flags); } else { rv = roseRunProgram(rose, scratch, program, start, end, flags); } - if (rv == HWLM_TERMINATE_MATCHING) { - return MO_HALT_MATCHING; - } - - return can_stop_matching(scratch) ? MO_HALT_MATCHING : MO_CONTINUE_MATCHING; -} + if (rv == HWLM_TERMINATE_MATCHING) { + return MO_HALT_MATCHING; + } + + return can_stop_matching(scratch) ? MO_HALT_MATCHING : MO_CONTINUE_MATCHING; +} diff --git a/contrib/libs/hyperscan/src/rose/match.h b/contrib/libs/hyperscan/src/rose/match.h index 28d21391ea..c03b1ebbae 100644 --- a/contrib/libs/hyperscan/src/rose/match.h +++ b/contrib/libs/hyperscan/src/rose/match.h @@ -29,61 +29,61 @@ #ifndef ROSE_MATCH_H #define ROSE_MATCH_H -#include "catchup.h" +#include "catchup.h" #include "runtime.h" #include "scratch.h" -#include "report.h" +#include "report.h" #include "rose_common.h" #include "rose_internal.h" #include "ue2common.h" -#include "hwlm/hwlm.h" +#include "hwlm/hwlm.h" #include "nfa/nfa_api.h" #include "nfa/nfa_api_queue.h" #include "nfa/nfa_api_util.h" #include "som/som_runtime.h" #include "util/bitutils.h" -#include "util/exhaust.h" -#include "util/fatbit.h" +#include "util/exhaust.h" +#include "util/fatbit.h" #include "util/multibit.h" /* Callbacks, defined in catchup.c */ -int roseNfaAdaptor(u64a start, u64a end, ReportID id, void *context); +int roseNfaAdaptor(u64a start, u64a end, ReportID id, void *context); /* Callbacks, defined in match.c */ -hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch); -hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id, - struct hs_scratch *scratch); -hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id, - struct hs_scratch *scratch); -int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx); +hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch); +hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id, + struct hs_scratch *scratch); +hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id, + struct hs_scratch *scratch); +int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx); /* Common code, used all over Rose runtime */ -hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 event, - u64a top_squash_distance, u64a end, - char in_catchup); +hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 event, + u64a top_squash_distance, u64a end, + char in_catchup); /** \brief Initialize the queue for a suffix/outfix engine. */ static really_inline void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, - struct hs_scratch *scratch) { + struct hs_scratch *scratch) { const struct NfaInfo *info = getNfaInfoByQueue(t, qi); assert(scratch->fullState); q->nfa = getNfaByInfo(t, info); q->end = 0; q->cur = 0; q->state = scratch->fullState + info->fullStateOffset; - q->streamState = scratch->core_info.state + info->stateOffset; + q->streamState = scratch->core_info.state + info->stateOffset; q->offset = scratch->core_info.buf_offset; q->buffer = scratch->core_info.buf; q->length = scratch->core_info.len; q->history = scratch->core_info.hbuf; q->hlength = scratch->core_info.hlen; - q->cb = roseNfaAdaptor; - q->context = scratch; + q->cb = roseNfaAdaptor; + q->context = scratch; q->report_current = 0; DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, " @@ -95,7 +95,7 @@ void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t, static really_inline void initRoseQueue(const struct RoseEngine *t, u32 qi, const struct LeftNfaInfo *left, - struct hs_scratch *scratch) { + struct hs_scratch *scratch) { struct mq *q = scratch->queues + qi; const struct NfaInfo *info = getNfaInfoByQueue(t, qi); q->nfa = getNfaByInfo(t, info); @@ -110,7 +110,7 @@ void initRoseQueue(const struct RoseEngine *t, u32 qi, if (left->transient) { q->streamState = (char *)scratch->tstate + info->stateOffset; } else { - q->streamState = scratch->core_info.state + info->stateOffset; + q->streamState = scratch->core_info.state + info->stateOffset; } q->offset = scratch->core_info.buf_offset; @@ -142,7 +142,7 @@ void loadStreamState(const struct NFA *nfa, struct mq *q, s64a loc) { } static really_inline -void storeRoseDelay(const struct RoseEngine *t, char *state, +void storeRoseDelay(const struct RoseEngine *t, char *state, const struct LeftNfaInfo *left, u32 loc) { u32 di = left->lagIndex; if (di == ROSE_OFFSET_INVALID) { @@ -157,7 +157,7 @@ void storeRoseDelay(const struct RoseEngine *t, char *state, } static really_inline -void setAsZombie(const struct RoseEngine *t, char *state, +void setAsZombie(const struct RoseEngine *t, char *state, const struct LeftNfaInfo *left) { u32 di = left->lagIndex; assert(di != ROSE_OFFSET_INVALID); @@ -172,7 +172,7 @@ void setAsZombie(const struct RoseEngine *t, char *state, /* loadRoseDelay MUST NOT be called on the first stream write as it is only * initialized for running nfas on stream boundaries */ static really_inline -u32 loadRoseDelay(const struct RoseEngine *t, const char *state, +u32 loadRoseDelay(const struct RoseEngine *t, const char *state, const struct LeftNfaInfo *left) { u32 di = left->lagIndex; if (di == ROSE_OFFSET_INVALID) { @@ -186,7 +186,7 @@ u32 loadRoseDelay(const struct RoseEngine *t, const char *state, } static really_inline -char isZombie(const struct RoseEngine *t, const char *state, +char isZombie(const struct RoseEngine *t, const char *state, const struct LeftNfaInfo *left) { u32 di = left->lagIndex; assert(di != ROSE_OFFSET_INVALID); @@ -199,46 +199,46 @@ char isZombie(const struct RoseEngine *t, const char *state, return leftfixDelay[di] == OWB_ZOMBIE_ALWAYS_YES; } -hwlmcb_rv_t flushQueuedLiterals_i(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a end); +hwlmcb_rv_t flushQueuedLiterals_i(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end); static really_inline -hwlmcb_rv_t flushQueuedLiterals(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a end) { - struct RoseContext *tctxt = &scratch->tctxt; - +hwlmcb_rv_t flushQueuedLiterals(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end) { + struct RoseContext *tctxt = &scratch->tctxt; + if (tctxt->delayLastEndOffset == end) { DEBUG_PRINTF("no progress, no flush\n"); return HWLM_CONTINUE_MATCHING; } - if (!tctxt->filledDelayedSlots && !scratch->al_log_sum) { + if (!tctxt->filledDelayedSlots && !scratch->al_log_sum) { tctxt->delayLastEndOffset = end; return HWLM_CONTINUE_MATCHING; } - return flushQueuedLiterals_i(t, scratch, end); + return flushQueuedLiterals_i(t, scratch, end); } static really_inline -hwlmcb_rv_t cleanUpDelayed(const struct RoseEngine *t, - struct hs_scratch *scratch, size_t length, - u64a offset) { - if (can_stop_matching(scratch)) { +hwlmcb_rv_t cleanUpDelayed(const struct RoseEngine *t, + struct hs_scratch *scratch, size_t length, + u64a offset) { + if (can_stop_matching(scratch)) { return HWLM_TERMINATE_MATCHING; } - if (flushQueuedLiterals(t, scratch, length + offset) + if (flushQueuedLiterals(t, scratch, length + offset) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } - struct RoseContext *tctxt = &scratch->tctxt; + struct RoseContext *tctxt = &scratch->tctxt; if (tctxt->filledDelayedSlots) { DEBUG_PRINTF("dirty\n"); - scratch->core_info.status |= STATUS_DELAY_DIRTY; + scratch->core_info.status |= STATUS_DELAY_DIRTY; } else { - scratch->core_info.status &= ~STATUS_DELAY_DIRTY; + scratch->core_info.status &= ~STATUS_DELAY_DIRTY; } tctxt->filledDelayedSlots = 0; @@ -248,13 +248,13 @@ hwlmcb_rv_t cleanUpDelayed(const struct RoseEngine *t, } static rose_inline -void roseFlushLastByteHistory(const struct RoseEngine *t, - struct hs_scratch *scratch, u64a currEnd) { +void roseFlushLastByteHistory(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a currEnd) { if (!t->lastByteHistoryIterOffset) { return; } - struct RoseContext *tctxt = &scratch->tctxt; + struct RoseContext *tctxt = &scratch->tctxt; struct core_info *ci = &scratch->core_info; /* currEnd is last byte of string + 1 */ @@ -266,118 +266,118 @@ void roseFlushLastByteHistory(const struct RoseEngine *t, DEBUG_PRINTF("flushing\n"); - const struct mmbit_sparse_iter *it = - getByOffset(t, t->lastByteHistoryIterOffset); - assert(ISALIGNED(it)); - + const struct mmbit_sparse_iter *it = + getByOffset(t, t->lastByteHistoryIterOffset); + assert(ISALIGNED(it)); + const u32 numStates = t->rolesWithStateCount; - void *role_state = getRoleState(scratch->core_info.state); + void *role_state = getRoleState(scratch->core_info.state); + + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + mmbit_sparse_iter_unset(role_state, numStates, it, si_state); +} + +static rose_inline +int roseHasInFlightMatches(const struct RoseEngine *t, char *state, + const struct hs_scratch *scratch) { + if (scratch->al_log_sum) { + DEBUG_PRINTF("anchored literals in log\n"); + return 1; + } - struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; - - mmbit_sparse_iter_unset(role_state, numStates, it, si_state); + if (scratch->tctxt.filledDelayedSlots) { + DEBUG_PRINTF("delayed literal\n"); + return 1; + } + + if (mmbit_any(getRoleState(state), t->rolesWithStateCount)) { + DEBUG_PRINTF("role state is set\n"); + return 1; + } + + return 0; +} + +static rose_inline +hwlmcb_rv_t roseHaltIfExhausted(const struct RoseEngine *t, + struct hs_scratch *scratch) { + struct core_info *ci = &scratch->core_info; + if (isAllExhausted(t, ci->exhaustionVector)) { + ci->status |= STATUS_EXHAUSTED; + scratch->tctxt.groups = 0; + DEBUG_PRINTF("all exhausted, termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + return HWLM_CONTINUE_MATCHING; +} + +static really_inline +hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 qi, s64a loc, + char is_mpv, char in_catchup) { + struct RoseContext *tctxt = &scratch->tctxt; + u8 *aa = getActiveLeafArray(t, scratch->core_info.state); + struct fatbit *activeQueues = scratch->aqa; + u32 aaCount = t->activeArrayCount; + u32 qCount = t->queueCount; + + struct mq *q = &scratch->queues[qi]; + DEBUG_PRINTF("qcl %lld, loc: %lld, min (non mpv) match offset: %llu\n", + q_cur_loc(q), loc, tctxt->minNonMpvMatchOffset); + if (q_cur_loc(q) == loc) { + /* too many tops enqueued at the one spot; need to flatten this queue. + * We can use the full catchups as it will short circuit as we are + * already at this location. It also saves waking everybody up */ + pushQueueNoMerge(q, MQE_END, loc); + nfaQueueExec(q->nfa, q, loc); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } else if (!in_catchup) { + if (is_mpv) { + tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ + if (loc + scratch->core_info.buf_offset + <= tctxt->minNonMpvMatchOffset) { + DEBUG_PRINTF("flushing chained\n"); + if (roseCatchUpMPV(t, loc, scratch) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + goto done_queue_empty; + } + } + + if (roseCatchUpTo(t, scratch, loc + scratch->core_info.buf_offset) == + HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } else { + /* we must be a chained nfa */ + assert(is_mpv); + DEBUG_PRINTF("flushing chained\n"); + tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ + if (roseCatchUpMPV(t, loc, scratch) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } +done_queue_empty: + if (!mmbit_set(aa, aaCount, qi)) { + initQueue(q, qi, t, scratch); + nfaQueueInitState(q->nfa, q); + pushQueueAt(q, 0, MQE_START, loc); + fatbit_set(activeQueues, qCount, qi); + } + + assert(!isQueueFull(q)); + + return roseHaltIfExhausted(t, scratch); +} + +static rose_inline +hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 qi, s64a loc) { + return ensureQueueFlushed_i(t, scratch, qi, loc, 0, 0); } -static rose_inline -int roseHasInFlightMatches(const struct RoseEngine *t, char *state, - const struct hs_scratch *scratch) { - if (scratch->al_log_sum) { - DEBUG_PRINTF("anchored literals in log\n"); - return 1; - } - - if (scratch->tctxt.filledDelayedSlots) { - DEBUG_PRINTF("delayed literal\n"); - return 1; - } - - if (mmbit_any(getRoleState(state), t->rolesWithStateCount)) { - DEBUG_PRINTF("role state is set\n"); - return 1; - } - - return 0; -} - -static rose_inline -hwlmcb_rv_t roseHaltIfExhausted(const struct RoseEngine *t, - struct hs_scratch *scratch) { - struct core_info *ci = &scratch->core_info; - if (isAllExhausted(t, ci->exhaustionVector)) { - ci->status |= STATUS_EXHAUSTED; - scratch->tctxt.groups = 0; - DEBUG_PRINTF("all exhausted, termination requested\n"); - return HWLM_TERMINATE_MATCHING; - } - - return HWLM_CONTINUE_MATCHING; -} - -static really_inline -hwlmcb_rv_t ensureQueueFlushed_i(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 qi, s64a loc, - char is_mpv, char in_catchup) { - struct RoseContext *tctxt = &scratch->tctxt; - u8 *aa = getActiveLeafArray(t, scratch->core_info.state); - struct fatbit *activeQueues = scratch->aqa; - u32 aaCount = t->activeArrayCount; - u32 qCount = t->queueCount; - - struct mq *q = &scratch->queues[qi]; - DEBUG_PRINTF("qcl %lld, loc: %lld, min (non mpv) match offset: %llu\n", - q_cur_loc(q), loc, tctxt->minNonMpvMatchOffset); - if (q_cur_loc(q) == loc) { - /* too many tops enqueued at the one spot; need to flatten this queue. - * We can use the full catchups as it will short circuit as we are - * already at this location. It also saves waking everybody up */ - pushQueueNoMerge(q, MQE_END, loc); - nfaQueueExec(q->nfa, q, loc); - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } else if (!in_catchup) { - if (is_mpv) { - tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ - if (loc + scratch->core_info.buf_offset - <= tctxt->minNonMpvMatchOffset) { - DEBUG_PRINTF("flushing chained\n"); - if (roseCatchUpMPV(t, loc, scratch) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - goto done_queue_empty; - } - } - - if (roseCatchUpTo(t, scratch, loc + scratch->core_info.buf_offset) == - HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } else { - /* we must be a chained nfa */ - assert(is_mpv); - DEBUG_PRINTF("flushing chained\n"); - tctxt->next_mpv_offset = 0; /* force us to catch the mpv */ - if (roseCatchUpMPV(t, loc, scratch) == HWLM_TERMINATE_MATCHING) { - return HWLM_TERMINATE_MATCHING; - } - } -done_queue_empty: - if (!mmbit_set(aa, aaCount, qi)) { - initQueue(q, qi, t, scratch); - nfaQueueInitState(q->nfa, q); - pushQueueAt(q, 0, MQE_START, loc); - fatbit_set(activeQueues, qCount, qi); - } - - assert(!isQueueFull(q)); - - return roseHaltIfExhausted(t, scratch); -} - -static rose_inline -hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 qi, s64a loc) { - return ensureQueueFlushed_i(t, scratch, qi, loc, 0, 0); -} - #endif diff --git a/contrib/libs/hyperscan/src/rose/program_runtime.c b/contrib/libs/hyperscan/src/rose/program_runtime.c index d8aa8b2f8c..ff5a5099c9 100644 --- a/contrib/libs/hyperscan/src/rose/program_runtime.c +++ b/contrib/libs/hyperscan/src/rose/program_runtime.c @@ -1,38 +1,38 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * \file - * \brief Rose runtime: program interpreter. - */ - -#include "program_runtime.h" - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Rose runtime: program interpreter. + */ + +#include "program_runtime.h" + #include "catchup.h" #include "counting_miracle.h" #include "infix.h" @@ -1689,14 +1689,14 @@ int roseCheckMultipathShufti64(const struct hs_scratch *scratch, } static rose_inline -int roseNfaEarliestSom(u64a start, UNUSED u64a end, UNUSED ReportID id, - void *context) { - assert(context); - u64a *som = context; - *som = MIN(*som, start); - return MO_CONTINUE_MATCHING; -} - +int roseNfaEarliestSom(u64a start, UNUSED u64a end, UNUSED ReportID id, + void *context) { + assert(context); + u64a *som = context; + *som = MIN(*som, start); + return MO_CONTINUE_MATCHING; +} + static rose_inline u64a roseGetHaigSom(const struct RoseEngine *t, struct hs_scratch *scratch, const u32 qi, UNUSED const u32 leftfixLag) { @@ -2148,9 +2148,9 @@ hwlmcb_rv_t checkPurelyNegatives(const struct RoseEngine *t, #define PROGRAM_NEXT_INSTRUCTION_JUMP continue; #endif -hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 programOffset, - u64a som, u64a end, u8 prog_flags) { +hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 programOffset, + u64a som, u64a end, u8 prog_flags) { DEBUG_PRINTF("program=%u, offsets [%llu,%llu], flags=%u\n", programOffset, som, end, prog_flags); @@ -3082,7 +3082,7 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, assert(0); // unreachable return HWLM_CONTINUE_MATCHING; -} +} #define L_PROGRAM_CASE(name) \ case ROSE_INSTR_##name: { \ diff --git a/contrib/libs/hyperscan/src/rose/program_runtime.h b/contrib/libs/hyperscan/src/rose/program_runtime.h index aad2bb3666..50bf202c6f 100644 --- a/contrib/libs/hyperscan/src/rose/program_runtime.h +++ b/contrib/libs/hyperscan/src/rose/program_runtime.h @@ -1,61 +1,61 @@ -/* +/* * Copyright (c) 2015-2019, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * \file - * \brief Rose runtime: program interpreter. - */ - -#ifndef PROGRAM_RUNTIME_H -#define PROGRAM_RUNTIME_H - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Rose runtime: program interpreter. + */ + +#ifndef PROGRAM_RUNTIME_H +#define PROGRAM_RUNTIME_H + #include "hwlm/hwlm.h" // for hwlmcb_rv_t -#include "rose.h" -#include "scratch.h" -#include "ue2common.h" - -/* - * Program context flags, which control the behaviour of some instructions at - * based on runtime contexts (whether the program is triggered by the anchored - * matcher, engine catchup, etc). - */ - -#define ROSE_PROG_FLAG_IN_ANCHORED 1 -#define ROSE_PROG_FLAG_IN_CATCHUP 2 -#define ROSE_PROG_FLAG_FROM_MPV 4 -#define ROSE_PROG_FLAG_SKIP_MPV_CATCHUP 8 - -hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 programOffset, - u64a som, u64a end, u8 prog_flags); - +#include "rose.h" +#include "scratch.h" +#include "ue2common.h" + +/* + * Program context flags, which control the behaviour of some instructions at + * based on runtime contexts (whether the program is triggered by the anchored + * matcher, engine catchup, etc). + */ + +#define ROSE_PROG_FLAG_IN_ANCHORED 1 +#define ROSE_PROG_FLAG_IN_CATCHUP 2 +#define ROSE_PROG_FLAG_FROM_MPV 4 +#define ROSE_PROG_FLAG_SKIP_MPV_CATCHUP 8 + +hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, + struct hs_scratch *scratch, u32 programOffset, + u64a som, u64a end, u8 prog_flags); + hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, - struct hs_scratch *scratch, u32 programOffset, + struct hs_scratch *scratch, u32 programOffset, u64a som, u64a end, u8 prog_flags); - -#endif // PROGRAM_RUNTIME_H + +#endif // PROGRAM_RUNTIME_H diff --git a/contrib/libs/hyperscan/src/rose/rose.h b/contrib/libs/hyperscan/src/rose/rose.h index 4519b09de2..409b70028f 100644 --- a/contrib/libs/hyperscan/src/rose/rose.h +++ b/contrib/libs/hyperscan/src/rose/rose.h @@ -31,27 +31,27 @@ #include "ue2common.h" -struct RoseEngine; -struct hs_scratch; - +struct RoseEngine; +struct hs_scratch; + // Initialise state space for engine use. -void roseInitState(const struct RoseEngine *t, char *state); +void roseInitState(const struct RoseEngine *t, char *state); -/* assumes core_info in scratch has been init to point to data */ -void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch); +/* assumes core_info in scratch has been init to point to data */ +void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch); /* assumes core_info in scratch has been init to point to data */ -void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch); +void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch); -void roseStreamEodExec(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch); +void roseStreamEodExec(const struct RoseEngine *t, u64a offset, + struct hs_scratch *scratch); -hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch); +hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch); -int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context); +int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context); -int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, - u64a stream_offset, struct hs_scratch *scratch); +int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, + u64a stream_offset, struct hs_scratch *scratch); int roseRunFlushCombProgram(const struct RoseEngine *rose, struct hs_scratch *scratch, u64a end); diff --git a/contrib/libs/hyperscan/src/rose/rose_build.h b/contrib/libs/hyperscan/src/rose/rose_build.h index 1640a660bd..ca3ba3696e 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build.h +++ b/contrib/libs/hyperscan/src/rose/rose_build.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,10 +40,10 @@ #include "ue2common.h" #include "rose_common.h" #include "rose_in_graph.h" -#include "util/bytecode_ptr.h" +#include "util/bytecode_ptr.h" #include "util/charreach.h" -#include "util/flat_containers.h" -#include "util/noncopyable.h" +#include "util/flat_containers.h" +#include "util/noncopyable.h" #include "util/ue2string.h" #include <memory> @@ -64,7 +64,7 @@ struct raw_som_dfa; class CharReach; class NGHolder; class ReportManager; -class SmallWriteBuild; +class SmallWriteBuild; class SomSlotManager; class RoseDedupeAux { @@ -73,21 +73,21 @@ public: /** \brief True if we can not establish that at most a single callback will * be generated at a given offset from this set of reports. */ - virtual bool requiresDedupeSupport(const flat_set<ReportID> &reports) + virtual bool requiresDedupeSupport(const flat_set<ReportID> &reports) const = 0; }; /** \brief Abstract interface intended for callers from elsewhere in the tree, * real underlying implementation is RoseBuildImpl in rose_build_impl.h. */ -class RoseBuild : noncopyable { +class RoseBuild : noncopyable { public: virtual ~RoseBuild(); /** \brief Adds a single literal. */ virtual void add(bool anchored, bool eod, const ue2_literal &lit, - const flat_set<ReportID> &ids) = 0; + const flat_set<ReportID> &ids) = 0; - virtual bool addRose(const RoseInGraph &ig, bool prefilter) = 0; + virtual bool addRose(const RoseInGraph &ig, bool prefilter) = 0; virtual bool addSombeRose(const RoseInGraph &ig) = 0; virtual bool addOutfix(const NGHolder &h) = 0; @@ -99,21 +99,21 @@ public: /** \brief Returns true if we were able to add it as a mask. */ virtual bool add(bool anchored, const std::vector<CharReach> &mask, - const flat_set<ReportID> &reports) = 0; + const flat_set<ReportID> &reports) = 0; /** \brief Attempts to add the graph to the anchored acyclic table. Returns * true on success. */ virtual bool addAnchoredAcyclic(const NGHolder &graph) = 0; virtual bool validateMask(const std::vector<CharReach> &mask, - const flat_set<ReportID> &reports, + const flat_set<ReportID> &reports, bool anchored, bool eod) const = 0; virtual void addMask(const std::vector<CharReach> &mask, - const flat_set<ReportID> &reports, bool anchored, + const flat_set<ReportID> &reports, bool anchored, bool eod) = 0; /** \brief Construct a runtime implementation. */ - virtual bytecode_ptr<RoseEngine> buildRose(u32 minWidth) = 0; + virtual bytecode_ptr<RoseEngine> buildRose(u32 minWidth) = 0; virtual std::unique_ptr<RoseDedupeAux> generateDedupeAux() const = 0; @@ -127,7 +127,7 @@ public: // Construct a usable Rose builder. std::unique_ptr<RoseBuild> makeRoseBuilder(ReportManager &rm, SomSlotManager &ssm, - SmallWriteBuild &smwr, + SmallWriteBuild &smwr, const CompileContext &cc, const BoundaryReports &boundary); diff --git a/contrib/libs/hyperscan/src/rose/rose_build_add.cpp b/contrib/libs/hyperscan/src/rose/rose_build_add.cpp index b3fe94b597..4929c95fce 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_add.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_add.cpp @@ -55,9 +55,9 @@ #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" -#include "util/insertion_ordered.h" +#include "util/insertion_ordered.h" #include "util/make_unique.h" -#include "util/noncopyable.h" +#include "util/noncopyable.h" #include "util/order_check.h" #include "util/report_manager.h" #include "util/ue2string.h" @@ -77,7 +77,7 @@ namespace ue2 { /** * \brief Data used by most of the construction code in this file. */ -struct RoseBuildData : noncopyable { +struct RoseBuildData : noncopyable { RoseBuildData(const RoseInGraph &ig_in, bool som_in) : ig(ig_in), som(som_in) {} @@ -86,7 +86,7 @@ struct RoseBuildData : noncopyable { /** Edges we've transformed (in \ref transformAnchoredLiteralOverlap) which * require ANCH history to prevent overlap. */ - unordered_set<RoseInEdge> anch_history_edges; + unordered_set<RoseInEdge> anch_history_edges; /** True if we're tracking Start of Match. */ bool som; @@ -111,7 +111,7 @@ RoseVertex createVertex(RoseBuildImpl *build, u32 literalId, u32 min_offset, g[v].min_offset = min_offset; g[v].max_offset = max_offset; - DEBUG_PRINTF("insert vertex %zu into literal %u's vertex set\n", g[v].index, + DEBUG_PRINTF("insert vertex %zu into literal %u's vertex set\n", g[v].index, literalId); g[v].literals.insert(literalId); build->literal_info[literalId].vertices.insert(v); @@ -122,7 +122,7 @@ RoseVertex createVertex(RoseBuildImpl *build, u32 literalId, u32 min_offset, RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent, u32 minBound, u32 maxBound, u32 literalId, size_t literalLength, - const flat_set<ReportID> &reports) { + const flat_set<ReportID> &reports) { assert(parent != RoseGraph::null_vertex()); RoseGraph &g = build->g; @@ -132,7 +132,7 @@ RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent, /* fill in report information */ g[v].reports.insert(reports.begin(), reports.end()); - RoseEdge e = add_edge(parent, v, g); + RoseEdge e = add_edge(parent, v, g); DEBUG_PRINTF("adding edge (%u, %u) to parent\n", minBound, maxBound); g[e].minBound = minBound; @@ -159,10 +159,10 @@ RoseVertex createAnchoredVertex(RoseBuildImpl *build, u32 literalId, RoseGraph &g = build->g; RoseVertex v = createVertex(build, literalId, min_offset, max_offset); - DEBUG_PRINTF("created anchored vertex %zu with lit id %u\n", g[v].index, + DEBUG_PRINTF("created anchored vertex %zu with lit id %u\n", g[v].index, literalId); - RoseEdge e = add_edge(build->anchored_root, v, g); + RoseEdge e = add_edge(build->anchored_root, v, g); g[e].minBound = min_offset; g[e].maxBound = max_offset; @@ -173,7 +173,7 @@ static RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) { RoseGraph &g = build->g; RoseVertex w = add_vertex(g[v], g); - DEBUG_PRINTF("added vertex %zu\n", g[w].index); + DEBUG_PRINTF("added vertex %zu\n", g[w].index); for (auto lit_id : g[w].literals) { build->literal_info[lit_id].vertices.insert(w); @@ -182,7 +182,7 @@ RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) { for (const auto &e : in_edges_range(v, g)) { RoseVertex s = source(e, g); add_edge(s, w, g[e], g); - DEBUG_PRINTF("added edge (%zu,%zu)\n", g[s].index, g[w].index); + DEBUG_PRINTF("added edge (%zu,%zu)\n", g[s].index, g[w].index); } return w; @@ -191,7 +191,7 @@ RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) { namespace { struct created_key { explicit created_key(const RoseInEdgeProps &trep) - : prefix(trep.graph.get()), lag(trep.graph_lag) { + : prefix(trep.graph.get()), lag(trep.graph_lag) { } bool operator<(const created_key &b) const { const created_key &a = *this; @@ -218,7 +218,7 @@ RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd, const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF); DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n", - g[u].index, g[v].index, g[e].minBound, g[e].maxBound, + g[u].index, g[v].index, g[e].minBound, g[e].maxBound, (int)g[u].fixedOffset(), (int)g[v].left); if (g[v].left) { @@ -277,8 +277,8 @@ void createVertices(RoseBuildImpl *tbi, if (prefix_graph) { g[w].left.graph = prefix_graph; - if (edge_props.dfa) { - g[w].left.dfa = edge_props.dfa; + if (edge_props.dfa) { + g[w].left.dfa = edge_props.dfa; } g[w].left.haig = edge_props.haig; g[w].left.lag = prefix_lag; @@ -296,19 +296,19 @@ void createVertices(RoseBuildImpl *tbi, if (bd.som && !g[w].left.haig) { /* no prefix - som based on literal start */ assert(!prefix_graph); - g[w].som_adjust = tbi->literals.at(literalId).elength(); + g[w].som_adjust = tbi->literals.at(literalId).elength(); DEBUG_PRINTF("set som_adjust to %u\n", g[w].som_adjust); } - DEBUG_PRINTF(" adding new vertex index=%zu\n", tbi->g[w].index); + DEBUG_PRINTF(" adding new vertex index=%zu\n", tbi->g[w].index); vertex_map[iv].push_back(w); } else { w = created[key]; } - RoseVertex p = pv.first; + RoseVertex p = pv.first; - RoseEdge e = add_edge(p, w, g); + RoseEdge e = add_edge(p, w, g); DEBUG_PRINTF("adding edge (%u,%u) to parent\n", edge_props.minBound, edge_props.maxBound); g[e].minBound = edge_props.minBound; @@ -334,7 +334,7 @@ void createVertices(RoseBuildImpl *tbi, u32 ghostId = tbi->literal_info[literalId].undelayed_id; DEBUG_PRINTF("creating delay ghost vertex, id=%u\n", ghostId); assert(ghostId != literalId); - assert(tbi->literals.at(ghostId).delay == 0); + assert(tbi->literals.at(ghostId).delay == 0); // Adjust offsets, removing delay. u32 ghost_min = min_offset, ghost_max = max_offset; @@ -346,7 +346,7 @@ void createVertices(RoseBuildImpl *tbi, for (const auto &pv : parents) { const RoseInEdgeProps &edge_props = bd.ig[pv.second]; - RoseEdge e = add_edge(pv.first, g_v, tbi->g); + RoseEdge e = add_edge(pv.first, g_v, tbi->g); g[e].minBound = edge_props.minBound; g[e].maxBound = edge_props.maxBound; g[e].history = selectHistory(*tbi, bd, pv.second, e); @@ -363,7 +363,7 @@ void createVertices(RoseBuildImpl *tbi, /* ensure the holder does not accept any paths which do not end with lit */ static void removeFalsePaths(NGHolder &g, const ue2_literal &lit) { - DEBUG_PRINTF("strip '%s'\n", dumpString(lit).c_str()); + DEBUG_PRINTF("strip '%s'\n", dumpString(lit).c_str()); set<NFAVertex> curr, next; curr.insert(g.accept); curr.insert(g.acceptEod); @@ -371,7 +371,7 @@ void removeFalsePaths(NGHolder &g, const ue2_literal &lit) { for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { next.clear(); for (auto curr_v : curr) { - DEBUG_PRINTF("handling %zu\n", g[curr_v].index); + DEBUG_PRINTF("handling %zu\n", g[curr_v].index); vector<NFAVertex> next_cand; insert(&next_cand, next_cand.end(), inv_adjacent_vertices(curr_v, g)); @@ -389,7 +389,7 @@ void removeFalsePaths(NGHolder &g, const ue2_literal &lit) { const CharReach &cr = g[v].char_reach; if (!overlaps(*it, cr)) { - DEBUG_PRINTF("false edge %zu\n", g[v].index); + DEBUG_PRINTF("false edge %zu\n", g[v].index); continue; } @@ -397,7 +397,7 @@ void removeFalsePaths(NGHolder &g, const ue2_literal &lit) { clone_in_edges(g, v, v2); add_edge(v2, curr_v, g); g[v2].char_reach &= *it; - DEBUG_PRINTF("next <- %zu\n", g[v2].index); + DEBUG_PRINTF("next <- %zu\n", g[v2].index); next.insert(v2); } } @@ -406,7 +406,7 @@ void removeFalsePaths(NGHolder &g, const ue2_literal &lit) { } pruneUseless(g); - clearReports(g); + clearReports(g); assert(in_degree(g.accept, g) || in_degree(g.acceptEod, g) > 1); assert(allMatchStatesHaveReports(g)); @@ -545,7 +545,7 @@ void findRoseLiteralMask(const NGHolder &h, const u32 lag, vector<u8> &msk, next.clear(); CharReach cr; for (auto v : curr) { - DEBUG_PRINTF("vertex %zu, reach %s\n", h[v].index, + DEBUG_PRINTF("vertex %zu, reach %s\n", h[v].index, describeClass(h[v].char_reach).c_str()); cr |= h[v].char_reach; insert(&next, inv_adjacent_vertices(v, h)); @@ -640,96 +640,96 @@ floating: } static -unique_ptr<NGHolder> makeRoseEodPrefix(const NGHolder &h, RoseBuildImpl &build, - map<flat_set<ReportID>, ReportID> &remap) { +unique_ptr<NGHolder> makeRoseEodPrefix(const NGHolder &h, RoseBuildImpl &build, + map<flat_set<ReportID>, ReportID> &remap) { assert(generates_callbacks(h)); - assert(!in_degree(h.accept, h)); - auto gg = cloneHolder(h); - NGHolder &g = *gg; - g.kind = is_triggered(h) ? NFA_INFIX : NFA_PREFIX; + assert(!in_degree(h.accept, h)); + auto gg = cloneHolder(h); + NGHolder &g = *gg; + g.kind = is_triggered(h) ? NFA_INFIX : NFA_PREFIX; // Move acceptEod edges over to accept. vector<NFAEdge> dead; - for (const auto &e : in_edges_range(g.acceptEod, g)) { - NFAVertex u = source(e, g); - if (u == g.accept) { + for (const auto &e : in_edges_range(g.acceptEod, g)) { + NFAVertex u = source(e, g); + if (u == g.accept) { continue; } - add_edge_if_not_present(u, g.accept, g); + add_edge_if_not_present(u, g.accept, g); dead.push_back(e); - - if (!contains(remap, g[u].reports)) { - remap[g[u].reports] = build.getNewNfaReport(); - } - - g[u].reports = { remap[g[u].reports] }; + + if (!contains(remap, g[u].reports)) { + remap[g[u].reports] = build.getNewNfaReport(); + } + + g[u].reports = { remap[g[u].reports] }; } - remove_edges(dead, g); - return gg; + remove_edges(dead, g); + return gg; +} + +static +u32 getEodEventID(RoseBuildImpl &build) { + // Allocate the EOD event if it hasn't been already. + if (build.eod_event_literal_id == MO_INVALID_IDX) { + build.eod_event_literal_id = build.getLiteralId({}, 0, ROSE_EVENT); + } + + return build.eod_event_literal_id; +} + +static +void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, + const NGHolder &h) { + assert(!build.isInETable(u)); + + RoseGraph &g = build.g; + map<flat_set<ReportID>, ReportID> report_remap; + shared_ptr<NGHolder> eod_leftfix + = makeRoseEodPrefix(h, build, report_remap); + + u32 eod_event = getEodEventID(build); + + for (const auto &report_mapping : report_remap) { + RoseVertex v = add_vertex(g); + g[v].literals.insert(eod_event); + build.literal_info[eod_event].vertices.insert(v); + + g[v].left.graph = eod_leftfix; + g[v].left.leftfix_report = report_mapping.second; + g[v].left.lag = 0; + RoseEdge e1 = add_edge(u, v, g); + g[e1].minBound = 0; + g[e1].maxBound = ROSE_BOUND_INF; + g[v].min_offset = add_rose_depth(g[u].min_offset, + findMinWidth(*g[v].left.graph)); + g[v].max_offset = ROSE_BOUND_INF; + + depth max_width = findMaxWidth(*g[v].left.graph); + if (u != build.root && max_width.is_finite() + && (!build.isAnyStart(u) || isPureAnchored(*g[v].left.graph))) { + g[e1].maxBound = max_width; + g[v].max_offset = add_rose_depth(g[u].max_offset, max_width); + } + + g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix + RoseVertex w = add_vertex(g); + g[w].eod_accept = true; + g[w].reports = report_mapping.first; + g[w].min_offset = g[v].min_offset; + g[w].max_offset = g[v].max_offset; + RoseEdge e = add_edge(v, w, g); + g[e].minBound = 0; + g[e].maxBound = 0; + /* No need to set history as the event is only delivered at the last + * byte anyway - no need to invalidate stale entries. */ + g[e].history = ROSE_ROLE_HISTORY_NONE; + DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index); + } } static -u32 getEodEventID(RoseBuildImpl &build) { - // Allocate the EOD event if it hasn't been already. - if (build.eod_event_literal_id == MO_INVALID_IDX) { - build.eod_event_literal_id = build.getLiteralId({}, 0, ROSE_EVENT); - } - - return build.eod_event_literal_id; -} - -static -void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u, - const NGHolder &h) { - assert(!build.isInETable(u)); - - RoseGraph &g = build.g; - map<flat_set<ReportID>, ReportID> report_remap; - shared_ptr<NGHolder> eod_leftfix - = makeRoseEodPrefix(h, build, report_remap); - - u32 eod_event = getEodEventID(build); - - for (const auto &report_mapping : report_remap) { - RoseVertex v = add_vertex(g); - g[v].literals.insert(eod_event); - build.literal_info[eod_event].vertices.insert(v); - - g[v].left.graph = eod_leftfix; - g[v].left.leftfix_report = report_mapping.second; - g[v].left.lag = 0; - RoseEdge e1 = add_edge(u, v, g); - g[e1].minBound = 0; - g[e1].maxBound = ROSE_BOUND_INF; - g[v].min_offset = add_rose_depth(g[u].min_offset, - findMinWidth(*g[v].left.graph)); - g[v].max_offset = ROSE_BOUND_INF; - - depth max_width = findMaxWidth(*g[v].left.graph); - if (u != build.root && max_width.is_finite() - && (!build.isAnyStart(u) || isPureAnchored(*g[v].left.graph))) { - g[e1].maxBound = max_width; - g[v].max_offset = add_rose_depth(g[u].max_offset, max_width); - } - - g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix - RoseVertex w = add_vertex(g); - g[w].eod_accept = true; - g[w].reports = report_mapping.first; - g[w].min_offset = g[v].min_offset; - g[w].max_offset = g[v].max_offset; - RoseEdge e = add_edge(v, w, g); - g[e].minBound = 0; - g[e].maxBound = 0; - /* No need to set history as the event is only delivered at the last - * byte anyway - no need to invalidate stale entries. */ - g[e].history = ROSE_ROLE_HISTORY_NONE; - DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index); - } -} - -static void doRoseAcceptVertex(RoseBuildImpl *tbi, const vector<pair<RoseVertex, RoseInEdge> > &parents, RoseInVertex iv, const RoseBuildData &bd) { @@ -742,22 +742,22 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, RoseVertex u = pv.first; const RoseInEdgeProps &edge_props = bd.ig[pv.second]; - /* We need to duplicate the parent vertices if: - * - * 1) It already has a suffix, etc as we are going to add the specified - * suffix, etc to the parents and we do not want to overwrite the - * existing information. - * - * 2) We are making the an EOD accept and the vertex already has other - * out-edges - The LAST_BYTE history used for EOD accepts is - * incompatible with normal successors. As accepts are processed last we - * do not need to worry about other normal successors being added later. - */ + /* We need to duplicate the parent vertices if: + * + * 1) It already has a suffix, etc as we are going to add the specified + * suffix, etc to the parents and we do not want to overwrite the + * existing information. + * + * 2) We are making the an EOD accept and the vertex already has other + * out-edges - The LAST_BYTE history used for EOD accepts is + * incompatible with normal successors. As accepts are processed last we + * do not need to worry about other normal successors being added later. + */ if (g[u].suffix || !g[u].reports.empty() - || (ig[iv].type == RIV_ACCEPT_EOD && out_degree(u, g) - && !edge_props.graph) + || (ig[iv].type == RIV_ACCEPT_EOD && out_degree(u, g) + && !edge_props.graph) || (!isLeafNode(u, g) && !tbi->isAnyStart(u))) { - DEBUG_PRINTF("duplicating for parent %zu\n", g[u].index); + DEBUG_PRINTF("duplicating for parent %zu\n", g[u].index); assert(!tbi->isAnyStart(u)); u = duplicate(tbi, u); g[u].suffix.reset(); @@ -767,56 +767,56 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi, assert(!g[u].suffix); if (ig[iv].type == RIV_ACCEPT) { assert(!tbi->isAnyStart(u)); - if (edge_props.dfa) { - DEBUG_PRINTF("adding early dfa suffix to i%zu\n", g[u].index); - g[u].suffix.rdfa = edge_props.dfa; + if (edge_props.dfa) { + DEBUG_PRINTF("adding early dfa suffix to i%zu\n", g[u].index); + g[u].suffix.rdfa = edge_props.dfa; g[u].suffix.dfa_min_width = findMinWidth(*edge_props.graph); g[u].suffix.dfa_max_width = findMaxWidth(*edge_props.graph); } else if (edge_props.graph) { - DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index); + DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index); g[u].suffix.graph = edge_props.graph; assert(g[u].suffix.graph->kind == NFA_SUFFIX); /* TODO: set dfa_(min|max)_width */ } else if (edge_props.haig) { - DEBUG_PRINTF("adding suffaig to i%zu\n", g[u].index); + DEBUG_PRINTF("adding suffaig to i%zu\n", g[u].index); g[u].suffix.haig = edge_props.haig; } else { - DEBUG_PRINTF("adding boring accept to i%zu\n", g[u].index); + DEBUG_PRINTF("adding boring accept to i%zu\n", g[u].index); assert(!g[u].eod_accept); g[u].reports = ig[iv].reports; } } else { assert(ig[iv].type == RIV_ACCEPT_EOD); - assert(!edge_props.haig); - - if (!edge_props.graph) { - RoseVertex w = add_vertex(g); - g[w].eod_accept = true; - g[w].reports = ig[iv].reports; - g[w].min_offset = g[u].min_offset; - g[w].max_offset = g[u].max_offset; - RoseEdge e = add_edge(u, w, g); - g[e].minBound = 0; - g[e].maxBound = 0; - g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; - DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index); - continue; - } - - const NGHolder &h = *edge_props.graph; - assert(!in_degree(h.accept, h)); - assert(generates_callbacks(h)); - - if (tbi->isInETable(u)) { - assert(h.kind == NFA_SUFFIX); + assert(!edge_props.haig); + + if (!edge_props.graph) { + RoseVertex w = add_vertex(g); + g[w].eod_accept = true; + g[w].reports = ig[iv].reports; + g[w].min_offset = g[u].min_offset; + g[w].max_offset = g[u].max_offset; + RoseEdge e = add_edge(u, w, g); + g[e].minBound = 0; + g[e].maxBound = 0; + g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; + DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index); + continue; + } + + const NGHolder &h = *edge_props.graph; + assert(!in_degree(h.accept, h)); + assert(generates_callbacks(h)); + + if (tbi->isInETable(u)) { + assert(h.kind == NFA_SUFFIX); assert(!tbi->isAnyStart(u)); /* etable can't/shouldn't use eod event */ - DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index); + DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index); g[u].suffix.graph = edge_props.graph; continue; } - makeEodEventLeftfix(*tbi, u, h); + makeEodEventLeftfix(*tbi, u, h); } } } @@ -917,8 +917,8 @@ bool suitableForEod(const RoseInGraph &ig, vector<RoseInVertex> topo, ENSURE_AT_LEAST(&v_depth, (u32)max_width); } - if (v_depth == ROSE_BOUND_INF - || v_depth > cc.grey.maxHistoryAvailable) { + if (v_depth == ROSE_BOUND_INF + || v_depth > cc.grey.maxHistoryAvailable) { DEBUG_PRINTF("not suitable for eod table %u\n", v_depth); return false; } @@ -932,13 +932,13 @@ bool suitableForEod(const RoseInGraph &ig, vector<RoseInVertex> topo, } static -void shift_accepts_to_end(const RoseInGraph &ig, - vector<RoseInVertex> &topo_order) { - stable_partition(begin(topo_order), end(topo_order), - [&](RoseInVertex v){ return !is_any_accept(v, ig); }); -} - -static +void shift_accepts_to_end(const RoseInGraph &ig, + vector<RoseInVertex> &topo_order) { + stable_partition(begin(topo_order), end(topo_order), + [&](RoseInVertex v){ return !is_any_accept(v, ig); }); +} + +static void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) { const RoseInGraph &ig = bd.ig; @@ -950,7 +950,7 @@ void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) { map<RoseInVertex, vector<RoseVertex> > vertex_map; vector<RoseInVertex> v_order = topo_order(ig); - shift_accepts_to_end(ig, v_order); + shift_accepts_to_end(ig, v_order); u32 eod_space_required; bool use_eod_table = suitableForEod(ig, v_order, &eod_space_required, @@ -963,7 +963,7 @@ void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) { || ig[v_order.front()].type == RIV_ANCHORED_START); for (RoseInVertex iv : v_order) { - DEBUG_PRINTF("vertex %zu\n", ig[iv].index); + DEBUG_PRINTF("vertex %zu\n", ig[iv].index); if (ig[iv].type == RIV_START) { DEBUG_PRINTF("is root\n"); @@ -982,7 +982,7 @@ void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) { const vector<RoseVertex> &images = vertex_map[u]; // We should have no dupes. - assert(set<RoseVertex>(images.begin(), images.end()).size() + assert(set<RoseVertex>(images.begin(), images.end()).size() == images.size()); for (auto v_image : images) { @@ -1032,8 +1032,8 @@ bool empty(const GraphT &g) { } static -bool canImplementGraph(NGHolder &h, bool prefilter, const ReportManager &rm, - const CompileContext &cc) { +bool canImplementGraph(NGHolder &h, bool prefilter, const ReportManager &rm, + const CompileContext &cc) { if (isImplementableNFA(h, &rm, cc)) { return true; } @@ -1106,7 +1106,7 @@ u32 maxAvailableDelay(const ue2_literal &pred_key, const ue2_literal &lit_key) { } static -u32 findMaxSafeDelay(const RoseInGraph &ig, RoseInVertex u, RoseInVertex v) { +u32 findMaxSafeDelay(const RoseInGraph &ig, RoseInVertex u, RoseInVertex v) { // First, check the overlap constraints on (u,v). size_t max_delay; if (ig[v].type == RIV_LITERAL) { @@ -1504,10 +1504,10 @@ bool validateKinds(const RoseInGraph &g) { } #endif -bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter) { +bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter) { DEBUG_PRINTF("trying to rose\n"); assert(validateKinds(ig)); - assert(hasCorrectlyNumberedVertices(ig)); + assert(hasCorrectlyNumberedVertices(ig)); if (::ue2::empty(ig)) { assert(0); @@ -1523,38 +1523,38 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter) { transformAnchoredLiteralOverlap(in, bd, cc); transformSuffixDelay(in, cc); - renumber_vertices(in); - assert(validateKinds(in)); + renumber_vertices(in); + assert(validateKinds(in)); - insertion_ordered_map<NGHolder *, vector<RoseInEdge>> graphs; + insertion_ordered_map<NGHolder *, vector<RoseInEdge>> graphs; for (const auto &e : edges_range(in)) { if (!in[e].graph) { - assert(!in[e].dfa); - assert(!in[e].haig); + assert(!in[e].dfa); + assert(!in[e].haig); continue; // no graph } - if (in[e].haig || in[e].dfa) { - /* Early DFAs/Haigs are always implementable (we've already built - * the raw DFA). */ + if (in[e].haig || in[e].dfa) { + /* Early DFAs/Haigs are always implementable (we've already built + * the raw DFA). */ continue; } NGHolder *h = in[e].graph.get(); - - assert(isCorrectlyTopped(*h)); + + assert(isCorrectlyTopped(*h)); graphs[h].push_back(e); } vector<RoseInEdge> graph_edges; - for (const auto &m : graphs) { - NGHolder *h = m.first; - if (!canImplementGraph(*h, prefilter, rm, cc)) { + for (const auto &m : graphs) { + NGHolder *h = m.first; + if (!canImplementGraph(*h, prefilter, rm, cc)) { return false; } - insert(&graph_edges, graph_edges.end(), m.second); + insert(&graph_edges, graph_edges.end(), m.second); } /* we are now past the point of no return. We can start making irreversible @@ -1569,7 +1569,7 @@ bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter) { if (!generates_callbacks(whatRoseIsThis(in, e)) && in[target(e, in)].type != RIV_ACCEPT_EOD) { - set_report(h, getNewNfaReport()); + set_report(h, getNewNfaReport()); } } @@ -1612,7 +1612,7 @@ bool roseCheckRose(const RoseInGraph &ig, bool prefilter, return false; } - vector<NGHolder *> graphs; + vector<NGHolder *> graphs; for (const auto &e : edges_range(ig)) { if (!ig[e].graph) { @@ -1624,11 +1624,11 @@ bool roseCheckRose(const RoseInGraph &ig, bool prefilter, continue; } - graphs.push_back(ig[e].graph.get()); + graphs.push_back(ig[e].graph.get()); } - for (const auto &g : graphs) { - if (!canImplementGraph(*g, prefilter, rm, cc)) { + for (const auto &g : graphs) { + if (!canImplementGraph(*g, prefilter, rm, cc)) { return false; } } @@ -1637,7 +1637,7 @@ bool roseCheckRose(const RoseInGraph &ig, bool prefilter, } void RoseBuildImpl::add(bool anchored, bool eod, const ue2_literal &lit, - const flat_set<ReportID> &reports) { + const flat_set<ReportID> &reports) { assert(!reports.empty()); if (cc.grey.floodAsPuffette && !anchored && !eod && is_flood(lit) && @@ -1672,7 +1672,7 @@ static u32 findMaxBAWidth(const NGHolder &h) { // Must be bi-anchored: no out-edges from startDs (other than its // self-loop), no in-edges to accept. - if (out_degree(h.startDs, h) > 1 || in_degree(h.accept, h)) { + if (out_degree(h.startDs, h) > 1 || in_degree(h.accept, h)) { return ROSE_BOUND_INF; } depth d = findMaxWidth(h); @@ -1694,70 +1694,70 @@ void populateOutfixInfo(OutfixInfo &outfix, const NGHolder &h, populateReverseAccelerationInfo(outfix.rev_info, h); } -static -bool addEodOutfix(RoseBuildImpl &build, const NGHolder &h) { - map<flat_set<ReportID>, ReportID> report_remap; - shared_ptr<NGHolder> eod_leftfix - = makeRoseEodPrefix(h, build, report_remap); - - bool nfa_ok = isImplementableNFA(h, &build.rm, build.cc); - - /* TODO: check if early dfa is possible */ - - if (!nfa_ok) { - DEBUG_PRINTF("could not build as NFA\n"); - return false; - } - - u32 eod_event = getEodEventID(build); - - auto &g = build.g; - for (const auto &report_mapping : report_remap) { - RoseVertex v = add_vertex(g); - g[v].literals.insert(eod_event); - build.literal_info[eod_event].vertices.insert(v); - - g[v].left.graph = eod_leftfix; - g[v].left.leftfix_report = report_mapping.second; - g[v].left.lag = 0; - RoseEdge e1 = add_edge(build.anchored_root, v, g); - g[e1].minBound = 0; - g[e1].maxBound = ROSE_BOUND_INF; - g[v].min_offset = findMinWidth(*eod_leftfix); - g[v].max_offset = ROSE_BOUND_INF; - - depth max_width = findMaxWidth(*g[v].left.graph); - if (max_width.is_finite() && isPureAnchored(*eod_leftfix)) { - g[e1].maxBound = max_width; - g[v].max_offset = max_width; - } - - g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix - RoseVertex w = add_vertex(g); - g[w].eod_accept = true; - g[w].reports = report_mapping.first; - g[w].min_offset = g[v].min_offset; - g[w].max_offset = g[v].max_offset; - RoseEdge e = add_edge(v, w, g); - g[e].minBound = 0; - g[e].maxBound = 0; - g[e].history = ROSE_ROLE_HISTORY_NONE; - DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index); - } - - return true; -} - +static +bool addEodOutfix(RoseBuildImpl &build, const NGHolder &h) { + map<flat_set<ReportID>, ReportID> report_remap; + shared_ptr<NGHolder> eod_leftfix + = makeRoseEodPrefix(h, build, report_remap); + + bool nfa_ok = isImplementableNFA(h, &build.rm, build.cc); + + /* TODO: check if early dfa is possible */ + + if (!nfa_ok) { + DEBUG_PRINTF("could not build as NFA\n"); + return false; + } + + u32 eod_event = getEodEventID(build); + + auto &g = build.g; + for (const auto &report_mapping : report_remap) { + RoseVertex v = add_vertex(g); + g[v].literals.insert(eod_event); + build.literal_info[eod_event].vertices.insert(v); + + g[v].left.graph = eod_leftfix; + g[v].left.leftfix_report = report_mapping.second; + g[v].left.lag = 0; + RoseEdge e1 = add_edge(build.anchored_root, v, g); + g[e1].minBound = 0; + g[e1].maxBound = ROSE_BOUND_INF; + g[v].min_offset = findMinWidth(*eod_leftfix); + g[v].max_offset = ROSE_BOUND_INF; + + depth max_width = findMaxWidth(*g[v].left.graph); + if (max_width.is_finite() && isPureAnchored(*eod_leftfix)) { + g[e1].maxBound = max_width; + g[v].max_offset = max_width; + } + + g[e1].history = ROSE_ROLE_HISTORY_NONE; // handled by prefix + RoseVertex w = add_vertex(g); + g[w].eod_accept = true; + g[w].reports = report_mapping.first; + g[w].min_offset = g[v].min_offset; + g[w].max_offset = g[v].max_offset; + RoseEdge e = add_edge(v, w, g); + g[e].minBound = 0; + g[e].maxBound = 0; + g[e].history = ROSE_ROLE_HISTORY_NONE; + DEBUG_PRINTF("accept eod vertex (index=%zu)\n", g[w].index); + } + + return true; +} + bool RoseBuildImpl::addOutfix(const NGHolder &h) { DEBUG_PRINTF("%zu vertices, %zu edges\n", num_vertices(h), num_edges(h)); - /* TODO: handle more than one report */ - if (!in_degree(h.accept, h) - && all_reports(h).size() == 1 - && addEodOutfix(*this, h)) { - return true; - } - + /* TODO: handle more than one report */ + if (!in_degree(h.accept, h) + && all_reports(h).size() == 1 + && addEodOutfix(*this, h)) { + return true; + } + const u32 nfa_states = isImplementableNFA(h, &rm, cc); if (nfa_states) { DEBUG_PRINTF("implementable as an NFA in %u states\n", nfa_states); @@ -1802,12 +1802,12 @@ bool RoseBuildImpl::addOutfix(const NGHolder &h, const raw_som_dfa &haig) { bool RoseBuildImpl::addOutfix(const raw_puff &rp) { if (!mpv_outfix) { - mpv_outfix = std::make_unique<OutfixInfo>(MpvProto()); + mpv_outfix = std::make_unique<OutfixInfo>(MpvProto()); } - auto *mpv = mpv_outfix->mpv(); - assert(mpv); - mpv->puffettes.push_back(rp); + auto *mpv = mpv_outfix->mpv(); + assert(mpv); + mpv->puffettes.push_back(rp); mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */ mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats)); @@ -1827,12 +1827,12 @@ bool RoseBuildImpl::addOutfix(const raw_puff &rp) { bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) { if (!mpv_outfix) { - mpv_outfix = std::make_unique<OutfixInfo>(MpvProto()); + mpv_outfix = std::make_unique<OutfixInfo>(MpvProto()); } - auto *mpv = mpv_outfix->mpv(); - assert(mpv); - mpv->triggered_puffettes.push_back(rp); + auto *mpv = mpv_outfix->mpv(); + assert(mpv); + mpv->triggered_puffettes.push_back(rp); mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */ mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats)); @@ -1844,7 +1844,7 @@ bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out, * the caller */ *queue_out = mpv_outfix->get_queue(qif); - *event_out = MQE_TOP_FIRST + mpv->triggered_puffettes.size() - 1; + *event_out = MQE_TOP_FIRST + mpv->triggered_puffettes.size() - 1; return true; /* failure is not yet an option */ } @@ -1858,9 +1858,9 @@ bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w, map<ReportID, u32> &allocated_reports, flat_set<u32> &added_lit_ids) { const depth max_anchored_depth(tbi.cc.grey.maxAnchoredRegion); - const size_t index = w[u].index; - assert(index < vertexDepths.size()); - const DepthMinMax &d = vertexDepths.at(index); + const size_t index = w[u].index; + assert(index < vertexDepths.size()); + const DepthMinMax &d = vertexDepths.at(index); for (const auto &int_report : w[u].reports) { assert(int_report != MO_INVALID_IDX); @@ -1902,20 +1902,20 @@ void removeAddedLiterals(RoseBuildImpl &tbi, const flat_set<u32> &lit_ids) { return; } - DEBUG_PRINTF("remove last %zu literals\n", lit_ids.size()); - + DEBUG_PRINTF("remove last %zu literals\n", lit_ids.size()); + // lit_ids should be a contiguous range. assert(lit_ids.size() == *lit_ids.rbegin() - *lit_ids.begin() + 1); - assert(*lit_ids.rbegin() == tbi.literals.size() - 1); + assert(*lit_ids.rbegin() == tbi.literals.size() - 1); - assert(all_of_in(lit_ids, [&](u32 lit_id) { - return lit_id < tbi.literal_info.size() && - tbi.literals.at(lit_id).table == ROSE_ANCHORED && - tbi.literal_info[lit_id].vertices.empty(); - })); + assert(all_of_in(lit_ids, [&](u32 lit_id) { + return lit_id < tbi.literal_info.size() && + tbi.literals.at(lit_id).table == ROSE_ANCHORED && + tbi.literal_info[lit_id].vertices.empty(); + })); - tbi.literals.erase_back(lit_ids.size()); - assert(tbi.literals.size() == *lit_ids.begin()); + tbi.literals.erase_back(lit_ids.size()); + assert(tbi.literals.size() == *lit_ids.begin()); // lit_ids should be at the end of tbi.literal_info. assert(tbi.literal_info.size() == *lit_ids.rbegin() + 1); @@ -1923,7 +1923,7 @@ void removeAddedLiterals(RoseBuildImpl &tbi, const flat_set<u32> &lit_ids) { } bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) { - auto vertexDepths = calcDepthsFrom(h, h.start); + auto vertexDepths = calcDepthsFrom(h, h.start); map<NFAVertex, set<u32> > reportMap; /* NFAVertex -> literal ids */ map<u32, DepthMinMax> depthMap; /* literal id -> min/max depth */ diff --git a/contrib/libs/hyperscan/src/rose/rose_build_add_internal.h b/contrib/libs/hyperscan/src/rose/rose_build_add_internal.h index b119f3bc71..143f1dfa58 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_add_internal.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_add_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,7 +31,7 @@ #include "rose_graph.h" #include "ue2common.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" namespace ue2 { @@ -40,8 +40,8 @@ class RoseBuildImpl; RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent, u32 minBound, u32 maxBound, u32 literalId, size_t literalLength, - const flat_set<ReportID> &reports); + const flat_set<ReportID> &reports); } // namespace ue2 -#endif // ROSE_BUILD_ADD_INTERNAL_H +#endif // ROSE_BUILD_ADD_INTERNAL_H diff --git a/contrib/libs/hyperscan/src/rose/rose_build_add_mask.cpp b/contrib/libs/hyperscan/src/rose/rose_build_add_mask.cpp index 067b847dff..0a7e44c370 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_add_mask.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_add_mask.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -144,7 +144,7 @@ void findMaskLiteral(const vector<CharReach> &mask, bool streaming, } static -bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> &cand) { +bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> &cand) { for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) { char c = (char)i; bool nocase = myisupper(c) && cr.test(mytolower(c)); @@ -152,25 +152,25 @@ bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> &cand) { continue; } - if (cand.size() >= MAX_MASK_LITS) { + if (cand.size() >= MAX_MASK_LITS) { DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS); return false; } - cand.emplace_back(c, nocase); + cand.emplace_back(c, nocase); } - assert(cand.size() <= MAX_MASK_LITS); - return !cand.empty(); + assert(cand.size() <= MAX_MASK_LITS); + return !cand.empty(); } static -bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> &curr, - vector<ue2_literal> &cand) { +bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> &curr, + vector<ue2_literal> &cand) { DEBUG_PRINTF("expanding string with cr of %zu\n", cr.count()); - DEBUG_PRINTF(" current cand list size %zu\n", cand.size()); + DEBUG_PRINTF(" current cand list size %zu\n", cand.size()); - curr.clear(); + curr.clear(); for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) { char c = (char)i; @@ -179,14 +179,14 @@ bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> &curr, continue; } - for (const auto &lit : cand) { + for (const auto &lit : cand) { if (curr.size() >= MAX_MASK_LITS) { DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS); return false; } - curr.push_back(lit); - curr.back().push_back(c, nocase); + curr.push_back(lit); + curr.back().push_back(c, nocase); } } @@ -197,7 +197,7 @@ bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> &curr, } assert(curr.size() <= MAX_MASK_LITS); - cand.swap(curr); + cand.swap(curr); return true; } @@ -214,7 +214,7 @@ u32 scoreFmlCandidates(const vector<ue2_literal> &cand) { u32 min_period = len; for (const auto &lit : cand) { - DEBUG_PRINTF("candidate: %s\n", dumpString(lit).c_str()); + DEBUG_PRINTF("candidate: %s\n", dumpString(lit).c_str()); u32 period = lit.length() - maxStringSelfOverlap(lit); min_period = min(min_period, period); } @@ -240,37 +240,37 @@ bool findMaskLiterals(const vector<CharReach> &mask, vector<ue2_literal> *lit, *minBound = 0; *length = 0; - vector<ue2_literal> candidates, best_candidates, curr_candidates; + vector<ue2_literal> candidates, best_candidates, curr_candidates; u32 best_score = 0; u32 best_minOffset = 0; - - for (auto it = mask.begin(); it != mask.end(); ++it) { + + for (auto it = mask.begin(); it != mask.end(); ++it) { candidates.clear(); - if (!initFmlCandidates(*it, candidates)) { + if (!initFmlCandidates(*it, candidates)) { DEBUG_PRINTF("failed to init\n"); continue; } DEBUG_PRINTF("++\n"); - auto jt = it; - while (jt != mask.begin()) { + auto jt = it; + while (jt != mask.begin()) { --jt; DEBUG_PRINTF("--\n"); - if (!expandFmlCandidates(*jt, curr_candidates, candidates)) { + if (!expandFmlCandidates(*jt, curr_candidates, candidates)) { DEBUG_PRINTF("expansion stopped\n"); break; } } - - // Candidates have been expanded in reverse order. - for (auto &cand : candidates) { - cand = reverse_literal(cand); - } - + + // Candidates have been expanded in reverse order. + for (auto &cand : candidates) { + cand = reverse_literal(cand); + } + u32 score = scoreFmlCandidates(candidates); DEBUG_PRINTF("scored %u for literal set of size %zu\n", score, candidates.size()); if (!candidates.empty() && score >= best_score) { - best_minOffset = it - mask.begin() - candidates.back().length() + 1; + best_minOffset = it - mask.begin() - candidates.back().length() + 1; best_candidates.swap(candidates); best_score = score; } @@ -286,11 +286,11 @@ bool findMaskLiterals(const vector<CharReach> &mask, vector<ue2_literal> *lit, DEBUG_PRINTF("best minbound %u length %u\n", *minBound, *length); - assert(all_of_in(best_candidates, [&](const ue2_literal &s) { - return s.length() == *length; - })); - - *lit = std::move(best_candidates); + assert(all_of_in(best_candidates, [&](const ue2_literal &s) { + return s.length() == *length; + })); + + *lit = std::move(best_candidates); return true; } @@ -345,8 +345,8 @@ void buildLiteralMask(const vector<CharReach> &mask, vector<u8> &msk, } static -bool validateTransientMask(const vector<CharReach> &mask, bool anchored, - bool eod, const Grey &grey) { +bool validateTransientMask(const vector<CharReach> &mask, bool anchored, + bool eod, const Grey &grey) { assert(!mask.empty()); // An EOD anchored mask requires that everything fit into history, while an @@ -358,12 +358,12 @@ bool validateTransientMask(const vector<CharReach> &mask, bool anchored, return false; } - /* although anchored masks cannot be transient, short masks may be placed - * into the atable. */ - if (anchored && mask.size() > grey.maxAnchoredRegion) { - return false; - } - + /* although anchored masks cannot be transient, short masks may be placed + * into the atable. */ + if (anchored && mask.size() > grey.maxAnchoredRegion) { + return false; + } + vector<ue2_literal> lits; u32 lit_minBound; /* minBound of each literal in lit */ u32 lit_length; /* length of each literal in lit */ @@ -423,8 +423,8 @@ bool validateTransientMask(const vector<CharReach> &mask, bool anchored, static bool maskIsNeeded(const ue2_literal &lit, const NGHolder &g) { - flat_set<NFAVertex> curr = {g.accept}; - flat_set<NFAVertex> next; + flat_set<NFAVertex> curr = {g.accept}; + flat_set<NFAVertex> next; for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) { const CharReach &cr = *it; @@ -460,7 +460,7 @@ bool maskIsNeeded(const ue2_literal &lit, const NGHolder &g) { static void addTransientMask(RoseBuildImpl &build, const vector<CharReach> &mask, - const flat_set<ReportID> &reports, bool anchored, + const flat_set<ReportID> &reports, bool anchored, bool eod) { vector<ue2_literal> lits; u32 lit_minBound; /* minBound of each literal in lit */ @@ -489,7 +489,7 @@ void addTransientMask(RoseBuildImpl &build, const vector<CharReach> &mask, // Everyone gets the same report ID. ReportID mask_report = build.getNewNfaReport(); - set_report(*mask_graph, mask_report); + set_report(*mask_graph, mask_report); // Build the HWLM literal mask. vector<u8> msk, cmp; @@ -525,7 +525,7 @@ void addTransientMask(RoseBuildImpl &build, const vector<CharReach> &mask, ENSURE_AT_LEAST(&build.ematcher_region_size, mask.size()); } - const flat_set<ReportID> no_reports; + const flat_set<ReportID> no_reports; for (const auto &lit : lits) { u32 lit_id = build.getLiteralId(lit, msk, cmp, delay, table); @@ -541,7 +541,7 @@ void addTransientMask(RoseBuildImpl &build, const vector<CharReach> &mask, g[v].left.leftfix_report = mask_report; } else { // Make sure our edge bounds are correct. - RoseEdge e = edge(parent, v, g); + RoseEdge e = edge(parent, v, g); g[e].minBound = 0; g[e].maxBound = anchored ? 0 : ROSE_BOUND_INF; g[e].history = anchored ? ROSE_ROLE_HISTORY_ANCH @@ -553,7 +553,7 @@ void addTransientMask(RoseBuildImpl &build, const vector<CharReach> &mask, g[v].max_offset = v_max_offset; if (eod) { - RoseEdge e = add_edge(v, eod_v, g); + RoseEdge e = add_edge(v, eod_v, g); g[e].minBound = 0; g[e].maxBound = 0; g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE; @@ -562,7 +562,7 @@ void addTransientMask(RoseBuildImpl &build, const vector<CharReach> &mask, } static -unique_ptr<NGHolder> buildMaskRhs(const flat_set<ReportID> &reports, +unique_ptr<NGHolder> buildMaskRhs(const flat_set<ReportID> &reports, const vector<CharReach> &mask, u32 suffix_len) { assert(suffix_len); @@ -583,16 +583,16 @@ unique_ptr<NGHolder> buildMaskRhs(const flat_set<ReportID> &reports, succ = u; } - NFAEdge e = add_edge(h.start, succ, h); - h[e].tops.insert(DEFAULT_TOP); + NFAEdge e = add_edge(h.start, succ, h); + h[e].tops.insert(DEFAULT_TOP); return rhs; } static -void doAddMask(RoseBuildImpl &tbi, bool anchored, const vector<CharReach> &mask, - const ue2_literal &lit, u32 prefix_len, u32 suffix_len, - const flat_set<ReportID> &reports) { +void doAddMask(RoseBuildImpl &tbi, bool anchored, const vector<CharReach> &mask, + const ue2_literal &lit, u32 prefix_len, u32 suffix_len, + const flat_set<ReportID> &reports) { /* Note: bounds are relative to literal start */ RoseInGraph ig; RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(anchored), ig); @@ -641,7 +641,7 @@ void doAddMask(RoseBuildImpl &tbi, bool anchored, const vector<CharReach> &mask, = buildMaskLhs(true, minBound - prefix2_len + overlap, mask3); mhs->kind = NFA_INFIX; - setTops(*mhs); + setTops(*mhs); add_edge(u, v, RoseInEdgeProps(mhs, delay), ig); DEBUG_PRINTF("add anch literal too!\n"); @@ -719,8 +719,8 @@ bool checkAllowMask(const vector<CharReach> &mask, ue2_literal *lit, } bool RoseBuildImpl::add(bool anchored, const vector<CharReach> &mask, - const flat_set<ReportID> &reports) { - if (validateTransientMask(mask, anchored, false, cc.grey)) { + const flat_set<ReportID> &reports) { + if (validateTransientMask(mask, anchored, false, cc.grey)) { bool eod = false; addTransientMask(*this, mask, reports, anchored, eod); return true; @@ -742,14 +742,14 @@ bool RoseBuildImpl::add(bool anchored, const vector<CharReach> &mask, } bool RoseBuildImpl::validateMask(const vector<CharReach> &mask, - UNUSED const flat_set<ReportID> &reports, - bool anchored, bool eod) const { - return validateTransientMask(mask, anchored, eod, cc.grey); + UNUSED const flat_set<ReportID> &reports, + bool anchored, bool eod) const { + return validateTransientMask(mask, anchored, eod, cc.grey); } static unique_ptr<NGHolder> makeAnchoredGraph(const vector<CharReach> &mask, - const flat_set<ReportID> &reports, + const flat_set<ReportID> &reports, bool eod) { auto gp = ue2::make_unique<NGHolder>(); NGHolder &g = *gp; @@ -771,7 +771,7 @@ unique_ptr<NGHolder> makeAnchoredGraph(const vector<CharReach> &mask, static bool addAnchoredMask(RoseBuildImpl &build, const vector<CharReach> &mask, - const flat_set<ReportID> &reports, bool eod) { + const flat_set<ReportID> &reports, bool eod) { if (!build.cc.grey.allowAnchoredAcyclic) { return false; } @@ -783,8 +783,8 @@ bool addAnchoredMask(RoseBuildImpl &build, const vector<CharReach> &mask, } void RoseBuildImpl::addMask(const vector<CharReach> &mask, - const flat_set<ReportID> &reports, bool anchored, - bool eod) { + const flat_set<ReportID> &reports, bool anchored, + bool eod) { if (anchored && addAnchoredMask(*this, mask, reports, eod)) { DEBUG_PRINTF("added mask as anchored acyclic graph\n"); return; diff --git a/contrib/libs/hyperscan/src/rose/rose_build_anchored.cpp b/contrib/libs/hyperscan/src/rose/rose_build_anchored.cpp index d247b33c3a..23688b8d22 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_anchored.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_anchored.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ #include "grey.h" #include "rose_build_impl.h" -#include "rose_build_matchers.h" +#include "rose_build_matchers.h" #include "rose_internal.h" #include "ue2common.h" #include "nfa/dfa_min.h" @@ -49,12 +49,12 @@ #include "util/compile_error.h" #include "util/container.h" #include "util/determinise.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include "util/make_unique.h" #include "util/order_check.h" #include "util/ue2string.h" -#include "util/unordered.h" +#include "util/unordered.h" #include "util/verify_types.h" #include <map> @@ -73,8 +73,8 @@ namespace ue2 { #define INIT_STATE (DEAD_STATE + 1) -#define NO_FRAG_ID (~0U) - +#define NO_FRAG_ID (~0U) + // Adds a vertex with the given reach. static NFAVertex add_vertex(NGHolder &h, const CharReach &cr) { @@ -177,90 +177,90 @@ void mergeAnchoredDfas(vector<unique_ptr<raw_dfa>> &dfas, } static -void remapAnchoredReports(raw_dfa &rdfa, const vector<u32> &frag_map) { - for (dstate &ds : rdfa.states) { - assert(ds.reports_eod.empty()); // Not used in anchored matcher. - if (ds.reports.empty()) { - continue; - } - - flat_set<ReportID> new_reports; - for (auto id : ds.reports) { - assert(id < frag_map.size()); - new_reports.insert(frag_map[id]); - } - ds.reports = std::move(new_reports); +void remapAnchoredReports(raw_dfa &rdfa, const vector<u32> &frag_map) { + for (dstate &ds : rdfa.states) { + assert(ds.reports_eod.empty()); // Not used in anchored matcher. + if (ds.reports.empty()) { + continue; + } + + flat_set<ReportID> new_reports; + for (auto id : ds.reports) { + assert(id < frag_map.size()); + new_reports.insert(frag_map[id]); + } + ds.reports = std::move(new_reports); } } -/** - * \brief Replaces the report ids currently in the dfas (rose graph literal - * ids) with the fragment id for each literal. - */ +/** + * \brief Replaces the report ids currently in the dfas (rose graph literal + * ids) with the fragment id for each literal. + */ static -void remapAnchoredReports(RoseBuildImpl &build, const vector<u32> &frag_map) { - for (auto &m : build.anchored_nfas) { - for (auto &rdfa : m.second) { - assert(rdfa); - remapAnchoredReports(*rdfa, frag_map); - } +void remapAnchoredReports(RoseBuildImpl &build, const vector<u32> &frag_map) { + for (auto &m : build.anchored_nfas) { + for (auto &rdfa : m.second) { + assert(rdfa); + remapAnchoredReports(*rdfa, frag_map); + } } } -/** - * Returns mapping from literal ids to fragment ids. - */ +/** + * Returns mapping from literal ids to fragment ids. + */ static -vector<u32> reverseFragMap(const RoseBuildImpl &build, - const vector<LitFragment> &fragments) { - vector<u32> rev(build.literal_info.size(), NO_FRAG_ID); - for (const auto &f : fragments) { - for (u32 lit_id : f.lit_ids) { - assert(lit_id < rev.size()); - rev[lit_id] = f.fragment_id; +vector<u32> reverseFragMap(const RoseBuildImpl &build, + const vector<LitFragment> &fragments) { + vector<u32> rev(build.literal_info.size(), NO_FRAG_ID); + for (const auto &f : fragments) { + for (u32 lit_id : f.lit_ids) { + assert(lit_id < rev.size()); + rev[lit_id] = f.fragment_id; } } - return rev; + return rev; } -/** - * \brief Replace the reports (which are literal final_ids) in the given - * raw_dfa with program offsets. - */ +/** + * \brief Replace the reports (which are literal final_ids) in the given + * raw_dfa with program offsets. + */ static -void remapIdsToPrograms(const vector<LitFragment> &fragments, raw_dfa &rdfa) { - for (dstate &ds : rdfa.states) { - assert(ds.reports_eod.empty()); // Not used in anchored matcher. - if (ds.reports.empty()) { - continue; - } - - flat_set<ReportID> new_reports; - for (auto fragment_id : ds.reports) { - const auto &frag = fragments.at(fragment_id); - new_reports.insert(frag.lit_program_offset); - } - ds.reports = std::move(new_reports); - } -} - -static -unique_ptr<NGHolder> populate_holder(const simple_anchored_info &sai, - const flat_set<u32> &exit_ids) { +void remapIdsToPrograms(const vector<LitFragment> &fragments, raw_dfa &rdfa) { + for (dstate &ds : rdfa.states) { + assert(ds.reports_eod.empty()); // Not used in anchored matcher. + if (ds.reports.empty()) { + continue; + } + + flat_set<ReportID> new_reports; + for (auto fragment_id : ds.reports) { + const auto &frag = fragments.at(fragment_id); + new_reports.insert(frag.lit_program_offset); + } + ds.reports = std::move(new_reports); + } +} + +static +unique_ptr<NGHolder> populate_holder(const simple_anchored_info &sai, + const flat_set<u32> &exit_ids) { DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound, sai.max_bound, dumpString(sai.literal).c_str()); - auto h_ptr = std::make_unique<NGHolder>(); - NGHolder &h = *h_ptr; - auto ends = addDotsToGraph(h, h.start, sai.min_bound, sai.max_bound, - CharReach::dot()); + auto h_ptr = std::make_unique<NGHolder>(); + NGHolder &h = *h_ptr; + auto ends = addDotsToGraph(h, h.start, sai.min_bound, sai.max_bound, + CharReach::dot()); NFAVertex v = addToGraph(h, ends, sai.literal); add_edge(v, h.accept, h); h[v].reports.insert(exit_ids.begin(), exit_ids.end()); - return h_ptr; + return h_ptr; } -u32 anchoredStateSize(const anchored_matcher_info &atable) { - const struct anchored_matcher_info *curr = &atable; +u32 anchoredStateSize(const anchored_matcher_info &atable) { + const struct anchored_matcher_info *curr = &atable; // Walk the list until we find the last element; total state size will be // that engine's state offset plus its state requirement. @@ -270,12 +270,12 @@ u32 anchoredStateSize(const anchored_matcher_info &atable) { } const NFA *nfa = (const NFA *)((const char *)curr + sizeof(*curr)); - return curr->state_offset + nfa->streamStateSize; + return curr->state_offset + nfa->streamStateSize; } namespace { -using nfa_state_set = bitfield<ANCHORED_NFA_STATE_LIMIT>; +using nfa_state_set = bitfield<ANCHORED_NFA_STATE_LIMIT>; struct Holder_StateSet { Holder_StateSet() : wdelay(0) {} @@ -286,16 +286,16 @@ struct Holder_StateSet { bool operator==(const Holder_StateSet &b) const { return wdelay == b.wdelay && wrap_state == b.wrap_state; } - - size_t hash() const { - return hash_all(wrap_state, wdelay); - } + + size_t hash() const { + return hash_all(wrap_state, wdelay); + } }; class Automaton_Holder { public: - using StateSet = Holder_StateSet; - using StateMap = ue2_unordered_map<StateSet, dstate_id_t>; + using StateSet = Holder_StateSet; + using StateMap = ue2_unordered_map<StateSet, dstate_id_t>; explicit Automaton_Holder(const NGHolder &g_in) : g(g_in) { for (auto v : vertices_range(g)) { @@ -414,7 +414,7 @@ public: private: const NGHolder &g; - unordered_map<NFAVertex, u32> vertexToIndex; + unordered_map<NFAVertex, u32> vertexToIndex; vector<NFAVertex> indexToVertex; vector<CharReach> cr_by_index; StateSet init; @@ -486,7 +486,7 @@ bool check_dupe(const raw_dfa &rdfa, } static -bool check_dupe_simple(const RoseBuildImpl &build, u32 min_bound, u32 max_bound, +bool check_dupe_simple(const RoseBuildImpl &build, u32 min_bound, u32 max_bound, const ue2_literal &lit, ReportID *remap) { if (!remap) { DEBUG_PRINTF("no remap\n"); @@ -494,8 +494,8 @@ bool check_dupe_simple(const RoseBuildImpl &build, u32 min_bound, u32 max_bound, } simple_anchored_info sai(min_bound, max_bound, lit); - if (contains(build.anchored_simple, sai)) { - *remap = *build.anchored_simple.at(sai).begin(); + if (contains(build.anchored_simple, sai)) { + *remap = *build.anchored_simple.at(sai).begin(); return true; } @@ -515,7 +515,7 @@ NFAVertex extractLiteral(const NGHolder &h, ue2_literal *lit) { } if (lit_verts.empty()) { - return NGHolder::null_vertex(); + return NGHolder::null_vertex(); } bool nocase = false; @@ -527,7 +527,7 @@ NFAVertex extractLiteral(const NGHolder &h, ue2_literal *lit) { if (cr.isAlpha()) { bool cr_nocase = cr.count() != 1; if (case_set && cr_nocase != nocase) { - return NGHolder::null_vertex(); + return NGHolder::null_vertex(); } case_set = true; @@ -550,7 +550,7 @@ bool isSimple(const NGHolder &h, u32 *min_bound, u32 *max_bound, DEBUG_PRINTF("looking for simple case\n"); NFAVertex lit_head = extractLiteral(h, lit); - if (lit_head == NGHolder::null_vertex()) { + if (lit_head == NGHolder::null_vertex()) { DEBUG_PRINTF("no literal found\n"); return false; } @@ -568,7 +568,7 @@ bool isSimple(const NGHolder &h, u32 *min_bound, u32 *max_bound, /* lit should only be connected to dot vertices */ for (auto u : inv_adjacent_vertices_range(lit_head, h)) { - DEBUG_PRINTF("checking %zu\n", h[u].index); + DEBUG_PRINTF("checking %zu\n", h[u].index); if (!h[u].char_reach.all()) { return false; } @@ -659,7 +659,7 @@ bool isSimple(const NGHolder &h, u32 *min_bound, u32 *max_bound, } static -int finalise_out(RoseBuildImpl &build, const NGHolder &h, +int finalise_out(RoseBuildImpl &build, const NGHolder &h, const Automaton_Holder &autom, unique_ptr<raw_dfa> out_dfa, ReportID *remap) { u32 min_bound = ~0U; @@ -668,12 +668,12 @@ int finalise_out(RoseBuildImpl &build, const NGHolder &h, u32 simple_report = MO_INVALID_IDX; if (isSimple(h, &min_bound, &max_bound, &lit, &simple_report)) { assert(simple_report != MO_INVALID_IDX); - if (check_dupe_simple(build, min_bound, max_bound, lit, remap)) { + if (check_dupe_simple(build, min_bound, max_bound, lit, remap)) { DEBUG_PRINTF("found duplicate remapping to %u\n", *remap); return ANCHORED_REMAP; } DEBUG_PRINTF("add with report %u\n", simple_report); - build.anchored_simple[simple_anchored_info(min_bound, max_bound, lit)] + build.anchored_simple[simple_anchored_info(min_bound, max_bound, lit)] .insert(simple_report); return ANCHORED_SUCCESS; } @@ -683,15 +683,15 @@ int finalise_out(RoseBuildImpl &build, const NGHolder &h, out_dfa->alpha_size = autom.alphasize; out_dfa->alpha_remap = autom.alpha; auto hash = hash_dfa_no_reports(*out_dfa); - if (check_dupe(*out_dfa, build.anchored_nfas[hash], remap)) { + if (check_dupe(*out_dfa, build.anchored_nfas[hash], remap)) { return ANCHORED_REMAP; } - build.anchored_nfas[hash].push_back(move(out_dfa)); + build.anchored_nfas[hash].push_back(move(out_dfa)); return ANCHORED_SUCCESS; } static -int addAutomaton(RoseBuildImpl &build, const NGHolder &h, ReportID *remap) { +int addAutomaton(RoseBuildImpl &build, const NGHolder &h, ReportID *remap) { if (num_vertices(h) > ANCHORED_NFA_STATE_LIMIT) { DEBUG_PRINTF("autom bad!\n"); return ANCHORED_FAIL; @@ -699,9 +699,9 @@ int addAutomaton(RoseBuildImpl &build, const NGHolder &h, ReportID *remap) { Automaton_Holder autom(h); - auto out_dfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW); - if (determinise(autom, out_dfa->states, MAX_DFA_STATES)) { - return finalise_out(build, h, autom, move(out_dfa), remap); + auto out_dfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW); + if (determinise(autom, out_dfa->states, MAX_DFA_STATES)) { + return finalise_out(build, h, autom, move(out_dfa), remap); } DEBUG_PRINTF("determinise failed\n"); @@ -710,7 +710,7 @@ int addAutomaton(RoseBuildImpl &build, const NGHolder &h, ReportID *remap) { static void setReports(NGHolder &h, const map<NFAVertex, set<u32>> &reportMap, - const unordered_map<NFAVertex, NFAVertex> &orig_to_copy) { + const unordered_map<NFAVertex, NFAVertex> &orig_to_copy) { for (const auto &m : reportMap) { NFAVertex t = orig_to_copy.at(m.first); assert(!m.second.empty()); @@ -719,10 +719,10 @@ void setReports(NGHolder &h, const map<NFAVertex, set<u32>> &reportMap, } } -int addAnchoredNFA(RoseBuildImpl &build, const NGHolder &wrapper, +int addAnchoredNFA(RoseBuildImpl &build, const NGHolder &wrapper, const map<NFAVertex, set<u32>> &reportMap) { NGHolder h; - unordered_map<NFAVertex, NFAVertex> orig_to_copy; + unordered_map<NFAVertex, NFAVertex> orig_to_copy; cloneHolder(h, wrapper, &orig_to_copy); clear_in_edges(h.accept, h); clear_in_edges(h.acceptEod, h); @@ -730,10 +730,10 @@ int addAnchoredNFA(RoseBuildImpl &build, const NGHolder &wrapper, clearReports(h); setReports(h, reportMap, orig_to_copy); - return addAutomaton(build, h, nullptr); + return addAutomaton(build, h, nullptr); } -int addToAnchoredMatcher(RoseBuildImpl &build, const NGHolder &anchored, +int addToAnchoredMatcher(RoseBuildImpl &build, const NGHolder &anchored, u32 exit_id, ReportID *remap) { NGHolder h; cloneHolder(h, anchored); @@ -744,26 +744,26 @@ int addToAnchoredMatcher(RoseBuildImpl &build, const NGHolder &anchored, h[v].reports.insert(exit_id); } - return addAutomaton(build, h, remap); + return addAutomaton(build, h, remap); } static -void buildSimpleDfas(const RoseBuildImpl &build, const vector<u32> &frag_map, +void buildSimpleDfas(const RoseBuildImpl &build, const vector<u32> &frag_map, vector<unique_ptr<raw_dfa>> *anchored_dfas) { /* we should have determinised all of these before so there should be no * chance of failure. */ - flat_set<u32> exit_ids; - for (const auto &simple : build.anchored_simple) { - exit_ids.clear(); + flat_set<u32> exit_ids; + for (const auto &simple : build.anchored_simple) { + exit_ids.clear(); for (auto lit_id : simple.second) { - assert(lit_id < frag_map.size()); - exit_ids.insert(frag_map[lit_id]); + assert(lit_id < frag_map.size()); + exit_ids.insert(frag_map[lit_id]); } - auto h = populate_holder(simple.first, exit_ids); - Automaton_Holder autom(*h); - auto rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW); - UNUSED bool rv = determinise(autom, rdfa->states, MAX_DFA_STATES); - assert(rv); + auto h = populate_holder(simple.first, exit_ids); + Automaton_Holder autom(*h); + auto rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW); + UNUSED bool rv = determinise(autom, rdfa->states, MAX_DFA_STATES); + assert(rv); rdfa->start_anchored = INIT_STATE; rdfa->start_floating = DEAD_STATE; rdfa->alpha_size = autom.alphasize; @@ -778,25 +778,25 @@ void buildSimpleDfas(const RoseBuildImpl &build, const vector<u32> &frag_map, * from RoseBuildImpl. */ static -vector<unique_ptr<raw_dfa>> getAnchoredDfas(RoseBuildImpl &build, - const vector<u32> &frag_map) { - vector<unique_ptr<raw_dfa>> dfas; - +vector<unique_ptr<raw_dfa>> getAnchoredDfas(RoseBuildImpl &build, + const vector<u32> &frag_map) { + vector<unique_ptr<raw_dfa>> dfas; + // DFAs that already exist as raw_dfas. - for (auto &anch_dfas : build.anchored_nfas) { + for (auto &anch_dfas : build.anchored_nfas) { for (auto &rdfa : anch_dfas.second) { - dfas.push_back(move(rdfa)); + dfas.push_back(move(rdfa)); } } - build.anchored_nfas.clear(); + build.anchored_nfas.clear(); // DFAs we currently have as simple literals. - if (!build.anchored_simple.empty()) { - buildSimpleDfas(build, frag_map, &dfas); - build.anchored_simple.clear(); + if (!build.anchored_simple.empty()) { + buildSimpleDfas(build, frag_map, &dfas); + build.anchored_simple.clear(); } - - return dfas; + + return dfas; } /** @@ -810,10 +810,10 @@ vector<unique_ptr<raw_dfa>> getAnchoredDfas(RoseBuildImpl &build, * \return Total bytes required for the complete anchored matcher. */ static -size_t buildNfas(vector<raw_dfa> &anchored_dfas, - vector<bytecode_ptr<NFA>> *nfas, - vector<u32> *start_offset, const CompileContext &cc, - const ReportManager &rm) { +size_t buildNfas(vector<raw_dfa> &anchored_dfas, + vector<bytecode_ptr<NFA>> *nfas, + vector<u32> *start_offset, const CompileContext &cc, + const ReportManager &rm) { const size_t num_dfas = anchored_dfas.size(); nfas->reserve(num_dfas); @@ -822,12 +822,12 @@ size_t buildNfas(vector<raw_dfa> &anchored_dfas, size_t total_size = 0; for (auto &rdfa : anchored_dfas) { - u32 removed_dots = remove_leading_dots(rdfa); + u32 removed_dots = remove_leading_dots(rdfa); start_offset->push_back(removed_dots); - minimize_hopcroft(rdfa, cc.grey); + minimize_hopcroft(rdfa, cc.grey); - auto nfa = mcclellanCompile(rdfa, cc, rm, false); + auto nfa = mcclellanCompile(rdfa, cc, rm, false); if (!nfa) { assert(0); throw std::bad_alloc(); @@ -844,53 +844,53 @@ size_t buildNfas(vector<raw_dfa> &anchored_dfas, return total_size; } -vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build, - const vector<LitFragment> &fragments) { - vector<raw_dfa> dfas; +vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build, + const vector<LitFragment> &fragments) { + vector<raw_dfa> dfas; - if (build.anchored_nfas.empty() && build.anchored_simple.empty()) { + if (build.anchored_nfas.empty() && build.anchored_simple.empty()) { + DEBUG_PRINTF("empty\n"); + return dfas; + } + + const auto frag_map = reverseFragMap(build, fragments); + remapAnchoredReports(build, frag_map); + + auto anch_dfas = getAnchoredDfas(build, frag_map); + mergeAnchoredDfas(anch_dfas, build); + + dfas.reserve(anch_dfas.size()); + for (auto &rdfa : anch_dfas) { + assert(rdfa); + dfas.push_back(move(*rdfa)); + } + return dfas; +} + +bytecode_ptr<anchored_matcher_info> +buildAnchoredMatcher(RoseBuildImpl &build, const vector<LitFragment> &fragments, + vector<raw_dfa> &dfas) { + const CompileContext &cc = build.cc; + + if (dfas.empty()) { DEBUG_PRINTF("empty\n"); - return dfas; - } - - const auto frag_map = reverseFragMap(build, fragments); - remapAnchoredReports(build, frag_map); - - auto anch_dfas = getAnchoredDfas(build, frag_map); - mergeAnchoredDfas(anch_dfas, build); - - dfas.reserve(anch_dfas.size()); - for (auto &rdfa : anch_dfas) { - assert(rdfa); - dfas.push_back(move(*rdfa)); - } - return dfas; -} - -bytecode_ptr<anchored_matcher_info> -buildAnchoredMatcher(RoseBuildImpl &build, const vector<LitFragment> &fragments, - vector<raw_dfa> &dfas) { - const CompileContext &cc = build.cc; - - if (dfas.empty()) { - DEBUG_PRINTF("empty\n"); return nullptr; } - for (auto &rdfa : dfas) { - remapIdsToPrograms(fragments, rdfa); - } + for (auto &rdfa : dfas) { + remapIdsToPrograms(fragments, rdfa); + } - vector<bytecode_ptr<NFA>> nfas; + vector<bytecode_ptr<NFA>> nfas; vector<u32> start_offset; // start offset for each dfa (dots removed) - size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc, build.rm); + size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc, build.rm); if (total_size > cc.grey.limitRoseAnchoredSize) { throw ResourceLimitError(); } - auto atable = - make_zeroed_bytecode_ptr<anchored_matcher_info>(total_size, 64); + auto atable = + make_zeroed_bytecode_ptr<anchored_matcher_info>(total_size, 64); char *curr = (char *)atable.get(); u32 state_offset = 0; @@ -912,11 +912,11 @@ buildAnchoredMatcher(RoseBuildImpl &build, const vector<LitFragment> &fragments, } ami->state_offset = state_offset; - state_offset += nfa->streamStateSize; + state_offset += nfa->streamStateSize; ami->anchoredMinDistance = start_offset[i]; } - DEBUG_PRINTF("success %zu\n", atable.size()); + DEBUG_PRINTF("success %zu\n", atable.size()); return atable; } diff --git a/contrib/libs/hyperscan/src/rose/rose_build_anchored.h b/contrib/libs/hyperscan/src/rose/rose_build_anchored.h index ef9d575e31..37d268ac5a 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_anchored.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_anchored.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,50 +30,50 @@ #define ROSE_BUILD_ANCHORED #include "ue2common.h" -#include "rose_build_impl.h" +#include "rose_build_impl.h" #include "nfagraph/ng_holder.h" -#include "util/bytecode_ptr.h" +#include "util/bytecode_ptr.h" #include <map> #include <vector> #include <set> -struct anchored_matcher_info; +struct anchored_matcher_info; namespace ue2 { class RoseBuildImpl; -struct raw_dfa; -struct LitFragment; +struct raw_dfa; +struct LitFragment; -/** - * \brief Construct a set of anchored DFAs from our anchored literals/engines. - */ -std::vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build, - const std::vector<LitFragment> &fragments); +/** + * \brief Construct a set of anchored DFAs from our anchored literals/engines. + */ +std::vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build, + const std::vector<LitFragment> &fragments); + +/** + * \brief Construct an anchored_matcher_info runtime structure from the given + * set of DFAs. + * + * Remap the literal final_ids used for raw_dfa reports to the program offsets + * given in litPrograms. + */ +bytecode_ptr<anchored_matcher_info> +buildAnchoredMatcher(RoseBuildImpl &build, + const std::vector<LitFragment> &fragments, + std::vector<raw_dfa> &dfas); + +u32 anchoredStateSize(const anchored_matcher_info &atable); -/** - * \brief Construct an anchored_matcher_info runtime structure from the given - * set of DFAs. - * - * Remap the literal final_ids used for raw_dfa reports to the program offsets - * given in litPrograms. - */ -bytecode_ptr<anchored_matcher_info> -buildAnchoredMatcher(RoseBuildImpl &build, - const std::vector<LitFragment> &fragments, - std::vector<raw_dfa> &dfas); - -u32 anchoredStateSize(const anchored_matcher_info &atable); - #define ANCHORED_FAIL 0 #define ANCHORED_SUCCESS 1 #define ANCHORED_REMAP 2 -int addAnchoredNFA(RoseBuildImpl &build, const NGHolder &wrapper, +int addAnchoredNFA(RoseBuildImpl &build, const NGHolder &wrapper, const std::map<NFAVertex, std::set<u32>> &reportMap); -int addToAnchoredMatcher(RoseBuildImpl &build, const NGHolder &anchored, +int addToAnchoredMatcher(RoseBuildImpl &build, const NGHolder &anchored, u32 exit_id, ReportID *remap); } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/rose/rose_build_bytecode.cpp b/contrib/libs/hyperscan/src/rose/rose_build_bytecode.cpp index 6327e53735..df464c2800 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_bytecode.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_bytecode.cpp @@ -33,38 +33,38 @@ #include "hs_compile.h" // for HS_MODE_* #include "rose_build_add_internal.h" #include "rose_build_anchored.h" -#include "rose_build_dump.h" -#include "rose_build_engine_blob.h" -#include "rose_build_exclusive.h" -#include "rose_build_groups.h" +#include "rose_build_dump.h" +#include "rose_build_engine_blob.h" +#include "rose_build_exclusive.h" +#include "rose_build_groups.h" #include "rose_build_infix.h" -#include "rose_build_long_lit.h" +#include "rose_build_long_lit.h" #include "rose_build_lookaround.h" -#include "rose_build_matchers.h" -#include "rose_build_misc.h" -#include "rose_build_program.h" -#include "rose_build_resources.h" +#include "rose_build_matchers.h" +#include "rose_build_misc.h" +#include "rose_build_program.h" +#include "rose_build_resources.h" #include "rose_build_scatter.h" #include "rose_build_util.h" #include "rose_build_width.h" -#include "rose_internal.h" -#include "rose_program.h" +#include "rose_internal.h" +#include "rose_program.h" #include "hwlm/hwlm.h" /* engine types */ #include "hwlm/hwlm_build.h" -#include "hwlm/hwlm_literal.h" +#include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" #include "nfa/goughcompile.h" #include "nfa/mcclellancompile.h" -#include "nfa/mcclellancompile_util.h" -#include "nfa/mcsheng_compile.h" +#include "nfa/mcclellancompile_util.h" +#include "nfa/mcsheng_compile.h" #include "nfa/nfa_api_queue.h" #include "nfa/nfa_build_util.h" #include "nfa/nfa_internal.h" -#include "nfa/shengcompile.h" +#include "nfa/shengcompile.h" #include "nfa/shufticompile.h" -#include "nfa/tamaramacompile.h" -#include "nfa/tamarama_internal.h" -#include "nfagraph/ng_execute.h" +#include "nfa/tamaramacompile.h" +#include "nfa/tamarama_internal.h" +#include "nfagraph/ng_execute.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_lbr.h" #include "nfagraph/ng_limex.h" @@ -75,7 +75,7 @@ #include "nfagraph/ng_stop.h" #include "nfagraph/ng_util.h" #include "nfagraph/ng_width.h" -#include "smallwrite/smallwrite_build.h" +#include "smallwrite/smallwrite_build.h" #include "som/slot_manager.h" #include "util/bitutils.h" #include "util/boundary_reports.h" @@ -84,21 +84,21 @@ #include "util/compile_context.h" #include "util/compile_error.h" #include "util/container.h" -#include "util/fatbit_build.h" +#include "util/fatbit_build.h" #include "util/graph_range.h" -#include "util/insertion_ordered.h" -#include "util/make_unique.h" +#include "util/insertion_ordered.h" +#include "util/make_unique.h" #include "util/multibit_build.h" -#include "util/noncopyable.h" +#include "util/noncopyable.h" #include "util/order_check.h" -#include "util/popcount.h" +#include "util/popcount.h" #include "util/queue_index_factory.h" #include "util/report_manager.h" #include "util/ue2string.h" #include "util/verify_types.h" #include <algorithm> -#include <array> +#include <array> #include <map> #include <queue> #include <set> @@ -135,73 +135,73 @@ namespace ue2 { namespace /* anon */ { -struct build_context : noncopyable { - /** \brief information about engines to the left of a vertex */ - map<RoseVertex, left_build_info> leftfix_info; - - /** \brief mapping from suffix to queue index. */ - map<suffix_id, u32> suffixes; - - /** \brief engine info by queue. */ - map<u32, engine_info> engine_info_by_queue; - - /** \brief Simple cache of programs written to engine blob, used for - * deduplication. */ - unordered_map<RoseProgram, u32, RoseProgramHash, - RoseProgramEquivalence> program_cache; - - /** \brief State indices, for those roles that have them. - * Each vertex present has a unique state index in the range - * [0, roleStateIndices.size()). */ - unordered_map<RoseVertex, u32> roleStateIndices; - - /** \brief Mapping from queue index to bytecode offset for built engines - * that have already been pushed into the engine_blob. */ - unordered_map<u32, u32> engineOffsets; - - /** \brief List of long literals (ones with CHECK_LONG_LIT instructions) - * that need hash table support. */ - vector<ue2_case_string> longLiterals; - - /** \brief Contents of the Rose bytecode immediately following the - * RoseEngine. */ - RoseEngineBlob engine_blob; - - /** \brief True if this Rose engine has an MPV engine. */ - bool needs_mpv_catchup = false; - - /** \brief Resources in use (tracked as programs are added). */ - RoseResources resources; -}; - -/** \brief subengine info including built engine and -* corresponding triggering rose vertices */ -struct ExclusiveSubengine { - bytecode_ptr<NFA> nfa; - vector<RoseVertex> vertices; -}; - -/** \brief exclusive info to build tamarama */ -struct ExclusiveInfo : noncopyable { - // subengine info - vector<ExclusiveSubengine> subengines; - // all the report in tamarama - set<ReportID> reports; - // assigned queue id - u32 queue; +struct build_context : noncopyable { + /** \brief information about engines to the left of a vertex */ + map<RoseVertex, left_build_info> leftfix_info; + + /** \brief mapping from suffix to queue index. */ + map<suffix_id, u32> suffixes; + + /** \brief engine info by queue. */ + map<u32, engine_info> engine_info_by_queue; + + /** \brief Simple cache of programs written to engine blob, used for + * deduplication. */ + unordered_map<RoseProgram, u32, RoseProgramHash, + RoseProgramEquivalence> program_cache; + + /** \brief State indices, for those roles that have them. + * Each vertex present has a unique state index in the range + * [0, roleStateIndices.size()). */ + unordered_map<RoseVertex, u32> roleStateIndices; + + /** \brief Mapping from queue index to bytecode offset for built engines + * that have already been pushed into the engine_blob. */ + unordered_map<u32, u32> engineOffsets; + + /** \brief List of long literals (ones with CHECK_LONG_LIT instructions) + * that need hash table support. */ + vector<ue2_case_string> longLiterals; + + /** \brief Contents of the Rose bytecode immediately following the + * RoseEngine. */ + RoseEngineBlob engine_blob; + + /** \brief True if this Rose engine has an MPV engine. */ + bool needs_mpv_catchup = false; + + /** \brief Resources in use (tracked as programs are added). */ + RoseResources resources; +}; + +/** \brief subengine info including built engine and +* corresponding triggering rose vertices */ +struct ExclusiveSubengine { + bytecode_ptr<NFA> nfa; + vector<RoseVertex> vertices; +}; + +/** \brief exclusive info to build tamarama */ +struct ExclusiveInfo : noncopyable { + // subengine info + vector<ExclusiveSubengine> subengines; + // all the report in tamarama + set<ReportID> reports; + // assigned queue id + u32 queue; }; } static -void add_nfa_to_blob(build_context &bc, NFA &nfa) { - u32 qi = nfa.queueIndex; - u32 nfa_offset = bc.engine_blob.add(nfa, nfa.length); - DEBUG_PRINTF("added nfa qi=%u, type=%u, length=%u at offset=%u\n", qi, - nfa.type, nfa.length, nfa_offset); - - assert(!contains(bc.engineOffsets, qi)); - bc.engineOffsets.emplace(qi, nfa_offset); +void add_nfa_to_blob(build_context &bc, NFA &nfa) { + u32 qi = nfa.queueIndex; + u32 nfa_offset = bc.engine_blob.add(nfa, nfa.length); + DEBUG_PRINTF("added nfa qi=%u, type=%u, length=%u at offset=%u\n", qi, + nfa.type, nfa.length, nfa_offset); + + assert(!contains(bc.engineOffsets, qi)); + bc.engineOffsets.emplace(qi, nfa_offset); } static @@ -215,96 +215,96 @@ u32 countRosePrefixes(const vector<LeftNfaInfo> &roses) { return num; } -/** - * \brief True if this Rose engine needs to run a catch up whenever a literal - * report is generated. - * - * Catch up is necessary if there are output-exposed engines (suffixes, - * outfixes). - */ -static -bool needsCatchup(const RoseBuildImpl &build) { - /* Note: we could be more selective about when we need to generate catch up - * instructions rather than just a boolean yes/no - for instance, if we know - * that a role can only match before the point that an outfix/suffix could - * match, we do not strictly need a catchup instruction. - * - * However, this would add a certain amount of complexity to the - * catchup logic and would likely have limited applicability - how many - * reporting roles have a fixed max offset and how much time is spent on - * catchup for these cases? - */ - - if (!build.outfixes.empty()) { - /* TODO: check that they have non-eod reports */ +/** + * \brief True if this Rose engine needs to run a catch up whenever a literal + * report is generated. + * + * Catch up is necessary if there are output-exposed engines (suffixes, + * outfixes). + */ +static +bool needsCatchup(const RoseBuildImpl &build) { + /* Note: we could be more selective about when we need to generate catch up + * instructions rather than just a boolean yes/no - for instance, if we know + * that a role can only match before the point that an outfix/suffix could + * match, we do not strictly need a catchup instruction. + * + * However, this would add a certain amount of complexity to the + * catchup logic and would likely have limited applicability - how many + * reporting roles have a fixed max offset and how much time is spent on + * catchup for these cases? + */ + + if (!build.outfixes.empty()) { + /* TODO: check that they have non-eod reports */ DEBUG_PRINTF("has outfixes\n"); - return true; - } - - const RoseGraph &g = build.g; - - for (auto v : vertices_range(g)) { - if (g[v].suffix) { - /* TODO: check that they have non-eod reports */ - DEBUG_PRINTF("vertex %zu has suffix\n", g[v].index); - return true; - } - } - - DEBUG_PRINTF("no need for catch-up on report\n"); - return false; -} - -static -bool isPureFloating(const RoseResources &resources, const CompileContext &cc) { - if (!resources.has_floating) { - DEBUG_PRINTF("no floating table\n"); + return true; + } + + const RoseGraph &g = build.g; + + for (auto v : vertices_range(g)) { + if (g[v].suffix) { + /* TODO: check that they have non-eod reports */ + DEBUG_PRINTF("vertex %zu has suffix\n", g[v].index); + return true; + } + } + + DEBUG_PRINTF("no need for catch-up on report\n"); + return false; +} + +static +bool isPureFloating(const RoseResources &resources, const CompileContext &cc) { + if (!resources.has_floating) { + DEBUG_PRINTF("no floating table\n"); return false; } - if (resources.has_outfixes || resources.has_suffixes || - resources.has_leftfixes) { - DEBUG_PRINTF("has engines\n"); - return false; - } + if (resources.has_outfixes || resources.has_suffixes || + resources.has_leftfixes) { + DEBUG_PRINTF("has engines\n"); + return false; + } - if (resources.has_anchored) { - DEBUG_PRINTF("has anchored matcher\n"); + if (resources.has_anchored) { + DEBUG_PRINTF("has anchored matcher\n"); return false; } - if (resources.has_eod) { - DEBUG_PRINTF("has eod work to do\n"); - return false; - } + if (resources.has_eod) { + DEBUG_PRINTF("has eod work to do\n"); + return false; + } + + if (resources.has_states) { + DEBUG_PRINTF("has states\n"); + return false; + } - if (resources.has_states) { - DEBUG_PRINTF("has states\n"); - return false; - } + if (resources.has_lit_delay) { + DEBUG_PRINTF("has delayed literals\n"); + return false; + } - if (resources.has_lit_delay) { - DEBUG_PRINTF("has delayed literals\n"); - return false; - } + if (cc.streaming && resources.has_lit_check) { + DEBUG_PRINTF("has long literals in streaming mode, which needs long " + "literal table support\n"); + return false; + } - if (cc.streaming && resources.has_lit_check) { - DEBUG_PRINTF("has long literals in streaming mode, which needs long " - "literal table support\n"); - return false; + if (resources.checks_groups) { + DEBUG_PRINTF("has group checks\n"); + return false; } - if (resources.checks_groups) { - DEBUG_PRINTF("has group checks\n"); - return false; - } - DEBUG_PRINTF("pure floating literals\n"); return true; } static -bool isSingleOutfix(const RoseBuildImpl &tbi) { +bool isSingleOutfix(const RoseBuildImpl &tbi) { for (auto v : vertices_range(tbi.g)) { if (tbi.isAnyStart(v)) { continue; @@ -324,86 +324,86 @@ bool isSingleOutfix(const RoseBuildImpl &tbi) { return false; /* streaming runtime makes liberal use of broken flag */ } - return tbi.outfixes.size() == 1; + return tbi.outfixes.size() == 1; } static -u8 pickRuntimeImpl(const RoseBuildImpl &build, const RoseResources &resources, - UNUSED u32 outfixEndQueue) { - DEBUG_PRINTF("has_outfixes=%d\n", resources.has_outfixes); - DEBUG_PRINTF("has_suffixes=%d\n", resources.has_suffixes); - DEBUG_PRINTF("has_leftfixes=%d\n", resources.has_leftfixes); - DEBUG_PRINTF("has_literals=%d\n", resources.has_literals); - DEBUG_PRINTF("has_states=%d\n", resources.has_states); - DEBUG_PRINTF("checks_groups=%d\n", resources.checks_groups); - DEBUG_PRINTF("has_lit_delay=%d\n", resources.has_lit_delay); - DEBUG_PRINTF("has_lit_check=%d\n", resources.has_lit_check); - DEBUG_PRINTF("has_anchored=%d\n", resources.has_anchored); - DEBUG_PRINTF("has_floating=%d\n", resources.has_floating); - DEBUG_PRINTF("has_eod=%d\n", resources.has_eod); - - if (isPureFloating(resources, build.cc)) { +u8 pickRuntimeImpl(const RoseBuildImpl &build, const RoseResources &resources, + UNUSED u32 outfixEndQueue) { + DEBUG_PRINTF("has_outfixes=%d\n", resources.has_outfixes); + DEBUG_PRINTF("has_suffixes=%d\n", resources.has_suffixes); + DEBUG_PRINTF("has_leftfixes=%d\n", resources.has_leftfixes); + DEBUG_PRINTF("has_literals=%d\n", resources.has_literals); + DEBUG_PRINTF("has_states=%d\n", resources.has_states); + DEBUG_PRINTF("checks_groups=%d\n", resources.checks_groups); + DEBUG_PRINTF("has_lit_delay=%d\n", resources.has_lit_delay); + DEBUG_PRINTF("has_lit_check=%d\n", resources.has_lit_check); + DEBUG_PRINTF("has_anchored=%d\n", resources.has_anchored); + DEBUG_PRINTF("has_floating=%d\n", resources.has_floating); + DEBUG_PRINTF("has_eod=%d\n", resources.has_eod); + + if (isPureFloating(resources, build.cc)) { return ROSE_RUNTIME_PURE_LITERAL; } - if (isSingleOutfix(build)) { + if (isSingleOutfix(build)) { return ROSE_RUNTIME_SINGLE_OUTFIX; } return ROSE_RUNTIME_FULL_ROSE; } -/** - * \brief True if this Rose engine needs to run MPV catch up in front of - * non-MPV reports. - */ -static -bool needsMpvCatchup(const RoseBuildImpl &build) { - const auto &outfixes = build.outfixes; - bool has_mpv = - any_of(begin(outfixes), end(outfixes), [](const OutfixInfo &outfix) { - return outfix.is_nonempty_mpv(); - }); - - if (!has_mpv) { - DEBUG_PRINTF("no mpv\n"); - return false; - } - - if (isSingleOutfix(build)) { - DEBUG_PRINTF("single outfix\n"); - return false; - } - - return true; -} - -static -void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, - u32 anchorStateSize, u32 activeArrayCount, - u32 activeLeftCount, u32 laggedRoseCount, - u32 longLitStreamStateRequired, u32 historyRequired, - RoseStateOffsets *so) { - u32 curr_offset = 0; - - // First, runtime status (stores per-stream state, like whether we need a - // delay rebuild or have been told to halt matching.) - curr_offset += sizeof(u8); - - // Role state storage. - curr_offset += mmbit_size(rolesWithStateCount); - +/** + * \brief True if this Rose engine needs to run MPV catch up in front of + * non-MPV reports. + */ +static +bool needsMpvCatchup(const RoseBuildImpl &build) { + const auto &outfixes = build.outfixes; + bool has_mpv = + any_of(begin(outfixes), end(outfixes), [](const OutfixInfo &outfix) { + return outfix.is_nonempty_mpv(); + }); + + if (!has_mpv) { + DEBUG_PRINTF("no mpv\n"); + return false; + } + + if (isSingleOutfix(build)) { + DEBUG_PRINTF("single outfix\n"); + return false; + } + + return true; +} + +static +void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, + u32 anchorStateSize, u32 activeArrayCount, + u32 activeLeftCount, u32 laggedRoseCount, + u32 longLitStreamStateRequired, u32 historyRequired, + RoseStateOffsets *so) { + u32 curr_offset = 0; + + // First, runtime status (stores per-stream state, like whether we need a + // delay rebuild or have been told to halt matching.) + curr_offset += sizeof(u8); + + // Role state storage. + curr_offset += mmbit_size(rolesWithStateCount); + so->activeLeafArray = curr_offset; /* TODO: limit size of array */ curr_offset += mmbit_size(activeArrayCount); - so->activeLeafArray_size = mmbit_size(activeArrayCount); + so->activeLeafArray_size = mmbit_size(activeArrayCount); so->activeLeftArray = curr_offset; /* TODO: limit size of array */ - curr_offset += mmbit_size(activeLeftCount); + curr_offset += mmbit_size(activeLeftCount); so->activeLeftArray_size = mmbit_size(activeLeftCount); - so->longLitState = curr_offset; - curr_offset += longLitStreamStateRequired; - so->longLitState_size = longLitStreamStateRequired; + so->longLitState = curr_offset; + curr_offset += longLitStreamStateRequired; + so->longLitState_size = longLitStreamStateRequired; // ONE WHOLE BYTE for each active leftfix with lag. so->leftfixLagTable = curr_offset; @@ -413,7 +413,7 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, curr_offset += anchorStateSize; so->groups = curr_offset; - so->groups_size = (build.group_end + 7) / 8; + so->groups_size = (build.group_end + 7) / 8; assert(so->groups_size <= sizeof(u64a)); curr_offset += so->groups_size; @@ -421,10 +421,10 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, so->history = curr_offset; curr_offset += historyRequired; - // Exhaustion multibit. + // Exhaustion multibit. so->exhausted = curr_offset; - curr_offset += mmbit_size(build.rm.numEkeys()); - so->exhausted_size = mmbit_size(build.rm.numEkeys()); + curr_offset += mmbit_size(build.rm.numEkeys()); + so->exhausted_size = mmbit_size(build.rm.numEkeys()); // Logical multibit. so->logicalVec = curr_offset; @@ -438,20 +438,20 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, curr_offset += so->combVec_size; // SOM locations and valid/writeable multibit structures. - if (build.ssm.numSomSlots()) { - const u32 somWidth = build.ssm.somPrecision(); + if (build.ssm.numSomSlots()) { + const u32 somWidth = build.ssm.somPrecision(); if (somWidth) { // somWidth is zero in block mode. curr_offset = ROUNDUP_N(curr_offset, somWidth); so->somLocation = curr_offset; - curr_offset += build.ssm.numSomSlots() * somWidth; + curr_offset += build.ssm.numSomSlots() * somWidth; } else { so->somLocation = 0; } so->somValid = curr_offset; - curr_offset += mmbit_size(build.ssm.numSomSlots()); + curr_offset += mmbit_size(build.ssm.numSomSlots()); so->somWritable = curr_offset; - curr_offset += mmbit_size(build.ssm.numSomSlots()); - so->somMultibit_size = mmbit_size(build.ssm.numSomSlots()); + curr_offset += mmbit_size(build.ssm.numSomSlots()); + so->somMultibit_size = mmbit_size(build.ssm.numSomSlots()); } else { // No SOM handling, avoid growing the stream state any further. so->somLocation = 0; @@ -460,16 +460,16 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount, } // note: state space for mask nfas is allocated later - so->nfaStateBegin = curr_offset; + so->nfaStateBegin = curr_offset; so->end = curr_offset; } // Get the mask of initial vertices due to root and anchored_root. rose_group RoseBuildImpl::getInitialGroups() const { - rose_group groups = getSuccGroups(root) - | getSuccGroups(anchored_root) - | boundary_group_mask; - + rose_group groups = getSuccGroups(root) + | getSuccGroups(anchored_root) + | boundary_group_mask; + DEBUG_PRINTF("initial groups = %016llx\n", groups); return groups; } @@ -486,7 +486,7 @@ bool nfaStuckOn(const NGHolder &g) { set<u32> done_tops; for (const auto &e : out_edges_range(g.start, g)) { - insert(&tops, g[e].tops); + insert(&tops, g[e].tops); if (!g[target(e, g)].char_reach.all()) { continue; } @@ -495,7 +495,7 @@ bool nfaStuckOn(const NGHolder &g) { insert(&asucc, adjacent_vertices(target(e, g), g)); if (asucc == succ) { - insert(&done_tops, g[e].tops); + insert(&done_tops, g[e].tops); } } @@ -553,26 +553,26 @@ void findFixedDepthTops(const RoseGraph &g, const set<PredTopPair> &triggers, * engine. */ static -bytecode_ptr<NFA> pickImpl(bytecode_ptr<NFA> dfa_impl, +bytecode_ptr<NFA> pickImpl(bytecode_ptr<NFA> dfa_impl, bytecode_ptr<NFA> nfa_impl, bool fast_nfa) { assert(nfa_impl); assert(dfa_impl); - assert(isDfaType(dfa_impl->type)); + assert(isDfaType(dfa_impl->type)); // If our NFA is an LBR, it always wins. if (isLbrType(nfa_impl->type)) { return nfa_impl; } - // if our DFA is an accelerated Sheng, it always wins. - if (isShengType(dfa_impl->type) && has_accel(*dfa_impl)) { - return dfa_impl; - } - + // if our DFA is an accelerated Sheng, it always wins. + if (isShengType(dfa_impl->type) && has_accel(*dfa_impl)) { + return dfa_impl; + } + bool d_accel = has_accel(*dfa_impl); bool n_accel = has_accel(*nfa_impl); - bool d_big = isBigDfaType(dfa_impl->type); + bool d_big = isBigDfaType(dfa_impl->type); bool n_vsmall = nfa_impl->nPositions <= 32; bool n_br = has_bounded_repeats(*nfa_impl); DEBUG_PRINTF("da %d na %d db %d nvs %d nbr %d\n", (int)d_accel, @@ -607,33 +607,33 @@ bytecode_ptr<NFA> pickImpl(bytecode_ptr<NFA> dfa_impl, * otherwise a Castle. */ static -bytecode_ptr<NFA> +bytecode_ptr<NFA> buildRepeatEngine(const CastleProto &proto, const map<u32, vector<vector<CharReach>>> &triggers, - const CompileContext &cc, const ReportManager &rm) { + const CompileContext &cc, const ReportManager &rm) { // If we only have one repeat, the LBR should always be the best possible // implementation. if (proto.repeats.size() == 1 && cc.grey.allowLbr) { - return constructLBR(proto, triggers.at(0), cc, rm); + return constructLBR(proto, triggers.at(0), cc, rm); } - auto castle_nfa = buildCastle(proto, triggers, cc, rm); + auto castle_nfa = buildCastle(proto, triggers, cc, rm); assert(castle_nfa); // Should always be constructible. return castle_nfa; } -static -bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, bool is_transient, - const CompileContext &cc, const ReportManager &rm) { - // Unleash the Sheng!! - auto dfa = shengCompile(rdfa, cc, rm, false); - if (!dfa && !is_transient) { - // Sheng wasn't successful, so unleash McClellan! - /* We don't try the hybrid for transient prefixes due to the extra - * bytecode and that they are usually run on small blocks */ - dfa = mcshengCompile(rdfa, cc, rm); - } - if (!dfa) { +static +bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, bool is_transient, + const CompileContext &cc, const ReportManager &rm) { + // Unleash the Sheng!! + auto dfa = shengCompile(rdfa, cc, rm, false); + if (!dfa && !is_transient) { + // Sheng wasn't successful, so unleash McClellan! + /* We don't try the hybrid for transient prefixes due to the extra + * bytecode and that they are usually run on small blocks */ + dfa = mcshengCompile(rdfa, cc, rm); + } + if (!dfa) { dfa = sheng32Compile(rdfa, cc, rm, false); } if (!dfa) { @@ -643,33 +643,33 @@ bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, bool is_transient, dfa = mcshengCompile64(rdfa, cc, rm); } if (!dfa) { - // Sheng wasn't successful, so unleash McClellan! - dfa = mcclellanCompile(rdfa, cc, rm, false); - } - return dfa; -} - + // Sheng wasn't successful, so unleash McClellan! + dfa = mcclellanCompile(rdfa, cc, rm, false); + } + return dfa; +} + /* builds suffix nfas */ static -bytecode_ptr<NFA> +bytecode_ptr<NFA> buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, const map<u32, u32> &fixed_depth_tops, const map<u32, vector<vector<CharReach>>> &triggers, suffix_id suff, const CompileContext &cc) { if (suff.castle()) { - auto n = buildRepeatEngine(*suff.castle(), triggers, cc, rm); + auto n = buildRepeatEngine(*suff.castle(), triggers, cc, rm); assert(n); return n; } if (suff.haig()) { - auto n = goughCompile(*suff.haig(), ssm.somPrecision(), cc, rm); + auto n = goughCompile(*suff.haig(), ssm.somPrecision(), cc, rm); assert(n); return n; } if (suff.dfa()) { - auto d = getDfa(*suff.dfa(), false, cc, rm); + auto d = getDfa(*suff.dfa(), false, cc, rm); assert(d); return d; } @@ -682,7 +682,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, // Take a shot at the LBR engine. if (oneTop) { - auto lbr = constructLBR(holder, triggers.at(0), cc, rm); + auto lbr = constructLBR(holder, triggers.at(0), cc, rm); if (lbr) { return lbr; } @@ -699,7 +699,7 @@ buildSuffix(const ReportManager &rm, const SomSlotManager &ssm, auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0), cc.grey); if (rdfa) { - auto d = getDfa(*rdfa, false, cc, rm); + auto d = getDfa(*rdfa, false, cc, rm); assert(d); if (cc.grey.roseMcClellanSuffix != 2) { n = pickImpl(move(d), move(n), fast_nfa); @@ -774,29 +774,29 @@ void findTriggerSequences(const RoseBuildImpl &tbi, const u32 top = e.first; const set<u32> &lit_ids = e.second; - for (u32 id : lit_ids) { - const rose_literal_id &lit = tbi.literals.at(id); + for (u32 id : lit_ids) { + const rose_literal_id &lit = tbi.literals.at(id); (*trigger_lits)[top].push_back(as_cr_seq(lit)); } } } -static -bytecode_ptr<NFA> makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, - const bool is_prefix, const bool is_transient, - const map<left_id, set<PredTopPair>> &infixTriggers, - const CompileContext &cc) { - const ReportManager &rm = tbi.rm; +static +bytecode_ptr<NFA> makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, + const bool is_prefix, const bool is_transient, + const map<left_id, set<PredTopPair>> &infixTriggers, + const CompileContext &cc) { + const ReportManager &rm = tbi.rm; + + bytecode_ptr<NFA> n; - bytecode_ptr<NFA> n; - // Should compress state if this rose is non-transient and we're in // streaming mode. const bool compress_state = !is_transient; - assert(is_prefix || !left.graph() || left.graph()->kind == NFA_INFIX); - assert(!is_prefix || !left.graph() || left.graph()->kind == NFA_PREFIX - || left.graph()->kind == NFA_EAGER_PREFIX); + assert(is_prefix || !left.graph() || left.graph()->kind == NFA_INFIX); + assert(!is_prefix || !left.graph() || left.graph()->kind == NFA_PREFIX + || left.graph()->kind == NFA_EAGER_PREFIX); // Holder should be implementable as an NFA at the very least. if (!left.dfa() && left.graph()) { @@ -813,19 +813,19 @@ bytecode_ptr<NFA> makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, assert(!is_prefix); map<u32, vector<vector<CharReach> > > triggers; findTriggerSequences(tbi, infixTriggers.at(left), &triggers); - n = buildRepeatEngine(*left.castle(), triggers, cc, rm); + n = buildRepeatEngine(*left.castle(), triggers, cc, rm); assert(n); return n; // Castles/LBRs are always best! } if (left.dfa()) { - n = getDfa(*left.dfa(), is_transient, cc, rm); + n = getDfa(*left.dfa(), is_transient, cc, rm); } else if (left.graph() && cc.grey.roseMcClellanPrefix == 2 && is_prefix && !is_transient) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - n = getDfa(*rdfa, is_transient, cc, rm); - assert(n); + n = getDfa(*rdfa, is_transient, cc, rm); + assert(n); } } @@ -833,16 +833,16 @@ bytecode_ptr<NFA> makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, if (!n && !is_prefix && left.graph() && onlyOneTop(*left.graph())) { map<u32, vector<vector<CharReach> > > triggers; findTriggerSequences(tbi, infixTriggers.at(left), &triggers); - assert(triggers.size() == 1); // single top - n = constructLBR(*left.graph(), triggers.begin()->second, cc, rm); + assert(triggers.size() == 1); // single top + n = constructLBR(*left.graph(), triggers.begin()->second, cc, rm); } bool fast_nfa = false; if (!n && left.graph()) { map<u32, vector<vector<CharReach>>> triggers; - if (left.graph()->kind == NFA_INFIX) { - findTriggerSequences(tbi, infixTriggers.at(left), &triggers); - } + if (left.graph()->kind == NFA_INFIX) { + findTriggerSequences(tbi, infixTriggers.at(left), &triggers); + } n = constructNFA(*left.graph(), nullptr, fixed_depth_tops, triggers, compress_state, fast_nfa, cc); } @@ -852,7 +852,7 @@ bytecode_ptr<NFA> makeLeftNfa(const RoseBuildImpl &tbi, left_id &left, && (!n || !has_bounded_repeats_other_than_firsts(*n) || !fast_nfa)) { auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey); if (rdfa) { - auto d = getDfa(*rdfa, is_transient, cc, rm); + auto d = getDfa(*rdfa, is_transient, cc, rm); assert(d); n = pickImpl(move(d), move(n), fast_nfa); } @@ -879,709 +879,709 @@ void setLeftNfaProperties(NFA &n, const left_id &left) { } static -void appendTailToHolder(NGHolder &h, const flat_set<ReportID> &reports, - const vector<NFAVertex> &starts, - const vector<CharReach> &tail) { - assert(!tail.empty()); - NFAVertex curr = add_vertex(h); - for (NFAVertex v : starts) { - assert(!edge(v, h.acceptEod, h).second); - assert(h[v].reports == reports); - h[v].reports.clear(); - remove_edge(v, h.accept, h); - add_edge(v, curr, h); - } - auto it = tail.begin(); - h[curr].char_reach = *it; - ++it; - while (it != tail.end()) { - NFAVertex old = curr; - curr = add_vertex(h); - add_edge(old, curr, h); - assert(!it->none()); - h[curr].char_reach = *it; - ++it; - } - - h[curr].reports = reports; - add_edge(curr, h.accept, h); -} - -static -void appendTailToHolder(NGHolder &h, const vector<CharReach> &tail) { - assert(in_degree(h.acceptEod, h) == 1); - assert(!tail.empty()); - - map<flat_set<ReportID>, vector<NFAVertex> > reporters; - for (auto v : inv_adjacent_vertices_range(h.accept, h)) { - reporters[h[v].reports].push_back(v); - } - - for (const auto &e : reporters) { - appendTailToHolder(h, e.first, e.second, tail); - } - - renumber_edges(h); -} - -static -u32 decreaseLag(const RoseBuildImpl &build, NGHolder &h, - const vector<RoseVertex> &succs) { - const RoseGraph &rg = build.g; - static const size_t MAX_RESTORE_LEN = 5; - - vector<CharReach> restored(MAX_RESTORE_LEN); - for (RoseVertex v : succs) { - u32 lag = rg[v].left.lag; - for (u32 lit_id : rg[v].literals) { - u32 delay = build.literals.at(lit_id).delay; - const ue2_literal &literal = build.literals.at(lit_id).s; - assert(lag <= literal.length() + delay); - size_t base = literal.length() + delay - lag; - if (base >= literal.length()) { - return 0; - } - size_t len = literal.length() - base; - len = MIN(len, restored.size()); - restored.resize(len); - auto lit_it = literal.begin() + base; - for (u32 i = 0; i < len; i++) { - assert(lit_it != literal.end()); - restored[i] |= *lit_it; - ++lit_it; - } - } - } - - assert(!restored.empty()); - - appendTailToHolder(h, restored); - - return restored.size(); -} - -#define EAGER_DIE_BEFORE_LIMIT 10 - -struct eager_info { - shared_ptr<NGHolder> new_graph; - u32 lag_adjust = 0; -}; - -static -bool checkSuitableForEager(bool is_prefix, const left_id &left, - const RoseBuildImpl &build, - const vector<RoseVertex> &succs, - rose_group squash_mask, rose_group initial_groups, - eager_info &ei, const CompileContext &cc) { - DEBUG_PRINTF("checking prefix --> %016llx...\n", squash_mask); - - const RoseGraph &rg = build.g; - - if (!is_prefix) { - DEBUG_PRINTF("not prefix\n"); - return false; /* only prefixes (for now...) */ - } - - if ((initial_groups & squash_mask) == initial_groups) { - DEBUG_PRINTF("no squash -- useless\n"); - return false; - } - - for (RoseVertex s : succs) { - if (build.isInETable(s) - || contains(rg[s].literals, build.eod_event_literal_id)) { - return false; /* Ignore EOD related prefixes */ - } - } - - if (left.dfa()) { - const raw_dfa &dfa = *left.dfa(); - if (dfa.start_floating != DEAD_STATE) { - return false; /* not purely anchored */ - } - if (!dfa.states[dfa.start_anchored].reports.empty()) { - return false; /* vacuous (todo: handle?) */ - } - - if (!can_die_early(dfa, EAGER_DIE_BEFORE_LIMIT)) { - return false; - } - ei.new_graph = rg[succs[0]].left.graph; - } else if (left.graph()) { - const NGHolder &g = *left.graph(); - if (proper_out_degree(g.startDs, g)) { - return false; /* not purely anchored */ - } - - ei.new_graph = cloneHolder(*left.graph()); - auto gg = ei.new_graph; - gg->kind = NFA_EAGER_PREFIX; - - ei.lag_adjust = decreaseLag(build, *gg, succs); - - if (is_match_vertex(gg->start, *gg)) { - return false; /* should not still be vacuous as lag decreased */ - } - - if (!can_die_early(*gg, EAGER_DIE_BEFORE_LIMIT)) { - DEBUG_PRINTF("not eager as stuck alive\n"); - return false; - } - - /* We need to ensure that adding in the literals does not cause us to no - * longer be able to build an nfa. */ - bool ok = isImplementableNFA(*gg, nullptr, cc); - if (!ok) { - return false; - } - } else { - DEBUG_PRINTF("unable to determine if good for eager running\n"); - return false; - } - - DEBUG_PRINTF("eager prefix\n"); - return true; -} - -static -left_id updateLeftfixWithEager(RoseGraph &g, const eager_info &ei, - const vector<RoseVertex> &succs) { - u32 lag_adjust = ei.lag_adjust; - auto gg = ei.new_graph; - for (RoseVertex v : succs) { - g[v].left.graph = gg; - assert(g[v].left.lag >= lag_adjust); - g[v].left.lag -= lag_adjust; - DEBUG_PRINTF("added %u literal chars back, new lag %u\n", lag_adjust, - g[v].left.lag); - } - left_id leftfix = g[succs[0]].left; - - if (leftfix.graph()) { - assert(leftfix.graph()->kind == NFA_PREFIX - || leftfix.graph()->kind == NFA_EAGER_PREFIX); - leftfix.graph()->kind = NFA_EAGER_PREFIX; - } - if (leftfix.dfa()) { - assert(leftfix.dfa()->kind == NFA_PREFIX); - leftfix.dfa()->kind = NFA_EAGER_PREFIX; - } - - return leftfix; -} - -static -void enforceEngineSizeLimit(const NFA *n, const Grey &grey) { - const size_t nfa_size = n->length; - // Global limit. - if (nfa_size > grey.limitEngineSize) { - throw ResourceLimitError(); - } - - // Type-specific limit checks follow. - - if (isDfaType(n->type)) { - if (nfa_size > grey.limitDFASize) { - throw ResourceLimitError(); - } - } else if (isNfaType(n->type)) { - if (nfa_size > grey.limitNFASize) { - throw ResourceLimitError(); - } - } else if (isLbrType(n->type)) { - if (nfa_size > grey.limitLBRSize) { - throw ResourceLimitError(); - } - } -} - -static -bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, - const map<left_id, set<PredTopPair> > &infixTriggers, - set<u32> *no_retrigger_queues, set<u32> *eager_queues, - const map<left_id, eager_info> &eager, - const vector<RoseVertex> &succs, left_id leftfix) { - RoseGraph &g = build.g; - const CompileContext &cc = build.cc; - const ReportManager &rm = build.rm; - - bool is_transient = contains(build.transient, leftfix); - rose_group squash_mask = build.rose_squash_masks.at(leftfix); - - DEBUG_PRINTF("making %sleftfix\n", is_transient ? "transient " : ""); - - if (contains(eager, leftfix)) { - eager_queues->insert(qi); - leftfix = updateLeftfixWithEager(g, eager.at(leftfix), succs); - } - - bytecode_ptr<NFA> nfa; - // Need to build NFA, which is either predestined to be a Haig (in SOM mode) - // or could be all manner of things. - if (leftfix.haig()) { - nfa = goughCompile(*leftfix.haig(), build.ssm.somPrecision(), cc, rm); - } else { - nfa = makeLeftNfa(build, leftfix, prefix, is_transient, infixTriggers, - cc); - } - - if (!nfa) { - assert(!"failed to build leftfix"); - return false; - } - - setLeftNfaProperties(*nfa, leftfix); - - nfa->queueIndex = qi; - enforceEngineSizeLimit(nfa.get(), cc.grey); - bc.engine_info_by_queue.emplace(nfa->queueIndex, - engine_info(nfa.get(), is_transient)); - - if (!prefix && !leftfix.haig() && leftfix.graph() - && nfaStuckOn(*leftfix.graph())) { - DEBUG_PRINTF("%u sticks on\n", qi); - no_retrigger_queues->insert(qi); - } - - DEBUG_PRINTF("built leftfix, qi=%u\n", qi); - add_nfa_to_blob(bc, *nfa); - - // Leftfixes can have stop alphabets. - vector<u8> stop(N_CHARS, 0); - /* haigs track som information - need more care */ - som_type som = leftfix.haig() ? SOM_LEFT : SOM_NONE; - if (leftfix.graph()) { - stop = findLeftOffsetStopAlphabet(*leftfix.graph(), som); - } else if (leftfix.castle()) { - stop = findLeftOffsetStopAlphabet(*leftfix.castle(), som); - } - - // Infix NFAs can have bounds on their queue lengths. - u32 max_queuelen = UINT32_MAX; - if (!prefix) { - set<ue2_literal> lits; - for (RoseVertex v : succs) { - for (auto u : inv_adjacent_vertices_range(v, g)) { - for (u32 lit_id : g[u].literals) { - lits.insert(build.literals.at(lit_id).s); - } +void appendTailToHolder(NGHolder &h, const flat_set<ReportID> &reports, + const vector<NFAVertex> &starts, + const vector<CharReach> &tail) { + assert(!tail.empty()); + NFAVertex curr = add_vertex(h); + for (NFAVertex v : starts) { + assert(!edge(v, h.acceptEod, h).second); + assert(h[v].reports == reports); + h[v].reports.clear(); + remove_edge(v, h.accept, h); + add_edge(v, curr, h); + } + auto it = tail.begin(); + h[curr].char_reach = *it; + ++it; + while (it != tail.end()) { + NFAVertex old = curr; + curr = add_vertex(h); + add_edge(old, curr, h); + assert(!it->none()); + h[curr].char_reach = *it; + ++it; + } + + h[curr].reports = reports; + add_edge(curr, h.accept, h); +} + +static +void appendTailToHolder(NGHolder &h, const vector<CharReach> &tail) { + assert(in_degree(h.acceptEod, h) == 1); + assert(!tail.empty()); + + map<flat_set<ReportID>, vector<NFAVertex> > reporters; + for (auto v : inv_adjacent_vertices_range(h.accept, h)) { + reporters[h[v].reports].push_back(v); + } + + for (const auto &e : reporters) { + appendTailToHolder(h, e.first, e.second, tail); + } + + renumber_edges(h); +} + +static +u32 decreaseLag(const RoseBuildImpl &build, NGHolder &h, + const vector<RoseVertex> &succs) { + const RoseGraph &rg = build.g; + static const size_t MAX_RESTORE_LEN = 5; + + vector<CharReach> restored(MAX_RESTORE_LEN); + for (RoseVertex v : succs) { + u32 lag = rg[v].left.lag; + for (u32 lit_id : rg[v].literals) { + u32 delay = build.literals.at(lit_id).delay; + const ue2_literal &literal = build.literals.at(lit_id).s; + assert(lag <= literal.length() + delay); + size_t base = literal.length() + delay - lag; + if (base >= literal.length()) { + return 0; + } + size_t len = literal.length() - base; + len = MIN(len, restored.size()); + restored.resize(len); + auto lit_it = literal.begin() + base; + for (u32 i = 0; i < len; i++) { + assert(lit_it != literal.end()); + restored[i] |= *lit_it; + ++lit_it; + } + } + } + + assert(!restored.empty()); + + appendTailToHolder(h, restored); + + return restored.size(); +} + +#define EAGER_DIE_BEFORE_LIMIT 10 + +struct eager_info { + shared_ptr<NGHolder> new_graph; + u32 lag_adjust = 0; +}; + +static +bool checkSuitableForEager(bool is_prefix, const left_id &left, + const RoseBuildImpl &build, + const vector<RoseVertex> &succs, + rose_group squash_mask, rose_group initial_groups, + eager_info &ei, const CompileContext &cc) { + DEBUG_PRINTF("checking prefix --> %016llx...\n", squash_mask); + + const RoseGraph &rg = build.g; + + if (!is_prefix) { + DEBUG_PRINTF("not prefix\n"); + return false; /* only prefixes (for now...) */ + } + + if ((initial_groups & squash_mask) == initial_groups) { + DEBUG_PRINTF("no squash -- useless\n"); + return false; + } + + for (RoseVertex s : succs) { + if (build.isInETable(s) + || contains(rg[s].literals, build.eod_event_literal_id)) { + return false; /* Ignore EOD related prefixes */ + } + } + + if (left.dfa()) { + const raw_dfa &dfa = *left.dfa(); + if (dfa.start_floating != DEAD_STATE) { + return false; /* not purely anchored */ + } + if (!dfa.states[dfa.start_anchored].reports.empty()) { + return false; /* vacuous (todo: handle?) */ + } + + if (!can_die_early(dfa, EAGER_DIE_BEFORE_LIMIT)) { + return false; + } + ei.new_graph = rg[succs[0]].left.graph; + } else if (left.graph()) { + const NGHolder &g = *left.graph(); + if (proper_out_degree(g.startDs, g)) { + return false; /* not purely anchored */ + } + + ei.new_graph = cloneHolder(*left.graph()); + auto gg = ei.new_graph; + gg->kind = NFA_EAGER_PREFIX; + + ei.lag_adjust = decreaseLag(build, *gg, succs); + + if (is_match_vertex(gg->start, *gg)) { + return false; /* should not still be vacuous as lag decreased */ + } + + if (!can_die_early(*gg, EAGER_DIE_BEFORE_LIMIT)) { + DEBUG_PRINTF("not eager as stuck alive\n"); + return false; + } + + /* We need to ensure that adding in the literals does not cause us to no + * longer be able to build an nfa. */ + bool ok = isImplementableNFA(*gg, nullptr, cc); + if (!ok) { + return false; + } + } else { + DEBUG_PRINTF("unable to determine if good for eager running\n"); + return false; + } + + DEBUG_PRINTF("eager prefix\n"); + return true; +} + +static +left_id updateLeftfixWithEager(RoseGraph &g, const eager_info &ei, + const vector<RoseVertex> &succs) { + u32 lag_adjust = ei.lag_adjust; + auto gg = ei.new_graph; + for (RoseVertex v : succs) { + g[v].left.graph = gg; + assert(g[v].left.lag >= lag_adjust); + g[v].left.lag -= lag_adjust; + DEBUG_PRINTF("added %u literal chars back, new lag %u\n", lag_adjust, + g[v].left.lag); + } + left_id leftfix = g[succs[0]].left; + + if (leftfix.graph()) { + assert(leftfix.graph()->kind == NFA_PREFIX + || leftfix.graph()->kind == NFA_EAGER_PREFIX); + leftfix.graph()->kind = NFA_EAGER_PREFIX; + } + if (leftfix.dfa()) { + assert(leftfix.dfa()->kind == NFA_PREFIX); + leftfix.dfa()->kind = NFA_EAGER_PREFIX; + } + + return leftfix; +} + +static +void enforceEngineSizeLimit(const NFA *n, const Grey &grey) { + const size_t nfa_size = n->length; + // Global limit. + if (nfa_size > grey.limitEngineSize) { + throw ResourceLimitError(); + } + + // Type-specific limit checks follow. + + if (isDfaType(n->type)) { + if (nfa_size > grey.limitDFASize) { + throw ResourceLimitError(); + } + } else if (isNfaType(n->type)) { + if (nfa_size > grey.limitNFASize) { + throw ResourceLimitError(); + } + } else if (isLbrType(n->type)) { + if (nfa_size > grey.limitLBRSize) { + throw ResourceLimitError(); + } + } +} + +static +bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi, + const map<left_id, set<PredTopPair> > &infixTriggers, + set<u32> *no_retrigger_queues, set<u32> *eager_queues, + const map<left_id, eager_info> &eager, + const vector<RoseVertex> &succs, left_id leftfix) { + RoseGraph &g = build.g; + const CompileContext &cc = build.cc; + const ReportManager &rm = build.rm; + + bool is_transient = contains(build.transient, leftfix); + rose_group squash_mask = build.rose_squash_masks.at(leftfix); + + DEBUG_PRINTF("making %sleftfix\n", is_transient ? "transient " : ""); + + if (contains(eager, leftfix)) { + eager_queues->insert(qi); + leftfix = updateLeftfixWithEager(g, eager.at(leftfix), succs); + } + + bytecode_ptr<NFA> nfa; + // Need to build NFA, which is either predestined to be a Haig (in SOM mode) + // or could be all manner of things. + if (leftfix.haig()) { + nfa = goughCompile(*leftfix.haig(), build.ssm.somPrecision(), cc, rm); + } else { + nfa = makeLeftNfa(build, leftfix, prefix, is_transient, infixTriggers, + cc); + } + + if (!nfa) { + assert(!"failed to build leftfix"); + return false; + } + + setLeftNfaProperties(*nfa, leftfix); + + nfa->queueIndex = qi; + enforceEngineSizeLimit(nfa.get(), cc.grey); + bc.engine_info_by_queue.emplace(nfa->queueIndex, + engine_info(nfa.get(), is_transient)); + + if (!prefix && !leftfix.haig() && leftfix.graph() + && nfaStuckOn(*leftfix.graph())) { + DEBUG_PRINTF("%u sticks on\n", qi); + no_retrigger_queues->insert(qi); + } + + DEBUG_PRINTF("built leftfix, qi=%u\n", qi); + add_nfa_to_blob(bc, *nfa); + + // Leftfixes can have stop alphabets. + vector<u8> stop(N_CHARS, 0); + /* haigs track som information - need more care */ + som_type som = leftfix.haig() ? SOM_LEFT : SOM_NONE; + if (leftfix.graph()) { + stop = findLeftOffsetStopAlphabet(*leftfix.graph(), som); + } else if (leftfix.castle()) { + stop = findLeftOffsetStopAlphabet(*leftfix.castle(), som); + } + + // Infix NFAs can have bounds on their queue lengths. + u32 max_queuelen = UINT32_MAX; + if (!prefix) { + set<ue2_literal> lits; + for (RoseVertex v : succs) { + for (auto u : inv_adjacent_vertices_range(v, g)) { + for (u32 lit_id : g[u].literals) { + lits.insert(build.literals.at(lit_id).s); + } } } - DEBUG_PRINTF("%zu literals\n", lits.size()); - max_queuelen = findMaxInfixMatches(leftfix, lits); - if (max_queuelen < UINT32_MAX) { - max_queuelen++; - } - } - - u32 max_width; - if (is_transient) { - depth d = findMaxWidth(leftfix); - assert(d.is_finite()); - max_width = d; - } else { - max_width = 0; - } - - u8 cm_count = 0; - CharReach cm_cr; - if (cc.grey.allowCountingMiracles) { - findCountingMiracleInfo(leftfix, stop, &cm_count, &cm_cr); - } - - for (RoseVertex v : succs) { - bc.leftfix_info.emplace(v, left_build_info(qi, g[v].left.lag, max_width, - squash_mask, stop, - max_queuelen, cm_count, - cm_cr)); - } - - return true; -} - -static -unique_ptr<TamaInfo> constructTamaInfo(const RoseGraph &g, - const vector<ExclusiveSubengine> &subengines, - const bool is_suffix) { - unique_ptr<TamaInfo> tamaInfo = ue2::make_unique<TamaInfo>(); - for (const auto &sub : subengines) { - const auto &rose_vertices = sub.vertices; - NFA *nfa = sub.nfa.get(); - set<u32> tops; - for (const auto &v : rose_vertices) { - if (is_suffix) { - tops.insert(g[v].suffix.top); - } else { - for (const auto &e : in_edges_range(v, g)) { - tops.insert(g[e].rose_top); - } + DEBUG_PRINTF("%zu literals\n", lits.size()); + max_queuelen = findMaxInfixMatches(leftfix, lits); + if (max_queuelen < UINT32_MAX) { + max_queuelen++; + } + } + + u32 max_width; + if (is_transient) { + depth d = findMaxWidth(leftfix); + assert(d.is_finite()); + max_width = d; + } else { + max_width = 0; + } + + u8 cm_count = 0; + CharReach cm_cr; + if (cc.grey.allowCountingMiracles) { + findCountingMiracleInfo(leftfix, stop, &cm_count, &cm_cr); + } + + for (RoseVertex v : succs) { + bc.leftfix_info.emplace(v, left_build_info(qi, g[v].left.lag, max_width, + squash_mask, stop, + max_queuelen, cm_count, + cm_cr)); + } + + return true; +} + +static +unique_ptr<TamaInfo> constructTamaInfo(const RoseGraph &g, + const vector<ExclusiveSubengine> &subengines, + const bool is_suffix) { + unique_ptr<TamaInfo> tamaInfo = ue2::make_unique<TamaInfo>(); + for (const auto &sub : subengines) { + const auto &rose_vertices = sub.vertices; + NFA *nfa = sub.nfa.get(); + set<u32> tops; + for (const auto &v : rose_vertices) { + if (is_suffix) { + tops.insert(g[v].suffix.top); + } else { + for (const auto &e : in_edges_range(v, g)) { + tops.insert(g[e].rose_top); + } + } + } + tamaInfo->add(nfa, tops); + } + + return tamaInfo; +} + +static +void updateTops(const RoseGraph &g, const TamaInfo &tamaInfo, + TamaProto &tamaProto, + const vector<ExclusiveSubengine> &subengines, + const map<pair<const NFA *, u32>, u32> &out_top_remap, + const bool is_suffix) { + u32 i = 0; + for (const auto &n : tamaInfo.subengines) { + for (const auto &v : subengines[i].vertices) { + if (is_suffix) { + tamaProto.add(n, g[v].index, g[v].suffix.top, out_top_remap); + } else { + for (const auto &e : in_edges_range(v, g)) { + tamaProto.add(n, g[v].index, g[e].rose_top, out_top_remap); + } } - } - tamaInfo->add(nfa, tops); - } - - return tamaInfo; -} - -static -void updateTops(const RoseGraph &g, const TamaInfo &tamaInfo, - TamaProto &tamaProto, - const vector<ExclusiveSubengine> &subengines, - const map<pair<const NFA *, u32>, u32> &out_top_remap, - const bool is_suffix) { - u32 i = 0; - for (const auto &n : tamaInfo.subengines) { - for (const auto &v : subengines[i].vertices) { - if (is_suffix) { - tamaProto.add(n, g[v].index, g[v].suffix.top, out_top_remap); - } else { - for (const auto &e : in_edges_range(v, g)) { - tamaProto.add(n, g[v].index, g[e].rose_top, out_top_remap); - } + } + i++; + } +} + +static +shared_ptr<TamaProto> constructContainerEngine(const RoseGraph &g, + build_context &bc, + const ExclusiveInfo &info, + const u32 queue, + const bool is_suffix, + const Grey &grey) { + const auto &subengines = info.subengines; + auto tamaInfo = constructTamaInfo(g, subengines, is_suffix); + + map<pair<const NFA *, u32>, u32> out_top_remap; + auto n = buildTamarama(*tamaInfo, queue, out_top_remap); + enforceEngineSizeLimit(n.get(), grey); + bc.engine_info_by_queue.emplace(n->queueIndex, engine_info(n.get(), false)); + add_nfa_to_blob(bc, *n); + + DEBUG_PRINTF("queue id:%u\n", queue); + shared_ptr<TamaProto> tamaProto = make_shared<TamaProto>(); + tamaProto->reports = info.reports; + updateTops(g, *tamaInfo, *tamaProto, subengines, out_top_remap, is_suffix); + return tamaProto; +} + +static +void buildInfixContainer(RoseGraph &g, build_context &bc, + const vector<ExclusiveInfo> &exclusive_info, + const Grey &grey) { + // Build tamarama engine + for (const auto &info : exclusive_info) { + const u32 queue = info.queue; + const auto &subengines = info.subengines; + auto tamaProto = + constructContainerEngine(g, bc, info, queue, false, grey); + + for (const auto &sub : subengines) { + const auto &verts = sub.vertices; + for (const auto &v : verts) { + DEBUG_PRINTF("vert id:%zu\n", g[v].index); + g[v].left.tamarama = tamaProto; } - } - i++; - } -} - -static -shared_ptr<TamaProto> constructContainerEngine(const RoseGraph &g, - build_context &bc, - const ExclusiveInfo &info, - const u32 queue, - const bool is_suffix, - const Grey &grey) { - const auto &subengines = info.subengines; - auto tamaInfo = constructTamaInfo(g, subengines, is_suffix); - - map<pair<const NFA *, u32>, u32> out_top_remap; - auto n = buildTamarama(*tamaInfo, queue, out_top_remap); - enforceEngineSizeLimit(n.get(), grey); - bc.engine_info_by_queue.emplace(n->queueIndex, engine_info(n.get(), false)); - add_nfa_to_blob(bc, *n); - - DEBUG_PRINTF("queue id:%u\n", queue); - shared_ptr<TamaProto> tamaProto = make_shared<TamaProto>(); - tamaProto->reports = info.reports; - updateTops(g, *tamaInfo, *tamaProto, subengines, out_top_remap, is_suffix); - return tamaProto; -} - -static -void buildInfixContainer(RoseGraph &g, build_context &bc, - const vector<ExclusiveInfo> &exclusive_info, - const Grey &grey) { - // Build tamarama engine - for (const auto &info : exclusive_info) { - const u32 queue = info.queue; - const auto &subengines = info.subengines; - auto tamaProto = - constructContainerEngine(g, bc, info, queue, false, grey); - - for (const auto &sub : subengines) { - const auto &verts = sub.vertices; - for (const auto &v : verts) { - DEBUG_PRINTF("vert id:%zu\n", g[v].index); - g[v].left.tamarama = tamaProto; + } + } +} + +static +void buildSuffixContainer(RoseGraph &g, build_context &bc, + const vector<ExclusiveInfo> &exclusive_info, + const Grey &grey) { + // Build tamarama engine + for (const auto &info : exclusive_info) { + const u32 queue = info.queue; + const auto &subengines = info.subengines; + auto tamaProto = constructContainerEngine(g, bc, info, queue, true, + grey); + for (const auto &sub : subengines) { + const auto &verts = sub.vertices; + for (const auto &v : verts) { + DEBUG_PRINTF("vert id:%zu\n", g[v].index); + g[v].suffix.tamarama = tamaProto; } - } - } -} - -static -void buildSuffixContainer(RoseGraph &g, build_context &bc, - const vector<ExclusiveInfo> &exclusive_info, - const Grey &grey) { - // Build tamarama engine - for (const auto &info : exclusive_info) { - const u32 queue = info.queue; - const auto &subengines = info.subengines; - auto tamaProto = constructContainerEngine(g, bc, info, queue, true, - grey); - for (const auto &sub : subengines) { - const auto &verts = sub.vertices; - for (const auto &v : verts) { - DEBUG_PRINTF("vert id:%zu\n", g[v].index); - g[v].suffix.tamarama = tamaProto; - } - const auto &v = verts[0]; - suffix_id newSuffix(g[v].suffix); - bc.suffixes.emplace(newSuffix, queue); - } - } -} - -static -void updateExclusiveInfixProperties(const RoseBuildImpl &build, - const vector<ExclusiveInfo> &exclusive_info, - map<RoseVertex, left_build_info> &leftfix_info, - set<u32> *no_retrigger_queues) { - const RoseGraph &g = build.g; - for (const auto &info : exclusive_info) { - // Set leftfix optimisations, disabled for tamarama subengines - rose_group squash_mask = ~rose_group{0}; + const auto &v = verts[0]; + suffix_id newSuffix(g[v].suffix); + bc.suffixes.emplace(newSuffix, queue); + } + } +} + +static +void updateExclusiveInfixProperties(const RoseBuildImpl &build, + const vector<ExclusiveInfo> &exclusive_info, + map<RoseVertex, left_build_info> &leftfix_info, + set<u32> *no_retrigger_queues) { + const RoseGraph &g = build.g; + for (const auto &info : exclusive_info) { + // Set leftfix optimisations, disabled for tamarama subengines + rose_group squash_mask = ~rose_group{0}; // Leftfixes can have stop alphabets. vector<u8> stop(N_CHARS, 0); - // Infix NFAs can have bounds on their queue lengths. - u32 max_queuelen = 0; - u32 max_width = 0; - u8 cm_count = 0; - CharReach cm_cr; - - const auto &qi = info.queue; - const auto &subengines = info.subengines; - bool no_retrigger = true; - for (const auto &sub : subengines) { - const auto &verts = sub.vertices; - const auto &v_first = verts[0]; - left_id leftfix(g[v_first].left); - if (leftfix.haig() || !leftfix.graph() || - !nfaStuckOn(*leftfix.graph())) { - no_retrigger = false; - } - - for (const auto &v : verts) { - set<ue2_literal> lits; - for (auto u : inv_adjacent_vertices_range(v, build.g)) { - for (u32 lit_id : build.g[u].literals) { - lits.insert(build.literals.at(lit_id).s); - } + // Infix NFAs can have bounds on their queue lengths. + u32 max_queuelen = 0; + u32 max_width = 0; + u8 cm_count = 0; + CharReach cm_cr; + + const auto &qi = info.queue; + const auto &subengines = info.subengines; + bool no_retrigger = true; + for (const auto &sub : subengines) { + const auto &verts = sub.vertices; + const auto &v_first = verts[0]; + left_id leftfix(g[v_first].left); + if (leftfix.haig() || !leftfix.graph() || + !nfaStuckOn(*leftfix.graph())) { + no_retrigger = false; + } + + for (const auto &v : verts) { + set<ue2_literal> lits; + for (auto u : inv_adjacent_vertices_range(v, build.g)) { + for (u32 lit_id : build.g[u].literals) { + lits.insert(build.literals.at(lit_id).s); + } + } + DEBUG_PRINTF("%zu literals\n", lits.size()); + + u32 queuelen = findMaxInfixMatches(leftfix, lits); + if (queuelen < UINT32_MAX) { + queuelen++; } - DEBUG_PRINTF("%zu literals\n", lits.size()); - - u32 queuelen = findMaxInfixMatches(leftfix, lits); - if (queuelen < UINT32_MAX) { - queuelen++; - } - max_queuelen = max(max_queuelen, queuelen); + max_queuelen = max(max_queuelen, queuelen); + } + } + + if (no_retrigger) { + no_retrigger_queues->insert(qi); + } + + for (const auto &sub : subengines) { + const auto &verts = sub.vertices; + for (const auto &v : verts) { + u32 lag = g[v].left.lag; + leftfix_info.emplace(v, left_build_info(qi, lag, max_width, + squash_mask, stop, + max_queuelen, cm_count, + cm_cr)); + } + } + } +} + +static +void updateExclusiveSuffixProperties(const RoseBuildImpl &build, + const vector<ExclusiveInfo> &exclusive_info, + set<u32> *no_retrigger_queues) { + const RoseGraph &g = build.g; + for (auto &info : exclusive_info) { + const auto &qi = info.queue; + const auto &subengines = info.subengines; + bool no_retrigger = true; + for (const auto &sub : subengines) { + const auto &v_first = sub.vertices[0]; + suffix_id suffix(g[v_first].suffix); + if (!suffix.graph() || !nfaStuckOn(*suffix.graph())) { + no_retrigger = false; + break; } - } - - if (no_retrigger) { - no_retrigger_queues->insert(qi); - } - - for (const auto &sub : subengines) { - const auto &verts = sub.vertices; - for (const auto &v : verts) { - u32 lag = g[v].left.lag; - leftfix_info.emplace(v, left_build_info(qi, lag, max_width, - squash_mask, stop, - max_queuelen, cm_count, - cm_cr)); + } + + if (no_retrigger) { + no_retrigger_queues->insert(qi); + } + } +} + +static +void buildExclusiveInfixes(RoseBuildImpl &build, build_context &bc, + QueueIndexFactory &qif, + const map<left_id, set<PredTopPair>> &infixTriggers, + const map<u32, vector<RoseVertex>> &vertex_map, + const vector<vector<u32>> &groups, + set<u32> *no_retrigger_queues) { + RoseGraph &g = build.g; + const CompileContext &cc = build.cc; + + vector<ExclusiveInfo> exclusive_info; + for (const auto &gp : groups) { + ExclusiveInfo info; + for (const auto &id : gp) { + const auto &verts = vertex_map.at(id); + left_id leftfix(g[verts[0]].left); + + bool is_transient = false; + auto n = makeLeftNfa(build, leftfix, false, is_transient, + infixTriggers, cc); + assert(n); + + setLeftNfaProperties(*n, leftfix); + + ExclusiveSubengine engine; + engine.nfa = move(n); + engine.vertices = verts; + info.subengines.push_back(move(engine)); + } + info.queue = qif.get_queue(); + exclusive_info.push_back(move(info)); + } + updateExclusiveInfixProperties(build, exclusive_info, bc.leftfix_info, + no_retrigger_queues); + buildInfixContainer(g, bc, exclusive_info, build.cc.grey); +} + +static +void findExclusiveInfixes(RoseBuildImpl &build, build_context &bc, + QueueIndexFactory &qif, + const map<left_id, set<PredTopPair>> &infixTriggers, + set<u32> *no_retrigger_queues) { + const RoseGraph &g = build.g; + + set<RoleInfo<left_id>> roleInfoSet; + map<u32, vector<RoseVertex>> vertex_map; + + u32 role_id = 0; + map<left_id, u32> leftfixes; + for (auto v : vertices_range(g)) { + if (!g[v].left || build.isRootSuccessor(v)) { + continue; + } + + left_id leftfix(g[v].left); + + // Sanity check: our NFA should contain each of the tops mentioned on + // our in-edges. + assert(roseHasTops(build, v)); + + if (contains(leftfixes, leftfix)) { + // NFA already built. + u32 id = leftfixes[leftfix]; + if (contains(vertex_map, id)) { + vertex_map[id].push_back(v); + } + DEBUG_PRINTF("sharing leftfix, id=%u\n", id); + continue; + } + + if (leftfix.haig()) { + continue; + } + + if (leftfix.graph() || leftfix.castle()) { + leftfixes.emplace(leftfix, role_id); + vertex_map[role_id].push_back(v); + + map<u32, vector<vector<CharReach>>> triggers; + findTriggerSequences(build, infixTriggers.at(leftfix), &triggers); + RoleInfo<left_id> info(leftfix, role_id); + if (setTriggerLiteralsInfix(info, triggers)) { + roleInfoSet.insert(info); + } + role_id++; + } + } + + if (leftfixes.size() > 1) { + DEBUG_PRINTF("leftfix size:%zu\n", leftfixes.size()); + vector<vector<u32>> groups; + exclusiveAnalysisInfix(build, vertex_map, roleInfoSet, groups); + buildExclusiveInfixes(build, bc, qif, infixTriggers, vertex_map, + groups, no_retrigger_queues); + } +} + +static +bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, + QueueIndexFactory &qif, set<u32> *no_retrigger_queues, + set<u32> *eager_queues, bool do_prefix) { + RoseGraph &g = tbi.g; + const CompileContext &cc = tbi.cc; + + map<left_id, set<PredTopPair>> infixTriggers; + findInfixTriggers(tbi, &infixTriggers); + + insertion_ordered_map<left_id, vector<RoseVertex>> succs; + + if (cc.grey.allowTamarama && cc.streaming && !do_prefix) { + findExclusiveInfixes(tbi, bc, qif, infixTriggers, no_retrigger_queues); + } + + for (auto v : vertices_range(g)) { + if (!g[v].left || g[v].left.tamarama) { + continue; + } + + assert(tbi.isNonRootSuccessor(v) != tbi.isRootSuccessor(v)); + bool is_prefix = tbi.isRootSuccessor(v); + + if (do_prefix != is_prefix) { + /* we require prefixes and then infixes */ + continue; + } + + left_id leftfix(g[v].left); + + // Sanity check: our NFA should contain each of the tops mentioned on + // our in-edges. + assert(roseHasTops(tbi, v)); + + bool is_transient = contains(tbi.transient, leftfix); + + // Transient leftfixes can sometimes be implemented solely with + // lookarounds, in which case we don't need to build an engine. + // TODO: Handle SOM-tracking cases as well. + if (cc.grey.roseLookaroundMasks && is_transient && + !g[v].left.tracksSom()) { + vector<vector<LookEntry>> lookaround; + if (makeLeftfixLookaround(tbi, v, lookaround)) { + DEBUG_PRINTF("implementing as lookaround!\n"); + bc.leftfix_info.emplace(v, left_build_info(lookaround)); + continue; } } - } -} - -static -void updateExclusiveSuffixProperties(const RoseBuildImpl &build, - const vector<ExclusiveInfo> &exclusive_info, - set<u32> *no_retrigger_queues) { - const RoseGraph &g = build.g; - for (auto &info : exclusive_info) { - const auto &qi = info.queue; - const auto &subengines = info.subengines; - bool no_retrigger = true; - for (const auto &sub : subengines) { - const auto &v_first = sub.vertices[0]; - suffix_id suffix(g[v_first].suffix); - if (!suffix.graph() || !nfaStuckOn(*suffix.graph())) { - no_retrigger = false; - break; - } - } - - if (no_retrigger) { - no_retrigger_queues->insert(qi); - } - } -} - -static -void buildExclusiveInfixes(RoseBuildImpl &build, build_context &bc, - QueueIndexFactory &qif, - const map<left_id, set<PredTopPair>> &infixTriggers, - const map<u32, vector<RoseVertex>> &vertex_map, - const vector<vector<u32>> &groups, - set<u32> *no_retrigger_queues) { - RoseGraph &g = build.g; - const CompileContext &cc = build.cc; - - vector<ExclusiveInfo> exclusive_info; - for (const auto &gp : groups) { - ExclusiveInfo info; - for (const auto &id : gp) { - const auto &verts = vertex_map.at(id); - left_id leftfix(g[verts[0]].left); - - bool is_transient = false; - auto n = makeLeftNfa(build, leftfix, false, is_transient, - infixTriggers, cc); - assert(n); - - setLeftNfaProperties(*n, leftfix); - - ExclusiveSubengine engine; - engine.nfa = move(n); - engine.vertices = verts; - info.subengines.push_back(move(engine)); - } - info.queue = qif.get_queue(); - exclusive_info.push_back(move(info)); - } - updateExclusiveInfixProperties(build, exclusive_info, bc.leftfix_info, - no_retrigger_queues); - buildInfixContainer(g, bc, exclusive_info, build.cc.grey); -} - -static -void findExclusiveInfixes(RoseBuildImpl &build, build_context &bc, - QueueIndexFactory &qif, - const map<left_id, set<PredTopPair>> &infixTriggers, - set<u32> *no_retrigger_queues) { - const RoseGraph &g = build.g; - - set<RoleInfo<left_id>> roleInfoSet; - map<u32, vector<RoseVertex>> vertex_map; - - u32 role_id = 0; - map<left_id, u32> leftfixes; - for (auto v : vertices_range(g)) { - if (!g[v].left || build.isRootSuccessor(v)) { - continue; - } - - left_id leftfix(g[v].left); - - // Sanity check: our NFA should contain each of the tops mentioned on - // our in-edges. - assert(roseHasTops(build, v)); - - if (contains(leftfixes, leftfix)) { - // NFA already built. - u32 id = leftfixes[leftfix]; - if (contains(vertex_map, id)) { - vertex_map[id].push_back(v); - } - DEBUG_PRINTF("sharing leftfix, id=%u\n", id); - continue; - } - - if (leftfix.haig()) { - continue; - } - - if (leftfix.graph() || leftfix.castle()) { - leftfixes.emplace(leftfix, role_id); - vertex_map[role_id].push_back(v); - - map<u32, vector<vector<CharReach>>> triggers; - findTriggerSequences(build, infixTriggers.at(leftfix), &triggers); - RoleInfo<left_id> info(leftfix, role_id); - if (setTriggerLiteralsInfix(info, triggers)) { - roleInfoSet.insert(info); - } - role_id++; - } - } - - if (leftfixes.size() > 1) { - DEBUG_PRINTF("leftfix size:%zu\n", leftfixes.size()); - vector<vector<u32>> groups; - exclusiveAnalysisInfix(build, vertex_map, roleInfoSet, groups); - buildExclusiveInfixes(build, bc, qif, infixTriggers, vertex_map, - groups, no_retrigger_queues); - } -} - -static -bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc, - QueueIndexFactory &qif, set<u32> *no_retrigger_queues, - set<u32> *eager_queues, bool do_prefix) { - RoseGraph &g = tbi.g; - const CompileContext &cc = tbi.cc; - - map<left_id, set<PredTopPair>> infixTriggers; - findInfixTriggers(tbi, &infixTriggers); - - insertion_ordered_map<left_id, vector<RoseVertex>> succs; - - if (cc.grey.allowTamarama && cc.streaming && !do_prefix) { - findExclusiveInfixes(tbi, bc, qif, infixTriggers, no_retrigger_queues); - } - - for (auto v : vertices_range(g)) { - if (!g[v].left || g[v].left.tamarama) { - continue; - } - - assert(tbi.isNonRootSuccessor(v) != tbi.isRootSuccessor(v)); - bool is_prefix = tbi.isRootSuccessor(v); - - if (do_prefix != is_prefix) { - /* we require prefixes and then infixes */ - continue; - } - - left_id leftfix(g[v].left); - - // Sanity check: our NFA should contain each of the tops mentioned on - // our in-edges. - assert(roseHasTops(tbi, v)); - - bool is_transient = contains(tbi.transient, leftfix); - - // Transient leftfixes can sometimes be implemented solely with - // lookarounds, in which case we don't need to build an engine. - // TODO: Handle SOM-tracking cases as well. - if (cc.grey.roseLookaroundMasks && is_transient && - !g[v].left.tracksSom()) { - vector<vector<LookEntry>> lookaround; - if (makeLeftfixLookaround(tbi, v, lookaround)) { - DEBUG_PRINTF("implementing as lookaround!\n"); - bc.leftfix_info.emplace(v, left_build_info(lookaround)); - continue; - } - } - - succs[leftfix].push_back(v); - } - - rose_group initial_groups = tbi.getInitialGroups(); - rose_group combined_eager_squashed_mask = ~0ULL; - - map<left_id, eager_info> eager; - - for (const auto &m : succs) { - const left_id &leftfix = m.first; - const auto &left_succs = m.second; - - rose_group squash_mask = tbi.rose_squash_masks.at(leftfix); - eager_info ei; - - if (checkSuitableForEager(do_prefix, leftfix, tbi, left_succs, - squash_mask, initial_groups, ei, cc)) { - eager[leftfix] = ei; - combined_eager_squashed_mask &= squash_mask; - DEBUG_PRINTF("combo %016llx...\n", combined_eager_squashed_mask); - } - } - - if (do_prefix && combined_eager_squashed_mask & initial_groups) { - DEBUG_PRINTF("eager groups won't squash everyone - be lazy\n"); - eager_queues->clear(); - eager.clear(); - } - - for (const auto &m : succs) { - const left_id &leftfix = m.first; - const auto &left_succs = m.second; - buildLeftfix(tbi, bc, do_prefix, qif.get_queue(), infixTriggers, - no_retrigger_queues, eager_queues, eager, left_succs, - leftfix); - } - + + succs[leftfix].push_back(v); + } + + rose_group initial_groups = tbi.getInitialGroups(); + rose_group combined_eager_squashed_mask = ~0ULL; + + map<left_id, eager_info> eager; + + for (const auto &m : succs) { + const left_id &leftfix = m.first; + const auto &left_succs = m.second; + + rose_group squash_mask = tbi.rose_squash_masks.at(leftfix); + eager_info ei; + + if (checkSuitableForEager(do_prefix, leftfix, tbi, left_succs, + squash_mask, initial_groups, ei, cc)) { + eager[leftfix] = ei; + combined_eager_squashed_mask &= squash_mask; + DEBUG_PRINTF("combo %016llx...\n", combined_eager_squashed_mask); + } + } + + if (do_prefix && combined_eager_squashed_mask & initial_groups) { + DEBUG_PRINTF("eager groups won't squash everyone - be lazy\n"); + eager_queues->clear(); + eager.clear(); + } + + for (const auto &m : succs) { + const left_id &leftfix = m.first; + const auto &left_succs = m.second; + buildLeftfix(tbi, bc, do_prefix, qif.get_queue(), infixTriggers, + no_retrigger_queues, eager_queues, eager, left_succs, + leftfix); + } + return true; } @@ -1608,73 +1608,73 @@ bool hasNonSmallBlockOutfix(const vector<OutfixInfo> &outfixes) { return false; } -namespace { -class OutfixBuilder : public boost::static_visitor<bytecode_ptr<NFA>> { -public: - explicit OutfixBuilder(const RoseBuildImpl &build_in) : build(build_in) {} +namespace { +class OutfixBuilder : public boost::static_visitor<bytecode_ptr<NFA>> { +public: + explicit OutfixBuilder(const RoseBuildImpl &build_in) : build(build_in) {} + + bytecode_ptr<NFA> operator()(boost::blank&) const { + return nullptr; + }; - bytecode_ptr<NFA> operator()(boost::blank&) const { - return nullptr; - }; + bytecode_ptr<NFA> operator()(unique_ptr<raw_dfa> &rdfa) const { + // Unleash the mighty DFA! + return getDfa(*rdfa, false, build.cc, build.rm); + } - bytecode_ptr<NFA> operator()(unique_ptr<raw_dfa> &rdfa) const { - // Unleash the mighty DFA! - return getDfa(*rdfa, false, build.cc, build.rm); - } - - bytecode_ptr<NFA> operator()(unique_ptr<raw_som_dfa> &haig) const { + bytecode_ptr<NFA> operator()(unique_ptr<raw_som_dfa> &haig) const { // Unleash the Goughfish! - return goughCompile(*haig, build.ssm.somPrecision(), build.cc, - build.rm); - } - - bytecode_ptr<NFA> operator()(unique_ptr<NGHolder> &holder) const { - const CompileContext &cc = build.cc; - const ReportManager &rm = build.rm; - - NGHolder &h = *holder; + return goughCompile(*haig, build.ssm.somPrecision(), build.cc, + build.rm); + } + + bytecode_ptr<NFA> operator()(unique_ptr<NGHolder> &holder) const { + const CompileContext &cc = build.cc; + const ReportManager &rm = build.rm; + + NGHolder &h = *holder; assert(h.kind == NFA_OUTFIX); // Build NFA. - const map<u32, u32> fixed_depth_tops; /* no tops */ - const map<u32, vector<vector<CharReach>>> triggers; /* no tops */ - bool compress_state = cc.streaming; + const map<u32, u32> fixed_depth_tops; /* no tops */ + const map<u32, vector<vector<CharReach>>> triggers; /* no tops */ + bool compress_state = cc.streaming; bool fast_nfa = false; - auto n = constructNFA(h, &rm, fixed_depth_tops, triggers, + auto n = constructNFA(h, &rm, fixed_depth_tops, triggers, compress_state, fast_nfa, cc); // Try for a DFA upgrade. - if (n && cc.grey.roseMcClellanOutfix && + if (n && cc.grey.roseMcClellanOutfix && (!has_bounded_repeats_other_than_firsts(*n) || !fast_nfa)) { auto rdfa = buildMcClellan(h, &rm, cc.grey); if (rdfa) { - auto d = getDfa(*rdfa, false, cc, rm); + auto d = getDfa(*rdfa, false, cc, rm); if (d) { n = pickImpl(move(d), move(n), fast_nfa); } } } - - return n; - } - - bytecode_ptr<NFA> operator()(UNUSED MpvProto &mpv) const { - // MPV construction handled separately. - assert(mpv.puffettes.empty()); - return nullptr; - } - -private: - const RoseBuildImpl &build; -}; -} - -static -bytecode_ptr<NFA> buildOutfix(const RoseBuildImpl &build, OutfixInfo &outfix) { - assert(!outfix.is_dead()); // should not be marked dead. - - auto n = boost::apply_visitor(OutfixBuilder(build), outfix.proto); - if (n && build.cc.grey.reverseAccelerate) { + + return n; + } + + bytecode_ptr<NFA> operator()(UNUSED MpvProto &mpv) const { + // MPV construction handled separately. + assert(mpv.puffettes.empty()); + return nullptr; + } + +private: + const RoseBuildImpl &build; +}; +} + +static +bytecode_ptr<NFA> buildOutfix(const RoseBuildImpl &build, OutfixInfo &outfix) { + assert(!outfix.is_dead()); // should not be marked dead. + + auto n = boost::apply_visitor(OutfixBuilder(build), outfix.proto); + if (n && build.cc.grey.reverseAccelerate) { buildReverseAcceleration(n.get(), outfix.rev_info, outfix.minWidth); } @@ -1682,43 +1682,43 @@ bytecode_ptr<NFA> buildOutfix(const RoseBuildImpl &build, OutfixInfo &outfix) { } static -void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, - bool *mpv_as_outfix) { - assert(bc.engineOffsets.empty()); // MPV should be first +void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, + bool *mpv_as_outfix) { + assert(bc.engineOffsets.empty()); // MPV should be first *mpv_as_outfix = false; - OutfixInfo *mpv_outfix = nullptr; + OutfixInfo *mpv_outfix = nullptr; /* assume outfixes are just above chain tails in queue indices */ for (auto &out : tbi.outfixes) { if (out.is_nonempty_mpv()) { - assert(!mpv_outfix); - mpv_outfix = &out; + assert(!mpv_outfix); + mpv_outfix = &out; } else { - assert(!out.mpv()); + assert(!out.mpv()); } } - if (!mpv_outfix) { + if (!mpv_outfix) { return; } - auto *mpv = mpv_outfix->mpv(); - auto nfa = mpvCompile(mpv->puffettes, mpv->triggered_puffettes, tbi.rm); + auto *mpv = mpv_outfix->mpv(); + auto nfa = mpvCompile(mpv->puffettes, mpv->triggered_puffettes, tbi.rm); assert(nfa); if (!nfa) { throw CompileError("Unable to generate bytecode."); } if (tbi.cc.grey.reverseAccelerate) { - buildReverseAcceleration(nfa.get(), mpv_outfix->rev_info, - mpv_outfix->minWidth); + buildReverseAcceleration(nfa.get(), mpv_outfix->rev_info, + mpv_outfix->minWidth); } - u32 qi = mpv_outfix->get_queue(tbi.qif); + u32 qi = mpv_outfix->get_queue(tbi.qif); nfa->queueIndex = qi; - enforceEngineSizeLimit(nfa.get(), tbi.cc.grey); - bc.engine_info_by_queue.emplace(nfa->queueIndex, - engine_info(nfa.get(), false)); + enforceEngineSizeLimit(nfa.get(), tbi.cc.grey); + bc.engine_info_by_queue.emplace(nfa->queueIndex, + engine_info(nfa.get(), false)); DEBUG_PRINTF("built mpv\n"); @@ -1726,7 +1726,7 @@ void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired, *historyRequired = 1; } - add_nfa_to_blob(bc, *nfa); + add_nfa_to_blob(bc, *nfa); *mpv_as_outfix = !mpv->puffettes.empty(); } @@ -1753,7 +1753,7 @@ void setOutfixProperties(NFA &n, const OutfixInfo &outfix) { } static -bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc, +bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired) { if (tbi.cc.grey.onlyOneOutfix && tbi.outfixes.size() > 1) { DEBUG_PRINTF("we have %zu outfixes, but Grey::onlyOneOutfix is set\n", @@ -1761,13 +1761,13 @@ bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc, throw ResourceLimitError(); } - assert(tbi.qif.allocated_count() == bc.engineOffsets.size()); + assert(tbi.qif.allocated_count() == bc.engineOffsets.size()); for (auto &out : tbi.outfixes) { - if (out.mpv()) { + if (out.mpv()) { continue; /* already done */ } - DEBUG_PRINTF("building outfix %zd\n", &out - &tbi.outfixes[0]); + DEBUG_PRINTF("building outfix %zd\n", &out - &tbi.outfixes[0]); auto n = buildOutfix(tbi, out); if (!n) { assert(0); @@ -1776,24 +1776,24 @@ bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc, setOutfixProperties(*n, out); - n->queueIndex = out.get_queue(tbi.qif); - enforceEngineSizeLimit(n.get(), tbi.cc.grey); - bc.engine_info_by_queue.emplace(n->queueIndex, - engine_info(n.get(), false)); + n->queueIndex = out.get_queue(tbi.qif); + enforceEngineSizeLimit(n.get(), tbi.cc.grey); + bc.engine_info_by_queue.emplace(n->queueIndex, + engine_info(n.get(), false)); if (!*historyRequired && requires_decompress_key(*n)) { *historyRequired = 1; } - add_nfa_to_blob(bc, *n); + add_nfa_to_blob(bc, *n); } return true; } static -void assignSuffixQueues(RoseBuildImpl &build, map<suffix_id, u32> &suffixes) { - const RoseGraph &g = build.g; +void assignSuffixQueues(RoseBuildImpl &build, map<suffix_id, u32> &suffixes) { + const RoseGraph &g = build.g; for (auto v : vertices_range(g)) { if (!g[v].suffix) { @@ -1802,16 +1802,16 @@ void assignSuffixQueues(RoseBuildImpl &build, map<suffix_id, u32> &suffixes) { const suffix_id s(g[v].suffix); - DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].index, s.graph()); + DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].index, s.graph()); // We may have already built this NFA. - if (contains(suffixes, s)) { + if (contains(suffixes, s)) { continue; } - u32 queue = build.qif.get_queue(); + u32 queue = build.qif.get_queue(); DEBUG_PRINTF("assigning %p to queue %u\n", s.graph(), queue); - suffixes.emplace(s, queue); + suffixes.emplace(s, queue); } } @@ -1835,173 +1835,173 @@ void setSuffixProperties(NFA &n, const suffix_id &suff, } static -void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc, - QueueIndexFactory &qif, - map<suffix_id, set<PredTopPair>> &suffixTriggers, - const map<u32, vector<RoseVertex>> &vertex_map, - const vector<vector<u32>> &groups, - set<u32> *no_retrigger_queues) { - RoseGraph &g = build.g; - - vector<ExclusiveInfo> exclusive_info; - for (const auto &gp : groups) { - ExclusiveInfo info; - for (const auto &id : gp) { - const auto &verts = vertex_map.at(id); - suffix_id s(g[verts[0]].suffix); - - const set<PredTopPair> &s_triggers = suffixTriggers.at(s); - - map<u32, u32> fixed_depth_tops; - findFixedDepthTops(g, s_triggers, &fixed_depth_tops); - - map<u32, vector<vector<CharReach>>> triggers; - findTriggerSequences(build, s_triggers, &triggers); - - auto n = buildSuffix(build.rm, build.ssm, fixed_depth_tops, - triggers, s, build.cc); - assert(n); - - setSuffixProperties(*n, s, build.rm); - - ExclusiveSubengine engine; - engine.nfa = move(n); - engine.vertices = verts; - info.subengines.push_back(move(engine)); - - const auto &reports = all_reports(s); - info.reports.insert(reports.begin(), reports.end()); - } - info.queue = qif.get_queue(); - exclusive_info.push_back(move(info)); - } - updateExclusiveSuffixProperties(build, exclusive_info, - no_retrigger_queues); - buildSuffixContainer(g, bc, exclusive_info, build.cc.grey); -} - -static -void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc, - QueueIndexFactory &qif, - map<suffix_id, set<PredTopPair>> &suffixTriggers, - set<u32> *no_retrigger_queues) { - const RoseGraph &g = tbi.g; - - map<suffix_id, u32> suffixes; - set<RoleInfo<suffix_id>> roleInfoSet; - map<u32, vector<RoseVertex>> vertex_map; - u32 role_id = 0; - for (auto v : vertices_range(g)) { - if (!g[v].suffix) { - continue; - } - - const suffix_id s(g[v].suffix); - - DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].index, s.graph()); - - // We may have already built this NFA. - if (contains(suffixes, s)) { - u32 id = suffixes[s]; - if (!tbi.isInETable(v)) { - vertex_map[id].push_back(v); - } - continue; - } - - if (s.haig()) { - continue; - } - - // Currently disable eod suffixes for exclusive analysis - if (!tbi.isInETable(v) && (s.graph() || s.castle())) { - DEBUG_PRINTF("assigning %p to id %u\n", s.graph(), role_id); - suffixes.emplace(s, role_id); - - vertex_map[role_id].push_back(v); - const set<PredTopPair> &s_triggers = suffixTriggers.at(s); - map<u32, vector<vector<CharReach>>> triggers; - findTriggerSequences(tbi, s_triggers, &triggers); - - RoleInfo<suffix_id> info(s, role_id); - if (setTriggerLiteralsSuffix(info, triggers)) { - roleInfoSet.insert(info); - } - role_id++; - } - } - - if (suffixes.size() > 1) { - DEBUG_PRINTF("suffix size:%zu\n", suffixes.size()); - vector<vector<u32>> groups; - exclusiveAnalysisSuffix(tbi, vertex_map, roleInfoSet, groups); - buildExclusiveSuffixes(tbi, bc, qif, suffixTriggers, vertex_map, - groups, no_retrigger_queues); - } -} - -static -bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, - set<u32> *no_retrigger_queues, - const map<suffix_id, set<PredTopPair>> &suffixTriggers) { - // To ensure compile determinism, build suffix engines in order of their - // (unique) queue indices, so that we call add_nfa_to_blob in the same - // order. - vector<pair<u32, suffix_id>> ordered; - for (const auto &e : bc.suffixes) { - ordered.emplace_back(e.second, e.first); - } - sort(begin(ordered), end(ordered)); - - for (const auto &e : ordered) { - const u32 queue = e.first; - const suffix_id &s = e.second; - - if (s.tamarama()) { - continue; - } - - const set<PredTopPair> &s_triggers = suffixTriggers.at(s); - - map<u32, u32> fixed_depth_tops; - findFixedDepthTops(tbi.g, s_triggers, &fixed_depth_tops); - - map<u32, vector<vector<CharReach>>> triggers; - findTriggerSequences(tbi, s_triggers, &triggers); - - auto n = buildSuffix(tbi.rm, tbi.ssm, fixed_depth_tops, triggers, - s, tbi.cc); - if (!n) { - return false; - } - - setSuffixProperties(*n, s, tbi.rm); - - n->queueIndex = queue; - enforceEngineSizeLimit(n.get(), tbi.cc.grey); - bc.engine_info_by_queue.emplace(n->queueIndex, - engine_info(n.get(), false)); - - if (s.graph() && nfaStuckOn(*s.graph())) { /* todo: have corresponding - * haig analysis */ - assert(!s.haig()); - DEBUG_PRINTF("%u sticks on\n", queue); - no_retrigger_queues->insert(queue); - } - - add_nfa_to_blob(bc, *n); - } - - return true; -} +void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc, + QueueIndexFactory &qif, + map<suffix_id, set<PredTopPair>> &suffixTriggers, + const map<u32, vector<RoseVertex>> &vertex_map, + const vector<vector<u32>> &groups, + set<u32> *no_retrigger_queues) { + RoseGraph &g = build.g; + + vector<ExclusiveInfo> exclusive_info; + for (const auto &gp : groups) { + ExclusiveInfo info; + for (const auto &id : gp) { + const auto &verts = vertex_map.at(id); + suffix_id s(g[verts[0]].suffix); + + const set<PredTopPair> &s_triggers = suffixTriggers.at(s); + + map<u32, u32> fixed_depth_tops; + findFixedDepthTops(g, s_triggers, &fixed_depth_tops); + + map<u32, vector<vector<CharReach>>> triggers; + findTriggerSequences(build, s_triggers, &triggers); + + auto n = buildSuffix(build.rm, build.ssm, fixed_depth_tops, + triggers, s, build.cc); + assert(n); + + setSuffixProperties(*n, s, build.rm); + + ExclusiveSubengine engine; + engine.nfa = move(n); + engine.vertices = verts; + info.subengines.push_back(move(engine)); + + const auto &reports = all_reports(s); + info.reports.insert(reports.begin(), reports.end()); + } + info.queue = qif.get_queue(); + exclusive_info.push_back(move(info)); + } + updateExclusiveSuffixProperties(build, exclusive_info, + no_retrigger_queues); + buildSuffixContainer(g, bc, exclusive_info, build.cc.grey); +} + +static +void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc, + QueueIndexFactory &qif, + map<suffix_id, set<PredTopPair>> &suffixTriggers, + set<u32> *no_retrigger_queues) { + const RoseGraph &g = tbi.g; + + map<suffix_id, u32> suffixes; + set<RoleInfo<suffix_id>> roleInfoSet; + map<u32, vector<RoseVertex>> vertex_map; + u32 role_id = 0; + for (auto v : vertices_range(g)) { + if (!g[v].suffix) { + continue; + } + + const suffix_id s(g[v].suffix); + + DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].index, s.graph()); + + // We may have already built this NFA. + if (contains(suffixes, s)) { + u32 id = suffixes[s]; + if (!tbi.isInETable(v)) { + vertex_map[id].push_back(v); + } + continue; + } + + if (s.haig()) { + continue; + } + + // Currently disable eod suffixes for exclusive analysis + if (!tbi.isInETable(v) && (s.graph() || s.castle())) { + DEBUG_PRINTF("assigning %p to id %u\n", s.graph(), role_id); + suffixes.emplace(s, role_id); + + vertex_map[role_id].push_back(v); + const set<PredTopPair> &s_triggers = suffixTriggers.at(s); + map<u32, vector<vector<CharReach>>> triggers; + findTriggerSequences(tbi, s_triggers, &triggers); + + RoleInfo<suffix_id> info(s, role_id); + if (setTriggerLiteralsSuffix(info, triggers)) { + roleInfoSet.insert(info); + } + role_id++; + } + } + + if (suffixes.size() > 1) { + DEBUG_PRINTF("suffix size:%zu\n", suffixes.size()); + vector<vector<u32>> groups; + exclusiveAnalysisSuffix(tbi, vertex_map, roleInfoSet, groups); + buildExclusiveSuffixes(tbi, bc, qif, suffixTriggers, vertex_map, + groups, no_retrigger_queues); + } +} + +static +bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc, + set<u32> *no_retrigger_queues, + const map<suffix_id, set<PredTopPair>> &suffixTriggers) { + // To ensure compile determinism, build suffix engines in order of their + // (unique) queue indices, so that we call add_nfa_to_blob in the same + // order. + vector<pair<u32, suffix_id>> ordered; + for (const auto &e : bc.suffixes) { + ordered.emplace_back(e.second, e.first); + } + sort(begin(ordered), end(ordered)); + + for (const auto &e : ordered) { + const u32 queue = e.first; + const suffix_id &s = e.second; -static -void buildCountingMiracles(build_context &bc) { + if (s.tamarama()) { + continue; + } + + const set<PredTopPair> &s_triggers = suffixTriggers.at(s); + + map<u32, u32> fixed_depth_tops; + findFixedDepthTops(tbi.g, s_triggers, &fixed_depth_tops); + + map<u32, vector<vector<CharReach>>> triggers; + findTriggerSequences(tbi, s_triggers, &triggers); + + auto n = buildSuffix(tbi.rm, tbi.ssm, fixed_depth_tops, triggers, + s, tbi.cc); + if (!n) { + return false; + } + + setSuffixProperties(*n, s, tbi.rm); + + n->queueIndex = queue; + enforceEngineSizeLimit(n.get(), tbi.cc.grey); + bc.engine_info_by_queue.emplace(n->queueIndex, + engine_info(n.get(), false)); + + if (s.graph() && nfaStuckOn(*s.graph())) { /* todo: have corresponding + * haig analysis */ + assert(!s.haig()); + DEBUG_PRINTF("%u sticks on\n", queue); + no_retrigger_queues->insert(queue); + } + + add_nfa_to_blob(bc, *n); + } + + return true; +} + +static +void buildCountingMiracles(build_context &bc) { map<pair<CharReach, u8>, u32> pre_built; - for (left_build_info &lbi : bc.leftfix_info | map_values) { - if (!lbi.countingMiracleCount) { - continue; + for (left_build_info &lbi : bc.leftfix_info | map_values) { + if (!lbi.countingMiracleCount) { + continue; } const CharReach &cr = lbi.countingMiracleReach; @@ -2020,7 +2020,7 @@ void buildCountingMiracles(build_context &bc) { rcm.c = cr.find_first(); } else { rcm.shufti = 1; - int rv = shuftiBuildMasks(cr, (u8 *)&rcm.lo, (u8 *)&rcm.hi); + int rv = shuftiBuildMasks(cr, (u8 *)&rcm.lo, (u8 *)&rcm.hi); if (rv == -1) { DEBUG_PRINTF("failed to build shufti\n"); lbi.countingMiracleCount = 0; /* remove counting miracle */ @@ -2032,41 +2032,41 @@ void buildCountingMiracles(build_context &bc) { rcm.count = lbi.countingMiracleCount; - lbi.countingMiracleOffset = bc.engine_blob.add(rcm); + lbi.countingMiracleOffset = bc.engine_blob.add(rcm); pre_built[key] = lbi.countingMiracleOffset; DEBUG_PRINTF("built cm for count of %u @ %u\n", rcm.count, lbi.countingMiracleOffset); } } -/* Note: buildNfas may reduce the lag for vertices that have prefixes */ +/* Note: buildNfas may reduce the lag for vertices that have prefixes */ static -bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, - set<u32> *no_retrigger_queues, set<u32> *eager_queues, - u32 *leftfixBeginQueue) { - map<suffix_id, set<PredTopPair>> suffixTriggers; - findSuffixTriggers(tbi, &suffixTriggers); +bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, + set<u32> *no_retrigger_queues, set<u32> *eager_queues, + u32 *leftfixBeginQueue) { + map<suffix_id, set<PredTopPair>> suffixTriggers; + findSuffixTriggers(tbi, &suffixTriggers); + + if (tbi.cc.grey.allowTamarama && tbi.cc.streaming) { + findExclusiveSuffixes(tbi, bc, qif, suffixTriggers, + no_retrigger_queues); + } - if (tbi.cc.grey.allowTamarama && tbi.cc.streaming) { - findExclusiveSuffixes(tbi, bc, qif, suffixTriggers, - no_retrigger_queues); - } - - assignSuffixQueues(tbi, bc.suffixes); - - if (!buildSuffixes(tbi, bc, no_retrigger_queues, suffixTriggers)) { + assignSuffixQueues(tbi, bc.suffixes); + + if (!buildSuffixes(tbi, bc, no_retrigger_queues, suffixTriggers)) { return false; } - suffixTriggers.clear(); + suffixTriggers.clear(); *leftfixBeginQueue = qif.allocated_count(); - if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues, + if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues, true)) { return false; } - if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues, + if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues, false)) { return false; } @@ -2075,45 +2075,45 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif, } static -void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info, - RoseStateOffsets *so, u32 *scratchStateSize, - u32 *transientStateSize) { +void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info, + RoseStateOffsets *so, u32 *scratchStateSize, + u32 *transientStateSize) { u32 state_offset; - if (eng_info.transient) { - // Transient engines do not use stream state, but must have room in - // transient state (stored in scratch). - state_offset = *transientStateSize; - *transientStateSize += eng_info.stream_size; + if (eng_info.transient) { + // Transient engines do not use stream state, but must have room in + // transient state (stored in scratch). + state_offset = *transientStateSize; + *transientStateSize += eng_info.stream_size; } else { - // Pack NFA stream state on to the end of the Rose stream state. + // Pack NFA stream state on to the end of the Rose stream state. state_offset = so->end; - so->end += eng_info.stream_size; + so->end += eng_info.stream_size; } - nfa_info.stateOffset = state_offset; + nfa_info.stateOffset = state_offset; - // Uncompressed state in scratch must be aligned. - *scratchStateSize = ROUNDUP_N(*scratchStateSize, eng_info.scratch_align); - nfa_info.fullStateOffset = *scratchStateSize; - *scratchStateSize += eng_info.scratch_size; + // Uncompressed state in scratch must be aligned. + *scratchStateSize = ROUNDUP_N(*scratchStateSize, eng_info.scratch_align); + nfa_info.fullStateOffset = *scratchStateSize; + *scratchStateSize += eng_info.scratch_size; } static -void updateNfaState(const build_context &bc, vector<NfaInfo> &nfa_infos, - RoseStateOffsets *so, u32 *scratchStateSize, - u32 *transientStateSize) { - if (nfa_infos.empty()) { - return; +void updateNfaState(const build_context &bc, vector<NfaInfo> &nfa_infos, + RoseStateOffsets *so, u32 *scratchStateSize, + u32 *transientStateSize) { + if (nfa_infos.empty()) { + return; } - *transientStateSize = 0; - *scratchStateSize = 0; + *transientStateSize = 0; + *scratchStateSize = 0; - for (u32 qi = 0; qi < nfa_infos.size(); qi++) { - NfaInfo &nfa_info = nfa_infos[qi]; - const auto &eng_info = bc.engine_info_by_queue.at(qi); - allocateStateSpace(eng_info, nfa_info, so, scratchStateSize, - transientStateSize); + for (u32 qi = 0; qi < nfa_infos.size(); qi++) { + NfaInfo &nfa_info = nfa_infos[qi]; + const auto &eng_info = bc.engine_info_by_queue.at(qi); + allocateStateSpace(eng_info, nfa_info, so, scratchStateSize, + transientStateSize); } } @@ -2152,8 +2152,8 @@ u32 RoseBuildImpl::calcHistoryRequired() const { } // Delayed literals contribute to history requirement as well. - for (u32 id = 0; id < literals.size(); id++) { - const auto &lit = literals.at(id); + for (u32 id = 0; id < literals.size(); id++) { + const auto &lit = literals.at(id); if (lit.delay) { // If the literal is delayed _and_ has a mask that is longer than // the literal, we need enough history to match the whole mask as @@ -2181,122 +2181,122 @@ u32 RoseBuildImpl::calcHistoryRequired() const { } static -u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { - vector<u32> lb_roles; +u32 buildLastByteIter(const RoseGraph &g, build_context &bc) { + vector<u32> lb_roles; - for (auto v : vertices_range(g)) { - if (!hasLastByteHistorySucc(g, v)) { - continue; + for (auto v : vertices_range(g)) { + if (!hasLastByteHistorySucc(g, v)) { + continue; } - // Eager EOD reporters won't have state indices. - auto it = bc.roleStateIndices.find(v); - if (it != end(bc.roleStateIndices)) { - lb_roles.push_back(it->second); - DEBUG_PRINTF("last byte %u\n", it->second); + // Eager EOD reporters won't have state indices. + auto it = bc.roleStateIndices.find(v); + if (it != end(bc.roleStateIndices)) { + lb_roles.push_back(it->second); + DEBUG_PRINTF("last byte %u\n", it->second); } } - if (lb_roles.empty()) { - return 0; /* invalid offset */ + if (lb_roles.empty()) { + return 0; /* invalid offset */ } - - auto iter = mmbBuildSparseIterator(lb_roles, bc.roleStateIndices.size()); - return bc.engine_blob.add_iterator(iter); + + auto iter = mmbBuildSparseIterator(lb_roles, bc.roleStateIndices.size()); + return bc.engine_blob.add_iterator(iter); } static -u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build, - const vector<raw_dfa> &anchored_dfas) { - if (anchored_dfas.size() > 1) { - DEBUG_PRINTF("multiple anchored dfas\n"); - /* We must regard matches from other anchored tables as unordered, as - * we do for floating matches. */ - return 1; +u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build, + const vector<raw_dfa> &anchored_dfas) { + if (anchored_dfas.size() > 1) { + DEBUG_PRINTF("multiple anchored dfas\n"); + /* We must regard matches from other anchored tables as unordered, as + * we do for floating matches. */ + return 1; } - const RoseGraph &g = build.g; - u32 minWidth = ROSE_BOUND_INF; - for (auto v : vertices_range(g)) { - if (build.isAnchored(v) || build.isVirtualVertex(v)) { - DEBUG_PRINTF("skipping %zu anchored or root\n", g[v].index); - continue; - } + const RoseGraph &g = build.g; + u32 minWidth = ROSE_BOUND_INF; + for (auto v : vertices_range(g)) { + if (build.isAnchored(v) || build.isVirtualVertex(v)) { + DEBUG_PRINTF("skipping %zu anchored or root\n", g[v].index); + continue; + } - u32 w = g[v].min_offset; - DEBUG_PRINTF("%zu m_o = %u\n", g[v].index, w); + u32 w = g[v].min_offset; + DEBUG_PRINTF("%zu m_o = %u\n", g[v].index, w); - if (w < minWidth) { - minWidth = w; + if (w < minWidth) { + minWidth = w; } } - return minWidth; + return minWidth; } static -vector<u32> buildSuffixEkeyLists(const RoseBuildImpl &build, build_context &bc, - const QueueIndexFactory &qif) { - vector<u32> out(qif.allocated_count()); +vector<u32> buildSuffixEkeyLists(const RoseBuildImpl &build, build_context &bc, + const QueueIndexFactory &qif) { + vector<u32> out(qif.allocated_count()); - map<u32, vector<u32>> qi_to_ekeys; /* for determinism */ + map<u32, vector<u32>> qi_to_ekeys; /* for determinism */ - for (const auto &e : bc.suffixes) { - const suffix_id &s = e.first; - u32 qi = e.second; - set<u32> ekeys = reportsToEkeys(all_reports(s), build.rm); + for (const auto &e : bc.suffixes) { + const suffix_id &s = e.first; + u32 qi = e.second; + set<u32> ekeys = reportsToEkeys(all_reports(s), build.rm); - if (!ekeys.empty()) { - qi_to_ekeys[qi] = {ekeys.begin(), ekeys.end()}; + if (!ekeys.empty()) { + qi_to_ekeys[qi] = {ekeys.begin(), ekeys.end()}; } } - /* for each outfix also build elists */ - for (const auto &outfix : build.outfixes) { - u32 qi = outfix.get_queue(); - set<u32> ekeys = reportsToEkeys(all_reports(outfix), build.rm); + /* for each outfix also build elists */ + for (const auto &outfix : build.outfixes) { + u32 qi = outfix.get_queue(); + set<u32> ekeys = reportsToEkeys(all_reports(outfix), build.rm); - if (!ekeys.empty()) { - qi_to_ekeys[qi] = {ekeys.begin(), ekeys.end()}; - } + if (!ekeys.empty()) { + qi_to_ekeys[qi] = {ekeys.begin(), ekeys.end()}; + } } - for (auto &e : qi_to_ekeys) { - u32 qi = e.first; - auto &ekeys = e.second; - assert(!ekeys.empty()); - ekeys.push_back(INVALID_EKEY); /* terminator */ - out[qi] = bc.engine_blob.add_range(ekeys); + for (auto &e : qi_to_ekeys) { + u32 qi = e.first; + auto &ekeys = e.second; + assert(!ekeys.empty()); + ekeys.push_back(INVALID_EKEY); /* terminator */ + out[qi] = bc.engine_blob.add_range(ekeys); } - return out; + return out; } -/** Returns sparse iter offset in engine blob. */ +/** Returns sparse iter offset in engine blob. */ static -u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) { - vector<u32> keys; - for (u32 qi = 0; qi < activeQueueCount; ++qi) { - const auto &eng_info = bc.engine_info_by_queue.at(qi); - if (eng_info.accepts_eod) { - DEBUG_PRINTF("nfa qi=%u accepts eod\n", qi); - keys.push_back(qi); - } +u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) { + vector<u32> keys; + for (u32 qi = 0; qi < activeQueueCount; ++qi) { + const auto &eng_info = bc.engine_info_by_queue.at(qi); + if (eng_info.accepts_eod) { + DEBUG_PRINTF("nfa qi=%u accepts eod\n", qi); + keys.push_back(qi); + } } - if (keys.empty()) { - return 0; + if (keys.empty()) { + return 0; } - DEBUG_PRINTF("building iter for %zu nfas\n", keys.size()); + DEBUG_PRINTF("building iter for %zu nfas\n", keys.size()); - auto iter = mmbBuildSparseIterator(keys, activeQueueCount); - return bc.engine_blob.add_iterator(iter); + auto iter = mmbBuildSparseIterator(keys, activeQueueCount); + return bc.engine_blob.add_iterator(iter); } static -bool hasMpvTrigger(const set<u32> &reports, const ReportManager &rm) { - for (u32 r : reports) { - if (rm.getReport(r).type == INTERNAL_ROSE_CHAIN) { +bool hasMpvTrigger(const set<u32> &reports, const ReportManager &rm) { + for (u32 r : reports) { + if (rm.getReport(r).type == INTERNAL_ROSE_CHAIN) { return true; } } @@ -2305,28 +2305,28 @@ bool hasMpvTrigger(const set<u32> &reports, const ReportManager &rm) { } static -bool anyEndfixMpvTriggers(const RoseBuildImpl &build) { - const RoseGraph &g = build.g; - unordered_set<suffix_id> done; +bool anyEndfixMpvTriggers(const RoseBuildImpl &build) { + const RoseGraph &g = build.g; + unordered_set<suffix_id> done; - /* suffixes */ - for (auto v : vertices_range(g)) { - if (!g[v].suffix) { - continue; + /* suffixes */ + for (auto v : vertices_range(g)) { + if (!g[v].suffix) { + continue; + } + if (contains(done, g[v].suffix)) { + continue; /* already done */ } - if (contains(done, g[v].suffix)) { - continue; /* already done */ - } - done.insert(g[v].suffix); + done.insert(g[v].suffix); - if (hasMpvTrigger(all_reports(g[v].suffix), build.rm)) { - return true; + if (hasMpvTrigger(all_reports(g[v].suffix), build.rm)) { + return true; } } - /* outfixes */ - for (const auto &out : build.outfixes) { - if (hasMpvTrigger(all_reports(out), build.rm)) { + /* outfixes */ + for (const auto &out : build.outfixes) { + if (hasMpvTrigger(all_reports(out), build.rm)) { return true; } } @@ -2334,163 +2334,163 @@ bool anyEndfixMpvTriggers(const RoseBuildImpl &build) { return false; } -struct DerivedBoundaryReports { - explicit DerivedBoundaryReports(const BoundaryReports &boundary) { - insert(&report_at_0_eod_full, boundary.report_at_0_eod); - insert(&report_at_0_eod_full, boundary.report_at_eod); - insert(&report_at_0_eod_full, boundary.report_at_0); +struct DerivedBoundaryReports { + explicit DerivedBoundaryReports(const BoundaryReports &boundary) { + insert(&report_at_0_eod_full, boundary.report_at_0_eod); + insert(&report_at_0_eod_full, boundary.report_at_eod); + insert(&report_at_0_eod_full, boundary.report_at_0); } - set<ReportID> report_at_0_eod_full; -}; + set<ReportID> report_at_0_eod_full; +}; -static -void addSomRevNfas(build_context &bc, RoseEngine &proto, - const SomSlotManager &ssm) { - const auto &nfas = ssm.getRevNfas(); - vector<u32> nfa_offsets; - nfa_offsets.reserve(nfas.size()); - for (const auto &nfa : nfas) { - assert(nfa); - u32 offset = bc.engine_blob.add(*nfa, nfa->length); - DEBUG_PRINTF("wrote SOM rev NFA %zu (len %u) to offset %u\n", - nfa_offsets.size(), nfa->length, offset); - nfa_offsets.push_back(offset); - /* note: som rev nfas don't need a queue assigned as only run in block - * mode reverse */ +static +void addSomRevNfas(build_context &bc, RoseEngine &proto, + const SomSlotManager &ssm) { + const auto &nfas = ssm.getRevNfas(); + vector<u32> nfa_offsets; + nfa_offsets.reserve(nfas.size()); + for (const auto &nfa : nfas) { + assert(nfa); + u32 offset = bc.engine_blob.add(*nfa, nfa->length); + DEBUG_PRINTF("wrote SOM rev NFA %zu (len %u) to offset %u\n", + nfa_offsets.size(), nfa->length, offset); + nfa_offsets.push_back(offset); + /* note: som rev nfas don't need a queue assigned as only run in block + * mode reverse */ } - proto.somRevCount = verify_u32(nfas.size()); - proto.somRevOffsetOffset = bc.engine_blob.add_range(nfa_offsets); + proto.somRevCount = verify_u32(nfas.size()); + proto.somRevOffsetOffset = bc.engine_blob.add_range(nfa_offsets); } static -void recordResources(RoseResources &resources, const RoseBuildImpl &build, - const vector<raw_dfa> &anchored_dfas, - const vector<LitFragment> &fragments) { - if (!build.outfixes.empty()) { - resources.has_outfixes = true; +void recordResources(RoseResources &resources, const RoseBuildImpl &build, + const vector<raw_dfa> &anchored_dfas, + const vector<LitFragment> &fragments) { + if (!build.outfixes.empty()) { + resources.has_outfixes = true; } - resources.has_literals = !fragments.empty(); + resources.has_literals = !fragments.empty(); - const auto &g = build.g; - for (const auto &v : vertices_range(g)) { - if (g[v].eod_accept) { - resources.has_eod = true; - break; + const auto &g = build.g; + for (const auto &v : vertices_range(g)) { + if (g[v].eod_accept) { + resources.has_eod = true; + break; } - if (g[v].suffix && has_eod_accepts(g[v].suffix)) { - resources.has_eod = true; - break; + if (g[v].suffix && has_eod_accepts(g[v].suffix)) { + resources.has_eod = true; + break; } } - resources.has_anchored = !anchored_dfas.empty(); - resources.has_anchored_multiple = anchored_dfas.size() > 1; - for (const auto &rdfa : anchored_dfas) { - if (rdfa.states.size() > 256) { - resources.has_anchored_large = true; + resources.has_anchored = !anchored_dfas.empty(); + resources.has_anchored_multiple = anchored_dfas.size() > 1; + for (const auto &rdfa : anchored_dfas) { + if (rdfa.states.size() > 256) { + resources.has_anchored_large = true; } } } static -u32 writeProgram(build_context &bc, RoseProgram &&program) { - if (program.empty()) { - DEBUG_PRINTF("no program\n"); - return 0; +u32 writeProgram(build_context &bc, RoseProgram &&program) { + if (program.empty()) { + DEBUG_PRINTF("no program\n"); + return 0; } - applyFinalSpecialisation(program); + applyFinalSpecialisation(program); - auto it = bc.program_cache.find(program); - if (it != end(bc.program_cache)) { - DEBUG_PRINTF("reusing cached program at %u\n", it->second); - return it->second; + auto it = bc.program_cache.find(program); + if (it != end(bc.program_cache)) { + DEBUG_PRINTF("reusing cached program at %u\n", it->second); + return it->second; } - recordResources(bc.resources, program); - recordLongLiterals(bc.longLiterals, program); + recordResources(bc.resources, program); + recordLongLiterals(bc.longLiterals, program); - auto prog_bytecode = writeProgram(bc.engine_blob, program); - u32 offset = bc.engine_blob.add(prog_bytecode); - DEBUG_PRINTF("prog len %zu written at offset %u\n", prog_bytecode.size(), - offset); - bc.program_cache.emplace(move(program), offset); - return offset; + auto prog_bytecode = writeProgram(bc.engine_blob, program); + u32 offset = bc.engine_blob.add(prog_bytecode); + DEBUG_PRINTF("prog len %zu written at offset %u\n", prog_bytecode.size(), + offset); + bc.program_cache.emplace(move(program), offset); + return offset; } static -u32 writeActiveLeftIter(RoseEngineBlob &engine_blob, - const vector<LeftNfaInfo> &leftInfoTable) { - vector<u32> keys; - for (size_t i = 0; i < leftInfoTable.size(); i++) { - if (!leftInfoTable[i].transient) { - DEBUG_PRINTF("leftfix %zu is active\n", i); - keys.push_back(verify_u32(i)); - } +u32 writeActiveLeftIter(RoseEngineBlob &engine_blob, + const vector<LeftNfaInfo> &leftInfoTable) { + vector<u32> keys; + for (size_t i = 0; i < leftInfoTable.size(); i++) { + if (!leftInfoTable[i].transient) { + DEBUG_PRINTF("leftfix %zu is active\n", i); + keys.push_back(verify_u32(i)); + } } - DEBUG_PRINTF("%zu active leftfixes\n", keys.size()); + DEBUG_PRINTF("%zu active leftfixes\n", keys.size()); - if (keys.empty()) { - return 0; + if (keys.empty()) { + return 0; } - auto iter = mmbBuildSparseIterator(keys, verify_u32(leftInfoTable.size())); - return engine_blob.add_iterator(iter); + auto iter = mmbBuildSparseIterator(keys, verify_u32(leftInfoTable.size())); + return engine_blob.add_iterator(iter); } static -bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc, - u32 outfixEndQueue) { - for (u32 i = 0; i < outfixEndQueue; i++) { - const auto &eng_info = bc.engine_info_by_queue.at(i); - if (eng_info.accepts_eod) { - DEBUG_PRINTF("outfix has eod\n"); - return true; - } +bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc, + u32 outfixEndQueue) { + for (u32 i = 0; i < outfixEndQueue; i++) { + const auto &eng_info = bc.engine_info_by_queue.at(i); + if (eng_info.accepts_eod) { + DEBUG_PRINTF("outfix has eod\n"); + return true; + } } - if (build.eod_event_literal_id != MO_INVALID_IDX) { - DEBUG_PRINTF("eod is an event to be celebrated\n"); - return true; + if (build.eod_event_literal_id != MO_INVALID_IDX) { + DEBUG_PRINTF("eod is an event to be celebrated\n"); + return true; } - const RoseGraph &g = build.g; - for (auto v : vertices_range(g)) { - if (g[v].eod_accept) { - DEBUG_PRINTF("literally report eod\n"); + const RoseGraph &g = build.g; + for (auto v : vertices_range(g)) { + if (g[v].eod_accept) { + DEBUG_PRINTF("literally report eod\n"); + return true; + } + if (g[v].suffix && has_eod_accepts(g[v].suffix)) { + DEBUG_PRINTF("eod suffix\n"); return true; } - if (g[v].suffix && has_eod_accepts(g[v].suffix)) { - DEBUG_PRINTF("eod suffix\n"); - return true; - } } - DEBUG_PRINTF("yawn\n"); + DEBUG_PRINTF("yawn\n"); return false; } static -void writeDkeyInfo(const ReportManager &rm, RoseEngineBlob &engine_blob, - RoseEngine &proto) { - const auto inv_dkeys = rm.getDkeyToReportTable(); - proto.invDkeyOffset = engine_blob.add_range(inv_dkeys); - proto.dkeyCount = rm.numDkeys(); - proto.dkeyLogSize = fatbit_size(proto.dkeyCount); +void writeDkeyInfo(const ReportManager &rm, RoseEngineBlob &engine_blob, + RoseEngine &proto) { + const auto inv_dkeys = rm.getDkeyToReportTable(); + proto.invDkeyOffset = engine_blob.add_range(inv_dkeys); + proto.dkeyCount = rm.numDkeys(); + proto.dkeyLogSize = fatbit_size(proto.dkeyCount); } static -void writeLeftInfo(RoseEngineBlob &engine_blob, RoseEngine &proto, - const vector<LeftNfaInfo> &leftInfoTable) { - proto.leftOffset = engine_blob.add_range(leftInfoTable); - proto.activeLeftIterOffset - = writeActiveLeftIter(engine_blob, leftInfoTable); - proto.roseCount = verify_u32(leftInfoTable.size()); - proto.activeLeftCount = verify_u32(leftInfoTable.size()); - proto.rosePrefixCount = countRosePrefixes(leftInfoTable); +void writeLeftInfo(RoseEngineBlob &engine_blob, RoseEngine &proto, + const vector<LeftNfaInfo> &leftInfoTable) { + proto.leftOffset = engine_blob.add_range(leftInfoTable); + proto.activeLeftIterOffset + = writeActiveLeftIter(engine_blob, leftInfoTable); + proto.roseCount = verify_u32(leftInfoTable.size()); + proto.activeLeftCount = verify_u32(leftInfoTable.size()); + proto.rosePrefixCount = countRosePrefixes(leftInfoTable); } static @@ -2506,132 +2506,132 @@ void writeLogicalInfo(const ReportManager &rm, RoseEngineBlob &engine_blob, } static -void writeNfaInfo(const RoseBuildImpl &build, build_context &bc, - RoseEngine &proto, const set<u32> &no_retrigger_queues) { - const u32 queue_count = build.qif.allocated_count(); - if (!queue_count) { - return; +void writeNfaInfo(const RoseBuildImpl &build, build_context &bc, + RoseEngine &proto, const set<u32> &no_retrigger_queues) { + const u32 queue_count = build.qif.allocated_count(); + if (!queue_count) { + return; } - auto ekey_lists = buildSuffixEkeyLists(build, bc, build.qif); + auto ekey_lists = buildSuffixEkeyLists(build, bc, build.qif); - vector<NfaInfo> infos(queue_count); - memset(infos.data(), 0, sizeof(NfaInfo) * queue_count); + vector<NfaInfo> infos(queue_count); + memset(infos.data(), 0, sizeof(NfaInfo) * queue_count); - for (u32 qi = 0; qi < queue_count; qi++) { - NfaInfo &info = infos[qi]; - info.nfaOffset = bc.engineOffsets.at(qi); - assert(qi < ekey_lists.size()); - info.ekeyListOffset = ekey_lists.at(qi); - info.no_retrigger = contains(no_retrigger_queues, qi) ? 1 : 0; + for (u32 qi = 0; qi < queue_count; qi++) { + NfaInfo &info = infos[qi]; + info.nfaOffset = bc.engineOffsets.at(qi); + assert(qi < ekey_lists.size()); + info.ekeyListOffset = ekey_lists.at(qi); + info.no_retrigger = contains(no_retrigger_queues, qi) ? 1 : 0; } - // Mark outfixes that are in the small block matcher. - for (const auto &out : build.outfixes) { - const u32 qi = out.get_queue(); - assert(qi < infos.size()); - infos.at(qi).in_sbmatcher = out.in_sbmatcher; + // Mark outfixes that are in the small block matcher. + for (const auto &out : build.outfixes) { + const u32 qi = out.get_queue(); + assert(qi < infos.size()); + infos.at(qi).in_sbmatcher = out.in_sbmatcher; } - // Mark suffixes triggered by EOD table literals. - const RoseGraph &g = build.g; + // Mark suffixes triggered by EOD table literals. + const RoseGraph &g = build.g; for (auto v : vertices_range(g)) { if (!g[v].suffix) { continue; } - u32 qi = bc.suffixes.at(g[v].suffix); - assert(qi < infos.size()); - if (build.isInETable(v)) { - infos.at(qi).eod = 1; + u32 qi = bc.suffixes.at(g[v].suffix); + assert(qi < infos.size()); + if (build.isInETable(v)) { + infos.at(qi).eod = 1; } } - // Update state offsets to do with NFAs in proto and in the NfaInfo - // structures. - updateNfaState(bc, infos, &proto.stateOffsets, &proto.scratchStateSize, - &proto.tStateSize); + // Update state offsets to do with NFAs in proto and in the NfaInfo + // structures. + updateNfaState(bc, infos, &proto.stateOffsets, &proto.scratchStateSize, + &proto.tStateSize); - proto.nfaInfoOffset = bc.engine_blob.add_range(infos); + proto.nfaInfoOffset = bc.engine_blob.add_range(infos); } static -bool hasBoundaryReports(const BoundaryReports &boundary) { - if (!boundary.report_at_0.empty()) { - DEBUG_PRINTF("has boundary reports at 0\n"); - return true; +bool hasBoundaryReports(const BoundaryReports &boundary) { + if (!boundary.report_at_0.empty()) { + DEBUG_PRINTF("has boundary reports at 0\n"); + return true; } - if (!boundary.report_at_0_eod.empty()) { - DEBUG_PRINTF("has boundary reports at 0 eod\n"); - return true; + if (!boundary.report_at_0_eod.empty()) { + DEBUG_PRINTF("has boundary reports at 0 eod\n"); + return true; } - if (!boundary.report_at_eod.empty()) { - DEBUG_PRINTF("has boundary reports at eod\n"); - return true; + if (!boundary.report_at_eod.empty()) { + DEBUG_PRINTF("has boundary reports at eod\n"); + return true; } - DEBUG_PRINTF("no boundary reports\n"); - return false; + DEBUG_PRINTF("no boundary reports\n"); + return false; } static -void makeBoundaryPrograms(const RoseBuildImpl &build, build_context &bc, - const BoundaryReports &boundary, - const DerivedBoundaryReports &dboundary, - RoseBoundaryReports &out) { - DEBUG_PRINTF("report ^: %zu\n", boundary.report_at_0.size()); - DEBUG_PRINTF("report $: %zu\n", boundary.report_at_eod.size()); - DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size()); +void makeBoundaryPrograms(const RoseBuildImpl &build, build_context &bc, + const BoundaryReports &boundary, + const DerivedBoundaryReports &dboundary, + RoseBoundaryReports &out) { + DEBUG_PRINTF("report ^: %zu\n", boundary.report_at_0.size()); + DEBUG_PRINTF("report $: %zu\n", boundary.report_at_eod.size()); + DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size()); - auto eod_prog = makeBoundaryProgram(build, boundary.report_at_eod); - out.reportEodOffset = writeProgram(bc, move(eod_prog)); + auto eod_prog = makeBoundaryProgram(build, boundary.report_at_eod); + out.reportEodOffset = writeProgram(bc, move(eod_prog)); - auto zero_prog = makeBoundaryProgram(build, boundary.report_at_0); - out.reportZeroOffset = writeProgram(bc, move(zero_prog)); + auto zero_prog = makeBoundaryProgram(build, boundary.report_at_0); + out.reportZeroOffset = writeProgram(bc, move(zero_prog)); - auto zeod_prog = makeBoundaryProgram(build, dboundary.report_at_0_eod_full); - out.reportZeroEodOffset = writeProgram(bc, move(zeod_prog)); + auto zeod_prog = makeBoundaryProgram(build, dboundary.report_at_0_eod_full); + out.reportZeroEodOffset = writeProgram(bc, move(zeod_prog)); } static -unordered_map<RoseVertex, u32> assignStateIndices(const RoseBuildImpl &build) { - const auto &g = build.g; +unordered_map<RoseVertex, u32> assignStateIndices(const RoseBuildImpl &build) { + const auto &g = build.g; - u32 state = 0; - unordered_map<RoseVertex, u32> roleStateIndices; + u32 state = 0; + unordered_map<RoseVertex, u32> roleStateIndices; for (auto v : vertices_range(g)) { - // Virtual vertices (starts, EOD accept vertices) never need state - // indices. - if (build.isVirtualVertex(v)) { + // Virtual vertices (starts, EOD accept vertices) never need state + // indices. + if (build.isVirtualVertex(v)) { continue; } - - // We only need a state index if we have successors that are not - // eagerly-reported EOD vertices. - bool needs_state_index = false; - for (const auto &e : out_edges_range(v, g)) { - if (!canEagerlyReportAtEod(build, e)) { - needs_state_index = true; - break; - } + + // We only need a state index if we have successors that are not + // eagerly-reported EOD vertices. + bool needs_state_index = false; + for (const auto &e : out_edges_range(v, g)) { + if (!canEagerlyReportAtEod(build, e)) { + needs_state_index = true; + break; + } } - if (!needs_state_index) { - continue; + if (!needs_state_index) { + continue; } - /* TODO: also don't need a state index if all edges are nfa based */ - roleStateIndices.emplace(v, state++); + /* TODO: also don't need a state index if all edges are nfa based */ + roleStateIndices.emplace(v, state++); } - DEBUG_PRINTF("assigned %u states (from %zu vertices)\n", state, - num_vertices(g)); - - return roleStateIndices; + DEBUG_PRINTF("assigned %u states (from %zu vertices)\n", state, + num_vertices(g)); + + return roleStateIndices; } static -bool hasUsefulStops(const left_build_info &build) { - for (u32 i = 0; i < N_CHARS; i++) { - if (build.stopAlphabet[i]) { +bool hasUsefulStops(const left_build_info &build) { + for (u32 i = 0; i < N_CHARS; i++) { + if (build.stopAlphabet[i]) { return true; } } @@ -2639,609 +2639,609 @@ bool hasUsefulStops(const left_build_info &build) { } static -void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, - const set<u32> &eager_queues, u32 leftfixBeginQueue, - u32 leftfixCount, vector<LeftNfaInfo> &leftTable, - u32 *laggedRoseCount, size_t *history) { - const RoseGraph &g = tbi.g; - const CompileContext &cc = tbi.cc; +void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc, + const set<u32> &eager_queues, u32 leftfixBeginQueue, + u32 leftfixCount, vector<LeftNfaInfo> &leftTable, + u32 *laggedRoseCount, size_t *history) { + const RoseGraph &g = tbi.g; + const CompileContext &cc = tbi.cc; - unordered_set<u32> done_core; + unordered_set<u32> done_core; - leftTable.resize(leftfixCount); + leftTable.resize(leftfixCount); - u32 lagIndex = 0; + u32 lagIndex = 0; - for (RoseVertex v : vertices_range(g)) { - if (!g[v].left) { - continue; + for (RoseVertex v : vertices_range(g)) { + if (!g[v].left) { + continue; + } + assert(contains(bc.leftfix_info, v)); + const left_build_info &lbi = bc.leftfix_info.at(v); + if (lbi.has_lookaround) { + continue; } - assert(contains(bc.leftfix_info, v)); - const left_build_info &lbi = bc.leftfix_info.at(v); - if (lbi.has_lookaround) { - continue; - } - assert(lbi.queue >= leftfixBeginQueue); - u32 left_index = lbi.queue - leftfixBeginQueue; - assert(left_index < leftfixCount); + assert(lbi.queue >= leftfixBeginQueue); + u32 left_index = lbi.queue - leftfixBeginQueue; + assert(left_index < leftfixCount); - /* seedy hack to make miracles more effective. - * - * TODO: make miracle seeking not depend on history length and have - * runt scans */ - if (hasUsefulStops(lbi)) { - ENSURE_AT_LEAST(history, - (size_t)MIN(cc.grey.maxHistoryAvailable, - g[v].left.lag + 1 - + cc.grey.miracleHistoryBonus)); - } + /* seedy hack to make miracles more effective. + * + * TODO: make miracle seeking not depend on history length and have + * runt scans */ + if (hasUsefulStops(lbi)) { + ENSURE_AT_LEAST(history, + (size_t)MIN(cc.grey.maxHistoryAvailable, + g[v].left.lag + 1 + + cc.grey.miracleHistoryBonus)); + } - LeftNfaInfo &left = leftTable[left_index]; - if (!contains(done_core, left_index)) { - done_core.insert(left_index); - memset(&left, 0, sizeof(left)); - left.squash_mask = ~0ULL; + LeftNfaInfo &left = leftTable[left_index]; + if (!contains(done_core, left_index)) { + done_core.insert(left_index); + memset(&left, 0, sizeof(left)); + left.squash_mask = ~0ULL; - DEBUG_PRINTF("populating info for %u\n", left_index); + DEBUG_PRINTF("populating info for %u\n", left_index); - left.maxQueueLen = lbi.max_queuelen; + left.maxQueueLen = lbi.max_queuelen; - if (hasUsefulStops(lbi)) { - assert(lbi.stopAlphabet.size() == N_CHARS); - left.stopTable = bc.engine_blob.add_range(lbi.stopAlphabet); - } + if (hasUsefulStops(lbi)) { + assert(lbi.stopAlphabet.size() == N_CHARS); + left.stopTable = bc.engine_blob.add_range(lbi.stopAlphabet); + } - assert(lbi.countingMiracleOffset || !lbi.countingMiracleCount); - left.countingMiracleOffset = lbi.countingMiracleOffset; + assert(lbi.countingMiracleOffset || !lbi.countingMiracleCount); + left.countingMiracleOffset = lbi.countingMiracleOffset; - DEBUG_PRINTF("mw = %u\n", lbi.transient); - left.transient = verify_u8(lbi.transient); - left.infix = tbi.isNonRootSuccessor(v); - left.eager = contains(eager_queues, lbi.queue); + DEBUG_PRINTF("mw = %u\n", lbi.transient); + left.transient = verify_u8(lbi.transient); + left.infix = tbi.isNonRootSuccessor(v); + left.eager = contains(eager_queues, lbi.queue); - // A rose has a lagIndex if it's non-transient and we are - // streaming. - if (!lbi.transient && cc.streaming) { - assert(lagIndex < ROSE_OFFSET_INVALID); - left.lagIndex = lagIndex++; - } else { - left.lagIndex = ROSE_OFFSET_INVALID; - } - } + // A rose has a lagIndex if it's non-transient and we are + // streaming. + if (!lbi.transient && cc.streaming) { + assert(lagIndex < ROSE_OFFSET_INVALID); + left.lagIndex = lagIndex++; + } else { + left.lagIndex = ROSE_OFFSET_INVALID; + } + } - DEBUG_PRINTF("rose %u is %s\n", left_index, - left.infix ? "infix" : "prefix"); + DEBUG_PRINTF("rose %u is %s\n", left_index, + left.infix ? "infix" : "prefix"); - // Update squash mask. - left.squash_mask &= lbi.squash_mask; + // Update squash mask. + left.squash_mask &= lbi.squash_mask; - // Update the max delay. - ENSURE_AT_LEAST(&left.maxLag, lbi.lag); + // Update the max delay. + ENSURE_AT_LEAST(&left.maxLag, lbi.lag); - if (contains(g[v].literals, tbi.eod_event_literal_id)) { - left.eod_check = 1; - } + if (contains(g[v].literals, tbi.eod_event_literal_id)) { + left.eod_check = 1; + } } - DEBUG_PRINTF("built %u roses with lag indices\n", lagIndex); - *laggedRoseCount = lagIndex; + DEBUG_PRINTF("built %u roses with lag indices\n", lagIndex); + *laggedRoseCount = lagIndex; } static -RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, u32 lit_id, - const vector<vector<RoseEdge>> &lit_edge_map, - bool is_anchored_replay_program) { - DEBUG_PRINTF("lit_id=%u\n", lit_id); - assert(lit_id < lit_edge_map.size()); +RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build, u32 lit_id, + const vector<vector<RoseEdge>> &lit_edge_map, + bool is_anchored_replay_program) { + DEBUG_PRINTF("lit_id=%u\n", lit_id); + assert(lit_id < lit_edge_map.size()); - return makeLiteralProgram(build, bc.leftfix_info, bc.suffixes, - bc.engine_info_by_queue, bc.roleStateIndices, - prog_build, lit_id, lit_edge_map.at(lit_id), - is_anchored_replay_program); + return makeLiteralProgram(build, bc.leftfix_info, bc.suffixes, + bc.engine_info_by_queue, bc.roleStateIndices, + prog_build, lit_id, lit_edge_map.at(lit_id), + is_anchored_replay_program); } static -RoseProgram makeFragmentProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, - const vector<u32> &lit_ids, - const vector<vector<RoseEdge>> &lit_edge_map) { - assert(!lit_ids.empty()); +RoseProgram makeFragmentProgram(const RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build, + const vector<u32> &lit_ids, + const vector<vector<RoseEdge>> &lit_edge_map) { + assert(!lit_ids.empty()); - vector<RoseProgram> blocks; - for (const auto &lit_id : lit_ids) { - auto prog = makeLiteralProgram(build, bc, prog_build, lit_id, - lit_edge_map, false); - blocks.push_back(move(prog)); + vector<RoseProgram> blocks; + for (const auto &lit_id : lit_ids) { + auto prog = makeLiteralProgram(build, bc, prog_build, lit_id, + lit_edge_map, false); + blocks.push_back(move(prog)); } - return assembleProgramBlocks(move(blocks)); + return assembleProgramBlocks(move(blocks)); } -/** - * \brief Returns a map from literal ID to a list of edges leading into - * vertices with that literal ID. - */ +/** + * \brief Returns a map from literal ID to a list of edges leading into + * vertices with that literal ID. + */ static -vector<vector<RoseEdge>> findEdgesByLiteral(const RoseBuildImpl &build) { - vector<vector<RoseEdge>> lit_edge_map(build.literals.size()); +vector<vector<RoseEdge>> findEdgesByLiteral(const RoseBuildImpl &build) { + vector<vector<RoseEdge>> lit_edge_map(build.literals.size()); - const auto &g = build.g; - for (const auto &v : vertices_range(g)) { - for (const auto &lit_id : g[v].literals) { - assert(lit_id < lit_edge_map.size()); - auto &edge_list = lit_edge_map.at(lit_id); - insert(&edge_list, edge_list.end(), in_edges(v, g)); - } - } + const auto &g = build.g; + for (const auto &v : vertices_range(g)) { + for (const auto &lit_id : g[v].literals) { + assert(lit_id < lit_edge_map.size()); + auto &edge_list = lit_edge_map.at(lit_id); + insert(&edge_list, edge_list.end(), in_edges(v, g)); + } + } - // Sort edges in each edge list by (source, target) indices. This gives us - // less surprising ordering in program generation for a literal with many - // edges. - for (auto &edge_list : lit_edge_map) { - sort(begin(edge_list), end(edge_list), [&g](const RoseEdge &a, - const RoseEdge &b) { - return tie(g[source(a, g)].index, g[target(a, g)].index) < - tie(g[source(b, g)].index, g[target(b, g)].index); - }); + // Sort edges in each edge list by (source, target) indices. This gives us + // less surprising ordering in program generation for a literal with many + // edges. + for (auto &edge_list : lit_edge_map) { + sort(begin(edge_list), end(edge_list), [&g](const RoseEdge &a, + const RoseEdge &b) { + return tie(g[source(a, g)].index, g[target(a, g)].index) < + tie(g[source(b, g)].index, g[target(b, g)].index); + }); } - return lit_edge_map; + return lit_edge_map; } static -bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { - assert(lit_id < build.literal_info.size()); - const auto &info = build.literal_info[lit_id]; - if (!info.vertices.empty()) { - return true; - } +bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) { + assert(lit_id < build.literal_info.size()); + const auto &info = build.literal_info[lit_id]; + if (!info.vertices.empty()) { + return true; + } - for (const u32 &delayed_id : info.delayed_ids) { - assert(delayed_id < build.literal_info.size()); - const rose_literal_info &delayed_info = build.literal_info[delayed_id]; - if (!delayed_info.vertices.empty()) { - return true; - } + for (const u32 &delayed_id : info.delayed_ids) { + assert(delayed_id < build.literal_info.size()); + const rose_literal_info &delayed_info = build.literal_info[delayed_id]; + if (!delayed_info.vertices.empty()) { + return true; + } } - DEBUG_PRINTF("literal %u has no refs\n", lit_id); - return false; + DEBUG_PRINTF("literal %u has no refs\n", lit_id); + return false; } static -rose_literal_id getFragment(rose_literal_id lit) { - if (lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) { - // Trim to last ROSE_SHORT_LITERAL_LEN_MAX bytes. - lit.s.erase(0, lit.s.length() - ROSE_SHORT_LITERAL_LEN_MAX); +rose_literal_id getFragment(rose_literal_id lit) { + if (lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) { + // Trim to last ROSE_SHORT_LITERAL_LEN_MAX bytes. + lit.s.erase(0, lit.s.length() - ROSE_SHORT_LITERAL_LEN_MAX); } - DEBUG_PRINTF("fragment: %s\n", dumpString(lit.s).c_str()); - return lit; + DEBUG_PRINTF("fragment: %s\n", dumpString(lit.s).c_str()); + return lit; } static -vector<LitFragment> groupByFragment(const RoseBuildImpl &build) { - vector<LitFragment> fragments; - u32 frag_id = 0; +vector<LitFragment> groupByFragment(const RoseBuildImpl &build) { + vector<LitFragment> fragments; + u32 frag_id = 0; - struct FragmentInfo { - vector<u32> lit_ids; - rose_group groups = 0; - }; + struct FragmentInfo { + vector<u32> lit_ids; + rose_group groups = 0; + }; - map<rose_literal_id, FragmentInfo> frag_info; + map<rose_literal_id, FragmentInfo> frag_info; - for (u32 lit_id = 0; lit_id < build.literals.size(); lit_id++) { - const auto &lit = build.literals.at(lit_id); - const auto &info = build.literal_info.at(lit_id); + for (u32 lit_id = 0; lit_id < build.literals.size(); lit_id++) { + const auto &lit = build.literals.at(lit_id); + const auto &info = build.literal_info.at(lit_id); - if (!isUsedLiteral(build, lit_id)) { - DEBUG_PRINTF("lit %u is unused\n", lit_id); - continue; + if (!isUsedLiteral(build, lit_id)) { + DEBUG_PRINTF("lit %u is unused\n", lit_id); + continue; } - if (lit.table == ROSE_EVENT) { - DEBUG_PRINTF("lit %u is an event\n", lit_id); + if (lit.table == ROSE_EVENT) { + DEBUG_PRINTF("lit %u is an event\n", lit_id); continue; } - auto groups = info.group_mask; + auto groups = info.group_mask; - if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) { - fragments.emplace_back(frag_id, lit.s, groups, lit_id); - frag_id++; - continue; + if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) { + fragments.emplace_back(frag_id, lit.s, groups, lit_id); + frag_id++; + continue; } - DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id, - dumpString(lit.s).c_str()); - auto &fi = frag_info[getFragment(lit)]; - fi.lit_ids.push_back(lit_id); - fi.groups |= groups; - } - - for (auto &m : frag_info) { - auto &lit = m.first; - auto &fi = m.second; - DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(), - as_string_list(fi.lit_ids).c_str()); - fragments.emplace_back(frag_id, lit.s, fi.groups, move(fi.lit_ids)); - frag_id++; - assert(frag_id == fragments.size()); - } - - return fragments; -} - -static -void buildIncludedIdMap(unordered_map<u32, pair<u32, u8>> &includedIdMap, - const LitProto *litProto) { - if (!litProto) { - return; - } - const auto &proto = *litProto->hwlmProto; - for (const auto &lit : proto.lits) { - if (contains(includedIdMap, lit.id)) { - const auto &included_id = includedIdMap[lit.id].first; - const auto &squash = includedIdMap[lit.id].second; - // The squash behavior should be the same for the same literal - // in different literal matchers. - if (lit.included_id != included_id || - lit.squash != squash) { - includedIdMap[lit.id] = make_pair(INVALID_LIT_ID, 0); - DEBUG_PRINTF("find different included info for the" - " same literal\n"); - } - } else if (lit.included_id != INVALID_LIT_ID) { - includedIdMap[lit.id] = make_pair(lit.included_id, lit.squash); - } else { - includedIdMap[lit.id] = make_pair(INVALID_LIT_ID, 0); - } - } -} - -static -void findInclusionGroups(vector<LitFragment> &fragments, - LitProto *fproto, LitProto *drproto, - LitProto *eproto, LitProto *sbproto) { - unordered_map<u32, pair<u32, u8>> includedIdMap; - unordered_map<u32, pair<u32, u8>> includedDelayIdMap; - buildIncludedIdMap(includedIdMap, fproto); - buildIncludedIdMap(includedDelayIdMap, drproto); - buildIncludedIdMap(includedIdMap, eproto); - buildIncludedIdMap(includedIdMap, sbproto); - - size_t fragNum = fragments.size(); - vector<u32> candidates; - for (size_t j = 0; j < fragNum; j++) { - DEBUG_PRINTF("frag id %lu\n", j); - u32 id = j; - if (contains(includedIdMap, id) || - contains(includedDelayIdMap, id)) { - candidates.push_back(j); - DEBUG_PRINTF("find candidate\n"); - } - } - - for (const auto &c : candidates) { - auto &frag = fragments[c]; - u32 id = c; - if (contains(includedIdMap, id) && - includedIdMap[id].first != INVALID_LIT_ID) { - const auto &childId = includedIdMap[id]; - frag.included_frag_id = childId.first; - frag.squash = childId.second; - DEBUG_PRINTF("frag id %u child frag id %u\n", c, - frag.included_frag_id); - } - - if (contains(includedDelayIdMap, id) && - includedDelayIdMap[id].first != INVALID_LIT_ID) { - const auto &childId = includedDelayIdMap[id]; - frag.included_delay_frag_id = childId.first; - frag.delay_squash = childId.second; - - DEBUG_PRINTF("delay frag id %u child frag id %u\n", c, - frag.included_delay_frag_id); - } - } -} - -static -void buildFragmentPrograms(const RoseBuildImpl &build, - vector<LitFragment> &fragments, - build_context &bc, ProgramBuild &prog_build, - const vector<vector<RoseEdge>> &lit_edge_map) { - // Sort fragments based on literal length and case info to build - // included literal programs before their parent programs. - vector<LitFragment> ordered_fragments(fragments); - stable_sort(begin(ordered_fragments), end(ordered_fragments), - [](const LitFragment &a, const LitFragment &b) { - auto len1 = a.s.length(); - auto caseful1 = !a.s.any_nocase(); - auto len2 = b.s.length(); - auto caseful2 = !b.s.any_nocase(); - return tie(len1, caseful1) < tie(len2, caseful2); - }); - - for (auto &frag : ordered_fragments) { - auto &pfrag = fragments[frag.fragment_id]; - DEBUG_PRINTF("frag_id=%u, lit_ids=[%s]\n", pfrag.fragment_id, - as_string_list(pfrag.lit_ids).c_str()); - - auto lit_prog = makeFragmentProgram(build, bc, prog_build, - pfrag.lit_ids, lit_edge_map); - if (pfrag.included_frag_id != INVALID_FRAG_ID && - !lit_prog.empty()) { - auto &cfrag = fragments[pfrag.included_frag_id]; - assert(pfrag.s.length() >= cfrag.s.length() && - !pfrag.s.any_nocase() >= !cfrag.s.any_nocase()); - u32 child_offset = cfrag.lit_program_offset; - DEBUG_PRINTF("child %u offset %u\n", cfrag.fragment_id, - child_offset); - addIncludedJumpProgram(lit_prog, child_offset, pfrag.squash); - } - pfrag.lit_program_offset = writeProgram(bc, move(lit_prog)); - - // We only do delayed rebuild in streaming mode. - if (!build.cc.streaming) { + DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id, + dumpString(lit.s).c_str()); + auto &fi = frag_info[getFragment(lit)]; + fi.lit_ids.push_back(lit_id); + fi.groups |= groups; + } + + for (auto &m : frag_info) { + auto &lit = m.first; + auto &fi = m.second; + DEBUG_PRINTF("frag %s -> ids: %s\n", dumpString(m.first.s).c_str(), + as_string_list(fi.lit_ids).c_str()); + fragments.emplace_back(frag_id, lit.s, fi.groups, move(fi.lit_ids)); + frag_id++; + assert(frag_id == fragments.size()); + } + + return fragments; +} + +static +void buildIncludedIdMap(unordered_map<u32, pair<u32, u8>> &includedIdMap, + const LitProto *litProto) { + if (!litProto) { + return; + } + const auto &proto = *litProto->hwlmProto; + for (const auto &lit : proto.lits) { + if (contains(includedIdMap, lit.id)) { + const auto &included_id = includedIdMap[lit.id].first; + const auto &squash = includedIdMap[lit.id].second; + // The squash behavior should be the same for the same literal + // in different literal matchers. + if (lit.included_id != included_id || + lit.squash != squash) { + includedIdMap[lit.id] = make_pair(INVALID_LIT_ID, 0); + DEBUG_PRINTF("find different included info for the" + " same literal\n"); + } + } else if (lit.included_id != INVALID_LIT_ID) { + includedIdMap[lit.id] = make_pair(lit.included_id, lit.squash); + } else { + includedIdMap[lit.id] = make_pair(INVALID_LIT_ID, 0); + } + } +} + +static +void findInclusionGroups(vector<LitFragment> &fragments, + LitProto *fproto, LitProto *drproto, + LitProto *eproto, LitProto *sbproto) { + unordered_map<u32, pair<u32, u8>> includedIdMap; + unordered_map<u32, pair<u32, u8>> includedDelayIdMap; + buildIncludedIdMap(includedIdMap, fproto); + buildIncludedIdMap(includedDelayIdMap, drproto); + buildIncludedIdMap(includedIdMap, eproto); + buildIncludedIdMap(includedIdMap, sbproto); + + size_t fragNum = fragments.size(); + vector<u32> candidates; + for (size_t j = 0; j < fragNum; j++) { + DEBUG_PRINTF("frag id %lu\n", j); + u32 id = j; + if (contains(includedIdMap, id) || + contains(includedDelayIdMap, id)) { + candidates.push_back(j); + DEBUG_PRINTF("find candidate\n"); + } + } + + for (const auto &c : candidates) { + auto &frag = fragments[c]; + u32 id = c; + if (contains(includedIdMap, id) && + includedIdMap[id].first != INVALID_LIT_ID) { + const auto &childId = includedIdMap[id]; + frag.included_frag_id = childId.first; + frag.squash = childId.second; + DEBUG_PRINTF("frag id %u child frag id %u\n", c, + frag.included_frag_id); + } + + if (contains(includedDelayIdMap, id) && + includedDelayIdMap[id].first != INVALID_LIT_ID) { + const auto &childId = includedDelayIdMap[id]; + frag.included_delay_frag_id = childId.first; + frag.delay_squash = childId.second; + + DEBUG_PRINTF("delay frag id %u child frag id %u\n", c, + frag.included_delay_frag_id); + } + } +} + +static +void buildFragmentPrograms(const RoseBuildImpl &build, + vector<LitFragment> &fragments, + build_context &bc, ProgramBuild &prog_build, + const vector<vector<RoseEdge>> &lit_edge_map) { + // Sort fragments based on literal length and case info to build + // included literal programs before their parent programs. + vector<LitFragment> ordered_fragments(fragments); + stable_sort(begin(ordered_fragments), end(ordered_fragments), + [](const LitFragment &a, const LitFragment &b) { + auto len1 = a.s.length(); + auto caseful1 = !a.s.any_nocase(); + auto len2 = b.s.length(); + auto caseful2 = !b.s.any_nocase(); + return tie(len1, caseful1) < tie(len2, caseful2); + }); + + for (auto &frag : ordered_fragments) { + auto &pfrag = fragments[frag.fragment_id]; + DEBUG_PRINTF("frag_id=%u, lit_ids=[%s]\n", pfrag.fragment_id, + as_string_list(pfrag.lit_ids).c_str()); + + auto lit_prog = makeFragmentProgram(build, bc, prog_build, + pfrag.lit_ids, lit_edge_map); + if (pfrag.included_frag_id != INVALID_FRAG_ID && + !lit_prog.empty()) { + auto &cfrag = fragments[pfrag.included_frag_id]; + assert(pfrag.s.length() >= cfrag.s.length() && + !pfrag.s.any_nocase() >= !cfrag.s.any_nocase()); + u32 child_offset = cfrag.lit_program_offset; + DEBUG_PRINTF("child %u offset %u\n", cfrag.fragment_id, + child_offset); + addIncludedJumpProgram(lit_prog, child_offset, pfrag.squash); + } + pfrag.lit_program_offset = writeProgram(bc, move(lit_prog)); + + // We only do delayed rebuild in streaming mode. + if (!build.cc.streaming) { continue; } - auto rebuild_prog = makeDelayRebuildProgram(build, prog_build, - pfrag.lit_ids); - if (pfrag.included_delay_frag_id != INVALID_FRAG_ID && - !rebuild_prog.empty()) { - auto &cfrag = fragments[pfrag.included_delay_frag_id]; - assert(pfrag.s.length() >= cfrag.s.length() && - !pfrag.s.any_nocase() >= !cfrag.s.any_nocase()); - u32 child_offset = cfrag.delay_program_offset; - DEBUG_PRINTF("child %u offset %u\n", cfrag.fragment_id, - child_offset); - addIncludedJumpProgram(rebuild_prog, child_offset, - pfrag.delay_squash); + auto rebuild_prog = makeDelayRebuildProgram(build, prog_build, + pfrag.lit_ids); + if (pfrag.included_delay_frag_id != INVALID_FRAG_ID && + !rebuild_prog.empty()) { + auto &cfrag = fragments[pfrag.included_delay_frag_id]; + assert(pfrag.s.length() >= cfrag.s.length() && + !pfrag.s.any_nocase() >= !cfrag.s.any_nocase()); + u32 child_offset = cfrag.delay_program_offset; + DEBUG_PRINTF("child %u offset %u\n", cfrag.fragment_id, + child_offset); + addIncludedJumpProgram(rebuild_prog, child_offset, + pfrag.delay_squash); } - pfrag.delay_program_offset = writeProgram(bc, move(rebuild_prog)); + pfrag.delay_program_offset = writeProgram(bc, move(rebuild_prog)); } } -static -void updateLitProtoProgramOffset(vector<LitFragment> &fragments, - LitProto &litProto, bool delay) { - auto &proto = *litProto.hwlmProto; - for (auto &lit : proto.lits) { - auto fragId = lit.id; - auto &frag = fragments[fragId]; - if (delay) { - DEBUG_PRINTF("delay_program_offset:%u\n", - frag.delay_program_offset); - lit.id = frag.delay_program_offset; - } else { - DEBUG_PRINTF("lit_program_offset:%u\n", - frag.lit_program_offset); - lit.id = frag.lit_program_offset; - } +static +void updateLitProtoProgramOffset(vector<LitFragment> &fragments, + LitProto &litProto, bool delay) { + auto &proto = *litProto.hwlmProto; + for (auto &lit : proto.lits) { + auto fragId = lit.id; + auto &frag = fragments[fragId]; + if (delay) { + DEBUG_PRINTF("delay_program_offset:%u\n", + frag.delay_program_offset); + lit.id = frag.delay_program_offset; + } else { + DEBUG_PRINTF("lit_program_offset:%u\n", + frag.lit_program_offset); + lit.id = frag.lit_program_offset; + } } } static -void updateLitProgramOffset(vector<LitFragment> &fragments, - LitProto *fproto, LitProto *drproto, - LitProto *eproto, LitProto *sbproto) { - if (fproto) { - updateLitProtoProgramOffset(fragments, *fproto, false); - } +void updateLitProgramOffset(vector<LitFragment> &fragments, + LitProto *fproto, LitProto *drproto, + LitProto *eproto, LitProto *sbproto) { + if (fproto) { + updateLitProtoProgramOffset(fragments, *fproto, false); + } - if (drproto) { - updateLitProtoProgramOffset(fragments, *drproto, true); - } + if (drproto) { + updateLitProtoProgramOffset(fragments, *drproto, true); + } - if (eproto) { - updateLitProtoProgramOffset(fragments, *eproto, false); + if (eproto) { + updateLitProtoProgramOffset(fragments, *eproto, false); } - if (sbproto) { - updateLitProtoProgramOffset(fragments, *sbproto, false); + if (sbproto) { + updateLitProtoProgramOffset(fragments, *sbproto, false); } } -/** - * \brief Build the interpreter programs for each literal. - */ +/** + * \brief Build the interpreter programs for each literal. + */ static -void buildLiteralPrograms(const RoseBuildImpl &build, - vector<LitFragment> &fragments, build_context &bc, - ProgramBuild &prog_build, LitProto *fproto, - LitProto *drproto, LitProto *eproto, - LitProto *sbproto) { - DEBUG_PRINTF("%zu fragments\n", fragments.size()); - auto lit_edge_map = findEdgesByLiteral(build); - - findInclusionGroups(fragments, fproto, drproto, eproto, sbproto); - - buildFragmentPrograms(build, fragments, bc, prog_build, lit_edge_map); - - // update literal program offsets for literal matcher prototypes - updateLitProgramOffset(fragments, fproto, drproto, eproto, sbproto); -} - -/** - * \brief Write delay replay programs to the bytecode. - * - * Returns the offset of the beginning of the program array, and the number of - * programs. - */ -static -pair<u32, u32> writeDelayPrograms(const RoseBuildImpl &build, - const vector<LitFragment> &fragments, - build_context &bc, - ProgramBuild &prog_build) { - auto lit_edge_map = findEdgesByLiteral(build); - - vector<u32> programs; // program offsets indexed by (delayed) lit id - unordered_map<u32, u32> cache; // program offsets we have already seen - - for (const auto &frag : fragments) { - for (const u32 lit_id : frag.lit_ids) { - const auto &info = build.literal_info.at(lit_id); - - for (const auto &delayed_lit_id : info.delayed_ids) { - DEBUG_PRINTF("lit id %u delay id %u\n", lit_id, delayed_lit_id); - auto prog = makeLiteralProgram(build, bc, prog_build, - delayed_lit_id, lit_edge_map, - false); - u32 offset = writeProgram(bc, move(prog)); - - u32 delay_id; - auto it = cache.find(offset); - if (it != end(cache)) { - delay_id = it->second; - DEBUG_PRINTF("reusing delay_id %u for offset %u\n", - delay_id, offset); - } else { - delay_id = verify_u32(programs.size()); - programs.push_back(offset); - cache.emplace(offset, delay_id); - DEBUG_PRINTF("assigned new delay_id %u for offset %u\n", - delay_id, offset); - } - prog_build.delay_programs.emplace(delayed_lit_id, delay_id); - } - } - } - - DEBUG_PRINTF("%zu delay programs\n", programs.size()); - return {bc.engine_blob.add_range(programs), verify_u32(programs.size())}; -} - -/** - * \brief Write anchored replay programs to the bytecode. - * - * Returns the offset of the beginning of the program array, and the number of - * programs. - */ +void buildLiteralPrograms(const RoseBuildImpl &build, + vector<LitFragment> &fragments, build_context &bc, + ProgramBuild &prog_build, LitProto *fproto, + LitProto *drproto, LitProto *eproto, + LitProto *sbproto) { + DEBUG_PRINTF("%zu fragments\n", fragments.size()); + auto lit_edge_map = findEdgesByLiteral(build); + + findInclusionGroups(fragments, fproto, drproto, eproto, sbproto); + + buildFragmentPrograms(build, fragments, bc, prog_build, lit_edge_map); + + // update literal program offsets for literal matcher prototypes + updateLitProgramOffset(fragments, fproto, drproto, eproto, sbproto); +} + +/** + * \brief Write delay replay programs to the bytecode. + * + * Returns the offset of the beginning of the program array, and the number of + * programs. + */ static -pair<u32, u32> writeAnchoredPrograms(const RoseBuildImpl &build, - const vector<LitFragment> &fragments, - build_context &bc, - ProgramBuild &prog_build) { - auto lit_edge_map = findEdgesByLiteral(build); +pair<u32, u32> writeDelayPrograms(const RoseBuildImpl &build, + const vector<LitFragment> &fragments, + build_context &bc, + ProgramBuild &prog_build) { + auto lit_edge_map = findEdgesByLiteral(build); + + vector<u32> programs; // program offsets indexed by (delayed) lit id + unordered_map<u32, u32> cache; // program offsets we have already seen + + for (const auto &frag : fragments) { + for (const u32 lit_id : frag.lit_ids) { + const auto &info = build.literal_info.at(lit_id); + + for (const auto &delayed_lit_id : info.delayed_ids) { + DEBUG_PRINTF("lit id %u delay id %u\n", lit_id, delayed_lit_id); + auto prog = makeLiteralProgram(build, bc, prog_build, + delayed_lit_id, lit_edge_map, + false); + u32 offset = writeProgram(bc, move(prog)); + + u32 delay_id; + auto it = cache.find(offset); + if (it != end(cache)) { + delay_id = it->second; + DEBUG_PRINTF("reusing delay_id %u for offset %u\n", + delay_id, offset); + } else { + delay_id = verify_u32(programs.size()); + programs.push_back(offset); + cache.emplace(offset, delay_id); + DEBUG_PRINTF("assigned new delay_id %u for offset %u\n", + delay_id, offset); + } + prog_build.delay_programs.emplace(delayed_lit_id, delay_id); + } + } + } - vector<u32> programs; // program offsets indexed by anchored id - unordered_map<u32, u32> cache; // program offsets we have already seen + DEBUG_PRINTF("%zu delay programs\n", programs.size()); + return {bc.engine_blob.add_range(programs), verify_u32(programs.size())}; +} - for (const auto &frag : fragments) { - for (const u32 lit_id : frag.lit_ids) { - const auto &lit = build.literals.at(lit_id); +/** + * \brief Write anchored replay programs to the bytecode. + * + * Returns the offset of the beginning of the program array, and the number of + * programs. + */ +static +pair<u32, u32> writeAnchoredPrograms(const RoseBuildImpl &build, + const vector<LitFragment> &fragments, + build_context &bc, + ProgramBuild &prog_build) { + auto lit_edge_map = findEdgesByLiteral(build); - if (lit.table != ROSE_ANCHORED) { - continue; - } + vector<u32> programs; // program offsets indexed by anchored id + unordered_map<u32, u32> cache; // program offsets we have already seen - // If this anchored literal can never match past - // floatingMinLiteralMatchOffset, we will never have to record it. - if (findMaxOffset(build, lit_id) - <= prog_build.floatingMinLiteralMatchOffset) { - DEBUG_PRINTF("can never match after " - "floatingMinLiteralMatchOffset=%u\n", - prog_build.floatingMinLiteralMatchOffset); - continue; - } + for (const auto &frag : fragments) { + for (const u32 lit_id : frag.lit_ids) { + const auto &lit = build.literals.at(lit_id); - auto prog = makeLiteralProgram(build, bc, prog_build, lit_id, - lit_edge_map, true); - u32 offset = writeProgram(bc, move(prog)); - DEBUG_PRINTF("lit_id=%u -> anch prog at %u\n", lit_id, offset); + if (lit.table != ROSE_ANCHORED) { + continue; + } - u32 anch_id; - auto it = cache.find(offset); - if (it != end(cache)) { - anch_id = it->second; - DEBUG_PRINTF("reusing anch_id %u for offset %u\n", anch_id, - offset); - } else { - anch_id = verify_u32(programs.size()); - programs.push_back(offset); - cache.emplace(offset, anch_id); - DEBUG_PRINTF("assigned new anch_id %u for offset %u\n", anch_id, - offset); - } - prog_build.anchored_programs.emplace(lit_id, anch_id); - } - } - - DEBUG_PRINTF("%zu anchored programs\n", programs.size()); - return {bc.engine_blob.add_range(programs), verify_u32(programs.size())}; -} - -/** - * \brief Returns all reports used by output-exposed engines, for which we need - * to generate programs. - */ -static -set<ReportID> findEngineReports(const RoseBuildImpl &build) { - set<ReportID> reports; - - // The small write engine uses these engine report programs. - insert(&reports, build.smwr.all_reports()); + // If this anchored literal can never match past + // floatingMinLiteralMatchOffset, we will never have to record it. + if (findMaxOffset(build, lit_id) + <= prog_build.floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("can never match after " + "floatingMinLiteralMatchOffset=%u\n", + prog_build.floatingMinLiteralMatchOffset); + continue; + } - for (const auto &outfix : build.outfixes) { - insert(&reports, all_reports(outfix)); + auto prog = makeLiteralProgram(build, bc, prog_build, lit_id, + lit_edge_map, true); + u32 offset = writeProgram(bc, move(prog)); + DEBUG_PRINTF("lit_id=%u -> anch prog at %u\n", lit_id, offset); + + u32 anch_id; + auto it = cache.find(offset); + if (it != end(cache)) { + anch_id = it->second; + DEBUG_PRINTF("reusing anch_id %u for offset %u\n", anch_id, + offset); + } else { + anch_id = verify_u32(programs.size()); + programs.push_back(offset); + cache.emplace(offset, anch_id); + DEBUG_PRINTF("assigned new anch_id %u for offset %u\n", anch_id, + offset); + } + prog_build.anchored_programs.emplace(lit_id, anch_id); + } } - const auto &g = build.g; - for (auto v : vertices_range(g)) { - if (g[v].suffix) { - insert(&reports, all_reports(g[v].suffix)); + DEBUG_PRINTF("%zu anchored programs\n", programs.size()); + return {bc.engine_blob.add_range(programs), verify_u32(programs.size())}; +} + +/** + * \brief Returns all reports used by output-exposed engines, for which we need + * to generate programs. + */ +static +set<ReportID> findEngineReports(const RoseBuildImpl &build) { + set<ReportID> reports; + + // The small write engine uses these engine report programs. + insert(&reports, build.smwr.all_reports()); + + for (const auto &outfix : build.outfixes) { + insert(&reports, all_reports(outfix)); + } + + const auto &g = build.g; + for (auto v : vertices_range(g)) { + if (g[v].suffix) { + insert(&reports, all_reports(g[v].suffix)); } } - DEBUG_PRINTF("%zu engine reports (of %zu)\n", reports.size(), - build.rm.numReports()); - return reports; + DEBUG_PRINTF("%zu engine reports (of %zu)\n", reports.size(), + build.rm.numReports()); + return reports; } static -pair<u32, u32> buildReportPrograms(const RoseBuildImpl &build, - build_context &bc) { - const auto reports = findEngineReports(build); - vector<u32> programs; - programs.reserve(reports.size()); +pair<u32, u32> buildReportPrograms(const RoseBuildImpl &build, + build_context &bc) { + const auto reports = findEngineReports(build); + vector<u32> programs; + programs.reserve(reports.size()); - for (ReportID id : reports) { - auto program = makeReportProgram(build, bc.needs_mpv_catchup, id); - u32 offset = writeProgram(bc, move(program)); - programs.push_back(offset); - build.rm.setProgramOffset(id, offset); - DEBUG_PRINTF("program for report %u @ %u (%zu instructions)\n", id, - programs.back(), program.size()); + for (ReportID id : reports) { + auto program = makeReportProgram(build, bc.needs_mpv_catchup, id); + u32 offset = writeProgram(bc, move(program)); + programs.push_back(offset); + build.rm.setProgramOffset(id, offset); + DEBUG_PRINTF("program for report %u @ %u (%zu instructions)\n", id, + programs.back(), program.size()); } - u32 offset = bc.engine_blob.add_range(programs); - u32 count = verify_u32(programs.size()); - return {offset, count}; + u32 offset = bc.engine_blob.add_range(programs); + u32 count = verify_u32(programs.size()); + return {offset, count}; } static -bool hasEodAnchoredSuffix(const RoseBuildImpl &build) { - const RoseGraph &g = build.g; - for (auto v : vertices_range(g)) { - if (g[v].suffix && build.isInETable(v)) { - DEBUG_PRINTF("vertex %zu is in eod table and has a suffix\n", - g[v].index); - return true; +bool hasEodAnchoredSuffix(const RoseBuildImpl &build) { + const RoseGraph &g = build.g; + for (auto v : vertices_range(g)) { + if (g[v].suffix && build.isInETable(v)) { + DEBUG_PRINTF("vertex %zu is in eod table and has a suffix\n", + g[v].index); + return true; } } - return false; + return false; } static -bool hasEodMatcher(const RoseBuildImpl &build) { - const RoseGraph &g = build.g; - for (auto v : vertices_range(g)) { - if (build.isInETable(v)) { - DEBUG_PRINTF("vertex %zu is in eod table\n", g[v].index); +bool hasEodMatcher(const RoseBuildImpl &build) { + const RoseGraph &g = build.g; + for (auto v : vertices_range(g)) { + if (build.isInETable(v)) { + DEBUG_PRINTF("vertex %zu is in eod table\n", g[v].index); return true; } } @@ -3249,104 +3249,104 @@ bool hasEodMatcher(const RoseBuildImpl &build) { } static -void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc, - ProgramBuild &prog_build, bool in_etable, - RoseProgram &program) { - const RoseGraph &g = build.g; +void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc, + ProgramBuild &prog_build, bool in_etable, + RoseProgram &program) { + const RoseGraph &g = build.g; - // Predecessor state id -> program block. - map<u32, RoseProgram> pred_blocks; + // Predecessor state id -> program block. + map<u32, RoseProgram> pred_blocks; - for (auto v : vertices_range(g)) { - if (!g[v].eod_accept) { + for (auto v : vertices_range(g)) { + if (!g[v].eod_accept) { continue; } - DEBUG_PRINTF("vertex %zu (with %zu preds) fires on EOD\n", g[v].index, - in_degree(v, g)); + DEBUG_PRINTF("vertex %zu (with %zu preds) fires on EOD\n", g[v].index, + in_degree(v, g)); - vector<RoseEdge> edge_list; - for (const auto &e : in_edges_range(v, g)) { - RoseVertex u = source(e, g); - if (build.isInETable(u) != in_etable) { - DEBUG_PRINTF("pred %zu %s in etable\n", g[u].index, - in_etable ? "is not" : "is"); - continue; + vector<RoseEdge> edge_list; + for (const auto &e : in_edges_range(v, g)) { + RoseVertex u = source(e, g); + if (build.isInETable(u) != in_etable) { + DEBUG_PRINTF("pred %zu %s in etable\n", g[u].index, + in_etable ? "is not" : "is"); + continue; } - if (canEagerlyReportAtEod(build, e)) { - DEBUG_PRINTF("already done report for vertex %zu\n", - g[u].index); - continue; + if (canEagerlyReportAtEod(build, e)) { + DEBUG_PRINTF("already done report for vertex %zu\n", + g[u].index); + continue; } - edge_list.push_back(e); + edge_list.push_back(e); } - const bool multiple_preds = edge_list.size() > 1; - for (const auto &e : edge_list) { - RoseVertex u = source(e, g); - assert(contains(bc.roleStateIndices, u)); - u32 pred_state = bc.roleStateIndices.at(u); - pred_blocks[pred_state].add_block( - makeEodAnchorProgram(build, prog_build, e, multiple_preds)); + const bool multiple_preds = edge_list.size() > 1; + for (const auto &e : edge_list) { + RoseVertex u = source(e, g); + assert(contains(bc.roleStateIndices, u)); + u32 pred_state = bc.roleStateIndices.at(u); + pred_blocks[pred_state].add_block( + makeEodAnchorProgram(build, prog_build, e, multiple_preds)); } } - addPredBlocks(pred_blocks, bc.roleStateIndices.size(), program); + addPredBlocks(pred_blocks, bc.roleStateIndices.size(), program); } static -void addEodEventProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, RoseProgram &program) { - if (build.eod_event_literal_id == MO_INVALID_IDX) { - return; - } +void addEodEventProgram(const RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build, RoseProgram &program) { + if (build.eod_event_literal_id == MO_INVALID_IDX) { + return; + } - const RoseGraph &g = build.g; - const auto &lit_info = build.literal_info.at(build.eod_event_literal_id); - assert(lit_info.delayed_ids.empty()); - assert(!lit_info.squash_group); - assert(!lit_info.requires_benefits); - - // Collect all edges leading into EOD event literal vertices. - vector<RoseEdge> edge_list; - for (const auto &v : lit_info.vertices) { - for (const auto &e : in_edges_range(v, g)) { - edge_list.push_back(e); + const RoseGraph &g = build.g; + const auto &lit_info = build.literal_info.at(build.eod_event_literal_id); + assert(lit_info.delayed_ids.empty()); + assert(!lit_info.squash_group); + assert(!lit_info.requires_benefits); + + // Collect all edges leading into EOD event literal vertices. + vector<RoseEdge> edge_list; + for (const auto &v : lit_info.vertices) { + for (const auto &e : in_edges_range(v, g)) { + edge_list.push_back(e); } - } + } - // Sort edge list for determinism, prettiness. - sort(begin(edge_list), end(edge_list), - [&g](const RoseEdge &a, const RoseEdge &b) { - return tie(g[source(a, g)].index, g[target(a, g)].index) < - tie(g[source(b, g)].index, g[target(b, g)].index); - }); + // Sort edge list for determinism, prettiness. + sort(begin(edge_list), end(edge_list), + [&g](const RoseEdge &a, const RoseEdge &b) { + return tie(g[source(a, g)].index, g[target(a, g)].index) < + tie(g[source(b, g)].index, g[target(b, g)].index); + }); - auto block = makeLiteralProgram(build, bc.leftfix_info, bc.suffixes, - bc.engine_info_by_queue, - bc.roleStateIndices, prog_build, - build.eod_event_literal_id, edge_list, - false); - program.add_block(move(block)); + auto block = makeLiteralProgram(build, bc.leftfix_info, bc.suffixes, + bc.engine_info_by_queue, + bc.roleStateIndices, prog_build, + build.eod_event_literal_id, edge_list, + false); + program.add_block(move(block)); } static -RoseProgram makeEodProgram(const RoseBuildImpl &build, build_context &bc, - ProgramBuild &prog_build, u32 eodNfaIterOffset) { - RoseProgram program; +RoseProgram makeEodProgram(const RoseBuildImpl &build, build_context &bc, + ProgramBuild &prog_build, u32 eodNfaIterOffset) { + RoseProgram program; - addEodEventProgram(build, bc, prog_build, program); - addEnginesEodProgram(eodNfaIterOffset, program); - addEodAnchorProgram(build, bc, prog_build, false, program); - if (hasEodMatcher(build)) { - addMatcherEodProgram(program); + addEodEventProgram(build, bc, prog_build, program); + addEnginesEodProgram(eodNfaIterOffset, program); + addEodAnchorProgram(build, bc, prog_build, false, program); + if (hasEodMatcher(build)) { + addMatcherEodProgram(program); + } + addEodAnchorProgram(build, bc, prog_build, true, program); + if (hasEodAnchoredSuffix(build)) { + addSuffixesEodProgram(program); } - addEodAnchorProgram(build, bc, prog_build, true, program); - if (hasEodAnchoredSuffix(build)) { - addSuffixesEodProgram(program); - } - return program; + return program; } static @@ -3396,13 +3396,13 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { assert(g[v].min_offset <= g[v].max_offset); for (u32 lit_id : g[v].literals) { - const rose_literal_id &key = build.literals.at(lit_id); + const rose_literal_id &key = build.literals.at(lit_id); u32 max_d = g[v].max_offset; u32 min_d = g[v].min_offset; - DEBUG_PRINTF("checking %u: elen %zu min/max %u/%u\n", lit_id, - key.elength_including_mask(), min_d, max_d); - + DEBUG_PRINTF("checking %u: elen %zu min/max %u/%u\n", lit_id, + key.elength_including_mask(), min_d, max_d); + if (build.literal_info[lit_id].undelayed_id != lit_id) { /* this is a delayed match; need to update delay properties */ /* TODO: can delayed literals ever be in another table ? */ @@ -3422,9 +3422,9 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { switch (key.table) { case ROSE_FLOATING: ENSURE_AT_LEAST(&engine->floatingDistance, max_d); - if (min_d >= key.elength_including_mask()) { + if (min_d >= key.elength_including_mask()) { LIMIT_TO_AT_MOST(&engine->floatingMinDistance, - min_d - (u32)key.elength_including_mask()); + min_d - (u32)key.elength_including_mask()); } else { /* overlapped literals from rose + anchored table can * cause us to underflow due to sloppiness in @@ -3466,272 +3466,272 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) { if (!engine->anchoredDistance) { return; } -} +} -static -u32 writeEagerQueueIter(const set<u32> &eager, u32 leftfixBeginQueue, - u32 queue_count, RoseEngineBlob &engine_blob) { - if (eager.empty()) { - return 0; - } - - vector<u32> vec; - for (u32 q : eager) { - assert(q >= leftfixBeginQueue); - vec.push_back(q - leftfixBeginQueue); - } - - auto iter = mmbBuildSparseIterator(vec, queue_count - leftfixBeginQueue); - return engine_blob.add_iterator(iter); -} - -static -bytecode_ptr<RoseEngine> addSmallWriteEngine(const RoseBuildImpl &build, - const RoseResources &res, - bytecode_ptr<RoseEngine> rose) { - assert(rose); - - if (roseIsPureLiteral(rose.get())) { - DEBUG_PRINTF("pure literal case, not adding smwr\n"); - return rose; - } - - u32 qual = roseQuality(res, rose.get()); - auto smwr_engine = build.smwr.build(qual); - if (!smwr_engine) { - DEBUG_PRINTF("no smwr built\n"); - return rose; - } - - const size_t mainSize = rose.size(); - const size_t smallWriteSize = smwr_engine.size(); - DEBUG_PRINTF("adding smwr engine, size=%zu\n", smallWriteSize); - - const size_t smwrOffset = ROUNDUP_CL(mainSize); - const size_t newSize = smwrOffset + smallWriteSize; - - auto rose2 = make_zeroed_bytecode_ptr<RoseEngine>(newSize, 64); - char *ptr = (char *)rose2.get(); - memcpy(ptr, rose.get(), mainSize); - memcpy(ptr + smwrOffset, smwr_engine.get(), smallWriteSize); - - rose2->smallWriteOffset = verify_u32(smwrOffset); - rose2->size = verify_u32(newSize); - - return rose2; -} - -/** - * \brief Returns the pair (number of literals, max length) for all real - * literals in the floating table that are in-use. - */ -static -pair<size_t, size_t> floatingCountAndMaxLen(const RoseBuildImpl &build) { - size_t num = 0; - size_t max_len = 0; - - for (u32 id = 0; id < build.literals.size(); id++) { - const rose_literal_id &lit = build.literals.at(id); - - if (lit.table != ROSE_FLOATING) { - continue; - } - if (lit.delay) { - // Skip delayed literals, so that we only count the undelayed - // version that ends up in the HWLM table. - continue; - } - if (!isUsedLiteral(build, id)) { - continue; - } - - num++; - max_len = max(max_len, lit.s.length()); - } - DEBUG_PRINTF("%zu floating literals with max_len=%zu\n", num, max_len); - return {num, max_len}; -} - -size_t calcLongLitThreshold(const RoseBuildImpl &build, - const size_t historyRequired) { - const auto &cc = build.cc; - - // In block mode, we don't have history, so we don't need long literal - // support and can just use "medium-length" literal confirm. TODO: we could - // specialize further and have a block mode literal confirm instruction. - if (!cc.streaming) { - return SIZE_MAX; - } - - size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN; - - // Expand to size of history we've already allocated. Note that we need N-1 - // bytes of history to match a literal of length N. - longLitLengthThreshold = max(longLitLengthThreshold, historyRequired + 1); - - // If we only have one literal, allow for a larger value in order to avoid - // building a long literal table for a trivial Noodle case that we could - // fit in history. - const auto num_len = floatingCountAndMaxLen(build); - if (num_len.first == 1) { - if (num_len.second > longLitLengthThreshold) { - DEBUG_PRINTF("expanding for single literal of length %zu\n", - num_len.second); - longLitLengthThreshold = num_len.second; - } - } - - // Clamp to max history available. - longLitLengthThreshold = - min(longLitLengthThreshold, size_t{cc.grey.maxHistoryAvailable} + 1); - - return longLitLengthThreshold; -} - -static -map<left_id, u32> makeLeftQueueMap(const RoseGraph &g, - const map<RoseVertex, left_build_info> &leftfix_info) { - map<left_id, u32> lqm; - for (const auto &e : leftfix_info) { - if (e.second.has_lookaround) { - continue; - } - DEBUG_PRINTF("%zu: using queue %u\n", g[e.first].index, e.second.queue); - assert(e.second.queue != INVALID_QUEUE); - left_id left(g[e.first].left); - assert(!contains(lqm, left) || lqm[left] == e.second.queue); - lqm[left] = e.second.queue; - } - - return lqm; -} - -bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) { - // We keep all our offsets, counts etc. in a prototype RoseEngine which we - // will copy into the real one once it is allocated: we can't do this - // until we know how big it will be. - RoseEngine proto; - memset(&proto, 0, sizeof(proto)); - - // Set scanning mode. - if (!cc.streaming) { - proto.mode = HS_MODE_BLOCK; - } else if (cc.vectored) { - proto.mode = HS_MODE_VECTORED; - } else { - proto.mode = HS_MODE_STREAM; - } - - DerivedBoundaryReports dboundary(boundary); - - size_t historyRequired = calcHistoryRequired(); // Updated by HWLM. - size_t longLitLengthThreshold = calcLongLitThreshold(*this, - historyRequired); - DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); - - vector<LitFragment> fragments = groupByFragment(*this); - - auto anchored_dfas = buildAnchoredDfas(*this, fragments); - - build_context bc; - u32 floatingMinLiteralMatchOffset - = findMinFloatingLiteralMatch(*this, anchored_dfas); - recordResources(bc.resources, *this, anchored_dfas, fragments); - bc.needs_mpv_catchup = needsMpvCatchup(*this); - - makeBoundaryPrograms(*this, bc, boundary, dboundary, proto.boundary); - - tie(proto.reportProgramOffset, proto.reportProgramCount) = - buildReportPrograms(*this, bc); - - // Build NFAs - bool mpv_as_outfix; - prepMpv(*this, bc, &historyRequired, &mpv_as_outfix); - proto.outfixBeginQueue = qif.allocated_count(); - if (!prepOutfixes(*this, bc, &historyRequired)) { - return nullptr; - } - proto.outfixEndQueue = qif.allocated_count(); - proto.leftfixBeginQueue = proto.outfixEndQueue; - - set<u32> no_retrigger_queues; - set<u32> eager_queues; - - /* Note: buildNfas may reduce the lag for vertices that have prefixes */ - if (!buildNfas(*this, bc, qif, &no_retrigger_queues, &eager_queues, - &proto.leftfixBeginQueue)) { - return nullptr; - } - u32 eodNfaIterOffset = buildEodNfaIterator(bc, proto.leftfixBeginQueue); - buildCountingMiracles(bc); - - u32 queue_count = qif.allocated_count(); /* excludes anchored matcher q; - * som rev nfas */ - if (queue_count > cc.grey.limitRoseEngineCount) { - throw ResourceLimitError(); - } - - // Enforce role table resource limit. - if (num_vertices(g) > cc.grey.limitRoseRoleCount) { - throw ResourceLimitError(); - } - - bc.roleStateIndices = assignStateIndices(*this); - - u32 laggedRoseCount = 0; - vector<LeftNfaInfo> leftInfoTable; - buildLeftInfoTable(*this, bc, eager_queues, proto.leftfixBeginQueue, - queue_count - proto.leftfixBeginQueue, leftInfoTable, - &laggedRoseCount, &historyRequired); - - // Information only needed for program construction. - ProgramBuild prog_build(floatingMinLiteralMatchOffset, - longLitLengthThreshold, needsCatchup(*this)); - prog_build.vertex_group_map = getVertexGroupMap(*this); - prog_build.squashable_groups = getSquashableGroups(*this); - - tie(proto.anchoredProgramOffset, proto.anchored_count) = - writeAnchoredPrograms(*this, fragments, bc, prog_build); - - tie(proto.delayProgramOffset, proto.delay_count) = - writeDelayPrograms(*this, fragments, bc, prog_build); - - // Build floating HWLM matcher prototype. - rose_group fgroups = 0; - auto fproto = buildFloatingMatcherProto(*this, fragments, - longLitLengthThreshold, - &fgroups, &historyRequired); - - // Build delay rebuild HWLM matcher prototype. - auto drproto = buildDelayRebuildMatcherProto(*this, fragments, - longLitLengthThreshold); - - // Build EOD-anchored HWLM matcher prototype. - auto eproto = buildEodAnchoredMatcherProto(*this, fragments); - - // Build small-block HWLM matcher prototype. - auto sbproto = buildSmallBlockMatcherProto(*this, fragments); - - buildLiteralPrograms(*this, fragments, bc, prog_build, fproto.get(), - drproto.get(), eproto.get(), sbproto.get()); - - auto eod_prog = makeEodProgram(*this, bc, prog_build, eodNfaIterOffset); - proto.eodProgramOffset = writeProgram(bc, move(eod_prog)); - - size_t longLitStreamStateRequired = 0; - proto.longLitTableOffset - = buildLongLiteralTable(*this, bc.engine_blob, bc.longLiterals, - longLitLengthThreshold, &historyRequired, - &longLitStreamStateRequired); - - proto.lastByteHistoryIterOffset = buildLastByteIter(g, bc); - proto.eagerIterOffset = writeEagerQueueIter( - eager_queues, proto.leftfixBeginQueue, queue_count, bc.engine_blob); - - addSomRevNfas(bc, proto, ssm); - - writeDkeyInfo(rm, bc.engine_blob, proto); - writeLeftInfo(bc.engine_blob, proto, leftInfoTable); +static +u32 writeEagerQueueIter(const set<u32> &eager, u32 leftfixBeginQueue, + u32 queue_count, RoseEngineBlob &engine_blob) { + if (eager.empty()) { + return 0; + } + + vector<u32> vec; + for (u32 q : eager) { + assert(q >= leftfixBeginQueue); + vec.push_back(q - leftfixBeginQueue); + } + + auto iter = mmbBuildSparseIterator(vec, queue_count - leftfixBeginQueue); + return engine_blob.add_iterator(iter); +} + +static +bytecode_ptr<RoseEngine> addSmallWriteEngine(const RoseBuildImpl &build, + const RoseResources &res, + bytecode_ptr<RoseEngine> rose) { + assert(rose); + + if (roseIsPureLiteral(rose.get())) { + DEBUG_PRINTF("pure literal case, not adding smwr\n"); + return rose; + } + + u32 qual = roseQuality(res, rose.get()); + auto smwr_engine = build.smwr.build(qual); + if (!smwr_engine) { + DEBUG_PRINTF("no smwr built\n"); + return rose; + } + + const size_t mainSize = rose.size(); + const size_t smallWriteSize = smwr_engine.size(); + DEBUG_PRINTF("adding smwr engine, size=%zu\n", smallWriteSize); + + const size_t smwrOffset = ROUNDUP_CL(mainSize); + const size_t newSize = smwrOffset + smallWriteSize; + + auto rose2 = make_zeroed_bytecode_ptr<RoseEngine>(newSize, 64); + char *ptr = (char *)rose2.get(); + memcpy(ptr, rose.get(), mainSize); + memcpy(ptr + smwrOffset, smwr_engine.get(), smallWriteSize); + + rose2->smallWriteOffset = verify_u32(smwrOffset); + rose2->size = verify_u32(newSize); + + return rose2; +} + +/** + * \brief Returns the pair (number of literals, max length) for all real + * literals in the floating table that are in-use. + */ +static +pair<size_t, size_t> floatingCountAndMaxLen(const RoseBuildImpl &build) { + size_t num = 0; + size_t max_len = 0; + + for (u32 id = 0; id < build.literals.size(); id++) { + const rose_literal_id &lit = build.literals.at(id); + + if (lit.table != ROSE_FLOATING) { + continue; + } + if (lit.delay) { + // Skip delayed literals, so that we only count the undelayed + // version that ends up in the HWLM table. + continue; + } + if (!isUsedLiteral(build, id)) { + continue; + } + + num++; + max_len = max(max_len, lit.s.length()); + } + DEBUG_PRINTF("%zu floating literals with max_len=%zu\n", num, max_len); + return {num, max_len}; +} + +size_t calcLongLitThreshold(const RoseBuildImpl &build, + const size_t historyRequired) { + const auto &cc = build.cc; + + // In block mode, we don't have history, so we don't need long literal + // support and can just use "medium-length" literal confirm. TODO: we could + // specialize further and have a block mode literal confirm instruction. + if (!cc.streaming) { + return SIZE_MAX; + } + + size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN; + + // Expand to size of history we've already allocated. Note that we need N-1 + // bytes of history to match a literal of length N. + longLitLengthThreshold = max(longLitLengthThreshold, historyRequired + 1); + + // If we only have one literal, allow for a larger value in order to avoid + // building a long literal table for a trivial Noodle case that we could + // fit in history. + const auto num_len = floatingCountAndMaxLen(build); + if (num_len.first == 1) { + if (num_len.second > longLitLengthThreshold) { + DEBUG_PRINTF("expanding for single literal of length %zu\n", + num_len.second); + longLitLengthThreshold = num_len.second; + } + } + + // Clamp to max history available. + longLitLengthThreshold = + min(longLitLengthThreshold, size_t{cc.grey.maxHistoryAvailable} + 1); + + return longLitLengthThreshold; +} + +static +map<left_id, u32> makeLeftQueueMap(const RoseGraph &g, + const map<RoseVertex, left_build_info> &leftfix_info) { + map<left_id, u32> lqm; + for (const auto &e : leftfix_info) { + if (e.second.has_lookaround) { + continue; + } + DEBUG_PRINTF("%zu: using queue %u\n", g[e.first].index, e.second.queue); + assert(e.second.queue != INVALID_QUEUE); + left_id left(g[e.first].left); + assert(!contains(lqm, left) || lqm[left] == e.second.queue); + lqm[left] = e.second.queue; + } + + return lqm; +} + +bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) { + // We keep all our offsets, counts etc. in a prototype RoseEngine which we + // will copy into the real one once it is allocated: we can't do this + // until we know how big it will be. + RoseEngine proto; + memset(&proto, 0, sizeof(proto)); + + // Set scanning mode. + if (!cc.streaming) { + proto.mode = HS_MODE_BLOCK; + } else if (cc.vectored) { + proto.mode = HS_MODE_VECTORED; + } else { + proto.mode = HS_MODE_STREAM; + } + + DerivedBoundaryReports dboundary(boundary); + + size_t historyRequired = calcHistoryRequired(); // Updated by HWLM. + size_t longLitLengthThreshold = calcLongLitThreshold(*this, + historyRequired); + DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold); + + vector<LitFragment> fragments = groupByFragment(*this); + + auto anchored_dfas = buildAnchoredDfas(*this, fragments); + + build_context bc; + u32 floatingMinLiteralMatchOffset + = findMinFloatingLiteralMatch(*this, anchored_dfas); + recordResources(bc.resources, *this, anchored_dfas, fragments); + bc.needs_mpv_catchup = needsMpvCatchup(*this); + + makeBoundaryPrograms(*this, bc, boundary, dboundary, proto.boundary); + + tie(proto.reportProgramOffset, proto.reportProgramCount) = + buildReportPrograms(*this, bc); + + // Build NFAs + bool mpv_as_outfix; + prepMpv(*this, bc, &historyRequired, &mpv_as_outfix); + proto.outfixBeginQueue = qif.allocated_count(); + if (!prepOutfixes(*this, bc, &historyRequired)) { + return nullptr; + } + proto.outfixEndQueue = qif.allocated_count(); + proto.leftfixBeginQueue = proto.outfixEndQueue; + + set<u32> no_retrigger_queues; + set<u32> eager_queues; + + /* Note: buildNfas may reduce the lag for vertices that have prefixes */ + if (!buildNfas(*this, bc, qif, &no_retrigger_queues, &eager_queues, + &proto.leftfixBeginQueue)) { + return nullptr; + } + u32 eodNfaIterOffset = buildEodNfaIterator(bc, proto.leftfixBeginQueue); + buildCountingMiracles(bc); + + u32 queue_count = qif.allocated_count(); /* excludes anchored matcher q; + * som rev nfas */ + if (queue_count > cc.grey.limitRoseEngineCount) { + throw ResourceLimitError(); + } + + // Enforce role table resource limit. + if (num_vertices(g) > cc.grey.limitRoseRoleCount) { + throw ResourceLimitError(); + } + + bc.roleStateIndices = assignStateIndices(*this); + + u32 laggedRoseCount = 0; + vector<LeftNfaInfo> leftInfoTable; + buildLeftInfoTable(*this, bc, eager_queues, proto.leftfixBeginQueue, + queue_count - proto.leftfixBeginQueue, leftInfoTable, + &laggedRoseCount, &historyRequired); + + // Information only needed for program construction. + ProgramBuild prog_build(floatingMinLiteralMatchOffset, + longLitLengthThreshold, needsCatchup(*this)); + prog_build.vertex_group_map = getVertexGroupMap(*this); + prog_build.squashable_groups = getSquashableGroups(*this); + + tie(proto.anchoredProgramOffset, proto.anchored_count) = + writeAnchoredPrograms(*this, fragments, bc, prog_build); + + tie(proto.delayProgramOffset, proto.delay_count) = + writeDelayPrograms(*this, fragments, bc, prog_build); + + // Build floating HWLM matcher prototype. + rose_group fgroups = 0; + auto fproto = buildFloatingMatcherProto(*this, fragments, + longLitLengthThreshold, + &fgroups, &historyRequired); + + // Build delay rebuild HWLM matcher prototype. + auto drproto = buildDelayRebuildMatcherProto(*this, fragments, + longLitLengthThreshold); + + // Build EOD-anchored HWLM matcher prototype. + auto eproto = buildEodAnchoredMatcherProto(*this, fragments); + + // Build small-block HWLM matcher prototype. + auto sbproto = buildSmallBlockMatcherProto(*this, fragments); + + buildLiteralPrograms(*this, fragments, bc, prog_build, fproto.get(), + drproto.get(), eproto.get(), sbproto.get()); + + auto eod_prog = makeEodProgram(*this, bc, prog_build, eodNfaIterOffset); + proto.eodProgramOffset = writeProgram(bc, move(eod_prog)); + + size_t longLitStreamStateRequired = 0; + proto.longLitTableOffset + = buildLongLiteralTable(*this, bc.engine_blob, bc.longLiterals, + longLitLengthThreshold, &historyRequired, + &longLitStreamStateRequired); + + proto.lastByteHistoryIterOffset = buildLastByteIter(g, bc); + proto.eagerIterOffset = writeEagerQueueIter( + eager_queues, proto.leftfixBeginQueue, queue_count, bc.engine_blob); + + addSomRevNfas(bc, proto, ssm); + + writeDkeyInfo(rm, bc.engine_blob, proto); + writeLeftInfo(bc.engine_blob, proto, leftInfoTable); writeLogicalInfo(rm, bc.engine_blob, proto); auto flushComb_prog = makeFlushCombProgram(proto); @@ -3741,154 +3741,154 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) { proto.lastFlushCombProgramOffset = writeProgram(bc, move(lastFlushComb_prog)); - // Build anchored matcher. - auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas); + // Build anchored matcher. + auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas); if (atable) { - proto.amatcherOffset = bc.engine_blob.add(atable); + proto.amatcherOffset = bc.engine_blob.add(atable); } - - // Build floating HWLM matcher. - auto ftable = buildHWLMMatcher(*this, fproto.get()); + + // Build floating HWLM matcher. + auto ftable = buildHWLMMatcher(*this, fproto.get()); if (ftable) { - proto.fmatcherOffset = bc.engine_blob.add(ftable); - bc.resources.has_floating = true; - } - - // Build delay rebuild HWLM matcher. - auto drtable = buildHWLMMatcher(*this, drproto.get()); - if (drtable) { - proto.drmatcherOffset = bc.engine_blob.add(drtable); - } - - // Build EOD-anchored HWLM matcher. - auto etable = buildHWLMMatcher(*this, eproto.get()); + proto.fmatcherOffset = bc.engine_blob.add(ftable); + bc.resources.has_floating = true; + } + + // Build delay rebuild HWLM matcher. + auto drtable = buildHWLMMatcher(*this, drproto.get()); + if (drtable) { + proto.drmatcherOffset = bc.engine_blob.add(drtable); + } + + // Build EOD-anchored HWLM matcher. + auto etable = buildHWLMMatcher(*this, eproto.get()); if (etable) { - proto.ematcherOffset = bc.engine_blob.add(etable); + proto.ematcherOffset = bc.engine_blob.add(etable); } - - // Build small-block HWLM matcher. - auto sbtable = buildHWLMMatcher(*this, sbproto.get()); + + // Build small-block HWLM matcher. + auto sbtable = buildHWLMMatcher(*this, sbproto.get()); if (sbtable) { - proto.sbmatcherOffset = bc.engine_blob.add(sbtable); + proto.sbmatcherOffset = bc.engine_blob.add(sbtable); } - proto.activeArrayCount = proto.leftfixBeginQueue; + proto.activeArrayCount = proto.leftfixBeginQueue; - proto.anchorStateSize = atable ? anchoredStateSize(*atable) : 0; + proto.anchorStateSize = atable ? anchoredStateSize(*atable) : 0; - DEBUG_PRINTF("rose history required %zu\n", historyRequired); - assert(!cc.streaming || historyRequired <= cc.grey.maxHistoryAvailable); + DEBUG_PRINTF("rose history required %zu\n", historyRequired); + assert(!cc.streaming || historyRequired <= cc.grey.maxHistoryAvailable); - // Some SOM schemes (reverse NFAs, for example) may require more history. - historyRequired = max(historyRequired, (size_t)ssm.somHistoryRequired()); + // Some SOM schemes (reverse NFAs, for example) may require more history. + historyRequired = max(historyRequired, (size_t)ssm.somHistoryRequired()); - assert(!cc.streaming || historyRequired <= - max(cc.grey.maxHistoryAvailable, cc.grey.somMaxRevNfaLength)); + assert(!cc.streaming || historyRequired <= + max(cc.grey.maxHistoryAvailable, cc.grey.somMaxRevNfaLength)); - fillStateOffsets(*this, bc.roleStateIndices.size(), proto.anchorStateSize, - proto.activeArrayCount, proto.activeLeftCount, - laggedRoseCount, longLitStreamStateRequired, - historyRequired, &proto.stateOffsets); + fillStateOffsets(*this, bc.roleStateIndices.size(), proto.anchorStateSize, + proto.activeArrayCount, proto.activeLeftCount, + laggedRoseCount, longLitStreamStateRequired, + historyRequired, &proto.stateOffsets); - // Write in NfaInfo structures. This will also update state size - // information in proto. - writeNfaInfo(*this, bc, proto, no_retrigger_queues); + // Write in NfaInfo structures. This will also update state size + // information in proto. + writeNfaInfo(*this, bc, proto, no_retrigger_queues); - scatter_plan_raw state_scatter = buildStateScatterPlan( - sizeof(u8), bc.roleStateIndices.size(), proto.activeLeftCount, - proto.rosePrefixCount, proto.stateOffsets, cc.streaming, - proto.activeArrayCount, proto.outfixBeginQueue, proto.outfixEndQueue); + scatter_plan_raw state_scatter = buildStateScatterPlan( + sizeof(u8), bc.roleStateIndices.size(), proto.activeLeftCount, + proto.rosePrefixCount, proto.stateOffsets, cc.streaming, + proto.activeArrayCount, proto.outfixBeginQueue, proto.outfixEndQueue); - u32 currOffset; /* relative to base of RoseEngine */ - if (!bc.engine_blob.empty()) { - currOffset = bc.engine_blob.base_offset + bc.engine_blob.size(); - } else { - currOffset = sizeof(RoseEngine); - } + u32 currOffset; /* relative to base of RoseEngine */ + if (!bc.engine_blob.empty()) { + currOffset = bc.engine_blob.base_offset + bc.engine_blob.size(); + } else { + currOffset = sizeof(RoseEngine); + } - currOffset = ROUNDUP_CL(currOffset); - DEBUG_PRINTF("currOffset %u\n", currOffset); + currOffset = ROUNDUP_CL(currOffset); + DEBUG_PRINTF("currOffset %u\n", currOffset); - currOffset = ROUNDUP_N(currOffset, alignof(scatter_unit_u64a)); - u32 state_scatter_aux_offset = currOffset; - currOffset += aux_size(state_scatter); + currOffset = ROUNDUP_N(currOffset, alignof(scatter_unit_u64a)); + u32 state_scatter_aux_offset = currOffset; + currOffset += aux_size(state_scatter); - proto.historyRequired = verify_u32(historyRequired); - proto.ekeyCount = rm.numEkeys(); + proto.historyRequired = verify_u32(historyRequired); + proto.ekeyCount = rm.numEkeys(); - proto.somHorizon = ssm.somPrecision(); - proto.somLocationCount = ssm.numSomSlots(); - proto.somLocationFatbitSize = fatbit_size(proto.somLocationCount); + proto.somHorizon = ssm.somPrecision(); + proto.somLocationCount = ssm.numSomSlots(); + proto.somLocationFatbitSize = fatbit_size(proto.somLocationCount); - proto.runtimeImpl = pickRuntimeImpl(*this, bc.resources, - proto.outfixEndQueue); - proto.mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this); + proto.runtimeImpl = pickRuntimeImpl(*this, bc.resources, + proto.outfixEndQueue); + proto.mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this); - proto.queueCount = queue_count; - proto.activeQueueArraySize = fatbit_size(queue_count); - proto.handledKeyCount = prog_build.handledKeys.size(); - proto.handledKeyFatbitSize = fatbit_size(proto.handledKeyCount); + proto.queueCount = queue_count; + proto.activeQueueArraySize = fatbit_size(queue_count); + proto.handledKeyCount = prog_build.handledKeys.size(); + proto.handledKeyFatbitSize = fatbit_size(proto.handledKeyCount); - proto.rolesWithStateCount = bc.roleStateIndices.size(); + proto.rolesWithStateCount = bc.roleStateIndices.size(); - proto.initMpvNfa = mpv_as_outfix ? 0 : MO_INVALID_IDX; - proto.stateSize = mmbit_size(bc.roleStateIndices.size()); + proto.initMpvNfa = mpv_as_outfix ? 0 : MO_INVALID_IDX; + proto.stateSize = mmbit_size(bc.roleStateIndices.size()); - proto.delay_fatbit_size = fatbit_size(proto.delay_count); - proto.anchored_fatbit_size = fatbit_size(proto.anchored_count); + proto.delay_fatbit_size = fatbit_size(proto.delay_count); + proto.anchored_fatbit_size = fatbit_size(proto.anchored_count); // The Small Write matcher is (conditionally) added to the RoseEngine in // another pass by the caller. Set to zero (meaning no SMWR engine) for // now. - proto.smallWriteOffset = 0; - - proto.amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED); - proto.fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING); - proto.eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED); - proto.amatcherMaxBiAnchoredWidth = findMaxBAWidth(*this, ROSE_ANCHORED); - proto.fmatcherMaxBiAnchoredWidth = findMaxBAWidth(*this, ROSE_FLOATING); - proto.minWidth = hasBoundaryReports(boundary) ? 0 : minWidth; - proto.minWidthExcludingBoundaries = minWidth; - proto.floatingMinLiteralMatchOffset = floatingMinLiteralMatchOffset; - - proto.maxBiAnchoredWidth = findMaxBAWidth(*this); - proto.noFloatingRoots = hasNoFloatingRoots(); - proto.requiresEodCheck = hasEodAnchors(*this, bc, proto.outfixEndQueue); - proto.hasOutfixesInSmallBlock = hasNonSmallBlockOutfix(outfixes); - proto.canExhaust = rm.patternSetCanExhaust(); - proto.hasSom = hasSom; + proto.smallWriteOffset = 0; + + proto.amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED); + proto.fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING); + proto.eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED); + proto.amatcherMaxBiAnchoredWidth = findMaxBAWidth(*this, ROSE_ANCHORED); + proto.fmatcherMaxBiAnchoredWidth = findMaxBAWidth(*this, ROSE_FLOATING); + proto.minWidth = hasBoundaryReports(boundary) ? 0 : minWidth; + proto.minWidthExcludingBoundaries = minWidth; + proto.floatingMinLiteralMatchOffset = floatingMinLiteralMatchOffset; + + proto.maxBiAnchoredWidth = findMaxBAWidth(*this); + proto.noFloatingRoots = hasNoFloatingRoots(); + proto.requiresEodCheck = hasEodAnchors(*this, bc, proto.outfixEndQueue); + proto.hasOutfixesInSmallBlock = hasNonSmallBlockOutfix(outfixes); + proto.canExhaust = rm.patternSetCanExhaust(); + proto.hasSom = hasSom; /* populate anchoredDistance, floatingDistance, floatingMinDistance, etc */ - fillMatcherDistances(*this, &proto); + fillMatcherDistances(*this, &proto); - proto.initialGroups = getInitialGroups(); - proto.floating_group_mask = fgroups; - proto.totalNumLiterals = verify_u32(literal_info.size()); - proto.asize = verify_u32(atable.size()); - proto.ematcherRegionSize = ematcher_region_size; + proto.initialGroups = getInitialGroups(); + proto.floating_group_mask = fgroups; + proto.totalNumLiterals = verify_u32(literal_info.size()); + proto.asize = verify_u32(atable.size()); + proto.ematcherRegionSize = ematcher_region_size; - proto.size = currOffset; + proto.size = currOffset; - // Time to allocate the real RoseEngine structure, at cacheline alignment. - auto engine = make_zeroed_bytecode_ptr<RoseEngine>(currOffset, 64); - assert(engine); // will have thrown bad_alloc otherwise. + // Time to allocate the real RoseEngine structure, at cacheline alignment. + auto engine = make_zeroed_bytecode_ptr<RoseEngine>(currOffset, 64); + assert(engine); // will have thrown bad_alloc otherwise. - // Copy in our prototype engine data. - memcpy(engine.get(), &proto, sizeof(proto)); + // Copy in our prototype engine data. + memcpy(engine.get(), &proto, sizeof(proto)); - write_out(&engine->state_init, (char *)engine.get(), state_scatter, - state_scatter_aux_offset); + write_out(&engine->state_init, (char *)engine.get(), state_scatter, + state_scatter_aux_offset); - // Copy in the engine blob. - bc.engine_blob.write_bytes(engine.get()); + // Copy in the engine blob. + bc.engine_blob.write_bytes(engine.get()); - // Add a small write engine if appropriate. - engine = addSmallWriteEngine(*this, bc.resources, move(engine)); + // Add a small write engine if appropriate. + engine = addSmallWriteEngine(*this, bc.resources, move(engine)); - DEBUG_PRINTF("rose done %p\n", engine.get()); + DEBUG_PRINTF("rose done %p\n", engine.get()); - dumpRose(*this, fragments, makeLeftQueueMap(g, bc.leftfix_info), - bc.suffixes, engine.get()); + dumpRose(*this, fragments, makeLeftQueueMap(g, bc.leftfix_info), + bc.suffixes, engine.get()); return engine; } diff --git a/contrib/libs/hyperscan/src/rose/rose_build_castle.cpp b/contrib/libs/hyperscan/src/rose/rose_build_castle.cpp index fd8b512107..59bab3b1f9 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_castle.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_castle.cpp @@ -1,396 +1,396 @@ -/* - * Copyright (c) 2015-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "rose_build_castle.h" - -#include "rose_build_impl.h" -#include "ue2common.h" -#include "nfa/castlecompile.h" -#include "nfagraph/ng_holder.h" -#include "nfagraph/ng_puff.h" -#include "util/charreach.h" -#include "util/compile_context.h" -#include "util/container.h" -#include "util/dump_charclass.h" -#include "util/graph_range.h" -#include "util/ue2string.h" - -#include <map> -#include <set> -#include <string> -#include <vector> - -#include <boost/range/adaptor/map.hpp> - -using namespace std; -using boost::adaptors::map_values; - -namespace ue2 { - -static -void makeCastle(LeftEngInfo &left, - unordered_map<const NGHolder *, shared_ptr<CastleProto>> &cache) { - if (left.dfa || left.haig || left.castle) { - return; - } - if (!left.graph) { - return; - } - - const NGHolder &h = *left.graph; - DEBUG_PRINTF("prefix %p\n", &h); - - if (contains(cache, &h)) { - DEBUG_PRINTF("using cached CastleProto\n"); - left.castle = cache[&h]; - left.graph.reset(); - return; - } - - PureRepeat pr; - if (isPureRepeat(h, pr) && pr.reports.size() == 1) { - DEBUG_PRINTF("vertex preceded by infix repeat %s\n", - pr.bounds.str().c_str()); - left.castle = make_shared<CastleProto>(h.kind, pr); - cache[&h] = left.castle; - left.graph.reset(); - } -} - -static -void makeCastleSuffix(RoseBuildImpl &tbi, RoseVertex v, - unordered_map<const NGHolder *, shared_ptr<CastleProto>> &cache) { - RoseSuffixInfo &suffix = tbi.g[v].suffix; - if (!suffix.graph) { - return; - } - const NGHolder &h = *suffix.graph; - DEBUG_PRINTF("suffix %p\n", &h); - - if (contains(cache, &h)) { - DEBUG_PRINTF("using cached CastleProto\n"); - suffix.castle = cache[&h]; - suffix.graph.reset(); - return; - } - - // The MPV will probably do a better job on the cases it's designed - // for. - const bool fixed_depth = tbi.g[v].min_offset == tbi.g[v].max_offset; - if (isPuffable(h, fixed_depth, tbi.rm, tbi.cc.grey)) { - DEBUG_PRINTF("leaving suffix for puff\n"); - return; - } - - PureRepeat pr; - if (isPureRepeat(h, pr) && pr.reports.size() == 1) { - DEBUG_PRINTF("suffix repeat %s\n", pr.bounds.str().c_str()); - - // Right now, the Castle uses much more stream state to represent a - // {m,1} repeat than just leaving it to an NFA. - if (pr.bounds.max <= depth(1)) { - DEBUG_PRINTF("leaving for other engines\n"); - return; - } - - suffix.castle = make_shared<CastleProto>(h.kind, pr); - cache[&h] = suffix.castle; - suffix.graph.reset(); - } -} - -static -vector<rose_literal_id> literals_for_vertex(const RoseBuildImpl &tbi, - RoseVertex v) { - vector<rose_literal_id> rv; - - for (const u32 id : tbi.g[v].literals) { - rv.push_back(tbi.literals.at(id)); - } - - return rv; -} - -static -void renovateCastle(RoseBuildImpl &tbi, CastleProto *castle, - const vector<RoseVertex> &verts) { - DEBUG_PRINTF("looking to renovate\n"); - - if (castle->repeats.size() != 1) { - assert(0); /* should not have merged castles yet */ - return; - } - - PureRepeat &pr = castle->repeats.begin()->second; - if (pr.bounds.max.is_finite()) { - /* repeat cannot be turned into pseudo .* */ - return; - } - - RoseGraph &g = tbi.g; - const CharReach &cr = castle->reach(); - - DEBUG_PRINTF("cr || %zu\n", cr.count()); - - u32 allowed_to_remove = ~0; - size_t min_succ_lit_len = 0; - - for (RoseVertex v : verts) { - assert(g[v].left.castle.get() == castle); - DEBUG_PRINTF("%zu checks at lag %u\n", g[v].index, g[v].left.lag); - vector<rose_literal_id> lits = literals_for_vertex(tbi, v); - for (const auto &e : lits) { - DEBUG_PRINTF("%s +%u\n", dumpString(e.s).c_str(), e.delay); - if (e.delay) { - return; /* bail - TODO: be less lazy */ - } - - vector<CharReach> rem_local_cr; - u32 ok_count = 0; - for (auto it = e.s.end() - g[v].left.lag; it != e.s.end(); ++it) { - if (!isSubsetOf(*it, cr)) { - break; - } - - ok_count++; - } - LIMIT_TO_AT_MOST(&allowed_to_remove, ok_count); - ENSURE_AT_LEAST(&min_succ_lit_len, e.elength()); - } - } - - DEBUG_PRINTF("possible to decrease lag by %u\n", allowed_to_remove); - - - for (RoseVertex v : verts) { - assert(g[v].left.lag >= allowed_to_remove); - g[v].left.lag -= allowed_to_remove; - } - - assert(castle->repeats.size() == 1); /* should not have merged castles yet */ - - pr.bounds.max += allowed_to_remove; - - /* Although it is always safe to increase the min bound as well, we would - * rather not as a >0 min bound means that we have to store state as well. - * - * As it was legal to run with the original lag, we know that it is not - * possible to have an overlapping match which finishes within the trigger - * literal past the original lag point. However, if there is already a min - * bound constraint this would be broken if we did not also increase the - * min bound. */ - - if (pr.bounds.min > 0ULL || allowed_to_remove > min_succ_lit_len) { - pr.bounds.min += allowed_to_remove; - } -} - -void makeCastles(RoseBuildImpl &tbi) { - if (!tbi.cc.grey.allowCastle && !tbi.cc.grey.allowLbr) { - return; - } - - RoseGraph &g = tbi.g; - - // Caches so that we can reuse analysis on graphs we've seen already. - unordered_map<const NGHolder *, shared_ptr<CastleProto> > left_cache; - unordered_map<const NGHolder *, shared_ptr<CastleProto> > suffix_cache; - - unordered_map<CastleProto *, vector<RoseVertex>> rev; - - for (RoseVertex v : vertices_range(g)) { - if (g[v].left && !tbi.isRootSuccessor(v)) { - makeCastle(g[v].left, left_cache); - if (g[v].left.castle) { - rev[g[v].left.castle.get()].push_back(v); - } - } - - if (g[v].suffix) { - makeCastleSuffix(tbi, v, suffix_cache); - } - } - - for (const auto &e : rev) { - renovateCastle(tbi, e.first, e.second); - } -} - -bool unmakeCastles(RoseBuildImpl &tbi) { - RoseGraph &g = tbi.g; - - const size_t MAX_UNMAKE_VERTICES = 64; - - map<left_id, vector<RoseVertex> > left_castles; - map<suffix_id, vector<RoseVertex> > suffix_castles; - bool changed = false; - - for (auto v : vertices_range(g)) { - const LeftEngInfo &left = g[v].left; - if (left.castle && left.castle->repeats.size() > 1) { - left_castles[left].push_back(v); - } - const RoseSuffixInfo &suffix = g[v].suffix; - if (suffix.castle && suffix.castle->repeats.size() > 1) { - suffix_castles[suffix].push_back(v); - } - } - - for (const auto &e : left_castles) { - assert(e.first.castle()); - shared_ptr<NGHolder> h = makeHolder(*e.first.castle(), tbi.cc); - if (!h || num_vertices(*h) > MAX_UNMAKE_VERTICES) { - continue; - } - DEBUG_PRINTF("replace rose with holder (%zu vertices)\n", - num_vertices(*h)); - for (auto v : e.second) { - assert(g[v].left.castle.get() == e.first.castle()); - g[v].left.graph = h; - g[v].left.castle.reset(); - changed = true; - } - } - - for (const auto &e : suffix_castles) { - assert(e.first.castle()); - shared_ptr<NGHolder> h = makeHolder(*e.first.castle(), tbi.cc); - if (!h || num_vertices(*h) > MAX_UNMAKE_VERTICES) { - continue; - } - DEBUG_PRINTF("replace suffix with holder (%zu vertices)\n", - num_vertices(*h)); - for (auto v : e.second) { - assert(g[v].suffix.castle.get() == e.first.castle()); - g[v].suffix.graph = h; - g[v].suffix.castle.reset(); - changed = true; - } - } - - return changed; -} - -void remapCastleTops(RoseBuildImpl &tbi) { - unordered_map<CastleProto *, vector<RoseVertex>> rose_castles; - unordered_map<CastleProto *, vector<RoseVertex>> suffix_castles; - - RoseGraph &g = tbi.g; - for (auto v : vertices_range(g)) { - if (g[v].left.castle) { - rose_castles[g[v].left.castle.get()].push_back(v); - } - if (g[v].suffix.castle) { - suffix_castles[g[v].suffix.castle.get()].push_back(v); - } - } - - DEBUG_PRINTF("%zu rose castles, %zu suffix castles\n", rose_castles.size(), - suffix_castles.size()); - - map<u32, u32> top_map; - - // Remap Rose Castles. - for (const auto &rc : rose_castles) { - CastleProto *c = rc.first; - const vector<RoseVertex> &verts = rc.second; - - DEBUG_PRINTF("rose castle %p (%zu repeats) has %zu verts\n", c, - c->repeats.size(), verts.size()); - - top_map.clear(); - remapCastleTops(*c, top_map); - - // Update the tops on the edges leading into vertices in v. - for (auto v : verts) { - for (const auto &e : in_edges_range(v, g)) { - g[e].rose_top = top_map.at(g[e].rose_top); - } - } - } - - // Remap Suffix Castles. - for (const auto &e : suffix_castles) { - CastleProto *c = e.first; - const vector<RoseVertex> &verts = e.second; - - DEBUG_PRINTF("suffix castle %p (%zu repeats) has %zu verts\n", c, - c->repeats.size(), verts.size()); - - top_map.clear(); - remapCastleTops(*c, top_map); - - // Update the tops on the suffixes. - for (auto v : verts) { - assert(g[v].suffix); - g[v].suffix.top = top_map.at(g[v].suffix.top); - } - } -} - -bool triggerKillsRoseCastle(const RoseBuildImpl &tbi, const left_id &left, - const set<ue2_literal> &all_lits, - const RoseEdge &e) { - assert(left.castle()); - const CastleProto &c = *left.castle(); - - const depth max_width = findMaxWidth(c); - DEBUG_PRINTF("castle max width is %s\n", max_width.str().c_str()); - - /* check each pred literal to see if they all kill previous castle - * state */ - for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) { - const rose_literal_id &pred_lit = tbi.literals.at(lit_id); - const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s); - const CharReach &cr = c.reach(); - - DEBUG_PRINTF("s=%s, castle reach=%s\n", dumpString(s).c_str(), - describeClass(cr).c_str()); - - for (const auto &s_cr : s) { - if (!overlaps(cr, s_cr)) { - DEBUG_PRINTF("reach %s kills castle\n", - describeClass(s_cr).c_str()); - goto next_pred; - } - } - - if (max_width < depth(s.length())) { - DEBUG_PRINTF("literal width >= castle max width\n"); - goto next_pred; - } - - return false; - - next_pred:; - } - - return true; -} - -} // namespace ue2 +/* + * Copyright (c) 2015-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_castle.h" + +#include "rose_build_impl.h" +#include "ue2common.h" +#include "nfa/castlecompile.h" +#include "nfagraph/ng_holder.h" +#include "nfagraph/ng_puff.h" +#include "util/charreach.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/dump_charclass.h" +#include "util/graph_range.h" +#include "util/ue2string.h" + +#include <map> +#include <set> +#include <string> +#include <vector> + +#include <boost/range/adaptor/map.hpp> + +using namespace std; +using boost::adaptors::map_values; + +namespace ue2 { + +static +void makeCastle(LeftEngInfo &left, + unordered_map<const NGHolder *, shared_ptr<CastleProto>> &cache) { + if (left.dfa || left.haig || left.castle) { + return; + } + if (!left.graph) { + return; + } + + const NGHolder &h = *left.graph; + DEBUG_PRINTF("prefix %p\n", &h); + + if (contains(cache, &h)) { + DEBUG_PRINTF("using cached CastleProto\n"); + left.castle = cache[&h]; + left.graph.reset(); + return; + } + + PureRepeat pr; + if (isPureRepeat(h, pr) && pr.reports.size() == 1) { + DEBUG_PRINTF("vertex preceded by infix repeat %s\n", + pr.bounds.str().c_str()); + left.castle = make_shared<CastleProto>(h.kind, pr); + cache[&h] = left.castle; + left.graph.reset(); + } +} + +static +void makeCastleSuffix(RoseBuildImpl &tbi, RoseVertex v, + unordered_map<const NGHolder *, shared_ptr<CastleProto>> &cache) { + RoseSuffixInfo &suffix = tbi.g[v].suffix; + if (!suffix.graph) { + return; + } + const NGHolder &h = *suffix.graph; + DEBUG_PRINTF("suffix %p\n", &h); + + if (contains(cache, &h)) { + DEBUG_PRINTF("using cached CastleProto\n"); + suffix.castle = cache[&h]; + suffix.graph.reset(); + return; + } + + // The MPV will probably do a better job on the cases it's designed + // for. + const bool fixed_depth = tbi.g[v].min_offset == tbi.g[v].max_offset; + if (isPuffable(h, fixed_depth, tbi.rm, tbi.cc.grey)) { + DEBUG_PRINTF("leaving suffix for puff\n"); + return; + } + + PureRepeat pr; + if (isPureRepeat(h, pr) && pr.reports.size() == 1) { + DEBUG_PRINTF("suffix repeat %s\n", pr.bounds.str().c_str()); + + // Right now, the Castle uses much more stream state to represent a + // {m,1} repeat than just leaving it to an NFA. + if (pr.bounds.max <= depth(1)) { + DEBUG_PRINTF("leaving for other engines\n"); + return; + } + + suffix.castle = make_shared<CastleProto>(h.kind, pr); + cache[&h] = suffix.castle; + suffix.graph.reset(); + } +} + +static +vector<rose_literal_id> literals_for_vertex(const RoseBuildImpl &tbi, + RoseVertex v) { + vector<rose_literal_id> rv; + + for (const u32 id : tbi.g[v].literals) { + rv.push_back(tbi.literals.at(id)); + } + + return rv; +} + +static +void renovateCastle(RoseBuildImpl &tbi, CastleProto *castle, + const vector<RoseVertex> &verts) { + DEBUG_PRINTF("looking to renovate\n"); + + if (castle->repeats.size() != 1) { + assert(0); /* should not have merged castles yet */ + return; + } + + PureRepeat &pr = castle->repeats.begin()->second; + if (pr.bounds.max.is_finite()) { + /* repeat cannot be turned into pseudo .* */ + return; + } + + RoseGraph &g = tbi.g; + const CharReach &cr = castle->reach(); + + DEBUG_PRINTF("cr || %zu\n", cr.count()); + + u32 allowed_to_remove = ~0; + size_t min_succ_lit_len = 0; + + for (RoseVertex v : verts) { + assert(g[v].left.castle.get() == castle); + DEBUG_PRINTF("%zu checks at lag %u\n", g[v].index, g[v].left.lag); + vector<rose_literal_id> lits = literals_for_vertex(tbi, v); + for (const auto &e : lits) { + DEBUG_PRINTF("%s +%u\n", dumpString(e.s).c_str(), e.delay); + if (e.delay) { + return; /* bail - TODO: be less lazy */ + } + + vector<CharReach> rem_local_cr; + u32 ok_count = 0; + for (auto it = e.s.end() - g[v].left.lag; it != e.s.end(); ++it) { + if (!isSubsetOf(*it, cr)) { + break; + } + + ok_count++; + } + LIMIT_TO_AT_MOST(&allowed_to_remove, ok_count); + ENSURE_AT_LEAST(&min_succ_lit_len, e.elength()); + } + } + + DEBUG_PRINTF("possible to decrease lag by %u\n", allowed_to_remove); + + + for (RoseVertex v : verts) { + assert(g[v].left.lag >= allowed_to_remove); + g[v].left.lag -= allowed_to_remove; + } + + assert(castle->repeats.size() == 1); /* should not have merged castles yet */ + + pr.bounds.max += allowed_to_remove; + + /* Although it is always safe to increase the min bound as well, we would + * rather not as a >0 min bound means that we have to store state as well. + * + * As it was legal to run with the original lag, we know that it is not + * possible to have an overlapping match which finishes within the trigger + * literal past the original lag point. However, if there is already a min + * bound constraint this would be broken if we did not also increase the + * min bound. */ + + if (pr.bounds.min > 0ULL || allowed_to_remove > min_succ_lit_len) { + pr.bounds.min += allowed_to_remove; + } +} + +void makeCastles(RoseBuildImpl &tbi) { + if (!tbi.cc.grey.allowCastle && !tbi.cc.grey.allowLbr) { + return; + } + + RoseGraph &g = tbi.g; + + // Caches so that we can reuse analysis on graphs we've seen already. + unordered_map<const NGHolder *, shared_ptr<CastleProto> > left_cache; + unordered_map<const NGHolder *, shared_ptr<CastleProto> > suffix_cache; + + unordered_map<CastleProto *, vector<RoseVertex>> rev; + + for (RoseVertex v : vertices_range(g)) { + if (g[v].left && !tbi.isRootSuccessor(v)) { + makeCastle(g[v].left, left_cache); + if (g[v].left.castle) { + rev[g[v].left.castle.get()].push_back(v); + } + } + + if (g[v].suffix) { + makeCastleSuffix(tbi, v, suffix_cache); + } + } + + for (const auto &e : rev) { + renovateCastle(tbi, e.first, e.second); + } +} + +bool unmakeCastles(RoseBuildImpl &tbi) { + RoseGraph &g = tbi.g; + + const size_t MAX_UNMAKE_VERTICES = 64; + + map<left_id, vector<RoseVertex> > left_castles; + map<suffix_id, vector<RoseVertex> > suffix_castles; + bool changed = false; + + for (auto v : vertices_range(g)) { + const LeftEngInfo &left = g[v].left; + if (left.castle && left.castle->repeats.size() > 1) { + left_castles[left].push_back(v); + } + const RoseSuffixInfo &suffix = g[v].suffix; + if (suffix.castle && suffix.castle->repeats.size() > 1) { + suffix_castles[suffix].push_back(v); + } + } + + for (const auto &e : left_castles) { + assert(e.first.castle()); + shared_ptr<NGHolder> h = makeHolder(*e.first.castle(), tbi.cc); + if (!h || num_vertices(*h) > MAX_UNMAKE_VERTICES) { + continue; + } + DEBUG_PRINTF("replace rose with holder (%zu vertices)\n", + num_vertices(*h)); + for (auto v : e.second) { + assert(g[v].left.castle.get() == e.first.castle()); + g[v].left.graph = h; + g[v].left.castle.reset(); + changed = true; + } + } + + for (const auto &e : suffix_castles) { + assert(e.first.castle()); + shared_ptr<NGHolder> h = makeHolder(*e.first.castle(), tbi.cc); + if (!h || num_vertices(*h) > MAX_UNMAKE_VERTICES) { + continue; + } + DEBUG_PRINTF("replace suffix with holder (%zu vertices)\n", + num_vertices(*h)); + for (auto v : e.second) { + assert(g[v].suffix.castle.get() == e.first.castle()); + g[v].suffix.graph = h; + g[v].suffix.castle.reset(); + changed = true; + } + } + + return changed; +} + +void remapCastleTops(RoseBuildImpl &tbi) { + unordered_map<CastleProto *, vector<RoseVertex>> rose_castles; + unordered_map<CastleProto *, vector<RoseVertex>> suffix_castles; + + RoseGraph &g = tbi.g; + for (auto v : vertices_range(g)) { + if (g[v].left.castle) { + rose_castles[g[v].left.castle.get()].push_back(v); + } + if (g[v].suffix.castle) { + suffix_castles[g[v].suffix.castle.get()].push_back(v); + } + } + + DEBUG_PRINTF("%zu rose castles, %zu suffix castles\n", rose_castles.size(), + suffix_castles.size()); + + map<u32, u32> top_map; + + // Remap Rose Castles. + for (const auto &rc : rose_castles) { + CastleProto *c = rc.first; + const vector<RoseVertex> &verts = rc.second; + + DEBUG_PRINTF("rose castle %p (%zu repeats) has %zu verts\n", c, + c->repeats.size(), verts.size()); + + top_map.clear(); + remapCastleTops(*c, top_map); + + // Update the tops on the edges leading into vertices in v. + for (auto v : verts) { + for (const auto &e : in_edges_range(v, g)) { + g[e].rose_top = top_map.at(g[e].rose_top); + } + } + } + + // Remap Suffix Castles. + for (const auto &e : suffix_castles) { + CastleProto *c = e.first; + const vector<RoseVertex> &verts = e.second; + + DEBUG_PRINTF("suffix castle %p (%zu repeats) has %zu verts\n", c, + c->repeats.size(), verts.size()); + + top_map.clear(); + remapCastleTops(*c, top_map); + + // Update the tops on the suffixes. + for (auto v : verts) { + assert(g[v].suffix); + g[v].suffix.top = top_map.at(g[v].suffix.top); + } + } +} + +bool triggerKillsRoseCastle(const RoseBuildImpl &tbi, const left_id &left, + const set<ue2_literal> &all_lits, + const RoseEdge &e) { + assert(left.castle()); + const CastleProto &c = *left.castle(); + + const depth max_width = findMaxWidth(c); + DEBUG_PRINTF("castle max width is %s\n", max_width.str().c_str()); + + /* check each pred literal to see if they all kill previous castle + * state */ + for (u32 lit_id : tbi.g[source(e, tbi.g)].literals) { + const rose_literal_id &pred_lit = tbi.literals.at(lit_id); + const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s); + const CharReach &cr = c.reach(); + + DEBUG_PRINTF("s=%s, castle reach=%s\n", dumpString(s).c_str(), + describeClass(cr).c_str()); + + for (const auto &s_cr : s) { + if (!overlaps(cr, s_cr)) { + DEBUG_PRINTF("reach %s kills castle\n", + describeClass(s_cr).c_str()); + goto next_pred; + } + } + + if (max_width < depth(s.length())) { + DEBUG_PRINTF("literal width >= castle max width\n"); + goto next_pred; + } + + return false; + + next_pred:; + } + + return true; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/rose/rose_build_castle.h b/contrib/libs/hyperscan/src/rose/rose_build_castle.h index f03feeab04..4a2b6188b0 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_castle.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_castle.h @@ -1,69 +1,69 @@ -/* - * Copyright (c) 2015, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ROSE_BUILD_CASTLE_H -#define ROSE_BUILD_CASTLE_H - -#include "rose_graph.h" - -#include <set> - -namespace ue2 { - -class RoseBuildImpl; -struct left_id; -struct ue2_literal; - -/** - * Runs over all rose infix/suffix engines and converts those that are pure - * repeats with one report into CastleProto engines. - */ -void makeCastles(RoseBuildImpl &tbi); - -/** - * Identifies all the CastleProto prototypes that are small enough that they - * would be better implemented as NFAs, and converts them back to NGHolder - * prototypes. - * - * Returns true if any changes were made. - */ -bool unmakeCastles(RoseBuildImpl &tbi); - -/** - * Runs over all the Castle engine prototypes in the graph and ensures that - * they have tops in a contiguous range, ready for construction. - */ -void remapCastleTops(RoseBuildImpl &tbi); - -bool triggerKillsRoseCastle(const RoseBuildImpl &tbi, const left_id &left, - const std::set<ue2_literal> &all_lits, - const RoseEdge &e); - -} - -#endif +/* + * Copyright (c) 2015, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_CASTLE_H +#define ROSE_BUILD_CASTLE_H + +#include "rose_graph.h" + +#include <set> + +namespace ue2 { + +class RoseBuildImpl; +struct left_id; +struct ue2_literal; + +/** + * Runs over all rose infix/suffix engines and converts those that are pure + * repeats with one report into CastleProto engines. + */ +void makeCastles(RoseBuildImpl &tbi); + +/** + * Identifies all the CastleProto prototypes that are small enough that they + * would be better implemented as NFAs, and converts them back to NGHolder + * prototypes. + * + * Returns true if any changes were made. + */ +bool unmakeCastles(RoseBuildImpl &tbi); + +/** + * Runs over all the Castle engine prototypes in the graph and ensures that + * they have tops in a contiguous range, ready for construction. + */ +void remapCastleTops(RoseBuildImpl &tbi); + +bool triggerKillsRoseCastle(const RoseBuildImpl &tbi, const left_id &left, + const std::set<ue2_literal> &all_lits, + const RoseEdge &e); + +} + +#endif diff --git a/contrib/libs/hyperscan/src/rose/rose_build_compile.cpp b/contrib/libs/hyperscan/src/rose/rose_build_compile.cpp index 76439695ae..1cf3bbe695 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_compile.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_compile.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -31,16 +31,16 @@ #include "grey.h" #include "hs_internal.h" #include "rose_build_anchored.h" -#include "rose_build_castle.h" +#include "rose_build_castle.h" #include "rose_build_convert.h" #include "rose_build_dump.h" -#include "rose_build_groups.h" -#include "rose_build_matchers.h" +#include "rose_build_groups.h" +#include "rose_build_matchers.h" #include "rose_build_merge.h" #include "rose_build_role_aliasing.h" #include "rose_build_util.h" #include "ue2common.h" -#include "hwlm/hwlm_literal.h" +#include "hwlm/hwlm_literal.h" #include "nfa/nfa_internal.h" #include "nfa/rdfa.h" #include "nfagraph/ng_holder.h" @@ -48,7 +48,7 @@ #include "nfagraph/ng_is_equal.h" #include "nfagraph/ng_limex.h" #include "nfagraph/ng_mcclellan.h" -#include "nfagraph/ng_prune.h" +#include "nfagraph/ng_prune.h" #include "nfagraph/ng_repeat.h" #include "nfagraph/ng_reports.h" #include "nfagraph/ng_stop.h" @@ -61,7 +61,7 @@ #include "util/compile_context.h" #include "util/container.h" #include "util/dump_charclass.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include "util/order_check.h" #include "util/report_manager.h" @@ -89,89 +89,89 @@ namespace ue2 { #define ANCHORED_REHOME_DEEP 25 #define ANCHORED_REHOME_SHORT_LEN 3 -#define MAX_EXPLOSION_NC 3 +#define MAX_EXPLOSION_NC 3 static -bool limited_explosion(const ue2_literal &s) { - u32 nc_count = 0; +bool limited_explosion(const ue2_literal &s) { + u32 nc_count = 0; - for (const auto &e : s) { - if (e.nocase) { - nc_count++; + for (const auto &e : s) { + if (e.nocase) { + nc_count++; } } - return nc_count <= MAX_EXPLOSION_NC; + return nc_count <= MAX_EXPLOSION_NC; } static -void removeLiteralFromGraph(RoseBuildImpl &build, u32 id) { - assert(id < build.literal_info.size()); - auto &info = build.literal_info.at(id); - for (const auto &v : info.vertices) { - build.g[v].literals.erase(id); +void removeLiteralFromGraph(RoseBuildImpl &build, u32 id) { + assert(id < build.literal_info.size()); + auto &info = build.literal_info.at(id); + for (const auto &v : info.vertices) { + build.g[v].literals.erase(id); } - info.vertices.clear(); + info.vertices.clear(); } -/** - * \brief Replace the given mixed-case literal with the set of its caseless - * variants. - */ +/** + * \brief Replace the given mixed-case literal with the set of its caseless + * variants. + */ static -void explodeLiteral(RoseBuildImpl &build, u32 id) { - const auto &lit = build.literals.at(id); - auto &info = build.literal_info[id]; +void explodeLiteral(RoseBuildImpl &build, u32 id) { + const auto &lit = build.literals.at(id); + auto &info = build.literal_info[id]; - assert(!info.group_mask); // not set yet - assert(info.undelayed_id == id); // we do not explode delayed literals + assert(!info.group_mask); // not set yet + assert(info.undelayed_id == id); // we do not explode delayed literals - for (auto it = caseIterateBegin(lit.s); it != caseIterateEnd(); ++it) { - ue2_literal new_str(*it, false); + for (auto it = caseIterateBegin(lit.s); it != caseIterateEnd(); ++it) { + ue2_literal new_str(*it, false); - if (!maskIsConsistent(new_str.get_string(), false, lit.msk, lit.cmp)) { - DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); + if (!maskIsConsistent(new_str.get_string(), false, lit.msk, lit.cmp)) { + DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); continue; } - u32 new_id = - build.getLiteralId(new_str, lit.msk, lit.cmp, lit.delay, lit.table); + u32 new_id = + build.getLiteralId(new_str, lit.msk, lit.cmp, lit.delay, lit.table); - DEBUG_PRINTF("adding exploded lit %u: '%s'\n", new_id, - dumpString(new_str).c_str()); + DEBUG_PRINTF("adding exploded lit %u: '%s'\n", new_id, + dumpString(new_str).c_str()); - const auto &new_lit = build.literals.at(new_id); - auto &new_info = build.literal_info.at(new_id); - insert(&new_info.vertices, info.vertices); - for (const auto &v : info.vertices) { - build.g[v].literals.insert(new_id); + const auto &new_lit = build.literals.at(new_id); + auto &new_info = build.literal_info.at(new_id); + insert(&new_info.vertices, info.vertices); + for (const auto &v : info.vertices) { + build.g[v].literals.insert(new_id); } - build.literal_info[new_id].undelayed_id = new_id; - if (!info.delayed_ids.empty()) { - flat_set<u32> &del_ids = new_info.delayed_ids; - for (u32 delay_id : info.delayed_ids) { - const auto &dlit = build.literals.at(delay_id); - u32 new_delay_id = - build.getLiteralId(new_lit.s, new_lit.msk, new_lit.cmp, - dlit.delay, dlit.table); - del_ids.insert(new_delay_id); - build.literal_info[new_delay_id].undelayed_id = new_id; + build.literal_info[new_id].undelayed_id = new_id; + if (!info.delayed_ids.empty()) { + flat_set<u32> &del_ids = new_info.delayed_ids; + for (u32 delay_id : info.delayed_ids) { + const auto &dlit = build.literals.at(delay_id); + u32 new_delay_id = + build.getLiteralId(new_lit.s, new_lit.msk, new_lit.cmp, + dlit.delay, dlit.table); + del_ids.insert(new_delay_id); + build.literal_info[new_delay_id].undelayed_id = new_id; } } } - // Remove the old literal and any old delay variants. - removeLiteralFromGraph(build, id); - for (u32 delay_id : info.delayed_ids) { - removeLiteralFromGraph(build, delay_id); + // Remove the old literal and any old delay variants. + removeLiteralFromGraph(build, id); + for (u32 delay_id : info.delayed_ids) { + removeLiteralFromGraph(build, delay_id); } - info.delayed_ids.clear(); + info.delayed_ids.clear(); } void RoseBuildImpl::handleMixedSensitivity(void) { - vector<u32> explode; - for (u32 id = 0; id < literals.size(); id++) { - const rose_literal_id &lit = literals.at(id); + vector<u32> explode; + for (u32 id = 0; id < literals.size(); id++) { + const rose_literal_id &lit = literals.at(id); if (lit.delay) { continue; /* delay id's are virtual-ish */ @@ -185,24 +185,24 @@ void RoseBuildImpl::handleMixedSensitivity(void) { continue; } - // We don't want to explode long literals, as they require confirmation - // with a CHECK_LONG_LIT instruction and need unique final_ids. - // TODO: we could allow explosion for literals where the prefixes - // covered by CHECK_LONG_LIT are identical. - - if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN && - limited_explosion(lit.s) && literal_info[id].delayed_ids.empty()) { + // We don't want to explode long literals, as they require confirmation + // with a CHECK_LONG_LIT instruction and need unique final_ids. + // TODO: we could allow explosion for literals where the prefixes + // covered by CHECK_LONG_LIT are identical. + + if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN && + limited_explosion(lit.s) && literal_info[id].delayed_ids.empty()) { DEBUG_PRINTF("need to explode existing string '%s'\n", dumpString(lit.s).c_str()); - explode.push_back(id); + explode.push_back(id); } else { literal_info[id].requires_benefits = true; } } - - for (u32 id : explode) { - explodeLiteral(*this, id); - } + + for (u32 id : explode) { + explodeLiteral(*this, id); + } } // Returns the length of the longest prefix of s that is (a) also a suffix of s @@ -294,7 +294,7 @@ RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) { const RoseVertex u = source(e, g); /* pred role */ const RoseVertex v = target(e, g); /* current role */ - DEBUG_PRINTF("find history for [%zu,%zu]\n", g[u].index, g[v].index); + DEBUG_PRINTF("find history for [%zu,%zu]\n", g[u].index, g[v].index); DEBUG_PRINTF("u has min_offset=%u, max_offset=%u\n", g[u].min_offset, g[u].max_offset); @@ -335,9 +335,9 @@ RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) { // If the bounds are {0,0}, this role can only match precisely at EOD. if (minBound == 0 && maxBound == 0) { - /* last byte history will squash the state byte so cannot have other - * succ */ - assert(out_degree(u, g) == 1); + /* last byte history will squash the state byte so cannot have other + * succ */ + assert(out_degree(u, g) == 1); return ROSE_ROLE_HISTORY_LAST_BYTE; } @@ -348,7 +348,7 @@ RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) { // Non-EOD cases. DEBUG_PRINTF("examining edge [%zu,%zu] with bounds {%u,%u}\n", - g[u].index, g[v].index, g[e].minBound, g[e].maxBound); + g[u].index, g[v].index, g[e].minBound, g[e].maxBound); if (tbi.isAnchored(v)) { // Matches for literals in the anchored table will always arrive at the @@ -358,8 +358,8 @@ RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) { return ROSE_ROLE_HISTORY_NONE; } - if (g[u].fixedOffset() && - (g[e].minBound || g[e].maxBound != ROSE_BOUND_INF)) { + if (g[u].fixedOffset() && + (g[e].minBound || g[e].maxBound != ROSE_BOUND_INF)) { DEBUG_PRINTF("fixed offset -> anch\n"); return ROSE_ROLE_HISTORY_ANCH; } @@ -402,7 +402,7 @@ bool RoseBuildImpl::isDirectReport(u32 id) const { // role's reports from a list. for (auto v : info.vertices) { - assert(contains(g[v].literals, id)); + assert(contains(g[v].literals, id)); if (g[v].reports.empty() || g[v].eod_accept || // no accept EOD @@ -412,14 +412,14 @@ bool RoseBuildImpl::isDirectReport(u32 id) const { return false; } - // Use the program to handle cases that aren't external reports. - for (const ReportID &rid : g[v].reports) { - if (!isExternalReport(rm.getReport(rid))) { - return false; - } - } - - if (literals.at(id).table == ROSE_ANCHORED) { + // Use the program to handle cases that aren't external reports. + for (const ReportID &rid : g[v].reports) { + if (!isExternalReport(rm.getReport(rid))) { + return false; + } + } + + if (literals.at(id).table == ROSE_ANCHORED) { /* in-edges are irrelevant for anchored region. */ continue; } @@ -438,52 +438,52 @@ bool RoseBuildImpl::isDirectReport(u32 id) const { } DEBUG_PRINTF("literal %u ('%s') is a %s report\n", id, - dumpString(literals.at(id).s).c_str(), + dumpString(literals.at(id).s).c_str(), info.vertices.size() > 1 ? "multi-direct" : "direct"); return true; } - -/* If we have prefixes that can squash all the floating roots, we can have a - * somewhat-conditional floating table. As we can't yet look at squash_masks, we - * have to make some guess as to if we are in this case but the win for not - * running a floating table over a large portion of the stream is significantly - * larger than avoiding running an eod table over the last N bytes. */ + +/* If we have prefixes that can squash all the floating roots, we can have a + * somewhat-conditional floating table. As we can't yet look at squash_masks, we + * have to make some guess as to if we are in this case but the win for not + * running a floating table over a large portion of the stream is significantly + * larger than avoiding running an eod table over the last N bytes. */ +static +bool checkFloatingKillableByPrefixes(const RoseBuildImpl &tbi) { + for (auto v : vertices_range(tbi.g)) { + if (!tbi.isRootSuccessor(v)) { + continue; + } + + if (!tbi.isFloating(v)) { + continue; + } + + if (!tbi.g[v].left) { + DEBUG_PRINTF("unguarded floating root\n"); + return false; + } + + if (tbi.g[v].left.graph) { + const NGHolder &h = *tbi.g[v].left.graph; + if (proper_out_degree(h.startDs, h)) { + DEBUG_PRINTF("floating nfa prefix, won't die\n"); + return false; + } + } else if (tbi.g[v].left.dfa) { + if (tbi.g[v].left.dfa->start_floating != DEAD_STATE) { + DEBUG_PRINTF("floating dfa prefix, won't die\n"); + return false; + } + } + } + + return true; +} + static -bool checkFloatingKillableByPrefixes(const RoseBuildImpl &tbi) { - for (auto v : vertices_range(tbi.g)) { - if (!tbi.isRootSuccessor(v)) { - continue; - } - - if (!tbi.isFloating(v)) { - continue; - } - - if (!tbi.g[v].left) { - DEBUG_PRINTF("unguarded floating root\n"); - return false; - } - - if (tbi.g[v].left.graph) { - const NGHolder &h = *tbi.g[v].left.graph; - if (proper_out_degree(h.startDs, h)) { - DEBUG_PRINTF("floating nfa prefix, won't die\n"); - return false; - } - } else if (tbi.g[v].left.dfa) { - if (tbi.g[v].left.dfa->start_floating != DEAD_STATE) { - DEBUG_PRINTF("floating dfa prefix, won't die\n"); - return false; - } - } - } - - return true; -} - -static -bool checkEodStealFloating(const RoseBuildImpl &build, +bool checkEodStealFloating(const RoseBuildImpl &build, const vector<u32> &eodLiteralsForFloating, u32 numFloatingLiterals, size_t shortestFloatingLen) { @@ -497,35 +497,35 @@ bool checkEodStealFloating(const RoseBuildImpl &build, return false; } - if (build.hasNoFloatingRoots()) { + if (build.hasNoFloatingRoots()) { DEBUG_PRINTF("skipping as floating table is conditional\n"); /* TODO: investigate putting stuff in atable */ return false; } - if (checkFloatingKillableByPrefixes(build)) { - DEBUG_PRINTF("skipping as prefixes may make ftable conditional\n"); - return false; - } - - // Collect a set of all floating literals. - unordered_set<ue2_literal> floating_lits; - for (auto &lit : build.literals) { - if (lit.table == ROSE_FLOATING) { - floating_lits.insert(lit.s); - } - } - + if (checkFloatingKillableByPrefixes(build)) { + DEBUG_PRINTF("skipping as prefixes may make ftable conditional\n"); + return false; + } + + // Collect a set of all floating literals. + unordered_set<ue2_literal> floating_lits; + for (auto &lit : build.literals) { + if (lit.table == ROSE_FLOATING) { + floating_lits.insert(lit.s); + } + } + DEBUG_PRINTF("%zu are eod literals, %u floating; floating len=%zu\n", eodLiteralsForFloating.size(), numFloatingLiterals, shortestFloatingLen); u32 new_floating_lits = 0; for (u32 eod_id : eodLiteralsForFloating) { - const rose_literal_id &lit = build.literals.at(eod_id); + const rose_literal_id &lit = build.literals.at(eod_id); DEBUG_PRINTF("checking '%s'\n", dumpString(lit.s).c_str()); - if (contains(floating_lits, lit.s)) { + if (contains(floating_lits, lit.s)) { DEBUG_PRINTF("skip; there is already a floating version\n"); continue; } @@ -556,16 +556,16 @@ bool checkEodStealFloating(const RoseBuildImpl &build, static void promoteEodToFloating(RoseBuildImpl &tbi, const vector<u32> &eodLiterals) { - DEBUG_PRINTF("promoting %zu eod literals to floating table\n", - eodLiterals.size()); + DEBUG_PRINTF("promoting %zu eod literals to floating table\n", + eodLiterals.size()); for (u32 eod_id : eodLiterals) { - const rose_literal_id &lit = tbi.literals.at(eod_id); - DEBUG_PRINTF("eod_id=%u, lit=%s\n", eod_id, dumpString(lit.s).c_str()); + const rose_literal_id &lit = tbi.literals.at(eod_id); + DEBUG_PRINTF("eod_id=%u, lit=%s\n", eod_id, dumpString(lit.s).c_str()); u32 floating_id = tbi.getLiteralId(lit.s, lit.msk, lit.cmp, lit.delay, ROSE_FLOATING); - DEBUG_PRINTF("floating_id=%u, lit=%s\n", floating_id, - dumpString(tbi.literals.at(floating_id).s).c_str()); + DEBUG_PRINTF("floating_id=%u, lit=%s\n", floating_id, + dumpString(tbi.literals.at(floating_id).s).c_str()); auto &float_verts = tbi.literal_info[floating_id].vertices; auto &eod_verts = tbi.literal_info[eod_id].vertices; @@ -590,7 +590,7 @@ bool promoteEodToAnchored(RoseBuildImpl &tbi, const vector<u32> &eodLiterals) { bool rv = true; for (u32 eod_id : eodLiterals) { - const rose_literal_id &lit = tbi.literals.at(eod_id); + const rose_literal_id &lit = tbi.literals.at(eod_id); NGHolder h; add_edge(h.start, h.accept, h); @@ -730,7 +730,7 @@ void stealEodVertices(RoseBuildImpl &tbi) { continue; // skip unused literals } - const rose_literal_id &lit = tbi.literals.at(i); + const rose_literal_id &lit = tbi.literals.at(i); if (lit.table == ROSE_EOD_ANCHORED) { if (suitableForAnchored(tbi, lit, info)) { @@ -770,335 +770,335 @@ bool RoseBuildImpl::isDelayed(u32 id) const { return literal_info.at(id).undelayed_id != id; } -bool RoseBuildImpl::hasDelayedLiteral(RoseVertex v) const { - for (u32 lit_id : g[v].literals) { - if (literals.at(lit_id).delay) { - return true; +bool RoseBuildImpl::hasDelayedLiteral(RoseVertex v) const { + for (u32 lit_id : g[v].literals) { + if (literals.at(lit_id).delay) { + return true; } } - return false; + return false; } -bool RoseBuildImpl::hasDelayPred(RoseVertex v) const { - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (hasDelayedLiteral(u)) { - return true; +bool RoseBuildImpl::hasDelayPred(RoseVertex v) const { + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (hasDelayedLiteral(u)) { + return true; } } return false; } -bool RoseBuildImpl::hasAnchoredTablePred(RoseVertex v) const { +bool RoseBuildImpl::hasAnchoredTablePred(RoseVertex v) const { for (auto u : inv_adjacent_vertices_range(v, g)) { - if (isAnchored(u)) { - return true; + if (isAnchored(u)) { + return true; } } - return false; + return false; } -void RoseBuildImpl::findTransientLeftfixes(void) { - for (auto v : vertices_range(g)) { - if (!g[v].left) { +void RoseBuildImpl::findTransientLeftfixes(void) { + for (auto v : vertices_range(g)) { + if (!g[v].left) { continue; } - /* infixes can never (or at least not yet) be transient */ - if (isNonRootSuccessor(v)) { + /* infixes can never (or at least not yet) be transient */ + if (isNonRootSuccessor(v)) { continue; } - const left_id &left(g[v].left); + const left_id &left(g[v].left); - if (::ue2::isAnchored(left) && !isInETable(v)) { - /* etable prefixes currently MUST be transient as we do not know - * where we can safely catch them up to (yet). */ - DEBUG_PRINTF("anchored roses in rocky soil are not fleeting\n"); - continue; - } + if (::ue2::isAnchored(left) && !isInETable(v)) { + /* etable prefixes currently MUST be transient as we do not know + * where we can safely catch them up to (yet). */ + DEBUG_PRINTF("anchored roses in rocky soil are not fleeting\n"); + continue; + } - const depth max_width = findMaxWidth(left); - if (!max_width.is_finite()) { - DEBUG_PRINTF("inf max width\n"); + const depth max_width = findMaxWidth(left); + if (!max_width.is_finite()) { + DEBUG_PRINTF("inf max width\n"); continue; } - if (cc.streaming) { - /* STREAMING: transient prefixes must be able to run using history - * rather than storing state. */ - u32 his = g[v].left.lag + max_width; + if (cc.streaming) { + /* STREAMING: transient prefixes must be able to run using history + * rather than storing state. */ + u32 his = g[v].left.lag + max_width; - // If this vertex has an event literal, we need to add one to cope - // with it. - if (hasLiteralInTable(v, ROSE_EVENT)) { - his++; - } + // If this vertex has an event literal, we need to add one to cope + // with it. + if (hasLiteralInTable(v, ROSE_EVENT)) { + his++; + } - /* +1 as trigger must appear in main buffer and no byte is needed to - * decompress the state */ - if (his <= cc.grey.maxHistoryAvailable + 1) { - transient.insert(left); - DEBUG_PRINTF("a transient leftfix spotted his=%u\n", his); + /* +1 as trigger must appear in main buffer and no byte is needed to + * decompress the state */ + if (his <= cc.grey.maxHistoryAvailable + 1) { + transient.insert(left); + DEBUG_PRINTF("a transient leftfix spotted his=%u\n", his); + } + } else { + /* BLOCK: transientness is less important and more fuzzy, ideally + * it should be quick to calculate the state. No need to worry about + * history (and hence lag). */ + if (max_width < depth(ROSE_BLOCK_TRANSIENT_MAX_WIDTH)) { + transient.insert(left); + DEBUG_PRINTF("a transient block leftfix spotted [%u]\n", + (u32)max_width); } - } else { - /* BLOCK: transientness is less important and more fuzzy, ideally - * it should be quick to calculate the state. No need to worry about - * history (and hence lag). */ - if (max_width < depth(ROSE_BLOCK_TRANSIENT_MAX_WIDTH)) { - transient.insert(left); - DEBUG_PRINTF("a transient block leftfix spotted [%u]\n", - (u32)max_width); - } - } - } -} - -/** Find all the different roses and their associated literals. */ -static -map<left_id, vector<RoseVertex>> findLeftSucc(const RoseBuildImpl &build) { - map<left_id, vector<RoseVertex>> leftfixes; - for (auto v : vertices_range(build.g)) { - if (build.g[v].left) { - const LeftEngInfo &lei = build.g[v].left; - leftfixes[lei].push_back(v); - } - } - return leftfixes; -} - -namespace { -struct infix_info { - set<RoseVertex> preds; - set<RoseVertex> succs; -}; -} - -static -map<NGHolder *, infix_info> findInfixGraphInfo(const RoseBuildImpl &build) { - map<NGHolder *, infix_info> rv; - - for (auto v : vertices_range(build.g)) { - if (!build.g[v].left) { + } + } +} + +/** Find all the different roses and their associated literals. */ +static +map<left_id, vector<RoseVertex>> findLeftSucc(const RoseBuildImpl &build) { + map<left_id, vector<RoseVertex>> leftfixes; + for (auto v : vertices_range(build.g)) { + if (build.g[v].left) { + const LeftEngInfo &lei = build.g[v].left; + leftfixes[lei].push_back(v); + } + } + return leftfixes; +} + +namespace { +struct infix_info { + set<RoseVertex> preds; + set<RoseVertex> succs; +}; +} + +static +map<NGHolder *, infix_info> findInfixGraphInfo(const RoseBuildImpl &build) { + map<NGHolder *, infix_info> rv; + + for (auto v : vertices_range(build.g)) { + if (!build.g[v].left) { continue; } - if (build.isRootSuccessor(v)) { - DEBUG_PRINTF("a prefix is never an infix\n"); - continue; + if (build.isRootSuccessor(v)) { + DEBUG_PRINTF("a prefix is never an infix\n"); + continue; } - /* ensure only proper nfas */ - const LeftEngInfo &lei = build.g[v].left; - if (!lei.graph) { + /* ensure only proper nfas */ + const LeftEngInfo &lei = build.g[v].left; + if (!lei.graph) { continue; } - if (lei.haig || lei.dfa) { - continue; + if (lei.haig || lei.dfa) { + continue; } - assert(!lei.castle); - infix_info &info = rv[lei.graph.get()]; - insert(&info.preds, inv_adjacent_vertices_range(v, build.g)); - info.succs.insert(v); + assert(!lei.castle); + infix_info &info = rv[lei.graph.get()]; + insert(&info.preds, inv_adjacent_vertices_range(v, build.g)); + info.succs.insert(v); } - return rv; + return rv; } -static -map<u32, flat_set<NFAEdge>> getTopInfo(const NGHolder &h) { - map<u32, flat_set<NFAEdge>> rv; - for (NFAEdge e : out_edges_range(h.start, h)) { - for (u32 t : h[e].tops) { - rv[t].insert(e); +static +map<u32, flat_set<NFAEdge>> getTopInfo(const NGHolder &h) { + map<u32, flat_set<NFAEdge>> rv; + for (NFAEdge e : out_edges_range(h.start, h)) { + for (u32 t : h[e].tops) { + rv[t].insert(e); } } - return rv; + return rv; } -static -u32 findUnusedTop(const map<u32, flat_set<NFAEdge>> &tops) { - u32 i = 0; - while (contains(tops, i)) { - i++; +static +u32 findUnusedTop(const map<u32, flat_set<NFAEdge>> &tops) { + u32 i = 0; + while (contains(tops, i)) { + i++; } - return i; + return i; } -static -bool reduceTopTriggerLoad(RoseBuildImpl &build, NGHolder &h, RoseVertex u) { - RoseGraph &g = build.g; - - set<u32> tops; /* tops triggered by u */ - for (RoseEdge e : out_edges_range(u, g)) { - RoseVertex v = target(e, g); - if (g[v].left.graph.get() != &h) { - continue; +static +bool reduceTopTriggerLoad(RoseBuildImpl &build, NGHolder &h, RoseVertex u) { + RoseGraph &g = build.g; + + set<u32> tops; /* tops triggered by u */ + for (RoseEdge e : out_edges_range(u, g)) { + RoseVertex v = target(e, g); + if (g[v].left.graph.get() != &h) { + continue; } - tops.insert(g[e].rose_top); + tops.insert(g[e].rose_top); } - assert(!tops.empty()); - if (tops.size() <= 1) { + assert(!tops.empty()); + if (tops.size() <= 1) { return false; } - DEBUG_PRINTF("%zu triggers %zu tops for %p\n", build.g[u].index, - tops.size(), &h); + DEBUG_PRINTF("%zu triggers %zu tops for %p\n", build.g[u].index, + tops.size(), &h); - auto h_top_info = getTopInfo(h); - flat_set<NFAEdge> edges_to_trigger; - for (u32 t : tops) { - insert(&edges_to_trigger, h_top_info[t]); + auto h_top_info = getTopInfo(h); + flat_set<NFAEdge> edges_to_trigger; + for (u32 t : tops) { + insert(&edges_to_trigger, h_top_info[t]); } - u32 new_top = ~0U; - /* check if there is already a top with the right the successor set */ - for (const auto &elem : h_top_info) { - if (elem.second == edges_to_trigger) { - new_top = elem.first; - break; + u32 new_top = ~0U; + /* check if there is already a top with the right the successor set */ + for (const auto &elem : h_top_info) { + if (elem.second == edges_to_trigger) { + new_top = elem.first; + break; } } - /* if no existing suitable top, add a new top for us */ - if (new_top == ~0U) { - new_top = findUnusedTop(h_top_info); + /* if no existing suitable top, add a new top for us */ + if (new_top == ~0U) { + new_top = findUnusedTop(h_top_info); - /* add top to edges out of start */ - for (NFAEdge e : out_edges_range(h.start, h)) { - if (has_intersection(tops, h[e].tops)) { - h[e].tops.insert(new_top); - } + /* add top to edges out of start */ + for (NFAEdge e : out_edges_range(h.start, h)) { + if (has_intersection(tops, h[e].tops)) { + h[e].tops.insert(new_top); + } } - /* check still implementable if we add a new top */ - if (!isImplementableNFA(h, nullptr, build.cc)) { - DEBUG_PRINTF("unable to add new top\n"); - for (NFAEdge e : out_edges_range(h.start, h)) { - h[e].tops.erase(new_top); - } - /* we should be back to the original graph */ - assert(isImplementableNFA(h, nullptr, build.cc)); + /* check still implementable if we add a new top */ + if (!isImplementableNFA(h, nullptr, build.cc)) { + DEBUG_PRINTF("unable to add new top\n"); + for (NFAEdge e : out_edges_range(h.start, h)) { + h[e].tops.erase(new_top); + } + /* we should be back to the original graph */ + assert(isImplementableNFA(h, nullptr, build.cc)); return false; } } - DEBUG_PRINTF("using new merged top %u\n", new_top); - assert(new_top != ~0U); - for (RoseEdge e: out_edges_range(u, g)) { - RoseVertex v = target(e, g); - if (g[v].left.graph.get() != &h) { - continue; + DEBUG_PRINTF("using new merged top %u\n", new_top); + assert(new_top != ~0U); + for (RoseEdge e: out_edges_range(u, g)) { + RoseVertex v = target(e, g); + if (g[v].left.graph.get() != &h) { + continue; } - g[e].rose_top = new_top; + g[e].rose_top = new_top; } - return true; + return true; } static -void packInfixTops(NGHolder &h, RoseGraph &g, - const set<RoseVertex> &verts) { - if (!is_triggered(h)) { - DEBUG_PRINTF("not triggered, no tops\n"); - return; +void packInfixTops(NGHolder &h, RoseGraph &g, + const set<RoseVertex> &verts) { + if (!is_triggered(h)) { + DEBUG_PRINTF("not triggered, no tops\n"); + return; } - assert(isCorrectlyTopped(h)); - DEBUG_PRINTF("pruning unused tops\n"); - flat_set<u32> used_tops; - for (auto v : verts) { - assert(g[v].left.graph.get() == &h); + assert(isCorrectlyTopped(h)); + DEBUG_PRINTF("pruning unused tops\n"); + flat_set<u32> used_tops; + for (auto v : verts) { + assert(g[v].left.graph.get() == &h); - for (const auto &e : in_edges_range(v, g)) { - u32 top = g[e].rose_top; - used_tops.insert(top); - } + for (const auto &e : in_edges_range(v, g)) { + u32 top = g[e].rose_top; + used_tops.insert(top); + } } - map<u32, u32> top_mapping; - for (u32 t : used_tops) { - u32 new_top = top_mapping.size(); - top_mapping[t] = new_top; + map<u32, u32> top_mapping; + for (u32 t : used_tops) { + u32 new_top = top_mapping.size(); + top_mapping[t] = new_top; } - for (auto v : verts) { - assert(g[v].left.graph.get() == &h); + for (auto v : verts) { + assert(g[v].left.graph.get() == &h); - for (const auto &e : in_edges_range(v, g)) { - g[e].rose_top = top_mapping.at(g[e].rose_top); + for (const auto &e : in_edges_range(v, g)) { + g[e].rose_top = top_mapping.at(g[e].rose_top); } - } + } - vector<NFAEdge> dead; - for (const auto &e : out_edges_range(h.start, h)) { - NFAVertex v = target(e, h); - if (v == h.startDs) { - continue; // stylised edge, leave it alone. + vector<NFAEdge> dead; + for (const auto &e : out_edges_range(h.start, h)) { + NFAVertex v = target(e, h); + if (v == h.startDs) { + continue; // stylised edge, leave it alone. } - flat_set<u32> updated_tops; - for (u32 t : h[e].tops) { - if (contains(top_mapping, t)) { - updated_tops.insert(top_mapping.at(t)); + flat_set<u32> updated_tops; + for (u32 t : h[e].tops) { + if (contains(top_mapping, t)) { + updated_tops.insert(top_mapping.at(t)); } } - h[e].tops = std::move(updated_tops); - if (h[e].tops.empty()) { - DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index); - dead.push_back(e); + h[e].tops = std::move(updated_tops); + if (h[e].tops.empty()) { + DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index); + dead.push_back(e); } } - if (dead.empty()) { - return; + if (dead.empty()) { + return; } - remove_edges(dead, h); - pruneUseless(h); - clearReports(h); // As we may have removed vacuous edges. + remove_edges(dead, h); + pruneUseless(h); + clearReports(h); // As we may have removed vacuous edges. } static -void reduceTopTriggerLoad(RoseBuildImpl &build) { - auto infixes = findInfixGraphInfo(build); +void reduceTopTriggerLoad(RoseBuildImpl &build) { + auto infixes = findInfixGraphInfo(build); - for (auto &p : infixes) { - if (onlyOneTop(*p.first)) { + for (auto &p : infixes) { + if (onlyOneTop(*p.first)) { continue; } - bool changed = false; - for (RoseVertex v : p.second.preds) { - changed |= reduceTopTriggerLoad(build, *p.first, v); + bool changed = false; + for (RoseVertex v : p.second.preds) { + changed |= reduceTopTriggerLoad(build, *p.first, v); } - if (changed) { - packInfixTops(*p.first, build.g, p.second.succs); - reduceImplementableGraph(*p.first, SOM_NONE, nullptr, build.cc); + if (changed) { + packInfixTops(*p.first, build.g, p.second.succs); + reduceImplementableGraph(*p.first, SOM_NONE, nullptr, build.cc); } } } static -bool triggerKillsRoseGraph(const RoseBuildImpl &build, const left_id &left, +bool triggerKillsRoseGraph(const RoseBuildImpl &build, const left_id &left, const set<ue2_literal> &all_lits, const RoseEdge &e) { assert(left.graph()); const NGHolder &h = *left.graph(); - flat_set<NFAVertex> all_states; + flat_set<NFAVertex> all_states; insert(&all_states, vertices(h)); assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */ DEBUG_PRINTF("removing sds\n"); all_states.erase(h.startDs); - flat_set<NFAVertex> states; + flat_set<NFAVertex> states; /* check each pred literal to see if they all kill previous graph * state */ - for (u32 lit_id : build.g[source(e, build.g)].literals) { - const rose_literal_id &pred_lit = build.literals.at(lit_id); + for (u32 lit_id : build.g[source(e, build.g)].literals) { + const rose_literal_id &pred_lit = build.literals.at(lit_id); const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s); DEBUG_PRINTF("running graph %zu\n", states.size()); @@ -1114,7 +1114,7 @@ bool triggerKillsRoseGraph(const RoseBuildImpl &build, const left_id &left, } static -bool triggerKillsRose(const RoseBuildImpl &build, const left_id &left, +bool triggerKillsRose(const RoseBuildImpl &build, const left_id &left, const set<ue2_literal> &all_lits, const RoseEdge &e) { if (left.haig()) { /* TODO: To allow this for som-based engines we would also need to @@ -1124,30 +1124,30 @@ bool triggerKillsRose(const RoseBuildImpl &build, const left_id &left, } if (left.graph()) { - return triggerKillsRoseGraph(build, left, all_lits, e); + return triggerKillsRoseGraph(build, left, all_lits, e); } if (left.castle()) { - return triggerKillsRoseCastle(build, left, all_lits, e); + return triggerKillsRoseCastle(build, left, all_lits, e); } return false; } -/* Sometimes the arrival of a top for a rose infix can ensure that the nfa would - * be dead at that time. In the case of multiple trigger literals, we can only - * base our decision on that portion of literal after any overlapping literals. - */ +/* Sometimes the arrival of a top for a rose infix can ensure that the nfa would + * be dead at that time. In the case of multiple trigger literals, we can only + * base our decision on that portion of literal after any overlapping literals. + */ static -void findTopTriggerCancels(RoseBuildImpl &build) { - auto left_succ = findLeftSucc(build); /* leftfixes -> succ verts */ +void findTopTriggerCancels(RoseBuildImpl &build) { + auto left_succ = findLeftSucc(build); /* leftfixes -> succ verts */ - for (const auto &r : left_succ) { + for (const auto &r : left_succ) { const left_id &left = r.first; const vector<RoseVertex> &succs = r.second; assert(!succs.empty()); - if (build.isRootSuccessor(*succs.begin())) { + if (build.isRootSuccessor(*succs.begin())) { /* a prefix is never an infix */ continue; } @@ -1157,10 +1157,10 @@ void findTopTriggerCancels(RoseBuildImpl &build) { set<u32> pred_lit_ids; for (auto v : succs) { - for (const auto &e : in_edges_range(v, build.g)) { - RoseVertex u = source(e, build.g); - tops_seen.insert(build.g[e].rose_top); - insert(&pred_lit_ids, build.g[u].literals); + for (const auto &e : in_edges_range(v, build.g)) { + RoseVertex u = source(e, build.g); + tops_seen.insert(build.g[e].rose_top); + insert(&pred_lit_ids, build.g[u].literals); rose_edges.insert(e); } } @@ -1172,7 +1172,7 @@ void findTopTriggerCancels(RoseBuildImpl &build) { } for (u32 lit_id : pred_lit_ids) { - const rose_literal_id &p_lit = build.literals.at(lit_id); + const rose_literal_id &p_lit = build.literals.at(lit_id); if (p_lit.delay || p_lit.table == ROSE_ANCHORED) { goto next_rose; } @@ -1184,9 +1184,9 @@ void findTopTriggerCancels(RoseBuildImpl &build) { all_lits.size(), rose_edges.size()); for (const auto &e : rose_edges) { - if (triggerKillsRose(build, left, all_lits, e)) { + if (triggerKillsRose(build, left, all_lits, e)) { DEBUG_PRINTF("top will override previous rose state\n"); - build.g[e].rose_cancel_prev_top = true; + build.g[e].rose_cancel_prev_top = true; } } next_rose:; @@ -1194,13 +1194,13 @@ void findTopTriggerCancels(RoseBuildImpl &build) { } static -void optimiseRoseTops(RoseBuildImpl &build) { - reduceTopTriggerLoad(build); - /* prune unused tops ? */ - findTopTriggerCancels(build); -} - -static +void optimiseRoseTops(RoseBuildImpl &build) { + reduceTopTriggerLoad(build); + /* prune unused tops ? */ + findTopTriggerCancels(build); +} + +static void buildRoseSquashMasks(RoseBuildImpl &tbi) { /* Rose nfa squash masks are applied to the groups when the nfa can no * longer match */ @@ -1243,15 +1243,15 @@ void buildRoseSquashMasks(RoseBuildImpl &tbi) { } } - rose_group unsquashable = tbi.boundary_group_mask; + rose_group unsquashable = tbi.boundary_group_mask; for (u32 lit_id : lit_ids) { const rose_literal_info &info = tbi.literal_info[lit_id]; - if (!info.delayed_ids.empty() - || !all_of_in(info.vertices, - [&](RoseVertex v) { - return left == tbi.g[v].left; })) { - DEBUG_PRINTF("group %llu is unsquashable\n", info.group_mask); + if (!info.delayed_ids.empty() + || !all_of_in(info.vertices, + [&](RoseVertex v) { + return left == tbi.g[v].left; })) { + DEBUG_PRINTF("group %llu is unsquashable\n", info.group_mask); unsquashable |= info.group_mask; } } @@ -1273,7 +1273,7 @@ void countFloatingLiterals(const RoseBuildImpl &tbi, u32 *total_count, u32 *short_count) { *total_count = 0; *short_count = 0; - for (const rose_literal_id &lit : tbi.literals) { + for (const rose_literal_id &lit : tbi.literals) { if (lit.delay) { continue; /* delay id's are virtual-ish */ } @@ -1384,7 +1384,7 @@ void addSmallBlockLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai, assert(old_id < tbi.literal_info.size()); const rose_literal_info &li = tbi.literal_info[old_id]; - for (auto lit_v : li.vertices) { + for (auto lit_v : li.vertices) { // Clone vertex with the new literal ID. RoseVertex v = add_vertex(g[lit_v], g); g[v].literals.clear(); @@ -1393,9 +1393,9 @@ void addSmallBlockLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai, g[v].max_offset = sai.max_bound + sai.literal.length(); lit_info.vertices.insert(v); - RoseEdge e = add_edge(anchored_root, v, g); - g[e].minBound = sai.min_bound; - g[e].maxBound = sai.max_bound; + RoseEdge e = add_edge(anchored_root, v, g); + g[e].minBound = sai.min_bound; + g[e].maxBound = sai.max_bound; } } } @@ -1417,7 +1417,7 @@ void addSmallBlockLiteral(RoseBuildImpl &tbi, const ue2_literal &lit, g[v].literals.insert(lit_id); g[v].reports = reports; - RoseEdge e = add_edge(tbi.root, v, g); + RoseEdge e = add_edge(tbi.root, v, g); g[e].minBound = 0; g[e].maxBound = ROSE_BOUND_INF; g[v].min_offset = 1; @@ -1533,8 +1533,8 @@ bool extractSEPLiterals(const OutfixInfo &outfix, const ReportManager &rm, // SEP cases should always become DFAs, so that's the only extract code we // have implemented here. - if (outfix.rdfa()) { - return extractSEPLiterals(*outfix.rdfa(), lits_out); + if (outfix.rdfa()) { + return extractSEPLiterals(*outfix.rdfa(), lits_out); } DEBUG_PRINTF("cannot extract literals from outfix type\n"); @@ -1623,7 +1623,7 @@ bool historiesAreValid(const RoseGraph &g) { for (const auto &e : edges_range(g)) { if (g[e].history == ROSE_ROLE_HISTORY_INVALID) { DEBUG_PRINTF("edge [%zu,%zu] has invalid history\n", - g[source(e, g)].index, g[target(e, g)].index); + g[source(e, g)].index, g[target(e, g)].index); return false; } } @@ -1639,23 +1639,23 @@ static bool danglingVertexRef(RoseBuildImpl &tbi) { RoseGraph::vertex_iterator vi, ve; tie(vi, ve) = vertices(tbi.g); - const unordered_set<RoseVertex> valid_vertices(vi, ve); + const unordered_set<RoseVertex> valid_vertices(vi, ve); if (!contains(valid_vertices, tbi.anchored_root)) { - DEBUG_PRINTF("anchored root vertex %zu not in graph\n", - tbi.g[tbi.anchored_root].index); + DEBUG_PRINTF("anchored root vertex %zu not in graph\n", + tbi.g[tbi.anchored_root].index); return true; } for (const auto &e : tbi.ghost) { if (!contains(valid_vertices, e.first)) { - DEBUG_PRINTF("ghost key vertex %zu not in graph\n", - tbi.g[e.first].index); + DEBUG_PRINTF("ghost key vertex %zu not in graph\n", + tbi.g[e.first].index); return true; } if (!contains(valid_vertices, e.second)) { - DEBUG_PRINTF("ghost value vertex %zu not in graph\n", - tbi.g[e.second].index); + DEBUG_PRINTF("ghost value vertex %zu not in graph\n", + tbi.g[e.second].index); return true; } } @@ -1667,11 +1667,11 @@ static bool roleOffsetsAreValid(const RoseGraph &g) { for (auto v : vertices_range(g)) { if (g[v].min_offset >= ROSE_BOUND_INF) { - DEBUG_PRINTF("invalid min_offset for role %zu\n", g[v].index); + DEBUG_PRINTF("invalid min_offset for role %zu\n", g[v].index); return false; } if (g[v].min_offset > g[v].max_offset) { - DEBUG_PRINTF("min_offset > max_offset for %zu\n", g[v].index); + DEBUG_PRINTF("min_offset > max_offset for %zu\n", g[v].index); return false; } } @@ -1679,8 +1679,8 @@ bool roleOffsetsAreValid(const RoseGraph &g) { } #endif // NDEBUG -bytecode_ptr<RoseEngine> RoseBuildImpl::buildRose(u32 minWidth) { - dumpRoseGraph(*this, "rose_early.dot"); +bytecode_ptr<RoseEngine> RoseBuildImpl::buildRose(u32 minWidth) { + dumpRoseGraph(*this, "rose_early.dot"); // Early check for Rose implementability. assert(canImplementGraphs(*this)); @@ -1700,8 +1700,8 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildRose(u32 minWidth) { // If we've got a very small number of EOD-anchored literals, consider // moving them into the floating table so that we only have one literal - // matcher to run. Note that this needs to happen before - // addAnchoredSmallBlockLiterals as it may create anchored literals. + // matcher to run. Note that this needs to happen before + // addAnchoredSmallBlockLiterals as it may create anchored literals. assert(roleOffsetsAreValid(g)); stealEodVertices(*this); @@ -1755,27 +1755,27 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildRose(u32 minWidth) { mergeSmallLeftfixes(*this); } - assert(!hasOrphanedTops(*this)); - + assert(!hasOrphanedTops(*this)); + // Do a rose-merging aliasing pass. aliasRoles(*this, true); - assert(!hasOrphanedTops(*this)); + assert(!hasOrphanedTops(*this)); // Run a merge pass over the outfixes as well. mergeOutfixes(*this); assert(!danglingVertexRef(*this)); - assert(!hasOrphanedTops(*this)); + assert(!hasOrphanedTops(*this)); + + findMoreLiteralMasks(*this); - findMoreLiteralMasks(*this); - - assignGroupsToLiterals(*this); - assignGroupsToRoles(*this); + assignGroupsToLiterals(*this); + assignGroupsToRoles(*this); findGroupSquashers(*this); /* final prep work */ remapCastleTops(*this); - optimiseRoseTops(*this); + optimiseRoseTops(*this); buildRoseSquashMasks(*this); rm.assignDkeys(this); @@ -1791,7 +1791,7 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildRose(u32 minWidth) { assert(roleOffsetsAreValid(g)); assert(historiesAreValid(g)); - dumpRoseGraph(*this, "rose_pre_norm.dot"); + dumpRoseGraph(*this, "rose_pre_norm.dot"); return buildFinalEngine(minWidth); } diff --git a/contrib/libs/hyperscan/src/rose/rose_build_convert.cpp b/contrib/libs/hyperscan/src/rose/rose_build_convert.cpp index 18815ef8b5..33351099f7 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_convert.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_convert.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -58,8 +58,8 @@ #include <queue> #include <set> #include <string> -#include <unordered_map> -#include <utility> +#include <unordered_map> +#include <utility> #include <vector> #include <boost/range/adaptor/map.hpp> @@ -85,7 +85,7 @@ size_t suffixFloodLen(const ue2_literal &s) { const ue2_literal::elem &c = s.back(); auto it = find_if(s.rbegin(), s.rend(), - [&c](const ue2_literal::elem &e) { return e != c; }); + [&c](const ue2_literal::elem &e) { return e != c; }); return distance(s.rbegin(), it); } @@ -100,10 +100,10 @@ unique_ptr<NGHolder> makeFloodProneSuffix(const ue2_literal &s, size_t len, NFAVertex u = h->start; for (auto it = s.begin() + s.length() - len; it != s.end(); ++it) { NFAVertex v = addHolderVertex(*it, *h); - NFAEdge e = add_edge(u, v, *h); - if (u == h->start) { - (*h)[e].tops.insert(DEFAULT_TOP); - } + NFAEdge e = add_edge(u, v, *h); + if (u == h->start) { + (*h)[e].tops.insert(DEFAULT_TOP); + } u = v; } @@ -167,7 +167,7 @@ bool delayLiteralWithPrefix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id, shared_ptr<NGHolder> h = makeRosePrefix(lit.s); ReportID prefix_report = 0; - set_report(*h, prefix_report); + set_report(*h, prefix_report); if (!isImplementableNFA(*h, &tbi.rm, tbi.cc)) { DEBUG_PRINTF("prefix not implementable\n"); @@ -236,7 +236,7 @@ void convertFloodProneSuffix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id, static size_t findFloodProneSuffixLen(const RoseBuildImpl &tbi) { size_t numLiterals = 0; - for (const rose_literal_id &lit : tbi.literals) { + for (const rose_literal_id &lit : tbi.literals) { if (lit.delay) { continue; // delay ids are virtual-ish } @@ -294,7 +294,7 @@ void convertFloodProneSuffixes(RoseBuildImpl &tbi) { } u32 lit_id = *g[v].literals.begin(); - const rose_literal_id &lit = tbi.literals.at(lit_id); + const rose_literal_id &lit = tbi.literals.at(lit_id); // anchored or delayed literals need thought. if (lit.table != ROSE_FLOATING || lit.delay) { @@ -354,27 +354,27 @@ CharReach getReachOfNormalVertex(const NGHolder &g) { return CharReach(); } -/** - * \brief Set the edge bounds and appropriate history on the given edge in the - * Rose graph. - */ +/** + * \brief Set the edge bounds and appropriate history on the given edge in the + * Rose graph. + */ +static +void setEdgeBounds(RoseGraph &g, const RoseEdge &e, u32 min_bound, + u32 max_bound) { + assert(min_bound <= max_bound); + assert(max_bound <= ROSE_BOUND_INF); + + g[e].minBound = min_bound; + g[e].maxBound = max_bound; + + if (min_bound || max_bound < ROSE_BOUND_INF) { + g[e].history = ROSE_ROLE_HISTORY_ANCH; + } else { + g[e].history = ROSE_ROLE_HISTORY_NONE; + } +} + static -void setEdgeBounds(RoseGraph &g, const RoseEdge &e, u32 min_bound, - u32 max_bound) { - assert(min_bound <= max_bound); - assert(max_bound <= ROSE_BOUND_INF); - - g[e].minBound = min_bound; - g[e].maxBound = max_bound; - - if (min_bound || max_bound < ROSE_BOUND_INF) { - g[e].history = ROSE_ROLE_HISTORY_ANCH; - } else { - g[e].history = ROSE_ROLE_HISTORY_NONE; - } -} - -static bool handleStartPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, const RoseEdge &e_old, RoseVertex ar, vector<RoseEdge> *to_delete) { @@ -409,10 +409,10 @@ bool handleStartPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, if (source(e_old, g) == ar) { assert(g[e_old].minBound <= bound_min); assert(g[e_old].maxBound >= bound_max); - setEdgeBounds(g, e_old, bound_min, bound_max); + setEdgeBounds(g, e_old, bound_min, bound_max); } else { - RoseEdge e_new = add_edge(ar, v, g); - setEdgeBounds(g, e_new, bound_min, bound_max); + RoseEdge e_new = add_edge(ar, v, g); + setEdgeBounds(g, e_new, bound_min, bound_max); to_delete->push_back(e_old); } @@ -428,8 +428,8 @@ bool handleStartDsPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, u32 repeatCount = 0; NFAVertex hu = h.startDs; - auto start_succ = succs<set<NFAVertex>>(h.start, h); - auto startds_succ = succs<set<NFAVertex>>(h.startDs, h); + auto start_succ = succs<set<NFAVertex>>(h.start, h); + auto startds_succ = succs<set<NFAVertex>>(h.startDs, h); if (!is_subset_of(start_succ, startds_succ)) { DEBUG_PRINTF("not a simple chain\n"); @@ -464,7 +464,7 @@ bool handleStartDsPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, /* update bounds on edge */ assert(g[e].minBound <= repeatCount); - setEdgeBounds(g, e, repeatCount, ROSE_BOUND_INF); + setEdgeBounds(g, e, repeatCount, ROSE_BOUND_INF); g[v].left.reset(); /* clear the prefix info */ @@ -479,12 +479,12 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, assert(in_degree(h.acceptEod, h) == 1); bool anchored = !proper_out_degree(h.startDs, h); - NFAVertex key = NGHolder::null_vertex(); + NFAVertex key = NGHolder::null_vertex(); NFAVertex base = anchored ? h.start : h.startDs; if (!anchored) { - auto start_succ = succs<set<NFAVertex>>(h.start, h); - auto startds_succ = succs<set<NFAVertex>>(h.startDs, h); + auto start_succ = succs<set<NFAVertex>>(h.start, h); + auto startds_succ = succs<set<NFAVertex>>(h.startDs, h); if (!is_subset_of(start_succ, startds_succ)) { DEBUG_PRINTF("not a simple chain\n"); @@ -493,7 +493,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, } for (auto w : adjacent_vertices_range(base, h)) { - DEBUG_PRINTF("checking %zu\n", h[w].index); + DEBUG_PRINTF("checking %zu\n", h[w].index); if (!h[w].char_reach.all()) { continue; } @@ -528,7 +528,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, set<NFAVertex> exits_and_repeat_verts; for (auto repeat_v : ri.vertices) { - DEBUG_PRINTF("repeat vertex %zu\n", h[repeat_v].index); + DEBUG_PRINTF("repeat vertex %zu\n", h[repeat_v].index); succ(h, repeat_v, &exits_and_repeat_verts); exits_and_repeat_verts.insert(repeat_v); } @@ -543,7 +543,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, exits = exits_and_repeat_verts; erase_all(&exits, rep_verts); - auto base_succ = succs<set<NFAVertex>>(base, h); + auto base_succ = succs<set<NFAVertex>>(base, h); base_succ.erase(h.startDs); if (is_subset_of(base_succ, rep_verts)) { @@ -552,7 +552,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, && is_subset_of(exits, base_succ) && is_subset_of(base_succ, exits_and_repeat_verts)) { /* we have a jump edge */ - ri.repeatMin = depth(0); + ri.repeatMin = depth(0); } else { return false; } @@ -562,7 +562,7 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, DEBUG_PRINTF("woot?\n"); shared_ptr<NGHolder> h_new = make_shared<NGHolder>(); - unordered_map<NFAVertex, NFAVertex> rhs_map; + unordered_map<NFAVertex, NFAVertex> rhs_map; vector<NFAVertex> exits_vec; insert(&exits_vec, exits_vec.end(), exits); splitRHS(h, exits_vec, h_new.get(), &rhs_map); @@ -601,16 +601,16 @@ bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v, } if (source(e_old, g) == ar) { - setEdgeBounds(g, e_old, ri.repeatMin + width, ri.repeatMax + width); + setEdgeBounds(g, e_old, ri.repeatMin + width, ri.repeatMax + width); } else { - RoseEdge e_new = add_edge(ar, v, g); - setEdgeBounds(g, e_new, ri.repeatMin + width, ri.repeatMax + width); + RoseEdge e_new = add_edge(ar, v, g); + setEdgeBounds(g, e_new, ri.repeatMin + width, ri.repeatMax + width); to_delete->push_back(e_old); } } else { assert(g[e_old].minBound <= ri.repeatMin + width); - setEdgeBounds(g, e_old, ri.repeatMin + width, ROSE_BOUND_INF); + setEdgeBounds(g, e_old, ri.repeatMin + width, ROSE_BOUND_INF); } g[v].left.dfa.reset(); @@ -655,7 +655,7 @@ void convertPrefixToBounds(RoseBuildImpl &tbi) { continue; } - DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].index); + DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].index); if (!proper_out_degree(h.startDs, h)) { if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) { @@ -701,7 +701,7 @@ void convertPrefixToBounds(RoseBuildImpl &tbi) { continue; } - DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].index); + DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].index); if (!proper_out_degree(h.startDs, h)) { if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) { @@ -736,7 +736,7 @@ void convertAnchPrefixToBounds(RoseBuildImpl &tbi) { continue; } - DEBUG_PRINTF("vertex %zu\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); // This pass runs after makeCastles, so we use the fact that bounded // repeat detection has already been done for us. @@ -794,23 +794,23 @@ void convertAnchPrefixToBounds(RoseBuildImpl &tbi) { const PureRepeat &pr = castle.repeats.begin()->second; DEBUG_PRINTF("castle has repeat %s\n", pr.bounds.str().c_str()); - DEBUG_PRINTF("delay adj %u\n", (u32)delay_adj); + DEBUG_PRINTF("delay adj %u\n", (u32)delay_adj); + + if (delay_adj >= pr.bounds.max) { + DEBUG_PRINTF("delay adj too large\n"); + continue; + } - if (delay_adj >= pr.bounds.max) { - DEBUG_PRINTF("delay adj too large\n"); - continue; - } - DepthMinMax bounds(pr.bounds); // copy if (delay_adj > bounds.min) { - bounds.min = depth(0); - } else { - bounds.min -= delay_adj; + bounds.min = depth(0); + } else { + bounds.min -= delay_adj; } bounds.max -= delay_adj; - setEdgeBounds(g, e, bounds.min, bounds.max.is_finite() - ? (u32)bounds.max - : ROSE_BOUND_INF); + setEdgeBounds(g, e, bounds.min, bounds.max.is_finite() + ? (u32)bounds.max + : ROSE_BOUND_INF); g[v].left.reset(); } } diff --git a/contrib/libs/hyperscan/src/rose/rose_build_convert.h b/contrib/libs/hyperscan/src/rose/rose_build_convert.h index 413cbeae19..7307c213ca 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_convert.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_convert.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/contrib/libs/hyperscan/src/rose/rose_build_dedupe.cpp b/contrib/libs/hyperscan/src/rose/rose_build_dedupe.cpp index a159bb67b3..d5d002d43b 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_dedupe.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_dedupe.cpp @@ -1,393 +1,393 @@ -/* - * Copyright (c) 2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "rose_build_impl.h" -#include "nfa/castlecompile.h" -#include "nfagraph/ng_repeat.h" +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_impl.h" +#include "nfa/castlecompile.h" +#include "nfagraph/ng_repeat.h" #include "smallwrite/smallwrite_build.h" -#include "util/compile_context.h" -#include "util/boundary_reports.h" -#include "util/make_unique.h" -#include "util/report_manager.h" - -using namespace std; - -namespace ue2 { - -static -bool requiresDedupe(const NGHolder &h, const flat_set<ReportID> &reports, - const Grey &grey) { - /* TODO: tighten */ - NFAVertex seen_vert = NGHolder::null_vertex(); - - for (auto v : inv_adjacent_vertices_range(h.accept, h)) { - if (has_intersection(h[v].reports, reports)) { - if (seen_vert != NGHolder::null_vertex()) { - return true; - } - seen_vert = v; - } - } - - for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) { - if (has_intersection(h[v].reports, reports)) { - if (seen_vert != NGHolder::null_vertex()) { - return true; - } - seen_vert = v; - } - } - - if (seen_vert) { - /* if the reporting vertex is part of of a terminal repeat, the - * construction process may reform the graph splitting it into two - * vertices (pos, cyclic) and hence require dedupe */ - vector<GraphRepeatInfo> repeats; - findRepeats(h, grey.minExtBoundedRepeatSize, &repeats); - for (const auto &repeat : repeats) { - if (find(repeat.vertices.begin(), repeat.vertices.end(), - seen_vert) != repeat.vertices.end()) { - return true; - } - } - } - - return false; -} - -class RoseDedupeAuxImpl : public RoseDedupeAux { -public: - explicit RoseDedupeAuxImpl(const RoseBuildImpl &build_in); - bool requiresDedupeSupport( - const flat_set<ReportID> &reports) const override; - -private: - bool hasSafeMultiReports(const flat_set<ReportID> &reports) const; - - const RoseBuildImpl &build; - map<ReportID, set<RoseVertex>> vert_map; //!< ordinary literals - map<ReportID, set<RoseVertex>> sb_vert_map; //!< small block literals - map<ReportID, set<suffix_id>> suffix_map; - map<ReportID, set<const OutfixInfo *>> outfix_map; - map<ReportID, set<const raw_puff *>> puff_map; - - unordered_set<ReportID> live_reports; //!< all live internal reports. -}; - -unique_ptr<RoseDedupeAux> RoseBuildImpl::generateDedupeAux() const { - return ue2::make_unique<RoseDedupeAuxImpl>(*this); -} - -RoseDedupeAux::~RoseDedupeAux() = default; - -RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &build_in) - : build(build_in) { - const RoseGraph &g = build.g; - - set<suffix_id> suffixes; - - for (auto v : vertices_range(g)) { - insert(&live_reports, g[v].reports); - - // Literals in the small block table are "shadow" copies of literals in - // the other tables that do not run in the same runtime invocation. - // Dedupe key assignment will be taken care of by the real literals. - if (build.hasLiteralInTable(v, ROSE_ANCHORED_SMALL_BLOCK)) { - for (const auto &report_id : g[v].reports) { - sb_vert_map[report_id].insert(v); - } - } else { - for (const auto &report_id : g[v].reports) { - vert_map[report_id].insert(v); - } - } - - // Several vertices may share a suffix, so we collect the set of - // suffixes first to avoid repeating work. - if (g[v].suffix) { - suffixes.insert(g[v].suffix); - } - } - - for (const auto &suffix : suffixes) { - for (const auto &report_id : all_reports(suffix)) { - suffix_map[report_id].insert(suffix); - live_reports.insert(report_id); - } - } - - for (const auto &outfix : build.outfixes) { - for (const auto &report_id : all_reports(outfix)) { - outfix_map[report_id].insert(&outfix); - live_reports.insert(report_id); - } - } - - if (build.mpv_outfix) { - auto *mpv = build.mpv_outfix->mpv(); - for (const auto &puff : mpv->puffettes) { - puff_map[puff.report].insert(&puff); - live_reports.insert(puff.report); - } - for (const auto &puff : mpv->triggered_puffettes) { - puff_map[puff.report].insert(&puff); - live_reports.insert(puff.report); - } - } - +#include "util/compile_context.h" +#include "util/boundary_reports.h" +#include "util/make_unique.h" +#include "util/report_manager.h" + +using namespace std; + +namespace ue2 { + +static +bool requiresDedupe(const NGHolder &h, const flat_set<ReportID> &reports, + const Grey &grey) { + /* TODO: tighten */ + NFAVertex seen_vert = NGHolder::null_vertex(); + + for (auto v : inv_adjacent_vertices_range(h.accept, h)) { + if (has_intersection(h[v].reports, reports)) { + if (seen_vert != NGHolder::null_vertex()) { + return true; + } + seen_vert = v; + } + } + + for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) { + if (has_intersection(h[v].reports, reports)) { + if (seen_vert != NGHolder::null_vertex()) { + return true; + } + seen_vert = v; + } + } + + if (seen_vert) { + /* if the reporting vertex is part of of a terminal repeat, the + * construction process may reform the graph splitting it into two + * vertices (pos, cyclic) and hence require dedupe */ + vector<GraphRepeatInfo> repeats; + findRepeats(h, grey.minExtBoundedRepeatSize, &repeats); + for (const auto &repeat : repeats) { + if (find(repeat.vertices.begin(), repeat.vertices.end(), + seen_vert) != repeat.vertices.end()) { + return true; + } + } + } + + return false; +} + +class RoseDedupeAuxImpl : public RoseDedupeAux { +public: + explicit RoseDedupeAuxImpl(const RoseBuildImpl &build_in); + bool requiresDedupeSupport( + const flat_set<ReportID> &reports) const override; + +private: + bool hasSafeMultiReports(const flat_set<ReportID> &reports) const; + + const RoseBuildImpl &build; + map<ReportID, set<RoseVertex>> vert_map; //!< ordinary literals + map<ReportID, set<RoseVertex>> sb_vert_map; //!< small block literals + map<ReportID, set<suffix_id>> suffix_map; + map<ReportID, set<const OutfixInfo *>> outfix_map; + map<ReportID, set<const raw_puff *>> puff_map; + + unordered_set<ReportID> live_reports; //!< all live internal reports. +}; + +unique_ptr<RoseDedupeAux> RoseBuildImpl::generateDedupeAux() const { + return ue2::make_unique<RoseDedupeAuxImpl>(*this); +} + +RoseDedupeAux::~RoseDedupeAux() = default; + +RoseDedupeAuxImpl::RoseDedupeAuxImpl(const RoseBuildImpl &build_in) + : build(build_in) { + const RoseGraph &g = build.g; + + set<suffix_id> suffixes; + + for (auto v : vertices_range(g)) { + insert(&live_reports, g[v].reports); + + // Literals in the small block table are "shadow" copies of literals in + // the other tables that do not run in the same runtime invocation. + // Dedupe key assignment will be taken care of by the real literals. + if (build.hasLiteralInTable(v, ROSE_ANCHORED_SMALL_BLOCK)) { + for (const auto &report_id : g[v].reports) { + sb_vert_map[report_id].insert(v); + } + } else { + for (const auto &report_id : g[v].reports) { + vert_map[report_id].insert(v); + } + } + + // Several vertices may share a suffix, so we collect the set of + // suffixes first to avoid repeating work. + if (g[v].suffix) { + suffixes.insert(g[v].suffix); + } + } + + for (const auto &suffix : suffixes) { + for (const auto &report_id : all_reports(suffix)) { + suffix_map[report_id].insert(suffix); + live_reports.insert(report_id); + } + } + + for (const auto &outfix : build.outfixes) { + for (const auto &report_id : all_reports(outfix)) { + outfix_map[report_id].insert(&outfix); + live_reports.insert(report_id); + } + } + + if (build.mpv_outfix) { + auto *mpv = build.mpv_outfix->mpv(); + for (const auto &puff : mpv->puffettes) { + puff_map[puff.report].insert(&puff); + live_reports.insert(puff.report); + } + for (const auto &puff : mpv->triggered_puffettes) { + puff_map[puff.report].insert(&puff); + live_reports.insert(puff.report); + } + } + for (const auto &report_id : build.smwr.all_reports()) { live_reports.insert(report_id); } - // Collect live reports from boundary reports. - insert(&live_reports, build.boundary.report_at_0); - insert(&live_reports, build.boundary.report_at_0_eod); - insert(&live_reports, build.boundary.report_at_eod); - - DEBUG_PRINTF("%zu of %zu reports are live\n", live_reports.size(), - build.rm.numReports()); -} - -static -vector<CharReach> makePath(const rose_literal_id &lit) { - vector<CharReach> path(begin(lit.s), end(lit.s)); - for (u32 i = 0; i < lit.delay; i++) { - path.push_back(CharReach::dot()); - } - return path; -} - -/** - * \brief True if one of the given literals overlaps with the suffix of - * another, meaning that they could arrive at the same offset. - */ -static -bool literalsCouldRace(const rose_literal_id &lit1, - const rose_literal_id &lit2) { - DEBUG_PRINTF("compare %s (delay %u) and %s (delay %u)\n", - dumpString(lit1.s).c_str(), lit1.delay, - dumpString(lit2.s).c_str(), lit2.delay); - - // Add dots on the end of each literal for delay. - const auto v1 = makePath(lit1); - const auto v2 = makePath(lit2); - - // See if the smaller path is a suffix of the larger path. - const auto *smaller = v1.size() < v2.size() ? &v1 : &v2; - const auto *bigger = v1.size() < v2.size() ? &v2 : &v1; - auto r = mismatch(smaller->rbegin(), smaller->rend(), bigger->rbegin(), - overlaps); - return r.first == smaller->rend(); -} - -bool RoseDedupeAuxImpl::hasSafeMultiReports( - const flat_set<ReportID> &reports) const { - if (reports.size() <= 1) { - return true; - } - - /* We have more than one ReportID corresponding to the external ID that is - * presented to the user. These may differ in offset adjustment, bounds - * checks, etc. */ - - /* TODO: work out if these differences will actually cause problems */ - - /* One common case where we know we don't have a problem is if there are - * precisely two reports, one for the main Rose path and one for the - * "small block matcher" path. */ - if (reports.size() == 2) { - ReportID id1 = *reports.begin(); - ReportID id2 = *reports.rbegin(); - - bool has_verts_1 = contains(vert_map, id1); - bool has_verts_2 = contains(vert_map, id2); - bool has_sb_verts_1 = contains(sb_vert_map, id1); - bool has_sb_verts_2 = contains(sb_vert_map, id2); - - if (has_verts_1 != has_verts_2 && has_sb_verts_1 != has_sb_verts_2) { - DEBUG_PRINTF("two reports, one full and one small block: ok\n"); - return true; - } - } - - DEBUG_PRINTF("more than one report\n"); - return false; -} - -bool RoseDedupeAuxImpl::requiresDedupeSupport( - const flat_set<ReportID> &reports_in) const { - /* TODO: this could be expanded to check for offset or character - constraints */ - - // We don't want to consider dead reports (tracked by ReportManager but no - // longer used) for the purposes of assigning dupe keys. - flat_set<ReportID> reports; - for (auto id : reports_in) { - if (contains(live_reports, id)) { - reports.insert(id); - } - } - - DEBUG_PRINTF("live reports: %s\n", as_string_list(reports).c_str()); - - const RoseGraph &g = build.g; - - bool has_suffix = false; - bool has_outfix = false; - - if (!hasSafeMultiReports(reports)) { - DEBUG_PRINTF("multiple reports not safe\n"); - return true; - } - - set<RoseVertex> roles; - set<suffix_id> suffixes; - set<const OutfixInfo *> outfixes; - set<const raw_puff *> puffettes; - for (ReportID r : reports) { - if (contains(vert_map, r)) { - insert(&roles, vert_map.at(r)); - } - if (contains(suffix_map, r)) { - insert(&suffixes, suffix_map.at(r)); - } - - if (contains(outfix_map, r)) { - insert(&outfixes, outfix_map.at(r)); - } - - if (contains(puff_map, r)) { - insert(&puffettes, puff_map.at(r)); - } - } - - /* roles */ - - map<u32, u32> lits; // Literal ID -> count of occurrences. - - const bool has_role = !roles.empty(); - for (auto v : roles) { - for (const auto &lit : g[v].literals) { - lits[lit]++; - } - if (g[v].eod_accept) { - // Literals plugged into this EOD accept must be taken into account - // as well. - for (auto u : inv_adjacent_vertices_range(v, g)) { - for (const auto &lit : g[u].literals) { - lits[lit]++; - } - } - } - } - - /* literals */ - - for (const auto &m : lits) { - if (m.second > 1) { - DEBUG_PRINTF("lit %u used by >1 reporting roles\n", m.first); - return true; - } - } - - for (auto it = begin(lits); it != end(lits); ++it) { - const auto &lit1 = build.literals.at(it->first); - for (auto jt = next(it); jt != end(lits); ++jt) { - const auto &lit2 = build.literals.at(jt->first); - if (literalsCouldRace(lit1, lit2)) { - DEBUG_PRINTF("literals could race\n"); - return true; - } - } - } - - /* suffixes */ - - for (const auto &suffix : suffixes) { - if (has_suffix || has_role) { - return true; /* scope for badness */ - } - - has_suffix = true; - - /* some lesser suffix engines (nfas, haig, castle) can raise multiple - * matches for a report id at the same offset if there are multiple - * report states live. */ - if (suffix.haig()) { - return true; - } - if (suffix.graph() && - requiresDedupe(*suffix.graph(), reports, build.cc.grey)) { - return true; - } - if (suffix.castle() && requiresDedupe(*suffix.castle(), reports)) { - return true; - } - } - - /* outfixes */ - - for (const auto &outfix_ptr : outfixes) { - assert(outfix_ptr); - const OutfixInfo &out = *outfix_ptr; - - if (has_outfix || has_role || has_suffix) { - return true; - } - has_outfix = true; - - if (out.haig()) { - return true; /* haig may report matches with different SOM at the - same offset */ - } - - if (out.holder() && - requiresDedupe(*out.holder(), reports, build.cc.grey)) { - return true; - } - } - - /* mpv */ - for (UNUSED const auto &puff : puffettes) { - if (has_outfix || has_role || has_suffix) { - return true; - } - has_outfix = true; - } - - /* boundary */ - if (has_intersection(build.boundary.report_at_eod, reports)) { - if (has_outfix || has_role || has_suffix) { - return true; - } - } - - return false; -} - -} // namespace ue2 + // Collect live reports from boundary reports. + insert(&live_reports, build.boundary.report_at_0); + insert(&live_reports, build.boundary.report_at_0_eod); + insert(&live_reports, build.boundary.report_at_eod); + + DEBUG_PRINTF("%zu of %zu reports are live\n", live_reports.size(), + build.rm.numReports()); +} + +static +vector<CharReach> makePath(const rose_literal_id &lit) { + vector<CharReach> path(begin(lit.s), end(lit.s)); + for (u32 i = 0; i < lit.delay; i++) { + path.push_back(CharReach::dot()); + } + return path; +} + +/** + * \brief True if one of the given literals overlaps with the suffix of + * another, meaning that they could arrive at the same offset. + */ +static +bool literalsCouldRace(const rose_literal_id &lit1, + const rose_literal_id &lit2) { + DEBUG_PRINTF("compare %s (delay %u) and %s (delay %u)\n", + dumpString(lit1.s).c_str(), lit1.delay, + dumpString(lit2.s).c_str(), lit2.delay); + + // Add dots on the end of each literal for delay. + const auto v1 = makePath(lit1); + const auto v2 = makePath(lit2); + + // See if the smaller path is a suffix of the larger path. + const auto *smaller = v1.size() < v2.size() ? &v1 : &v2; + const auto *bigger = v1.size() < v2.size() ? &v2 : &v1; + auto r = mismatch(smaller->rbegin(), smaller->rend(), bigger->rbegin(), + overlaps); + return r.first == smaller->rend(); +} + +bool RoseDedupeAuxImpl::hasSafeMultiReports( + const flat_set<ReportID> &reports) const { + if (reports.size() <= 1) { + return true; + } + + /* We have more than one ReportID corresponding to the external ID that is + * presented to the user. These may differ in offset adjustment, bounds + * checks, etc. */ + + /* TODO: work out if these differences will actually cause problems */ + + /* One common case where we know we don't have a problem is if there are + * precisely two reports, one for the main Rose path and one for the + * "small block matcher" path. */ + if (reports.size() == 2) { + ReportID id1 = *reports.begin(); + ReportID id2 = *reports.rbegin(); + + bool has_verts_1 = contains(vert_map, id1); + bool has_verts_2 = contains(vert_map, id2); + bool has_sb_verts_1 = contains(sb_vert_map, id1); + bool has_sb_verts_2 = contains(sb_vert_map, id2); + + if (has_verts_1 != has_verts_2 && has_sb_verts_1 != has_sb_verts_2) { + DEBUG_PRINTF("two reports, one full and one small block: ok\n"); + return true; + } + } + + DEBUG_PRINTF("more than one report\n"); + return false; +} + +bool RoseDedupeAuxImpl::requiresDedupeSupport( + const flat_set<ReportID> &reports_in) const { + /* TODO: this could be expanded to check for offset or character + constraints */ + + // We don't want to consider dead reports (tracked by ReportManager but no + // longer used) for the purposes of assigning dupe keys. + flat_set<ReportID> reports; + for (auto id : reports_in) { + if (contains(live_reports, id)) { + reports.insert(id); + } + } + + DEBUG_PRINTF("live reports: %s\n", as_string_list(reports).c_str()); + + const RoseGraph &g = build.g; + + bool has_suffix = false; + bool has_outfix = false; + + if (!hasSafeMultiReports(reports)) { + DEBUG_PRINTF("multiple reports not safe\n"); + return true; + } + + set<RoseVertex> roles; + set<suffix_id> suffixes; + set<const OutfixInfo *> outfixes; + set<const raw_puff *> puffettes; + for (ReportID r : reports) { + if (contains(vert_map, r)) { + insert(&roles, vert_map.at(r)); + } + if (contains(suffix_map, r)) { + insert(&suffixes, suffix_map.at(r)); + } + + if (contains(outfix_map, r)) { + insert(&outfixes, outfix_map.at(r)); + } + + if (contains(puff_map, r)) { + insert(&puffettes, puff_map.at(r)); + } + } + + /* roles */ + + map<u32, u32> lits; // Literal ID -> count of occurrences. + + const bool has_role = !roles.empty(); + for (auto v : roles) { + for (const auto &lit : g[v].literals) { + lits[lit]++; + } + if (g[v].eod_accept) { + // Literals plugged into this EOD accept must be taken into account + // as well. + for (auto u : inv_adjacent_vertices_range(v, g)) { + for (const auto &lit : g[u].literals) { + lits[lit]++; + } + } + } + } + + /* literals */ + + for (const auto &m : lits) { + if (m.second > 1) { + DEBUG_PRINTF("lit %u used by >1 reporting roles\n", m.first); + return true; + } + } + + for (auto it = begin(lits); it != end(lits); ++it) { + const auto &lit1 = build.literals.at(it->first); + for (auto jt = next(it); jt != end(lits); ++jt) { + const auto &lit2 = build.literals.at(jt->first); + if (literalsCouldRace(lit1, lit2)) { + DEBUG_PRINTF("literals could race\n"); + return true; + } + } + } + + /* suffixes */ + + for (const auto &suffix : suffixes) { + if (has_suffix || has_role) { + return true; /* scope for badness */ + } + + has_suffix = true; + + /* some lesser suffix engines (nfas, haig, castle) can raise multiple + * matches for a report id at the same offset if there are multiple + * report states live. */ + if (suffix.haig()) { + return true; + } + if (suffix.graph() && + requiresDedupe(*suffix.graph(), reports, build.cc.grey)) { + return true; + } + if (suffix.castle() && requiresDedupe(*suffix.castle(), reports)) { + return true; + } + } + + /* outfixes */ + + for (const auto &outfix_ptr : outfixes) { + assert(outfix_ptr); + const OutfixInfo &out = *outfix_ptr; + + if (has_outfix || has_role || has_suffix) { + return true; + } + has_outfix = true; + + if (out.haig()) { + return true; /* haig may report matches with different SOM at the + same offset */ + } + + if (out.holder() && + requiresDedupe(*out.holder(), reports, build.cc.grey)) { + return true; + } + } + + /* mpv */ + for (UNUSED const auto &puff : puffettes) { + if (has_outfix || has_role || has_suffix) { + return true; + } + has_outfix = true; + } + + /* boundary */ + if (has_intersection(build.boundary.report_at_eod, reports)) { + if (has_outfix || has_role || has_suffix) { + return true; + } + } + + return false; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/rose/rose_build_dump.h b/contrib/libs/hyperscan/src/rose/rose_build_dump.h index 0c92a23cc3..d4c620a3e6 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_dump.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_dump.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -29,53 +29,53 @@ #ifndef ROSE_BUILD_DUMP_H #define ROSE_BUILD_DUMP_H -#include "ue2common.h" - -#include <map> -#include <string> -#include <vector> - +#include "ue2common.h" + +#include <map> +#include <string> +#include <vector> + struct RoseEngine; namespace ue2 { -class RoseBuildImpl; +class RoseBuildImpl; struct Grey; -struct hwlmLiteral; -struct LitFragment; -struct left_id; -struct suffix_id; +struct hwlmLiteral; +struct LitFragment; +struct left_id; +struct suffix_id; #ifdef DUMP_SUPPORT // Dump the Rose graph in graphviz representation. -void dumpRoseGraph(const RoseBuildImpl &build, const char *filename); - -void dumpRose(const RoseBuildImpl &build, - const std::vector<LitFragment> &fragments, - const std::map<left_id, u32> &leftfix_queue_map, - const std::map<suffix_id, u32> &suffix_queue_map, - const RoseEngine *t); - -void dumpMatcherLiterals(const std::vector<hwlmLiteral> &lits, - const std::string &name, const Grey &grey); - +void dumpRoseGraph(const RoseBuildImpl &build, const char *filename); + +void dumpRose(const RoseBuildImpl &build, + const std::vector<LitFragment> &fragments, + const std::map<left_id, u32> &leftfix_queue_map, + const std::map<suffix_id, u32> &suffix_queue_map, + const RoseEngine *t); + +void dumpMatcherLiterals(const std::vector<hwlmLiteral> &lits, + const std::string &name, const Grey &grey); + #else static UNUSED -void dumpRoseGraph(const RoseBuildImpl &, const char *) { +void dumpRoseGraph(const RoseBuildImpl &, const char *) { +} + +static UNUSED +void dumpRose(const RoseBuildImpl &, const std::vector<LitFragment> &, + const std::map<left_id, u32> &, const std::map<suffix_id, u32> &, + const RoseEngine *) { } static UNUSED -void dumpRose(const RoseBuildImpl &, const std::vector<LitFragment> &, - const std::map<left_id, u32> &, const std::map<suffix_id, u32> &, - const RoseEngine *) { +void dumpMatcherLiterals(const std::vector<hwlmLiteral> &, const std::string &, + const Grey &) { } -static UNUSED -void dumpMatcherLiterals(const std::vector<hwlmLiteral> &, const std::string &, - const Grey &) { -} - #endif } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/rose/rose_build_engine_blob.cpp b/contrib/libs/hyperscan/src/rose/rose_build_engine_blob.cpp index dc92418375..d39572070f 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_engine_blob.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_engine_blob.cpp @@ -1,117 +1,117 @@ -/* - * Copyright (c) 2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "rose_build_engine_blob.h" - -#include "rose_build_lookaround.h" -#include "util/charreach_util.h" - -using namespace std; - -namespace ue2 { - -u32 lookaround_info::get_offset_of(const vector<vector<CharReach>> &reaches, - RoseEngineBlob &blob) { - assert(reaches.size() != 1); - - // Check the cache. - auto it = multi_cache.find(reaches); - if (it != multi_cache.end()) { - DEBUG_PRINTF("reusing reach at idx %u\n", it->second); - return it->second; - } - - vector<u8> raw_reach(reaches.size() * MULTI_REACH_BITVECTOR_LEN); - size_t off = 0; - for (const auto &m : reaches) { - u8 u = 0; - assert(m.size() == MAX_LOOKAROUND_PATHS); - for (size_t i = 0; i < m.size(); i++) { - if (m[i].none()) { - u |= (u8)1U << i; - } - } - fill_n(raw_reach.data() + off, MULTI_REACH_BITVECTOR_LEN, u); - - for (size_t i = 0; i < m.size(); i++) { - const CharReach &cr = m[i]; - if (cr.none()) { - continue; - } - - for (size_t c = cr.find_first(); c != cr.npos; - c = cr.find_next(c)) { - raw_reach[c + off] |= (u8)1U << i; - } - } - - off += MULTI_REACH_BITVECTOR_LEN; - } - - u32 reach_idx = blob.add_range(raw_reach); - DEBUG_PRINTF("adding reach at idx %u\n", reach_idx); - multi_cache.emplace(reaches, reach_idx); - - return reach_idx; -} - -u32 lookaround_info::get_offset_of(const vector<CharReach> &reach, - RoseEngineBlob &blob) { - if (contains(rcache, reach)) { - u32 offset = rcache[reach]; - DEBUG_PRINTF("reusing reach at idx %u\n", offset); - return offset; - } - - vector<u8> raw_reach(reach.size() * REACH_BITVECTOR_LEN); - size_t off = 0; - for (const auto &cr : reach) { - assert(cr.any()); // Should be at least one character! - fill_bitvector(cr, raw_reach.data() + off); - off += REACH_BITVECTOR_LEN; - } - - u32 offset = blob.add_range(raw_reach); - rcache.emplace(reach, offset); - return offset; -} - -u32 lookaround_info::get_offset_of(const vector<s8> &look, - RoseEngineBlob &blob) { - if (contains(lcache, look)) { - u32 offset = lcache[look]; - DEBUG_PRINTF("reusing look at idx %u\n", offset); - return offset; - } - - u32 offset = blob.add_range(look); - lcache.emplace(look, offset); - return offset; -} - -} // namespace ue2 +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_engine_blob.h" + +#include "rose_build_lookaround.h" +#include "util/charreach_util.h" + +using namespace std; + +namespace ue2 { + +u32 lookaround_info::get_offset_of(const vector<vector<CharReach>> &reaches, + RoseEngineBlob &blob) { + assert(reaches.size() != 1); + + // Check the cache. + auto it = multi_cache.find(reaches); + if (it != multi_cache.end()) { + DEBUG_PRINTF("reusing reach at idx %u\n", it->second); + return it->second; + } + + vector<u8> raw_reach(reaches.size() * MULTI_REACH_BITVECTOR_LEN); + size_t off = 0; + for (const auto &m : reaches) { + u8 u = 0; + assert(m.size() == MAX_LOOKAROUND_PATHS); + for (size_t i = 0; i < m.size(); i++) { + if (m[i].none()) { + u |= (u8)1U << i; + } + } + fill_n(raw_reach.data() + off, MULTI_REACH_BITVECTOR_LEN, u); + + for (size_t i = 0; i < m.size(); i++) { + const CharReach &cr = m[i]; + if (cr.none()) { + continue; + } + + for (size_t c = cr.find_first(); c != cr.npos; + c = cr.find_next(c)) { + raw_reach[c + off] |= (u8)1U << i; + } + } + + off += MULTI_REACH_BITVECTOR_LEN; + } + + u32 reach_idx = blob.add_range(raw_reach); + DEBUG_PRINTF("adding reach at idx %u\n", reach_idx); + multi_cache.emplace(reaches, reach_idx); + + return reach_idx; +} + +u32 lookaround_info::get_offset_of(const vector<CharReach> &reach, + RoseEngineBlob &blob) { + if (contains(rcache, reach)) { + u32 offset = rcache[reach]; + DEBUG_PRINTF("reusing reach at idx %u\n", offset); + return offset; + } + + vector<u8> raw_reach(reach.size() * REACH_BITVECTOR_LEN); + size_t off = 0; + for (const auto &cr : reach) { + assert(cr.any()); // Should be at least one character! + fill_bitvector(cr, raw_reach.data() + off); + off += REACH_BITVECTOR_LEN; + } + + u32 offset = blob.add_range(raw_reach); + rcache.emplace(reach, offset); + return offset; +} + +u32 lookaround_info::get_offset_of(const vector<s8> &look, + RoseEngineBlob &blob) { + if (contains(lcache, look)) { + u32 offset = lcache[look]; + DEBUG_PRINTF("reusing look at idx %u\n", offset); + return offset; + } + + u32 offset = blob.add_range(look); + lcache.emplace(look, offset); + return offset; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/rose/rose_build_engine_blob.h b/contrib/libs/hyperscan/src/rose/rose_build_engine_blob.h index a7707852a7..da4e355de2 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_engine_blob.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_engine_blob.h @@ -1,176 +1,176 @@ -/* - * Copyright (c) 2016-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ROSE_BUILD_ENGINE_BLOB_H -#define ROSE_BUILD_ENGINE_BLOB_H - -#include "rose_internal.h" - -#include "ue2common.h" -#include "util/alloc.h" -#include "util/bytecode_ptr.h" -#include "util/charreach.h" -#include "util/container.h" -#include "util/hash.h" -#include "util/multibit_build.h" -#include "util/noncopyable.h" -#include "util/verify_types.h" -#include "util/unordered.h" - -#include <type_traits> -#include <vector> - -namespace ue2 { - -class RoseEngineBlob; - -struct lookaround_info : noncopyable { - u32 get_offset_of(const std::vector<std::vector<CharReach>> &look, - RoseEngineBlob &blob); - u32 get_offset_of(const std::vector<CharReach> &reach, - RoseEngineBlob &blob); - u32 get_offset_of(const std::vector<s8> &look, RoseEngineBlob &blob); - -private: - using Path = std::vector<CharReach>; - ue2_unordered_map<std::vector<Path>, u32> multi_cache; - ue2_unordered_map<std::vector<s8>, u32> lcache; - ue2_unordered_map<Path, u32> rcache; -}; - -class RoseEngineBlob : noncopyable { -public: - /** \brief Base offset of engine_blob in the Rose engine bytecode. */ - static constexpr u32 base_offset = ROUNDUP_CL(sizeof(RoseEngine)); - - bool empty() const { - return blob.empty(); - } - - size_t size() const { - return blob.size(); - } - - u32 add(const void *a, const size_t len, const size_t align) { - pad(align); - - size_t rv = base_offset + blob.size(); - assert(rv >= base_offset); - DEBUG_PRINTF("write %zu bytes at offset %zu\n", len, rv); - - assert(ISALIGNED_N(blob.size(), align)); - - blob.resize(blob.size() + len); - memcpy(&blob.back() - len + 1, a, len); - - return verify_u32(rv); - } - - template<typename T> - u32 add(const bytecode_ptr<T> &a) { - return add(a.get(), a.size(), a.align()); - } - - template<typename T> - u32 add(const T &a) { - static_assert(std::is_pod<T>::value, "should be pod"); - return add(&a, sizeof(a), alignof(T)); - } - - template<typename T> - u32 add(const T &a, const size_t len) { - static_assert(std::is_pod<T>::value, "should be pod"); - return add(&a, len, alignof(T)); - } - - template<typename Iter> - u32 add(Iter b, const Iter &e) { - using value_type = typename std::iterator_traits<Iter>::value_type; - static_assert(std::is_pod<value_type>::value, "should be pod"); - - if (b == e) { - return 0; - } - - u32 offset = add(*b); - for (++b; b != e; ++b) { - add(*b); - } - - return offset; - } - - template<typename Range> - u32 add_range(const Range &range) { - return add(begin(range), end(range)); - } - - u32 add_iterator(const std::vector<mmbit_sparse_iter> &iter) { - auto cache_it = cached_iters.find(iter); - if (cache_it != cached_iters.end()) { - u32 offset = cache_it->second; - DEBUG_PRINTF("cache hit for iter at %u\n", offset); - return offset; - } - - u32 offset = add(iter.begin(), iter.end()); - cached_iters.emplace(iter, offset); - return offset; - } - - void write_bytes(RoseEngine *engine) { - copy_bytes((char *)engine + base_offset, blob); - } - - lookaround_info lookaround_cache; - -private: - void pad(size_t align) { - assert(ISALIGNED_N(base_offset, align)); - size_t s = blob.size(); - - if (ISALIGNED_N(s, align)) { - return; - } - - blob.resize(s + align - s % align); - } - - /** \brief Cache of previously-written sparse iterators. */ - ue2_unordered_map<std::vector<mmbit_sparse_iter>, u32> cached_iters; - - /** - * \brief Contents of the Rose bytecode immediately following the - * RoseEngine. - */ - std::vector<char, AlignedAllocator<char, 64>> blob; -}; - -} // namespace ue2 - -#endif // ROSE_BUILD_ENGINE_BLOB_H +/* + * Copyright (c) 2016-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_ENGINE_BLOB_H +#define ROSE_BUILD_ENGINE_BLOB_H + +#include "rose_internal.h" + +#include "ue2common.h" +#include "util/alloc.h" +#include "util/bytecode_ptr.h" +#include "util/charreach.h" +#include "util/container.h" +#include "util/hash.h" +#include "util/multibit_build.h" +#include "util/noncopyable.h" +#include "util/verify_types.h" +#include "util/unordered.h" + +#include <type_traits> +#include <vector> + +namespace ue2 { + +class RoseEngineBlob; + +struct lookaround_info : noncopyable { + u32 get_offset_of(const std::vector<std::vector<CharReach>> &look, + RoseEngineBlob &blob); + u32 get_offset_of(const std::vector<CharReach> &reach, + RoseEngineBlob &blob); + u32 get_offset_of(const std::vector<s8> &look, RoseEngineBlob &blob); + +private: + using Path = std::vector<CharReach>; + ue2_unordered_map<std::vector<Path>, u32> multi_cache; + ue2_unordered_map<std::vector<s8>, u32> lcache; + ue2_unordered_map<Path, u32> rcache; +}; + +class RoseEngineBlob : noncopyable { +public: + /** \brief Base offset of engine_blob in the Rose engine bytecode. */ + static constexpr u32 base_offset = ROUNDUP_CL(sizeof(RoseEngine)); + + bool empty() const { + return blob.empty(); + } + + size_t size() const { + return blob.size(); + } + + u32 add(const void *a, const size_t len, const size_t align) { + pad(align); + + size_t rv = base_offset + blob.size(); + assert(rv >= base_offset); + DEBUG_PRINTF("write %zu bytes at offset %zu\n", len, rv); + + assert(ISALIGNED_N(blob.size(), align)); + + blob.resize(blob.size() + len); + memcpy(&blob.back() - len + 1, a, len); + + return verify_u32(rv); + } + + template<typename T> + u32 add(const bytecode_ptr<T> &a) { + return add(a.get(), a.size(), a.align()); + } + + template<typename T> + u32 add(const T &a) { + static_assert(std::is_pod<T>::value, "should be pod"); + return add(&a, sizeof(a), alignof(T)); + } + + template<typename T> + u32 add(const T &a, const size_t len) { + static_assert(std::is_pod<T>::value, "should be pod"); + return add(&a, len, alignof(T)); + } + + template<typename Iter> + u32 add(Iter b, const Iter &e) { + using value_type = typename std::iterator_traits<Iter>::value_type; + static_assert(std::is_pod<value_type>::value, "should be pod"); + + if (b == e) { + return 0; + } + + u32 offset = add(*b); + for (++b; b != e; ++b) { + add(*b); + } + + return offset; + } + + template<typename Range> + u32 add_range(const Range &range) { + return add(begin(range), end(range)); + } + + u32 add_iterator(const std::vector<mmbit_sparse_iter> &iter) { + auto cache_it = cached_iters.find(iter); + if (cache_it != cached_iters.end()) { + u32 offset = cache_it->second; + DEBUG_PRINTF("cache hit for iter at %u\n", offset); + return offset; + } + + u32 offset = add(iter.begin(), iter.end()); + cached_iters.emplace(iter, offset); + return offset; + } + + void write_bytes(RoseEngine *engine) { + copy_bytes((char *)engine + base_offset, blob); + } + + lookaround_info lookaround_cache; + +private: + void pad(size_t align) { + assert(ISALIGNED_N(base_offset, align)); + size_t s = blob.size(); + + if (ISALIGNED_N(s, align)) { + return; + } + + blob.resize(s + align - s % align); + } + + /** \brief Cache of previously-written sparse iterators. */ + ue2_unordered_map<std::vector<mmbit_sparse_iter>, u32> cached_iters; + + /** + * \brief Contents of the Rose bytecode immediately following the + * RoseEngine. + */ + std::vector<char, AlignedAllocator<char, 64>> blob; +}; + +} // namespace ue2 + +#endif // ROSE_BUILD_ENGINE_BLOB_H diff --git a/contrib/libs/hyperscan/src/rose/rose_build_exclusive.cpp b/contrib/libs/hyperscan/src/rose/rose_build_exclusive.cpp index 1cb3f0e50a..6a7c1c15a3 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_exclusive.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_exclusive.cpp @@ -1,447 +1,447 @@ -/* - * Copyright (c) 2016-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "rose_build_exclusive.h" - -#include "ue2common.h" -#include "rose_build_merge.h" -#include "nfa/castlecompile.h" -#include "nfagraph/ng_execute.h" -#include "nfagraph/ng_holder.h" -#include "nfagraph/ng_util.h" -#include "util/clique.h" -#include "util/compile_context.h" -#include "util/container.h" -#include "util/flat_containers.h" -#include "util/graph.h" -#include "util/make_unique.h" - -using namespace std; - -namespace ue2 { - -template<typename role_id> -struct RoleChunk { - vector<RoleInfo<role_id>> roles; -}; - -static -CharReach getReachability(const NGHolder &h) { - CharReach cr; - for (const auto &v : vertices_range(h)) { - if (!is_special(v, h)) { - cr |= h[v].char_reach; - } - } - return cr; -} - -template<typename role_id> -static -vector<RoleChunk<role_id>> divideIntoChunks(const RoseBuildImpl &build, - set<RoleInfo<role_id>> &roleInfoSet) { - u32 chunkSize = build.cc.grey.tamaChunkSize; - u32 cnt = 1; - vector<RoleChunk<role_id>> chunks; - RoleChunk<role_id> roleChunk; - for (const auto &roleInfo : roleInfoSet) { - if (cnt == chunkSize) { - cnt -= chunkSize; - chunks.push_back(roleChunk); - roleChunk.roles.clear(); - } - roleChunk.roles.push_back(roleInfo); - cnt++; - } - - if (cnt > 1) { - chunks.push_back(roleChunk); - } - - return chunks; -} - -/* add prefix literals to engine graph */ -static -bool addPrefixLiterals(NGHolder &h, unordered_set<u32> &tailId, - const vector<vector<CharReach>> &triggers) { - DEBUG_PRINTF("add literals to graph\n"); - - NFAVertex start = h.start; - vector<NFAVertex> heads; - vector<NFAVertex> tails; - for (const auto &lit : triggers) { - NFAVertex last = start; - if (lit.empty()) { - return false; - } - u32 i = 0; - for (const auto &c : lit) { - DEBUG_PRINTF("lit:%s \n", c.to_string().c_str()); - NFAVertex u = add_vertex(h); - h[u].char_reach = c; - if (!i++) { - heads.push_back(u); - last = u; - continue; - } - add_edge(last, u, h); - last = u; - } - tails.push_back(last); - tailId.insert(h[last].index); - } - - for (auto v : adjacent_vertices_range(start, h)) { - if (v != h.startDs) { - for (auto &t : tails) { - add_edge(t, v, h); - } - } - } - - clear_out_edges(start, h); - add_edge(h.start, h.start, h); - for (auto &t : heads) { - add_edge(start, t, h); - } - - DEBUG_PRINTF("literals addition done\n"); - return true; -} - -/* check if one literal is suffix of another */ -static -bool isSuffix(const vector<vector<CharReach>> &triggers1, - const vector<vector<CharReach>> &triggers2) { - // literal suffix test - for (const auto &lit1 : triggers1) { - for (const auto &lit2 : triggers2) { - const size_t len = min(lit1.size(), lit2.size()); - if (equal(lit1.rbegin(), lit1.rbegin() + len, - lit2.rbegin(), overlaps)) { - return true; - } - } - } - return false; -} - -/* prepare initial infix or suffix graph used for exclusive analysis */ -template<typename role_id> -static -u32 prepareRoleGraph(NGHolder &h, const role_id &s1) { - u32 num = 0; - if (s1.castle()) { - num = num_vertices(h); - NFAVertex u = add_vertex(h); - h[u].char_reach = s1.castle()->reach(); - add_edge(h.startDs, u, h); - // add self loop to repeat characters - add_edge(u, u, h); - } else if (s1.graph()) { - const NGHolder &g = *s1.graph(); - cloneHolder(h, g); - num = num_vertices(h); - } else { - // only infixes and suffixes with graph properties are possible - // candidates, already filtered out other cases before - // exclusive analysis - assert(0); - } - - return num; -} - -/* get a subset of literal if reset character is found */ -static -vector<CharReach> findStartPos(const CharReach &cr1, - const vector<CharReach> &lit) { - auto it = lit.rbegin(), ite = lit.rend(); - u32 pos = lit.size(); - for (; it != ite; it++) { - if (!overlaps(cr1, *it)) { - break; - } - pos--; - } - - return vector<CharReach> (lit.begin() + pos, lit.end()); -} - -template<typename role_id> -static -bool isExclusive(const NGHolder &h, - const u32 num, unordered_set<u32> &tailId, - map<u32, unordered_set<u32>> &skipList, - const RoleInfo<role_id> &role1, - const RoleInfo<role_id> &role2) { - const u32 id1 = role1.id; - const u32 id2 = role2.id; - - if (contains(skipList, id1) && contains(skipList[id1], id2)) { - return false; - } - - const auto &triggers1 = role1.literals; - const auto &triggers2 = role2.literals; - if (isSuffix(triggers1, triggers2)) { - skipList[id2].insert(id1); - return false; - } - - DEBUG_PRINTF("role id2:%u\n", id2); - const auto &cr1 = role1.cr; - if (overlaps(cr1, role2.last_cr)) { - CharReach cr = cr1 | role1.prefix_cr; - flat_set<NFAVertex> states; - for (const auto &lit : triggers2) { - auto lit1 = findStartPos(cr, lit); - if (lit1.empty()) { - continue; - } - - states.clear(); - - if (lit1.size() < lit.size()) { - // Only starts. - states.insert(h.start); - states.insert(h.startDs); - } else { - // All vertices. - insert(&states, vertices(h)); - } - - auto activeStates = execute_graph(h, lit1, states); - // Check if only literal states are on - for (const auto &s : activeStates) { - if ((!is_any_start(s, h) && h[s].index <= num) || - contains(tailId, h[s].index)) { - skipList[id2].insert(id1); - return false; - } - } - } - } - - return true; -} - -template<typename role_id> -static -unordered_set<u32> checkExclusivity(const NGHolder &h, - const u32 num, unordered_set<u32> &tailId, - map<u32, unordered_set<u32>> &skipList, - const RoleInfo<role_id> &role1, - const RoleChunk<role_id> &roleChunk) { - unordered_set<u32> info; - const u32 id1 = role1.id; - for (const auto &role2 : roleChunk.roles) { - const u32 id2 = role2.id; - if (id1 != id2 && isExclusive(h, num, tailId, skipList, - role1, role2)) { - info.insert(id2); - } - } - - return info; -} - -static -void findCliques(const map<u32, set<u32>> &exclusiveGroups, - vector<vector<u32>> &exclusive_roles) { - if (exclusiveGroups.empty()) { - return; - } - // Construct the exclusivity graph - map<u32, CliqueVertex> vertex_map; - unique_ptr<CliqueGraph> cg = std::make_unique<CliqueGraph>(); - - // Add vertices representing infixes/suffixes - for (const auto &e : exclusiveGroups) { - const u32 id = e.first; - CliqueVertex v1 = add_vertex(CliqueVertexProps(id), *cg); - vertex_map[id] = v1; - } - - // Wire exclusive pairs - for (const auto &e1 : exclusiveGroups) { - const u32 literalId1 = e1.first; - CliqueVertex lv = vertex_map[literalId1]; - const set<u32> &exclusiveSet = e1.second; - for (const auto &e2 : exclusiveGroups) { - const u32 literalId2 = e2.first; - if (literalId1 < literalId2 && - contains(exclusiveSet, literalId2)) { - add_edge(lv, vertex_map[literalId2], *cg); - DEBUG_PRINTF("Wire %u:%u\n", literalId1, literalId2); - } - } - } - - // Find clique groups - const auto &clique = removeClique(*cg); - for (const auto &i : clique) { - DEBUG_PRINTF("cliq:%zu\n", i.size()); - if (i.size() > 1) { - exclusive_roles.push_back(i); - } - } - DEBUG_PRINTF("Clique graph size:%zu\n", exclusive_roles.size()); -} - -static -map<u32, set<u32>> findExclusiveGroups(const RoseBuildImpl &build, - const map<u32, unordered_set<u32>> &exclusiveInfo, - const map<u32, vector<RoseVertex>> &vertex_map, - const bool is_infix) { - map<u32, set<u32>> exclusiveGroups; - for (const auto &e : exclusiveInfo) { - u32 i = e.first; - const auto &s = e.second; - set<u32> group; - set<RoseVertex> q1(vertex_map.at(i).begin(), - vertex_map.at(i).end()); - DEBUG_PRINTF("vertex set:%zu\n", q1.size()); - for (const auto &val : s) { - set<RoseVertex> q2(vertex_map.at(val).begin(), - vertex_map.at(val).end()); - if (contains(exclusiveInfo.at(val), i) && - (!is_infix || mergeableRoseVertices(build, q1, q2))) { - group.insert(val); - } - } - if (!group.empty()) { - exclusiveGroups[i] = group; - } - } - - return exclusiveGroups; -} - -template<typename role_id> -static -bool setTriggerLiterals(RoleInfo<role_id> &roleInfo, - const map<u32, vector<vector<CharReach>>> &triggers) { - u32 minLiteralLen = ~0U; - for (const auto &tr : triggers) { - for (const auto &lit : tr.second) { - if (lit.empty()) { - return false; - } - minLiteralLen = min(minLiteralLen, (u32)lit.size()); - roleInfo.last_cr |= lit.back(); - for (const auto &c : lit) { - roleInfo.prefix_cr |= c; - } - roleInfo.literals.push_back(lit); - } - } - - if (roleInfo.role.graph()) { - const NGHolder &g = *roleInfo.role.graph(); - roleInfo.cr = getReachability(g); - } else if (roleInfo.role.castle()) { - roleInfo.cr = roleInfo.role.castle()->reach(); - } - - // test the score of this engine - roleInfo.score = 256 - roleInfo.cr.count() + minLiteralLen; - if (roleInfo.score < 20) { - return false; - } - - return true; -} - -bool setTriggerLiteralsInfix(RoleInfo<left_id> &roleInfo, - const map<u32, vector<vector<CharReach>>> &triggers) { - return setTriggerLiterals(roleInfo, triggers); -} - -bool setTriggerLiteralsSuffix(RoleInfo<suffix_id> &roleInfo, - const map<u32, vector<vector<CharReach>>> &triggers) { - return setTriggerLiterals(roleInfo, triggers); -} - -template<typename role_id> -static -void exclusiveAnalysis(const RoseBuildImpl &build, - const map<u32, vector<RoseVertex>> &vertex_map, - set<RoleInfo<role_id>> &roleInfoSet, - vector<vector<u32>> &exclusive_roles, const bool is_infix) { - const auto &chunks = divideIntoChunks(build, roleInfoSet); - DEBUG_PRINTF("Exclusivity analysis entry\n"); - map<u32, unordered_set<u32>> exclusiveInfo; - - for (const auto &roleChunk : chunks) { - map<u32, unordered_set<u32>> skipList; - for (const auto &role1 : roleChunk.roles) { - const u32 id1 = role1.id; - const role_id &s1 = role1.role; - const auto &triggers1 = role1.literals; - - NGHolder h; - u32 num = prepareRoleGraph(h, s1); - DEBUG_PRINTF("role id1:%u\n", id1); - unordered_set<u32> tailId; - if (!addPrefixLiterals(h, tailId, triggers1)) { - continue; - } - - exclusiveInfo[id1] = checkExclusivity(h, num, tailId, - skipList, role1, roleChunk); - } - } - - // Create final candidate exclusive groups - const auto exclusiveGroups = - findExclusiveGroups(build, exclusiveInfo, vertex_map, is_infix); - exclusiveInfo.clear(); - - // Find cliques for each exclusive groups - findCliques(exclusiveGroups, exclusive_roles); -} - -void exclusiveAnalysisInfix(const RoseBuildImpl &build, - const map<u32, vector<RoseVertex>> &vertex_map, - set<RoleInfo<left_id>> &roleInfoSet, - vector<vector<u32>> &exclusive_roles) { - exclusiveAnalysis(build, vertex_map, roleInfoSet, exclusive_roles, - true); -} - -void exclusiveAnalysisSuffix(const RoseBuildImpl &build, - const map<u32, vector<RoseVertex>> &vertex_map, - set<RoleInfo<suffix_id>> &roleInfoSet, - vector<vector<u32>> &exclusive_roles) { - exclusiveAnalysis(build, vertex_map, roleInfoSet, exclusive_roles, - false); -} - -} // namespace ue2 +/* + * Copyright (c) 2016-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_exclusive.h" + +#include "ue2common.h" +#include "rose_build_merge.h" +#include "nfa/castlecompile.h" +#include "nfagraph/ng_execute.h" +#include "nfagraph/ng_holder.h" +#include "nfagraph/ng_util.h" +#include "util/clique.h" +#include "util/compile_context.h" +#include "util/container.h" +#include "util/flat_containers.h" +#include "util/graph.h" +#include "util/make_unique.h" + +using namespace std; + +namespace ue2 { + +template<typename role_id> +struct RoleChunk { + vector<RoleInfo<role_id>> roles; +}; + +static +CharReach getReachability(const NGHolder &h) { + CharReach cr; + for (const auto &v : vertices_range(h)) { + if (!is_special(v, h)) { + cr |= h[v].char_reach; + } + } + return cr; +} + +template<typename role_id> +static +vector<RoleChunk<role_id>> divideIntoChunks(const RoseBuildImpl &build, + set<RoleInfo<role_id>> &roleInfoSet) { + u32 chunkSize = build.cc.grey.tamaChunkSize; + u32 cnt = 1; + vector<RoleChunk<role_id>> chunks; + RoleChunk<role_id> roleChunk; + for (const auto &roleInfo : roleInfoSet) { + if (cnt == chunkSize) { + cnt -= chunkSize; + chunks.push_back(roleChunk); + roleChunk.roles.clear(); + } + roleChunk.roles.push_back(roleInfo); + cnt++; + } + + if (cnt > 1) { + chunks.push_back(roleChunk); + } + + return chunks; +} + +/* add prefix literals to engine graph */ +static +bool addPrefixLiterals(NGHolder &h, unordered_set<u32> &tailId, + const vector<vector<CharReach>> &triggers) { + DEBUG_PRINTF("add literals to graph\n"); + + NFAVertex start = h.start; + vector<NFAVertex> heads; + vector<NFAVertex> tails; + for (const auto &lit : triggers) { + NFAVertex last = start; + if (lit.empty()) { + return false; + } + u32 i = 0; + for (const auto &c : lit) { + DEBUG_PRINTF("lit:%s \n", c.to_string().c_str()); + NFAVertex u = add_vertex(h); + h[u].char_reach = c; + if (!i++) { + heads.push_back(u); + last = u; + continue; + } + add_edge(last, u, h); + last = u; + } + tails.push_back(last); + tailId.insert(h[last].index); + } + + for (auto v : adjacent_vertices_range(start, h)) { + if (v != h.startDs) { + for (auto &t : tails) { + add_edge(t, v, h); + } + } + } + + clear_out_edges(start, h); + add_edge(h.start, h.start, h); + for (auto &t : heads) { + add_edge(start, t, h); + } + + DEBUG_PRINTF("literals addition done\n"); + return true; +} + +/* check if one literal is suffix of another */ +static +bool isSuffix(const vector<vector<CharReach>> &triggers1, + const vector<vector<CharReach>> &triggers2) { + // literal suffix test + for (const auto &lit1 : triggers1) { + for (const auto &lit2 : triggers2) { + const size_t len = min(lit1.size(), lit2.size()); + if (equal(lit1.rbegin(), lit1.rbegin() + len, + lit2.rbegin(), overlaps)) { + return true; + } + } + } + return false; +} + +/* prepare initial infix or suffix graph used for exclusive analysis */ +template<typename role_id> +static +u32 prepareRoleGraph(NGHolder &h, const role_id &s1) { + u32 num = 0; + if (s1.castle()) { + num = num_vertices(h); + NFAVertex u = add_vertex(h); + h[u].char_reach = s1.castle()->reach(); + add_edge(h.startDs, u, h); + // add self loop to repeat characters + add_edge(u, u, h); + } else if (s1.graph()) { + const NGHolder &g = *s1.graph(); + cloneHolder(h, g); + num = num_vertices(h); + } else { + // only infixes and suffixes with graph properties are possible + // candidates, already filtered out other cases before + // exclusive analysis + assert(0); + } + + return num; +} + +/* get a subset of literal if reset character is found */ +static +vector<CharReach> findStartPos(const CharReach &cr1, + const vector<CharReach> &lit) { + auto it = lit.rbegin(), ite = lit.rend(); + u32 pos = lit.size(); + for (; it != ite; it++) { + if (!overlaps(cr1, *it)) { + break; + } + pos--; + } + + return vector<CharReach> (lit.begin() + pos, lit.end()); +} + +template<typename role_id> +static +bool isExclusive(const NGHolder &h, + const u32 num, unordered_set<u32> &tailId, + map<u32, unordered_set<u32>> &skipList, + const RoleInfo<role_id> &role1, + const RoleInfo<role_id> &role2) { + const u32 id1 = role1.id; + const u32 id2 = role2.id; + + if (contains(skipList, id1) && contains(skipList[id1], id2)) { + return false; + } + + const auto &triggers1 = role1.literals; + const auto &triggers2 = role2.literals; + if (isSuffix(triggers1, triggers2)) { + skipList[id2].insert(id1); + return false; + } + + DEBUG_PRINTF("role id2:%u\n", id2); + const auto &cr1 = role1.cr; + if (overlaps(cr1, role2.last_cr)) { + CharReach cr = cr1 | role1.prefix_cr; + flat_set<NFAVertex> states; + for (const auto &lit : triggers2) { + auto lit1 = findStartPos(cr, lit); + if (lit1.empty()) { + continue; + } + + states.clear(); + + if (lit1.size() < lit.size()) { + // Only starts. + states.insert(h.start); + states.insert(h.startDs); + } else { + // All vertices. + insert(&states, vertices(h)); + } + + auto activeStates = execute_graph(h, lit1, states); + // Check if only literal states are on + for (const auto &s : activeStates) { + if ((!is_any_start(s, h) && h[s].index <= num) || + contains(tailId, h[s].index)) { + skipList[id2].insert(id1); + return false; + } + } + } + } + + return true; +} + +template<typename role_id> +static +unordered_set<u32> checkExclusivity(const NGHolder &h, + const u32 num, unordered_set<u32> &tailId, + map<u32, unordered_set<u32>> &skipList, + const RoleInfo<role_id> &role1, + const RoleChunk<role_id> &roleChunk) { + unordered_set<u32> info; + const u32 id1 = role1.id; + for (const auto &role2 : roleChunk.roles) { + const u32 id2 = role2.id; + if (id1 != id2 && isExclusive(h, num, tailId, skipList, + role1, role2)) { + info.insert(id2); + } + } + + return info; +} + +static +void findCliques(const map<u32, set<u32>> &exclusiveGroups, + vector<vector<u32>> &exclusive_roles) { + if (exclusiveGroups.empty()) { + return; + } + // Construct the exclusivity graph + map<u32, CliqueVertex> vertex_map; + unique_ptr<CliqueGraph> cg = std::make_unique<CliqueGraph>(); + + // Add vertices representing infixes/suffixes + for (const auto &e : exclusiveGroups) { + const u32 id = e.first; + CliqueVertex v1 = add_vertex(CliqueVertexProps(id), *cg); + vertex_map[id] = v1; + } + + // Wire exclusive pairs + for (const auto &e1 : exclusiveGroups) { + const u32 literalId1 = e1.first; + CliqueVertex lv = vertex_map[literalId1]; + const set<u32> &exclusiveSet = e1.second; + for (const auto &e2 : exclusiveGroups) { + const u32 literalId2 = e2.first; + if (literalId1 < literalId2 && + contains(exclusiveSet, literalId2)) { + add_edge(lv, vertex_map[literalId2], *cg); + DEBUG_PRINTF("Wire %u:%u\n", literalId1, literalId2); + } + } + } + + // Find clique groups + const auto &clique = removeClique(*cg); + for (const auto &i : clique) { + DEBUG_PRINTF("cliq:%zu\n", i.size()); + if (i.size() > 1) { + exclusive_roles.push_back(i); + } + } + DEBUG_PRINTF("Clique graph size:%zu\n", exclusive_roles.size()); +} + +static +map<u32, set<u32>> findExclusiveGroups(const RoseBuildImpl &build, + const map<u32, unordered_set<u32>> &exclusiveInfo, + const map<u32, vector<RoseVertex>> &vertex_map, + const bool is_infix) { + map<u32, set<u32>> exclusiveGroups; + for (const auto &e : exclusiveInfo) { + u32 i = e.first; + const auto &s = e.second; + set<u32> group; + set<RoseVertex> q1(vertex_map.at(i).begin(), + vertex_map.at(i).end()); + DEBUG_PRINTF("vertex set:%zu\n", q1.size()); + for (const auto &val : s) { + set<RoseVertex> q2(vertex_map.at(val).begin(), + vertex_map.at(val).end()); + if (contains(exclusiveInfo.at(val), i) && + (!is_infix || mergeableRoseVertices(build, q1, q2))) { + group.insert(val); + } + } + if (!group.empty()) { + exclusiveGroups[i] = group; + } + } + + return exclusiveGroups; +} + +template<typename role_id> +static +bool setTriggerLiterals(RoleInfo<role_id> &roleInfo, + const map<u32, vector<vector<CharReach>>> &triggers) { + u32 minLiteralLen = ~0U; + for (const auto &tr : triggers) { + for (const auto &lit : tr.second) { + if (lit.empty()) { + return false; + } + minLiteralLen = min(minLiteralLen, (u32)lit.size()); + roleInfo.last_cr |= lit.back(); + for (const auto &c : lit) { + roleInfo.prefix_cr |= c; + } + roleInfo.literals.push_back(lit); + } + } + + if (roleInfo.role.graph()) { + const NGHolder &g = *roleInfo.role.graph(); + roleInfo.cr = getReachability(g); + } else if (roleInfo.role.castle()) { + roleInfo.cr = roleInfo.role.castle()->reach(); + } + + // test the score of this engine + roleInfo.score = 256 - roleInfo.cr.count() + minLiteralLen; + if (roleInfo.score < 20) { + return false; + } + + return true; +} + +bool setTriggerLiteralsInfix(RoleInfo<left_id> &roleInfo, + const map<u32, vector<vector<CharReach>>> &triggers) { + return setTriggerLiterals(roleInfo, triggers); +} + +bool setTriggerLiteralsSuffix(RoleInfo<suffix_id> &roleInfo, + const map<u32, vector<vector<CharReach>>> &triggers) { + return setTriggerLiterals(roleInfo, triggers); +} + +template<typename role_id> +static +void exclusiveAnalysis(const RoseBuildImpl &build, + const map<u32, vector<RoseVertex>> &vertex_map, + set<RoleInfo<role_id>> &roleInfoSet, + vector<vector<u32>> &exclusive_roles, const bool is_infix) { + const auto &chunks = divideIntoChunks(build, roleInfoSet); + DEBUG_PRINTF("Exclusivity analysis entry\n"); + map<u32, unordered_set<u32>> exclusiveInfo; + + for (const auto &roleChunk : chunks) { + map<u32, unordered_set<u32>> skipList; + for (const auto &role1 : roleChunk.roles) { + const u32 id1 = role1.id; + const role_id &s1 = role1.role; + const auto &triggers1 = role1.literals; + + NGHolder h; + u32 num = prepareRoleGraph(h, s1); + DEBUG_PRINTF("role id1:%u\n", id1); + unordered_set<u32> tailId; + if (!addPrefixLiterals(h, tailId, triggers1)) { + continue; + } + + exclusiveInfo[id1] = checkExclusivity(h, num, tailId, + skipList, role1, roleChunk); + } + } + + // Create final candidate exclusive groups + const auto exclusiveGroups = + findExclusiveGroups(build, exclusiveInfo, vertex_map, is_infix); + exclusiveInfo.clear(); + + // Find cliques for each exclusive groups + findCliques(exclusiveGroups, exclusive_roles); +} + +void exclusiveAnalysisInfix(const RoseBuildImpl &build, + const map<u32, vector<RoseVertex>> &vertex_map, + set<RoleInfo<left_id>> &roleInfoSet, + vector<vector<u32>> &exclusive_roles) { + exclusiveAnalysis(build, vertex_map, roleInfoSet, exclusive_roles, + true); +} + +void exclusiveAnalysisSuffix(const RoseBuildImpl &build, + const map<u32, vector<RoseVertex>> &vertex_map, + set<RoleInfo<suffix_id>> &roleInfoSet, + vector<vector<u32>> &exclusive_roles) { + exclusiveAnalysis(build, vertex_map, roleInfoSet, exclusive_roles, + false); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/rose/rose_build_exclusive.h b/contrib/libs/hyperscan/src/rose/rose_build_exclusive.h index fb4659efb0..3269dce612 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_exclusive.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_exclusive.h @@ -1,127 +1,127 @@ -/* - * Copyright (c) 2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief exclusive analysis for infix and suffix engines. - * Two engines are considered as exclusive if they can never be alive - * at the same time. This analysis takes advantage of the property of - * triggering literal + engine graph. If the triggering literals of - * two engines can make all the states dead in each other's graph, - * then they are exclusive. - */ -#ifndef ROSE_BUILD_EXCLUSIVE_H -#define ROSE_BUILD_EXCLUSIVE_H - -#include "ue2common.h" - -#include "rose_build_impl.h" -#include "util/alloc.h" -#include "util/charreach.h" - -#include <map> -#include <set> -#include <vector> - -namespace ue2 { - -/** \brief role info structure for exclusive analysis */ -template<typename role_id> -struct RoleInfo { - RoleInfo(role_id role_in, u32 id_in) : role(role_in), id(id_in) {} - bool operator==(const RoleInfo &b) const { - return id == b.id; - } - bool operator!=(const RoleInfo &b) const { return !(*this == b); } - bool operator<(const RoleInfo &b) const { - const RoleInfo &a = *this; - if (a.score != b.score) { - return a.score > b.score; - } - ORDER_CHECK(id); - return false; - } - - std::vector<std::vector<CharReach>> literals; // prefix literals - CharReach prefix_cr; // reach of prefix literals - CharReach last_cr; // reach of the last character of literals - CharReach cr; // reach of engine graph - const role_id role; // infix or suffix info - const u32 id; // infix or suffix id - u32 score = ~0U; // score for exclusive analysis -}; - -/** - * \brief add triggering literals to infix info. - */ -bool setTriggerLiteralsInfix(RoleInfo<left_id> &roleInfo, - const std::map<u32, std::vector<std::vector<CharReach>>> &triggers); - -/** - * \brief add triggering literals to suffix info. - */ -bool setTriggerLiteralsSuffix(RoleInfo<suffix_id> &roleInfo, - const std::map<u32, std::vector<std::vector<CharReach>>> &triggers); - -/** - * Exclusive analysis for infix engines. - * - * @param build rose build info mainly used to set exclusive chunk size here - * @param vertex_map mapping between engine id and rose vertices - * related to this engine - * @param roleInfoSet structure contains role properties including infix info, - * triggering literals and literal reachabilities. - * Used for exclusive analysis. - * @param exclusive_roles output mapping between engine id and its exclusive - * group id - */ -void exclusiveAnalysisInfix(const RoseBuildImpl &build, - const std::map<u32, std::vector<RoseVertex>> &vertex_map, - std::set<RoleInfo<left_id>> &roleInfoSet, - std::vector<std::vector<u32>> &exclusive_roles); - -/** - * Exclusive analysis for suffix engines. - * - * @param build rose build info mainly used to set exclusive chunk size here - * @param vertex_map mapping between engine id and rose vertices - * related to this engine - * @param roleInfoSet structure contains role properties including suffix info, - * triggering literals and literal reachabilities. - * Used for exclusive analysis. - * @param exclusive_roles output mapping between engine id and its exclusive - * group id - */ -void exclusiveAnalysisSuffix(const RoseBuildImpl &build, - const std::map<u32, std::vector<RoseVertex>> &vertex_map, - std::set<RoleInfo<suffix_id>> &roleInfoSet, - std::vector<std::vector<u32>> &exclusive_roles); - -} // namespace ue2 - -#endif //ROSE_BUILD_EXCLUSIVE_H - +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief exclusive analysis for infix and suffix engines. + * Two engines are considered as exclusive if they can never be alive + * at the same time. This analysis takes advantage of the property of + * triggering literal + engine graph. If the triggering literals of + * two engines can make all the states dead in each other's graph, + * then they are exclusive. + */ +#ifndef ROSE_BUILD_EXCLUSIVE_H +#define ROSE_BUILD_EXCLUSIVE_H + +#include "ue2common.h" + +#include "rose_build_impl.h" +#include "util/alloc.h" +#include "util/charreach.h" + +#include <map> +#include <set> +#include <vector> + +namespace ue2 { + +/** \brief role info structure for exclusive analysis */ +template<typename role_id> +struct RoleInfo { + RoleInfo(role_id role_in, u32 id_in) : role(role_in), id(id_in) {} + bool operator==(const RoleInfo &b) const { + return id == b.id; + } + bool operator!=(const RoleInfo &b) const { return !(*this == b); } + bool operator<(const RoleInfo &b) const { + const RoleInfo &a = *this; + if (a.score != b.score) { + return a.score > b.score; + } + ORDER_CHECK(id); + return false; + } + + std::vector<std::vector<CharReach>> literals; // prefix literals + CharReach prefix_cr; // reach of prefix literals + CharReach last_cr; // reach of the last character of literals + CharReach cr; // reach of engine graph + const role_id role; // infix or suffix info + const u32 id; // infix or suffix id + u32 score = ~0U; // score for exclusive analysis +}; + +/** + * \brief add triggering literals to infix info. + */ +bool setTriggerLiteralsInfix(RoleInfo<left_id> &roleInfo, + const std::map<u32, std::vector<std::vector<CharReach>>> &triggers); + +/** + * \brief add triggering literals to suffix info. + */ +bool setTriggerLiteralsSuffix(RoleInfo<suffix_id> &roleInfo, + const std::map<u32, std::vector<std::vector<CharReach>>> &triggers); + +/** + * Exclusive analysis for infix engines. + * + * @param build rose build info mainly used to set exclusive chunk size here + * @param vertex_map mapping between engine id and rose vertices + * related to this engine + * @param roleInfoSet structure contains role properties including infix info, + * triggering literals and literal reachabilities. + * Used for exclusive analysis. + * @param exclusive_roles output mapping between engine id and its exclusive + * group id + */ +void exclusiveAnalysisInfix(const RoseBuildImpl &build, + const std::map<u32, std::vector<RoseVertex>> &vertex_map, + std::set<RoleInfo<left_id>> &roleInfoSet, + std::vector<std::vector<u32>> &exclusive_roles); + +/** + * Exclusive analysis for suffix engines. + * + * @param build rose build info mainly used to set exclusive chunk size here + * @param vertex_map mapping between engine id and rose vertices + * related to this engine + * @param roleInfoSet structure contains role properties including suffix info, + * triggering literals and literal reachabilities. + * Used for exclusive analysis. + * @param exclusive_roles output mapping between engine id and its exclusive + * group id + */ +void exclusiveAnalysisSuffix(const RoseBuildImpl &build, + const std::map<u32, std::vector<RoseVertex>> &vertex_map, + std::set<RoleInfo<suffix_id>> &roleInfoSet, + std::vector<std::vector<u32>> &exclusive_roles); + +} // namespace ue2 + +#endif //ROSE_BUILD_EXCLUSIVE_H + diff --git a/contrib/libs/hyperscan/src/rose/rose_build_groups.cpp b/contrib/libs/hyperscan/src/rose/rose_build_groups.cpp index 5e4206943f..209889e558 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_groups.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_groups.cpp @@ -1,707 +1,707 @@ -/* - * Copyright (c) 2016-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * \file - * \brief Rose build: code for analysing literal groups. - */ - -#include "rose_build_groups.h" - -#include "util/boundary_reports.h" -#include "util/compile_context.h" -#include "util/report_manager.h" - -#include <queue> -#include <vector> - -#include <boost/graph/topological_sort.hpp> -#include <boost/range/adaptor/map.hpp> -#include <boost/range/adaptor/reversed.hpp> - -using namespace std; -using boost::adaptors::map_keys; - -namespace ue2 { - -#define ROSE_LONG_LITERAL_LEN 8 - -static -bool superStrong(const rose_literal_id &lit) { - if (lit.s.length() < ROSE_LONG_LITERAL_LEN) { - return false; - } - - const u32 EXPECTED_FDR_BUCKET_LENGTH = 8; - - assert(lit.s.length() >= EXPECTED_FDR_BUCKET_LENGTH); - size_t len = lit.s.length(); - const string &s = lit.s.get_string(); - - for (size_t i = 1; i < EXPECTED_FDR_BUCKET_LENGTH; i++) { - if (s[len - 1 - i] != s[len - 1]) { - return true; /* we have at least some variation in the tail */ - } - } - DEBUG_PRINTF("lit '%s' is not superstrong due to tail\n", - escapeString(s).c_str()); - return false; -} - -static -bool eligibleForAlwaysOnGroup(const RoseBuildImpl &build, u32 id) { - auto eligble = [&](RoseVertex v) { - return build.isRootSuccessor(v) - && (!build.g[v].left || !isAnchored(build.g[v].left)); - }; - - if (any_of_in(build.literal_info[id].vertices, eligble)) { - return true; - } - - for (u32 delayed_id : build.literal_info[id].delayed_ids) { - if (any_of_in(build.literal_info[delayed_id].vertices, eligble)) { - return true; - } - } - - return false; -} - -static -bool requires_group_assignment(const rose_literal_id &lit, - const rose_literal_info &info) { +/* + * Copyright (c) 2016-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Rose build: code for analysing literal groups. + */ + +#include "rose_build_groups.h" + +#include "util/boundary_reports.h" +#include "util/compile_context.h" +#include "util/report_manager.h" + +#include <queue> +#include <vector> + +#include <boost/graph/topological_sort.hpp> +#include <boost/range/adaptor/map.hpp> +#include <boost/range/adaptor/reversed.hpp> + +using namespace std; +using boost::adaptors::map_keys; + +namespace ue2 { + +#define ROSE_LONG_LITERAL_LEN 8 + +static +bool superStrong(const rose_literal_id &lit) { + if (lit.s.length() < ROSE_LONG_LITERAL_LEN) { + return false; + } + + const u32 EXPECTED_FDR_BUCKET_LENGTH = 8; + + assert(lit.s.length() >= EXPECTED_FDR_BUCKET_LENGTH); + size_t len = lit.s.length(); + const string &s = lit.s.get_string(); + + for (size_t i = 1; i < EXPECTED_FDR_BUCKET_LENGTH; i++) { + if (s[len - 1 - i] != s[len - 1]) { + return true; /* we have at least some variation in the tail */ + } + } + DEBUG_PRINTF("lit '%s' is not superstrong due to tail\n", + escapeString(s).c_str()); + return false; +} + +static +bool eligibleForAlwaysOnGroup(const RoseBuildImpl &build, u32 id) { + auto eligble = [&](RoseVertex v) { + return build.isRootSuccessor(v) + && (!build.g[v].left || !isAnchored(build.g[v].left)); + }; + + if (any_of_in(build.literal_info[id].vertices, eligble)) { + return true; + } + + for (u32 delayed_id : build.literal_info[id].delayed_ids) { + if (any_of_in(build.literal_info[delayed_id].vertices, eligble)) { + return true; + } + } + + return false; +} + +static +bool requires_group_assignment(const rose_literal_id &lit, + const rose_literal_info &info) { if (lit.delay) { /* we will check the shadow's leader */ - return false; - } - - if (lit.table == ROSE_ANCHORED || lit.table == ROSE_EVENT) { - return false; - } - - // If we already have a group applied, skip. - if (info.group_mask) { - return false; - } - - if (info.vertices.empty() && info.delayed_ids.empty()) { - DEBUG_PRINTF("literal is good for nothing\n"); - return false; - } - - return true; -} - -static -rose_group calcLocalGroup(const RoseVertex v, const RoseGraph &g, - const deque<rose_literal_info> &literal_info, - const bool small_literal_count) { - rose_group local_group = 0; - - for (auto u : inv_adjacent_vertices_range(v, g)) { - /* In small cases, ensure that siblings have the same rose parentage to - * allow rose squashing. In larger cases, don't do this as groups are - * probably too scarce. */ - for (auto w : adjacent_vertices_range(u, g)) { - if (!small_literal_count || g[v].left == g[w].left) { - for (u32 lit_id : g[w].literals) { - local_group |= literal_info[lit_id].group_mask; - } - } else { - DEBUG_PRINTF("not sibling different mother %zu %zu\n", - g[v].index, g[w].index); - } - } - } - - return local_group; -} - -/* group constants */ -#define MAX_LIGHT_LITERAL_CASE 200 /* allow rose to affect group decisions below - * this */ - -static -flat_set<RoseVertex> getAssociatedVertices(const RoseBuildImpl &build, u32 id) { - flat_set<RoseVertex> out; - const auto &info = build.literal_info[id]; - insert(&out, info.vertices); - for (const auto &delayed : info.delayed_ids) { - insert(&out, build.literal_info[delayed].vertices); - } - return out; -} - -static -u32 next_available_group(u32 counter, u32 min_start_group) { - counter++; - if (counter == ROSE_GROUPS_MAX) { - DEBUG_PRINTF("resetting groups\n"); - counter = min_start_group; - } - - return counter; -} - -static -void allocateGroupForBoundary(RoseBuildImpl &build, u32 group_always_on, - map<u8, u32> &groupCount) { - /* Boundary reports at zero will always fired and forgotten, no need to - * worry about preventing the stream being marked as exhausted */ - if (build.boundary.report_at_eod.empty()) { - return; - } - - /* Group based stream exhaustion is only done at stream boundaries */ - if (!build.cc.streaming) { - return; - } - - DEBUG_PRINTF("allocating %u as boundary group id\n", group_always_on); - - build.boundary_group_mask = 1ULL << group_always_on; - groupCount[group_always_on]++; -} - -static -void allocateGroupForEvent(RoseBuildImpl &build, u32 group_always_on, - map<u8, u32> &groupCount, u32 *counter) { - if (build.eod_event_literal_id == MO_INVALID_IDX) { - return; - } - - /* Group based stream exhaustion is only done at stream boundaries */ - if (!build.cc.streaming) { - return; - } - - rose_literal_info &info = build.literal_info[build.eod_event_literal_id]; - - if (info.vertices.empty()) { - return; - } - - bool new_group = !groupCount[group_always_on]; - for (RoseVertex v : info.vertices) { - if (build.g[v].left && !isAnchored(build.g[v].left)) { - new_group = false; - } - } - - u32 group; - if (!new_group) { - group = group_always_on; - } else { - group = *counter; - *counter += 1; - } - - DEBUG_PRINTF("allocating %u as eod event group id\n", *counter); - info.group_mask = 1ULL << group; - groupCount[group]++; -} - -void assignGroupsToLiterals(RoseBuildImpl &build) { - auto &literals = build.literals; - auto &literal_info = build.literal_info; - - bool small_literal_count = literal_info.size() <= MAX_LIGHT_LITERAL_CASE; - - map<u8, u32> groupCount; /* group index to number of members */ - - u32 counter = 0; - u32 group_always_on = 0; - - // First pass: handle always on literals. - for (u32 id = 0; id < literals.size(); id++) { - const rose_literal_id &lit = literals.at(id); - rose_literal_info &info = literal_info[id]; - - if (!requires_group_assignment(lit, info)) { - continue; - } - - // If this literal has a root role, we always have to search for it - // anyway, so it goes in the always-on group. - /* We could end up squashing it if it is followed by a .* */ - if (eligibleForAlwaysOnGroup(build, id)) { - info.group_mask = 1ULL << group_always_on; - groupCount[group_always_on]++; - continue; - } - } - - u32 group_long_lit; - if (groupCount[group_always_on]) { - DEBUG_PRINTF("%u always on literals\n", groupCount[group_always_on]); - group_long_lit = group_always_on; - counter++; - } else { - group_long_lit = counter; - counter++; - } - - allocateGroupForBoundary(build, group_always_on, groupCount); - allocateGroupForEvent(build, group_always_on, groupCount, &counter); - - u32 min_start_group = counter; - priority_queue<tuple<s32, s32, u32>> pq; - - // Second pass: the other literals. - for (u32 id = 0; id < literals.size(); id++) { - const rose_literal_id &lit = literals.at(id); - rose_literal_info &info = literal_info[id]; - - if (!requires_group_assignment(lit, info)) { - continue; - } - - assert(!eligibleForAlwaysOnGroup(build, id)); - pq.emplace(-(s32)info.vertices.size(), -(s32)lit.s.length(), id); - } - vector<u32> long_lits; - while (!pq.empty()) { - u32 id = get<2>(pq.top()); - pq.pop(); - UNUSED const rose_literal_id &lit = literals.at(id); - DEBUG_PRINTF("assigning groups to lit %u (v %zu l %zu)\n", id, - literal_info[id].vertices.size(), lit.s.length()); - - u8 group_id = 0; - rose_group group = ~0ULL; - for (auto v : getAssociatedVertices(build, id)) { - rose_group local_group = calcLocalGroup(v, build.g, literal_info, - small_literal_count); - group &= local_group; - if (!group) { - break; - } - } - - if (group == ~0ULL) { - goto boring; - } - - group &= ~((1ULL << min_start_group) - 1); /* ensure the purity of the - * always_on groups */ - if (!group) { - goto boring; - } - - group_id = ctz64(group); - - /* TODO: fairness */ - DEBUG_PRINTF("picking sibling group %hhd\n", group_id); - literal_info[id].group_mask = 1ULL << group_id; - groupCount[group_id]++; - - continue; - - boring: - /* long literals will either be stuck in a mega group or spread around - * depending on availability */ - if (superStrong(lit)) { - long_lits.push_back(id); - continue; - } - - // Other literals are assigned to our remaining groups round-robin. - group_id = counter; - - DEBUG_PRINTF("picking boring group %hhd\n", group_id); - literal_info[id].group_mask = 1ULL << group_id; - groupCount[group_id]++; - counter = next_available_group(counter, min_start_group); - } - - /* spread long literals out amongst unused groups if any, otherwise stick - * them in the always on the group */ - - if (groupCount[counter]) { - DEBUG_PRINTF("sticking long literals in the image of the always on\n"); - for (u32 lit_id : long_lits) { - literal_info[lit_id].group_mask = 1ULL << group_long_lit; - groupCount[group_long_lit]++; - } - } else { - u32 min_long_counter = counter; - DEBUG_PRINTF("base long lit group = %u\n", min_long_counter); - for (u32 lit_id : long_lits) { - u8 group_id = counter; - literal_info[lit_id].group_mask = 1ULL << group_id; - groupCount[group_id]++; - counter = next_available_group(counter, min_long_counter); - } - } - /* assign delayed literals to the same group as their parent */ - for (u32 id = 0; id < literals.size(); id++) { - const rose_literal_id &lit = literals.at(id); - - if (!lit.delay) { - continue; - } - - u32 parent = literal_info[id].undelayed_id; - DEBUG_PRINTF("%u is shadow picking up groups from %u\n", id, parent); - assert(literal_info[parent].undelayed_id == parent); - assert(literal_info[parent].group_mask); - literal_info[id].group_mask = literal_info[parent].group_mask; - /* don't increment the group count - these don't really exist */ - } - - DEBUG_PRINTF("populate group to literal mapping\n"); - for (u32 id = 0; id < literals.size(); id++) { - rose_group groups = literal_info[id].group_mask; - while (groups) { - u32 group_id = findAndClearLSB_64(&groups); - build.group_to_literal[group_id].insert(id); - } - } - - /* find how many groups we allocated */ - for (u32 i = 0; i < ROSE_GROUPS_MAX; i++) { - if (groupCount[i]) { - build.group_end = max(build.group_end, i + 1); - } - } -} - -rose_group RoseBuildImpl::getGroups(RoseVertex v) const { - rose_group groups = 0; - - for (u32 id : g[v].literals) { - u32 lit_id = literal_info.at(id).undelayed_id; - - rose_group mygroups = literal_info[lit_id].group_mask; - groups |= mygroups; - } - - return groups; -} - -/** \brief Get the groups of the successor literals of a given vertex. */ -rose_group RoseBuildImpl::getSuccGroups(RoseVertex start) const { - rose_group initialGroups = 0; - - for (auto v : adjacent_vertices_range(start, g)) { - initialGroups |= getGroups(v); - } - - return initialGroups; -} - -/** - * The groups that a role sets are determined by the union of its successor - * literals. Requires the literals already have had groups assigned. - */ -void assignGroupsToRoles(RoseBuildImpl &build) { - auto &g = build.g; - - /* Note: if there is a succ literal in the sidematcher, its successors - * literals must be added instead */ - for (auto v : vertices_range(g)) { - if (build.isAnyStart(v)) { - continue; - } - - const rose_group succ_groups = build.getSuccGroups(v); - g[v].groups |= succ_groups; - - auto ghost_it = build.ghost.find(v); - if (ghost_it != end(build.ghost)) { - /* delayed roles need to supply their groups to the ghost role */ - g[ghost_it->second].groups |= succ_groups; - } - - DEBUG_PRINTF("vertex %zu: groups=%llx\n", g[v].index, g[v].groups); - } -} - -/** - * \brief Returns a mapping from each graph vertex v to the intersection of the - * groups switched on by all of the paths leading up to (and including) v from - * the start vertexes. - */ -unordered_map<RoseVertex, rose_group> -getVertexGroupMap(const RoseBuildImpl &build) { - const RoseGraph &g = build.g; - vector<RoseVertex> v_order; - v_order.reserve(num_vertices(g)); - - boost::topological_sort(g, back_inserter(v_order)); - - unordered_map<RoseVertex, rose_group> vertex_group_map; - vertex_group_map.reserve(num_vertices(g)); - - const rose_group initial_groups = build.getInitialGroups(); - - for (const auto &v : boost::adaptors::reverse(v_order)) { - DEBUG_PRINTF("vertex %zu\n", g[v].index); - - if (build.isAnyStart(v)) { - DEBUG_PRINTF("start vertex, groups=0x%llx\n", initial_groups); - vertex_group_map.emplace(v, initial_groups); - continue; - } - - // To get to this vertex, we must have come through a predecessor, and - // everyone who isn't a start vertex has one. - assert(in_degree(v, g) > 0); - rose_group pred_groups = ~rose_group{0}; - for (auto u : inv_adjacent_vertices_range(v, g)) { - DEBUG_PRINTF("pred %zu\n", g[u].index); - assert(contains(vertex_group_map, u)); - pred_groups &= vertex_group_map.at(u); - } - - DEBUG_PRINTF("pred_groups=0x%llx\n", pred_groups); - DEBUG_PRINTF("g[v].groups=0x%llx\n", g[v].groups); - - rose_group v_groups = pred_groups | g[v].groups; - DEBUG_PRINTF("v_groups=0x%llx\n", v_groups); - - vertex_group_map.emplace(v, v_groups); - } - - return vertex_group_map; -} - -/** - * \brief Find the set of groups that can be squashed anywhere in the graph, - * either by a literal or by a leftfix. - */ -rose_group getSquashableGroups(const RoseBuildImpl &build) { - rose_group squashable_groups = 0; - for (const auto &info : build.literal_info) { - if (info.squash_group) { - DEBUG_PRINTF("lit squash mask 0x%llx\n", info.group_mask); - squashable_groups |= info.group_mask; - } - } - for (const auto &m : build.rose_squash_masks) { - DEBUG_PRINTF("left squash mask 0x%llx\n", ~m.second); - squashable_groups |= ~m.second; - } - - DEBUG_PRINTF("squashable groups=0x%llx\n", squashable_groups); - assert(!(squashable_groups & build.boundary_group_mask)); - return squashable_groups; -} - -/** - * \brief True if every vertex associated with a group also belongs to - * lit_info. - */ -static -bool coversGroup(const RoseBuildImpl &build, - const rose_literal_info &lit_info) { - if (lit_info.vertices.empty()) { - DEBUG_PRINTF("no vertices - does not cover\n"); - return false; - } - - if (!lit_info.group_mask) { - DEBUG_PRINTF("no group - does not cover\n"); - return false; /* no group (not a floating lit?) */ - } - - assert(popcount64(lit_info.group_mask) == 1); - - /* for each lit in group, ensure that vertices are a subset of lit_info's */ - rose_group groups = lit_info.group_mask; - while (groups) { - u32 group_id = findAndClearLSB_64(&groups); - for (u32 id : build.group_to_literal.at(group_id)) { - DEBUG_PRINTF(" checking against friend %u\n", id); - if (!is_subset_of(build.literal_info[id].vertices, - lit_info.vertices)) { - DEBUG_PRINTF("fail\n"); - return false; - } - } - } - - DEBUG_PRINTF("ok\n"); - return true; -} - -static -bool isGroupSquasher(const RoseBuildImpl &build, const u32 id /* literal id */, - rose_group forbidden_squash_group) { - const RoseGraph &g = build.g; - - const rose_literal_info &lit_info = build.literal_info.at(id); - - DEBUG_PRINTF("checking if %u '%s' is a group squasher %016llx\n", id, - dumpString(build.literals.at(id).s).c_str(), - lit_info.group_mask); - - if (build.literals.at(id).table == ROSE_EVENT) { - DEBUG_PRINTF("event literal\n"); - return false; - } - - if (!coversGroup(build, lit_info)) { - DEBUG_PRINTF("does not cover group\n"); - return false; - } - - if (lit_info.group_mask & forbidden_squash_group) { - /* probably a delayed lit */ - DEBUG_PRINTF("skipping as involves a forbidden group\n"); - return false; - } - - // Single-vertex, less constrained case than the multiple-vertex one below. - if (lit_info.vertices.size() == 1) { - const RoseVertex &v = *lit_info.vertices.begin(); - - if (build.hasDelayPred(v)) { /* due to rebuild issues */ - return false; - } - - /* there are two ways to be a group squasher: - * 1) only care about the first accepted match - * 2) can only match once after a pred match - * - * (2) requires analysis of the infix before v and is not implemented, - * TODO - */ - - /* Case 1 */ - - // Can't squash cases with accepts unless they are all - // simple-exhaustible. - if (any_of_in(g[v].reports, [&](ReportID report) { - return !isSimpleExhaustible(build.rm.getReport(report)); - })) { - DEBUG_PRINTF("can't squash reporter\n"); - return false; - } - - /* Can't squash cases with a suffix without analysis of the suffix. - * TODO: look at suffixes */ - if (g[v].suffix) { - return false; - } - - // Out-edges must have inf max bound, + no other shenanigans */ - for (const auto &e : out_edges_range(v, g)) { - if (g[e].maxBound != ROSE_BOUND_INF) { - return false; - } - - if (g[target(e, g)].left) { - return false; /* is an infix rose trigger, TODO: analysis */ - } - } - - DEBUG_PRINTF("%u is a path 1 group squasher\n", id); - return true; - - /* note: we could also squash the groups of its preds (if nobody else is - * using them. TODO. */ - } - - // Multiple-vertex case - for (auto v : lit_info.vertices) { - assert(!build.isAnyStart(v)); - - // Can't squash cases with accepts - if (!g[v].reports.empty()) { - return false; - } - - // Suffixes and leftfixes are out too as first literal may not match - // for everyone. - if (!g[v].isBoring()) { - return false; - } - - /* TODO: checks are solid but we should explain */ - if (build.hasDelayPred(v) || build.hasAnchoredTablePred(v)) { - return false; - } - - // Out-edges must have inf max bound and not directly lead to another - // vertex with this group, e.g. 'foobar.*foobar'. - for (const auto &e : out_edges_range(v, g)) { - if (g[e].maxBound != ROSE_BOUND_INF) { - return false; - } - RoseVertex t = target(e, g); - - if (g[t].left) { - return false; /* is an infix rose trigger */ - } - - for (u32 lit_id : g[t].literals) { - if (build.literal_info[lit_id].group_mask & - lit_info.group_mask) { - return false; - } - } - } - - // In-edges must all be dot-stars with no overlap at all, as overlap - // also causes history to be used. - /* Different tables are already forbidden by previous checks */ - for (const auto &e : in_edges_range(v, g)) { - if (!(g[e].minBound == 0 && g[e].maxBound == ROSE_BOUND_INF)) { - return false; - } - - // Check overlap, if source was a literal. - RoseVertex u = source(e, g); - if (build.maxLiteralOverlap(u, v)) { - return false; - } - } - } - - DEBUG_PRINTF("literal %u is a multi-vertex group squasher\n", id); - return true; -} - -void findGroupSquashers(RoseBuildImpl &build) { - rose_group forbidden_squash_group = build.boundary_group_mask; - for (u32 id = 0; id < build.literals.size(); id++) { - const auto &lit = build.literals.at(id); - if (lit.delay) { - forbidden_squash_group |= build.literal_info[id].group_mask; - } - } - - for (u32 id = 0; id < build.literal_info.size(); id++) { - if (isGroupSquasher(build, id, forbidden_squash_group)) { - build.literal_info[id].squash_group = true; - } - } -} - -} // namespace ue2 + return false; + } + + if (lit.table == ROSE_ANCHORED || lit.table == ROSE_EVENT) { + return false; + } + + // If we already have a group applied, skip. + if (info.group_mask) { + return false; + } + + if (info.vertices.empty() && info.delayed_ids.empty()) { + DEBUG_PRINTF("literal is good for nothing\n"); + return false; + } + + return true; +} + +static +rose_group calcLocalGroup(const RoseVertex v, const RoseGraph &g, + const deque<rose_literal_info> &literal_info, + const bool small_literal_count) { + rose_group local_group = 0; + + for (auto u : inv_adjacent_vertices_range(v, g)) { + /* In small cases, ensure that siblings have the same rose parentage to + * allow rose squashing. In larger cases, don't do this as groups are + * probably too scarce. */ + for (auto w : adjacent_vertices_range(u, g)) { + if (!small_literal_count || g[v].left == g[w].left) { + for (u32 lit_id : g[w].literals) { + local_group |= literal_info[lit_id].group_mask; + } + } else { + DEBUG_PRINTF("not sibling different mother %zu %zu\n", + g[v].index, g[w].index); + } + } + } + + return local_group; +} + +/* group constants */ +#define MAX_LIGHT_LITERAL_CASE 200 /* allow rose to affect group decisions below + * this */ + +static +flat_set<RoseVertex> getAssociatedVertices(const RoseBuildImpl &build, u32 id) { + flat_set<RoseVertex> out; + const auto &info = build.literal_info[id]; + insert(&out, info.vertices); + for (const auto &delayed : info.delayed_ids) { + insert(&out, build.literal_info[delayed].vertices); + } + return out; +} + +static +u32 next_available_group(u32 counter, u32 min_start_group) { + counter++; + if (counter == ROSE_GROUPS_MAX) { + DEBUG_PRINTF("resetting groups\n"); + counter = min_start_group; + } + + return counter; +} + +static +void allocateGroupForBoundary(RoseBuildImpl &build, u32 group_always_on, + map<u8, u32> &groupCount) { + /* Boundary reports at zero will always fired and forgotten, no need to + * worry about preventing the stream being marked as exhausted */ + if (build.boundary.report_at_eod.empty()) { + return; + } + + /* Group based stream exhaustion is only done at stream boundaries */ + if (!build.cc.streaming) { + return; + } + + DEBUG_PRINTF("allocating %u as boundary group id\n", group_always_on); + + build.boundary_group_mask = 1ULL << group_always_on; + groupCount[group_always_on]++; +} + +static +void allocateGroupForEvent(RoseBuildImpl &build, u32 group_always_on, + map<u8, u32> &groupCount, u32 *counter) { + if (build.eod_event_literal_id == MO_INVALID_IDX) { + return; + } + + /* Group based stream exhaustion is only done at stream boundaries */ + if (!build.cc.streaming) { + return; + } + + rose_literal_info &info = build.literal_info[build.eod_event_literal_id]; + + if (info.vertices.empty()) { + return; + } + + bool new_group = !groupCount[group_always_on]; + for (RoseVertex v : info.vertices) { + if (build.g[v].left && !isAnchored(build.g[v].left)) { + new_group = false; + } + } + + u32 group; + if (!new_group) { + group = group_always_on; + } else { + group = *counter; + *counter += 1; + } + + DEBUG_PRINTF("allocating %u as eod event group id\n", *counter); + info.group_mask = 1ULL << group; + groupCount[group]++; +} + +void assignGroupsToLiterals(RoseBuildImpl &build) { + auto &literals = build.literals; + auto &literal_info = build.literal_info; + + bool small_literal_count = literal_info.size() <= MAX_LIGHT_LITERAL_CASE; + + map<u8, u32> groupCount; /* group index to number of members */ + + u32 counter = 0; + u32 group_always_on = 0; + + // First pass: handle always on literals. + for (u32 id = 0; id < literals.size(); id++) { + const rose_literal_id &lit = literals.at(id); + rose_literal_info &info = literal_info[id]; + + if (!requires_group_assignment(lit, info)) { + continue; + } + + // If this literal has a root role, we always have to search for it + // anyway, so it goes in the always-on group. + /* We could end up squashing it if it is followed by a .* */ + if (eligibleForAlwaysOnGroup(build, id)) { + info.group_mask = 1ULL << group_always_on; + groupCount[group_always_on]++; + continue; + } + } + + u32 group_long_lit; + if (groupCount[group_always_on]) { + DEBUG_PRINTF("%u always on literals\n", groupCount[group_always_on]); + group_long_lit = group_always_on; + counter++; + } else { + group_long_lit = counter; + counter++; + } + + allocateGroupForBoundary(build, group_always_on, groupCount); + allocateGroupForEvent(build, group_always_on, groupCount, &counter); + + u32 min_start_group = counter; + priority_queue<tuple<s32, s32, u32>> pq; + + // Second pass: the other literals. + for (u32 id = 0; id < literals.size(); id++) { + const rose_literal_id &lit = literals.at(id); + rose_literal_info &info = literal_info[id]; + + if (!requires_group_assignment(lit, info)) { + continue; + } + + assert(!eligibleForAlwaysOnGroup(build, id)); + pq.emplace(-(s32)info.vertices.size(), -(s32)lit.s.length(), id); + } + vector<u32> long_lits; + while (!pq.empty()) { + u32 id = get<2>(pq.top()); + pq.pop(); + UNUSED const rose_literal_id &lit = literals.at(id); + DEBUG_PRINTF("assigning groups to lit %u (v %zu l %zu)\n", id, + literal_info[id].vertices.size(), lit.s.length()); + + u8 group_id = 0; + rose_group group = ~0ULL; + for (auto v : getAssociatedVertices(build, id)) { + rose_group local_group = calcLocalGroup(v, build.g, literal_info, + small_literal_count); + group &= local_group; + if (!group) { + break; + } + } + + if (group == ~0ULL) { + goto boring; + } + + group &= ~((1ULL << min_start_group) - 1); /* ensure the purity of the + * always_on groups */ + if (!group) { + goto boring; + } + + group_id = ctz64(group); + + /* TODO: fairness */ + DEBUG_PRINTF("picking sibling group %hhd\n", group_id); + literal_info[id].group_mask = 1ULL << group_id; + groupCount[group_id]++; + + continue; + + boring: + /* long literals will either be stuck in a mega group or spread around + * depending on availability */ + if (superStrong(lit)) { + long_lits.push_back(id); + continue; + } + + // Other literals are assigned to our remaining groups round-robin. + group_id = counter; + + DEBUG_PRINTF("picking boring group %hhd\n", group_id); + literal_info[id].group_mask = 1ULL << group_id; + groupCount[group_id]++; + counter = next_available_group(counter, min_start_group); + } + + /* spread long literals out amongst unused groups if any, otherwise stick + * them in the always on the group */ + + if (groupCount[counter]) { + DEBUG_PRINTF("sticking long literals in the image of the always on\n"); + for (u32 lit_id : long_lits) { + literal_info[lit_id].group_mask = 1ULL << group_long_lit; + groupCount[group_long_lit]++; + } + } else { + u32 min_long_counter = counter; + DEBUG_PRINTF("base long lit group = %u\n", min_long_counter); + for (u32 lit_id : long_lits) { + u8 group_id = counter; + literal_info[lit_id].group_mask = 1ULL << group_id; + groupCount[group_id]++; + counter = next_available_group(counter, min_long_counter); + } + } + /* assign delayed literals to the same group as their parent */ + for (u32 id = 0; id < literals.size(); id++) { + const rose_literal_id &lit = literals.at(id); + + if (!lit.delay) { + continue; + } + + u32 parent = literal_info[id].undelayed_id; + DEBUG_PRINTF("%u is shadow picking up groups from %u\n", id, parent); + assert(literal_info[parent].undelayed_id == parent); + assert(literal_info[parent].group_mask); + literal_info[id].group_mask = literal_info[parent].group_mask; + /* don't increment the group count - these don't really exist */ + } + + DEBUG_PRINTF("populate group to literal mapping\n"); + for (u32 id = 0; id < literals.size(); id++) { + rose_group groups = literal_info[id].group_mask; + while (groups) { + u32 group_id = findAndClearLSB_64(&groups); + build.group_to_literal[group_id].insert(id); + } + } + + /* find how many groups we allocated */ + for (u32 i = 0; i < ROSE_GROUPS_MAX; i++) { + if (groupCount[i]) { + build.group_end = max(build.group_end, i + 1); + } + } +} + +rose_group RoseBuildImpl::getGroups(RoseVertex v) const { + rose_group groups = 0; + + for (u32 id : g[v].literals) { + u32 lit_id = literal_info.at(id).undelayed_id; + + rose_group mygroups = literal_info[lit_id].group_mask; + groups |= mygroups; + } + + return groups; +} + +/** \brief Get the groups of the successor literals of a given vertex. */ +rose_group RoseBuildImpl::getSuccGroups(RoseVertex start) const { + rose_group initialGroups = 0; + + for (auto v : adjacent_vertices_range(start, g)) { + initialGroups |= getGroups(v); + } + + return initialGroups; +} + +/** + * The groups that a role sets are determined by the union of its successor + * literals. Requires the literals already have had groups assigned. + */ +void assignGroupsToRoles(RoseBuildImpl &build) { + auto &g = build.g; + + /* Note: if there is a succ literal in the sidematcher, its successors + * literals must be added instead */ + for (auto v : vertices_range(g)) { + if (build.isAnyStart(v)) { + continue; + } + + const rose_group succ_groups = build.getSuccGroups(v); + g[v].groups |= succ_groups; + + auto ghost_it = build.ghost.find(v); + if (ghost_it != end(build.ghost)) { + /* delayed roles need to supply their groups to the ghost role */ + g[ghost_it->second].groups |= succ_groups; + } + + DEBUG_PRINTF("vertex %zu: groups=%llx\n", g[v].index, g[v].groups); + } +} + +/** + * \brief Returns a mapping from each graph vertex v to the intersection of the + * groups switched on by all of the paths leading up to (and including) v from + * the start vertexes. + */ +unordered_map<RoseVertex, rose_group> +getVertexGroupMap(const RoseBuildImpl &build) { + const RoseGraph &g = build.g; + vector<RoseVertex> v_order; + v_order.reserve(num_vertices(g)); + + boost::topological_sort(g, back_inserter(v_order)); + + unordered_map<RoseVertex, rose_group> vertex_group_map; + vertex_group_map.reserve(num_vertices(g)); + + const rose_group initial_groups = build.getInitialGroups(); + + for (const auto &v : boost::adaptors::reverse(v_order)) { + DEBUG_PRINTF("vertex %zu\n", g[v].index); + + if (build.isAnyStart(v)) { + DEBUG_PRINTF("start vertex, groups=0x%llx\n", initial_groups); + vertex_group_map.emplace(v, initial_groups); + continue; + } + + // To get to this vertex, we must have come through a predecessor, and + // everyone who isn't a start vertex has one. + assert(in_degree(v, g) > 0); + rose_group pred_groups = ~rose_group{0}; + for (auto u : inv_adjacent_vertices_range(v, g)) { + DEBUG_PRINTF("pred %zu\n", g[u].index); + assert(contains(vertex_group_map, u)); + pred_groups &= vertex_group_map.at(u); + } + + DEBUG_PRINTF("pred_groups=0x%llx\n", pred_groups); + DEBUG_PRINTF("g[v].groups=0x%llx\n", g[v].groups); + + rose_group v_groups = pred_groups | g[v].groups; + DEBUG_PRINTF("v_groups=0x%llx\n", v_groups); + + vertex_group_map.emplace(v, v_groups); + } + + return vertex_group_map; +} + +/** + * \brief Find the set of groups that can be squashed anywhere in the graph, + * either by a literal or by a leftfix. + */ +rose_group getSquashableGroups(const RoseBuildImpl &build) { + rose_group squashable_groups = 0; + for (const auto &info : build.literal_info) { + if (info.squash_group) { + DEBUG_PRINTF("lit squash mask 0x%llx\n", info.group_mask); + squashable_groups |= info.group_mask; + } + } + for (const auto &m : build.rose_squash_masks) { + DEBUG_PRINTF("left squash mask 0x%llx\n", ~m.second); + squashable_groups |= ~m.second; + } + + DEBUG_PRINTF("squashable groups=0x%llx\n", squashable_groups); + assert(!(squashable_groups & build.boundary_group_mask)); + return squashable_groups; +} + +/** + * \brief True if every vertex associated with a group also belongs to + * lit_info. + */ +static +bool coversGroup(const RoseBuildImpl &build, + const rose_literal_info &lit_info) { + if (lit_info.vertices.empty()) { + DEBUG_PRINTF("no vertices - does not cover\n"); + return false; + } + + if (!lit_info.group_mask) { + DEBUG_PRINTF("no group - does not cover\n"); + return false; /* no group (not a floating lit?) */ + } + + assert(popcount64(lit_info.group_mask) == 1); + + /* for each lit in group, ensure that vertices are a subset of lit_info's */ + rose_group groups = lit_info.group_mask; + while (groups) { + u32 group_id = findAndClearLSB_64(&groups); + for (u32 id : build.group_to_literal.at(group_id)) { + DEBUG_PRINTF(" checking against friend %u\n", id); + if (!is_subset_of(build.literal_info[id].vertices, + lit_info.vertices)) { + DEBUG_PRINTF("fail\n"); + return false; + } + } + } + + DEBUG_PRINTF("ok\n"); + return true; +} + +static +bool isGroupSquasher(const RoseBuildImpl &build, const u32 id /* literal id */, + rose_group forbidden_squash_group) { + const RoseGraph &g = build.g; + + const rose_literal_info &lit_info = build.literal_info.at(id); + + DEBUG_PRINTF("checking if %u '%s' is a group squasher %016llx\n", id, + dumpString(build.literals.at(id).s).c_str(), + lit_info.group_mask); + + if (build.literals.at(id).table == ROSE_EVENT) { + DEBUG_PRINTF("event literal\n"); + return false; + } + + if (!coversGroup(build, lit_info)) { + DEBUG_PRINTF("does not cover group\n"); + return false; + } + + if (lit_info.group_mask & forbidden_squash_group) { + /* probably a delayed lit */ + DEBUG_PRINTF("skipping as involves a forbidden group\n"); + return false; + } + + // Single-vertex, less constrained case than the multiple-vertex one below. + if (lit_info.vertices.size() == 1) { + const RoseVertex &v = *lit_info.vertices.begin(); + + if (build.hasDelayPred(v)) { /* due to rebuild issues */ + return false; + } + + /* there are two ways to be a group squasher: + * 1) only care about the first accepted match + * 2) can only match once after a pred match + * + * (2) requires analysis of the infix before v and is not implemented, + * TODO + */ + + /* Case 1 */ + + // Can't squash cases with accepts unless they are all + // simple-exhaustible. + if (any_of_in(g[v].reports, [&](ReportID report) { + return !isSimpleExhaustible(build.rm.getReport(report)); + })) { + DEBUG_PRINTF("can't squash reporter\n"); + return false; + } + + /* Can't squash cases with a suffix without analysis of the suffix. + * TODO: look at suffixes */ + if (g[v].suffix) { + return false; + } + + // Out-edges must have inf max bound, + no other shenanigans */ + for (const auto &e : out_edges_range(v, g)) { + if (g[e].maxBound != ROSE_BOUND_INF) { + return false; + } + + if (g[target(e, g)].left) { + return false; /* is an infix rose trigger, TODO: analysis */ + } + } + + DEBUG_PRINTF("%u is a path 1 group squasher\n", id); + return true; + + /* note: we could also squash the groups of its preds (if nobody else is + * using them. TODO. */ + } + + // Multiple-vertex case + for (auto v : lit_info.vertices) { + assert(!build.isAnyStart(v)); + + // Can't squash cases with accepts + if (!g[v].reports.empty()) { + return false; + } + + // Suffixes and leftfixes are out too as first literal may not match + // for everyone. + if (!g[v].isBoring()) { + return false; + } + + /* TODO: checks are solid but we should explain */ + if (build.hasDelayPred(v) || build.hasAnchoredTablePred(v)) { + return false; + } + + // Out-edges must have inf max bound and not directly lead to another + // vertex with this group, e.g. 'foobar.*foobar'. + for (const auto &e : out_edges_range(v, g)) { + if (g[e].maxBound != ROSE_BOUND_INF) { + return false; + } + RoseVertex t = target(e, g); + + if (g[t].left) { + return false; /* is an infix rose trigger */ + } + + for (u32 lit_id : g[t].literals) { + if (build.literal_info[lit_id].group_mask & + lit_info.group_mask) { + return false; + } + } + } + + // In-edges must all be dot-stars with no overlap at all, as overlap + // also causes history to be used. + /* Different tables are already forbidden by previous checks */ + for (const auto &e : in_edges_range(v, g)) { + if (!(g[e].minBound == 0 && g[e].maxBound == ROSE_BOUND_INF)) { + return false; + } + + // Check overlap, if source was a literal. + RoseVertex u = source(e, g); + if (build.maxLiteralOverlap(u, v)) { + return false; + } + } + } + + DEBUG_PRINTF("literal %u is a multi-vertex group squasher\n", id); + return true; +} + +void findGroupSquashers(RoseBuildImpl &build) { + rose_group forbidden_squash_group = build.boundary_group_mask; + for (u32 id = 0; id < build.literals.size(); id++) { + const auto &lit = build.literals.at(id); + if (lit.delay) { + forbidden_squash_group |= build.literal_info[id].group_mask; + } + } + + for (u32 id = 0; id < build.literal_info.size(); id++) { + if (isGroupSquasher(build, id, forbidden_squash_group)) { + build.literal_info[id].squash_group = true; + } + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/rose/rose_build_groups.h b/contrib/libs/hyperscan/src/rose/rose_build_groups.h index d35080b62e..ada64b809f 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_groups.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_groups.h @@ -1,57 +1,57 @@ -/* - * Copyright (c) 2016-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * \file - * \brief Rose build: code for analysing literal groups. - */ - -#ifndef ROSE_BUILD_GROUPS_H -#define ROSE_BUILD_GROUPS_H - -#include "rose_build_impl.h" - -#include <unordered_map> - -namespace ue2 { - -std::unordered_map<RoseVertex, rose_group> -getVertexGroupMap(const RoseBuildImpl &build); - -rose_group getSquashableGroups(const RoseBuildImpl &build); - -void assignGroupsToLiterals(RoseBuildImpl &build); - -void assignGroupsToRoles(RoseBuildImpl &build); - -void findGroupSquashers(RoseBuildImpl &build); - -} // namespace ue2 - -#endif // ROSE_BUILD_GROUPS_H - +/* + * Copyright (c) 2016-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Rose build: code for analysing literal groups. + */ + +#ifndef ROSE_BUILD_GROUPS_H +#define ROSE_BUILD_GROUPS_H + +#include "rose_build_impl.h" + +#include <unordered_map> + +namespace ue2 { + +std::unordered_map<RoseVertex, rose_group> +getVertexGroupMap(const RoseBuildImpl &build); + +rose_group getSquashableGroups(const RoseBuildImpl &build); + +void assignGroupsToLiterals(RoseBuildImpl &build); + +void assignGroupsToRoles(RoseBuildImpl &build); + +void findGroupSquashers(RoseBuildImpl &build); + +} // namespace ue2 + +#endif // ROSE_BUILD_GROUPS_H + diff --git a/contrib/libs/hyperscan/src/rose/rose_build_impl.h b/contrib/libs/hyperscan/src/rose/rose_build_impl.h index 9ff3c65865..7780848b1b 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_impl.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_impl.h @@ -26,32 +26,32 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef ROSE_BUILD_IMPL_H -#define ROSE_BUILD_IMPL_H +#ifndef ROSE_BUILD_IMPL_H +#define ROSE_BUILD_IMPL_H #include "rose_build.h" #include "rose_build_util.h" -#include "rose_common.h" +#include "rose_common.h" #include "rose_graph.h" #include "nfa/mpvcompile.h" #include "nfa/goughcompile.h" #include "nfa/nfa_internal.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_revacc.h" -#include "util/bytecode_ptr.h" -#include "util/flat_containers.h" -#include "util/hash.h" +#include "util/bytecode_ptr.h" +#include "util/flat_containers.h" +#include "util/hash.h" #include "util/order_check.h" #include "util/queue_index_factory.h" -#include "util/ue2string.h" -#include "util/unordered.h" -#include "util/verify_types.h" +#include "util/ue2string.h" +#include "util/unordered.h" +#include "util/verify_types.h" #include <deque> #include <map> #include <string> #include <vector> -#include <boost/variant.hpp> +#include <boost/variant.hpp> struct RoseEngine; @@ -59,36 +59,36 @@ namespace ue2 { #define ROSE_GROUPS_MAX 64 -#define ROSE_LONG_LITERAL_THRESHOLD_MIN 33 - -/** - * \brief The largest allowable "short" literal fragment which can be given to - * a literal matcher directly. - * - * Literals longer than this will be truncated to their suffix and confirmed in - * the Rose interpreter, either as "medium length" literals which can be - * confirmed from history, or "long literals" which make use of the streaming - * table support. - */ -#define ROSE_SHORT_LITERAL_LEN_MAX 8 - +#define ROSE_LONG_LITERAL_THRESHOLD_MIN 33 + +/** + * \brief The largest allowable "short" literal fragment which can be given to + * a literal matcher directly. + * + * Literals longer than this will be truncated to their suffix and confirmed in + * the Rose interpreter, either as "medium length" literals which can be + * confirmed from history, or "long literals" which make use of the streaming + * table support. + */ +#define ROSE_SHORT_LITERAL_LEN_MAX 8 + struct BoundaryReports; struct CastleProto; struct CompileContext; class ReportManager; -class SmallWriteBuild; +class SmallWriteBuild; class SomSlotManager; struct suffix_id { suffix_id(const RoseSuffixInfo &in) : g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()), - h(in.haig.get()), t(in.tamarama.get()), - dfa_min_width(in.dfa_min_width), + h(in.haig.get()), t(in.tamarama.get()), + dfa_min_width(in.dfa_min_width), dfa_max_width(in.dfa_max_width) { assert(!g || g->kind == NFA_SUFFIX); } bool operator==(const suffix_id &b) const { - bool rv = g == b.g && c == b.c && h == b.h && d == b.d && t == b.t; + bool rv = g == b.g && c == b.c && h == b.h && d == b.d && t == b.t; assert(!rv || dfa_min_width == b.dfa_min_width); assert(!rv || dfa_max_width == b.dfa_max_width); return rv; @@ -100,7 +100,7 @@ struct suffix_id { ORDER_CHECK(c); ORDER_CHECK(d); ORDER_CHECK(h); - ORDER_CHECK(t); + ORDER_CHECK(t); return false; } @@ -132,22 +132,22 @@ struct suffix_id { } return c; } - TamaProto *tamarama() { - if (!d && !h) { - assert(dfa_min_width == depth(0)); - assert(dfa_max_width == depth::infinity()); - } - return t; - } - const TamaProto *tamarama() const { - if (!d && !h) { - assert(dfa_min_width == depth(0)); - assert(dfa_max_width == depth::infinity()); - } - return t; - } - - + TamaProto *tamarama() { + if (!d && !h) { + assert(dfa_min_width == depth(0)); + assert(dfa_max_width == depth::infinity()); + } + return t; + } + const TamaProto *tamarama() const { + if (!d && !h) { + assert(dfa_min_width == depth(0)); + assert(dfa_max_width == depth::infinity()); + } + return t; + } + + raw_som_dfa *haig() { return h; } const raw_som_dfa *haig() const { return h; } raw_dfa *dfa() { return d; } @@ -160,7 +160,7 @@ private: CastleProto *c; raw_dfa *d; raw_som_dfa *h; - TamaProto *t; + TamaProto *t; depth dfa_min_width; depth dfa_max_width; @@ -185,7 +185,7 @@ struct left_id { : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()), h(in.haig.get()), dfa_min_width(in.dfa_min_width), dfa_max_width(in.dfa_max_width) { - assert(!g || !has_managed_reports(*g)); + assert(!g || !has_managed_reports(*g)); } bool operator==(const left_id &b) const { bool rv = g == b.g && c == b.c && h == b.h && d == b.d; @@ -254,15 +254,15 @@ private: }; std::set<u32> all_tops(const left_id &r); -std::set<ReportID> all_reports(const left_id &left); +std::set<ReportID> all_reports(const left_id &left); bool isAnchored(const left_id &r); depth findMinWidth(const left_id &r); depth findMaxWidth(const left_id &r); u32 num_tops(const left_id &r); struct rose_literal_info { - flat_set<u32> delayed_ids; - flat_set<RoseVertex> vertices; + flat_set<u32> delayed_ids; + flat_set<RoseVertex> vertices; rose_group group_mask = 0; u32 undelayed_id = MO_INVALID_IDX; bool squash_group = false; @@ -290,26 +290,26 @@ struct rose_literal_id { u32 distinctiveness; size_t elength(void) const { return s.length() + delay; } - size_t elength_including_mask(void) const { - size_t mask_len = msk.size(); - for (u8 c : msk) { - if (!c) { - mask_len--; - } else { - break; - } - } - return MAX(mask_len, s.length()) + delay; - } - - bool operator==(const rose_literal_id &b) const { - return s == b.s && msk == b.msk && cmp == b.cmp && table == b.table && - delay == b.delay && distinctiveness == b.distinctiveness; - } - - size_t hash() const { - return hash_all(s, msk, cmp, table, delay, distinctiveness); - } + size_t elength_including_mask(void) const { + size_t mask_len = msk.size(); + for (u8 c : msk) { + if (!c) { + mask_len--; + } else { + break; + } + } + return MAX(mask_len, s.length()) + delay; + } + + bool operator==(const rose_literal_id &b) const { + return s == b.s && msk == b.msk && cmp == b.cmp && table == b.table && + delay == b.delay && distinctiveness == b.distinctiveness; + } + + size_t hash() const { + return hash_all(s, msk, cmp, table, delay, distinctiveness); + } }; static inline @@ -323,55 +323,55 @@ bool operator<(const rose_literal_id &a, const rose_literal_id &b) { return 0; } -class RoseLiteralMap { - /** - * \brief Main storage for literals. - * - * Note that this cannot be a vector, as the present code relies on - * iterator stability when iterating over this list and adding to it inside - * the loop. - */ - std::deque<rose_literal_id> lits; - - /** \brief Quick-lookup index from literal -> index in lits. */ - ue2_unordered_map<rose_literal_id, u32> lits_index; - -public: - std::pair<u32, bool> insert(const rose_literal_id &lit) { - auto it = lits_index.find(lit); - if (it != lits_index.end()) { - return {it->second, false}; - } - u32 id = verify_u32(lits.size()); - lits.push_back(lit); - lits_index.emplace(lit, id); - return {id, true}; - } - - // Erase the last num elements. - void erase_back(size_t num) { - assert(num <= lits.size()); - for (size_t i = 0; i < num; i++) { - lits_index.erase(lits.back()); - lits.pop_back(); - } - assert(lits.size() == lits_index.size()); - } - - const rose_literal_id &at(u32 id) const { - assert(id < lits.size()); - return lits.at(id); - } - - using const_iterator = decltype(lits)::const_iterator; - const_iterator begin() const { return lits.begin(); } - const_iterator end() const { return lits.end(); } - - size_t size() const { - return lits.size(); - } -}; - +class RoseLiteralMap { + /** + * \brief Main storage for literals. + * + * Note that this cannot be a vector, as the present code relies on + * iterator stability when iterating over this list and adding to it inside + * the loop. + */ + std::deque<rose_literal_id> lits; + + /** \brief Quick-lookup index from literal -> index in lits. */ + ue2_unordered_map<rose_literal_id, u32> lits_index; + +public: + std::pair<u32, bool> insert(const rose_literal_id &lit) { + auto it = lits_index.find(lit); + if (it != lits_index.end()) { + return {it->second, false}; + } + u32 id = verify_u32(lits.size()); + lits.push_back(lit); + lits_index.emplace(lit, id); + return {id, true}; + } + + // Erase the last num elements. + void erase_back(size_t num) { + assert(num <= lits.size()); + for (size_t i = 0; i < num; i++) { + lits_index.erase(lits.back()); + lits.pop_back(); + } + assert(lits.size() == lits_index.size()); + } + + const rose_literal_id &at(u32 id) const { + assert(id < lits.size()); + return lits.at(id); + } + + using const_iterator = decltype(lits)::const_iterator; + const_iterator begin() const { return lits.begin(); } + const_iterator end() const { return lits.end(); } + + size_t size() const { + return lits.size(); + } +}; + struct simple_anchored_info { simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit) : min_bound(min_b), max_bound(max_b), literal(lit) {} @@ -390,99 +390,99 @@ bool operator<(const simple_anchored_info &a, const simple_anchored_info &b) { return 0; } -struct MpvProto { - bool empty() const { - return puffettes.empty() && triggered_puffettes.empty(); +struct MpvProto { + bool empty() const { + return puffettes.empty() && triggered_puffettes.empty(); } - void reset() { - puffettes.clear(); - triggered_puffettes.clear(); + void reset() { + puffettes.clear(); + triggered_puffettes.clear(); } - std::vector<raw_puff> puffettes; - std::vector<raw_puff> triggered_puffettes; -}; + std::vector<raw_puff> puffettes; + std::vector<raw_puff> triggered_puffettes; +}; + +struct OutfixInfo { + template<class T> + explicit OutfixInfo(std::unique_ptr<T> x) : proto(std::move(x)) {} + + explicit OutfixInfo(MpvProto mpv_in) : proto(std::move(mpv_in)) {} -struct OutfixInfo { - template<class T> - explicit OutfixInfo(std::unique_ptr<T> x) : proto(std::move(x)) {} - - explicit OutfixInfo(MpvProto mpv_in) : proto(std::move(mpv_in)) {} - u32 get_queue(QueueIndexFactory &qif); - u32 get_queue() const { - assert(queue != ~0U); - return queue; - } - + u32 get_queue() const { + assert(queue != ~0U); + return queue; + } + bool is_nonempty_mpv() const { - auto *m = boost::get<MpvProto>(&proto); - return m && !m->empty(); + auto *m = boost::get<MpvProto>(&proto); + return m && !m->empty(); } bool is_dead() const { - auto *m = boost::get<MpvProto>(&proto); - if (m) { - return m->empty(); - } - return boost::get<boost::blank>(&proto) != nullptr; + auto *m = boost::get<MpvProto>(&proto); + if (m) { + return m->empty(); + } + return boost::get<boost::blank>(&proto) != nullptr; } void clear() { - proto = boost::blank(); - } - - // Convenience accessor functions. - - NGHolder *holder() { - auto *up = boost::get<std::unique_ptr<NGHolder>>(&proto); - return up ? up->get() : nullptr; - } - raw_dfa *rdfa() { - auto *up = boost::get<std::unique_ptr<raw_dfa>>(&proto); - return up ? up->get() : nullptr; - } - raw_som_dfa *haig() { - auto *up = boost::get<std::unique_ptr<raw_som_dfa>>(&proto); - return up ? up->get() : nullptr; - } - MpvProto *mpv() { - return boost::get<MpvProto>(&proto); - } - - // Convenience const accessor functions. - - const NGHolder *holder() const { - auto *up = boost::get<std::unique_ptr<NGHolder>>(&proto); - return up ? up->get() : nullptr; - } - const raw_dfa *rdfa() const { - auto *up = boost::get<std::unique_ptr<raw_dfa>>(&proto); - return up ? up->get() : nullptr; - } - const raw_som_dfa *haig() const { - auto *up = boost::get<std::unique_ptr<raw_som_dfa>>(&proto); - return up ? up->get() : nullptr; - } - const MpvProto *mpv() const { - return boost::get<MpvProto>(&proto); - } - - /** - * \brief Variant wrapping the various engine types. If this is - * boost::blank, it means that this outfix is unused (dead). - */ - boost::variant< - boost::blank, - std::unique_ptr<NGHolder>, - std::unique_ptr<raw_dfa>, - std::unique_ptr<raw_som_dfa>, - MpvProto> proto = boost::blank(); - + proto = boost::blank(); + } + + // Convenience accessor functions. + + NGHolder *holder() { + auto *up = boost::get<std::unique_ptr<NGHolder>>(&proto); + return up ? up->get() : nullptr; + } + raw_dfa *rdfa() { + auto *up = boost::get<std::unique_ptr<raw_dfa>>(&proto); + return up ? up->get() : nullptr; + } + raw_som_dfa *haig() { + auto *up = boost::get<std::unique_ptr<raw_som_dfa>>(&proto); + return up ? up->get() : nullptr; + } + MpvProto *mpv() { + return boost::get<MpvProto>(&proto); + } + + // Convenience const accessor functions. + + const NGHolder *holder() const { + auto *up = boost::get<std::unique_ptr<NGHolder>>(&proto); + return up ? up->get() : nullptr; + } + const raw_dfa *rdfa() const { + auto *up = boost::get<std::unique_ptr<raw_dfa>>(&proto); + return up ? up->get() : nullptr; + } + const raw_som_dfa *haig() const { + auto *up = boost::get<std::unique_ptr<raw_som_dfa>>(&proto); + return up ? up->get() : nullptr; + } + const MpvProto *mpv() const { + return boost::get<MpvProto>(&proto); + } + + /** + * \brief Variant wrapping the various engine types. If this is + * boost::blank, it means that this outfix is unused (dead). + */ + boost::variant< + boost::blank, + std::unique_ptr<NGHolder>, + std::unique_ptr<raw_dfa>, + std::unique_ptr<raw_som_dfa>, + MpvProto> proto = boost::blank(); + RevAccInfo rev_info; u32 maxBAWidth = 0; //!< max bi-anchored width - depth minWidth{depth::infinity()}; - depth maxWidth{0}; + depth minWidth{depth::infinity()}; + depth maxWidth{0}; u64a maxOffset = 0; bool in_sbmatcher = false; //!< handled by small-block matcher. @@ -495,16 +495,16 @@ std::set<ReportID> all_reports(const OutfixInfo &outfix); // Concrete impl class class RoseBuildImpl : public RoseBuild { public: - RoseBuildImpl(ReportManager &rm, SomSlotManager &ssm, SmallWriteBuild &smwr, + RoseBuildImpl(ReportManager &rm, SomSlotManager &ssm, SmallWriteBuild &smwr, const CompileContext &cc, const BoundaryReports &boundary); ~RoseBuildImpl() override; // Adds a single literal. void add(bool anchored, bool eod, const ue2_literal &lit, - const flat_set<ReportID> &ids) override; + const flat_set<ReportID> &ids) override; - bool addRose(const RoseInGraph &ig, bool prefilter) override; + bool addRose(const RoseInGraph &ig, bool prefilter) override; bool addSombeRose(const RoseInGraph &ig) override; bool addOutfix(const NGHolder &h) override; @@ -515,20 +515,20 @@ public: // Returns true if we were able to add it as a mask bool add(bool anchored, const std::vector<CharReach> &mask, - const flat_set<ReportID> &reports) override; + const flat_set<ReportID> &reports) override; bool addAnchoredAcyclic(const NGHolder &graph) override; bool validateMask(const std::vector<CharReach> &mask, - const flat_set<ReportID> &reports, bool anchored, + const flat_set<ReportID> &reports, bool anchored, bool eod) const override; void addMask(const std::vector<CharReach> &mask, - const flat_set<ReportID> &reports, bool anchored, + const flat_set<ReportID> &reports, bool anchored, bool eod) override; // Construct a runtime implementation. - bytecode_ptr<RoseEngine> buildRose(u32 minWidth) override; - bytecode_ptr<RoseEngine> buildFinalEngine(u32 minWidth); + bytecode_ptr<RoseEngine> buildRose(u32 minWidth) override; + bytecode_ptr<RoseEngine> buildFinalEngine(u32 minWidth); void setSom() override { hasSom = true; } @@ -625,8 +625,8 @@ public: * overlap calculation in history assignment. */ std::map<u32, rose_literal_id> anchoredLitSuffix; - ue2_unordered_set<left_id> transient; - ue2_unordered_map<left_id, rose_group> rose_squash_masks; + ue2_unordered_set<left_id> transient; + ue2_unordered_map<left_id, rose_group> rose_squash_masks; std::vector<OutfixInfo> outfixes; @@ -639,33 +639,33 @@ public: u32 max_rose_anchored_floating_overlap; - rose_group boundary_group_mask = 0; + rose_group boundary_group_mask = 0; QueueIndexFactory qif; ReportManager &rm; SomSlotManager &ssm; - SmallWriteBuild &smwr; + SmallWriteBuild &smwr; const BoundaryReports &boundary; private: ReportID next_nfa_report; }; -size_t calcLongLitThreshold(const RoseBuildImpl &build, - const size_t historyRequired); - +size_t calcLongLitThreshold(const RoseBuildImpl &build, + const size_t historyRequired); + // Free functions, in rose_build_misc.cpp bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v); bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v); size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b); -ue2_literal findNonOverlappingTail(const std::set<ue2_literal> &lits, - const ue2_literal &s); +ue2_literal findNonOverlappingTail(const std::set<ue2_literal> &lits, + const ue2_literal &s); #ifndef NDEBUG -bool roseHasTops(const RoseBuildImpl &build, RoseVertex v); -bool hasOrphanedTops(const RoseBuildImpl &build); +bool roseHasTops(const RoseBuildImpl &build, RoseVertex v); +bool hasOrphanedTops(const RoseBuildImpl &build); #endif u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm); @@ -676,10 +676,10 @@ u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm); void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk, std::vector<u8> &cmp); -u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id); -u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id); +u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id); +u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id); -bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e); +bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e); #ifndef NDEBUG bool canImplementGraphs(const RoseBuildImpl &tbi); @@ -687,22 +687,22 @@ bool canImplementGraphs(const RoseBuildImpl &tbi); } // namespace ue2 -namespace std { - -template<> -struct hash<ue2::left_id> { - size_t operator()(const ue2::left_id &l) const { - return l.hash(); - } -}; - -template<> -struct hash<ue2::suffix_id> { - size_t operator()(const ue2::suffix_id &s) const { - return s.hash(); - } -}; - -} // namespace std - -#endif /* ROSE_BUILD_IMPL_H */ +namespace std { + +template<> +struct hash<ue2::left_id> { + size_t operator()(const ue2::left_id &l) const { + return l.hash(); + } +}; + +template<> +struct hash<ue2::suffix_id> { + size_t operator()(const ue2::suffix_id &s) const { + return s.hash(); + } +}; + +} // namespace std + +#endif /* ROSE_BUILD_IMPL_H */ diff --git a/contrib/libs/hyperscan/src/rose/rose_build_infix.cpp b/contrib/libs/hyperscan/src/rose/rose_build_infix.cpp index 1efe27964b..80e1254236 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_infix.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_infix.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,12 +36,12 @@ #include "rose/rose_build_impl.h" #include "util/container.h" #include "util/dump_charclass.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph_range.h" #include "util/graph.h" -#include "util/hash.h" +#include "util/hash.h" #include "util/ue2string.h" -#include "util/unordered.h" +#include "util/unordered.h" #include <algorithm> #include <set> @@ -53,7 +53,7 @@ namespace ue2 { static bool couldEndLiteral(const ue2_literal &s, NFAVertex initial, const NGHolder &h) { - flat_set<NFAVertex> curr, next; + flat_set<NFAVertex> curr, next; curr.insert(initial); for (auto it = s.rbegin(), ite = s.rend(); it != ite; ++it) { @@ -84,10 +84,10 @@ bool couldEndLiteral(const ue2_literal &s, NFAVertex initial, return true; } -using EdgeCache = ue2_unordered_set<pair<NFAVertex, NFAVertex>>; - +using EdgeCache = ue2_unordered_set<pair<NFAVertex, NFAVertex>>; + static -void contractVertex(NGHolder &g, NFAVertex v, EdgeCache &all_edges) { +void contractVertex(NGHolder &g, NFAVertex v, EdgeCache &all_edges) { for (auto u : inv_adjacent_vertices_range(v, g)) { if (u == v) { continue; // self-edge @@ -111,9 +111,9 @@ void contractVertex(NGHolder &g, NFAVertex v, EdgeCache &all_edges) { } static -u32 findMaxLiteralMatches(const NGHolder &h, const set<ue2_literal> &lits) { +u32 findMaxLiteralMatches(const NGHolder &h, const set<ue2_literal> &lits) { DEBUG_PRINTF("h=%p, %zu literals\n", &h, lits.size()); - //dumpGraph("infix.dot", h); + //dumpGraph("infix.dot", h); // Indices of vertices that could terminate any of the literals in 'lits'. set<u32> terms; @@ -147,9 +147,9 @@ u32 findMaxLiteralMatches(const NGHolder &h, const set<ue2_literal> &lits) { cloneHolder(g, h); vector<NFAVertex> dead; - // The set of all edges in the graph is used for existence checks in - // contractVertex. - EdgeCache all_edges; + // The set of all edges in the graph is used for existence checks in + // contractVertex. + EdgeCache all_edges; for (const auto &e : edges_range(g)) { all_edges.emplace(source(e, g), target(e, g)); } @@ -167,7 +167,7 @@ u32 findMaxLiteralMatches(const NGHolder &h, const set<ue2_literal> &lits) { } remove_vertices(dead, g); - //dumpGraph("relaxed.dot", g); + //dumpGraph("relaxed.dot", g); depth maxWidth = findMaxWidth(g); DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str()); @@ -261,11 +261,11 @@ u32 findMaxInfixMatches(const left_id &left, const set<ue2_literal> &lits) { return findMaxInfixMatches(*left.castle(), lits); } if (left.graph()) { - if (!onlyOneTop(*left.graph())) { - DEBUG_PRINTF("more than one top!n"); - return NO_MATCH_LIMIT; - } - return findMaxLiteralMatches(*left.graph(), lits); + if (!onlyOneTop(*left.graph())) { + DEBUG_PRINTF("more than one top!n"); + return NO_MATCH_LIMIT; + } + return findMaxLiteralMatches(*left.graph(), lits); } return NO_MATCH_LIMIT; @@ -282,7 +282,7 @@ void findCountingMiracleInfo(const left_id &left, const vector<u8> &stopTable, const NGHolder &g = *left.graph(); - auto cyclics = find_vertices_in_cycles(g); + auto cyclics = find_vertices_in_cycles(g); if (!proper_out_degree(g.startDs, g)) { cyclics.erase(g.startDs); @@ -290,7 +290,7 @@ void findCountingMiracleInfo(const left_id &left, const vector<u8> &stopTable, CharReach cyclic_cr; for (NFAVertex v : cyclics) { - DEBUG_PRINTF("considering %zu ||=%zu\n", g[v].index, + DEBUG_PRINTF("considering %zu ||=%zu\n", g[v].index, g[v].char_reach.count()); cyclic_cr |= g[v].char_reach; } @@ -318,7 +318,7 @@ void findCountingMiracleInfo(const left_id &left, const vector<u8> &stopTable, lits.insert(ue2_literal(c, false)); } - u32 count = findMaxLiteralMatches(*left.graph(), lits); + u32 count = findMaxLiteralMatches(*left.graph(), lits); DEBUG_PRINTF("counting miracle %u\n", count + 1); if (count && count < 50) { *cm_count = count + 1; diff --git a/contrib/libs/hyperscan/src/rose/rose_build_instructions.cpp b/contrib/libs/hyperscan/src/rose/rose_build_instructions.cpp index 3dbbe747c2..f96221b247 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_instructions.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_instructions.cpp @@ -1,167 +1,167 @@ -/* +/* * Copyright (c) 2017-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "rose_build_instructions.h" - -#include "rose_build_engine_blob.h" -#include "util/multibit_build.h" -#include "util/verify_types.h" - -#include <algorithm> - -using namespace std; - -namespace ue2 { -/* Destructors to avoid weak vtables. */ - -RoseInstruction::~RoseInstruction() = default; -RoseInstrCatchUp::~RoseInstrCatchUp() = default; -RoseInstrCatchUpMpv::~RoseInstrCatchUpMpv() = default; -RoseInstrSomZero::~RoseInstrSomZero() = default; -RoseInstrSuffixesEod::~RoseInstrSuffixesEod() = default; -RoseInstrMatcherEod::~RoseInstrMatcherEod() = default; -RoseInstrEnd::~RoseInstrEnd() = default; -RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default; + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_instructions.h" + +#include "rose_build_engine_blob.h" +#include "util/multibit_build.h" +#include "util/verify_types.h" + +#include <algorithm> + +using namespace std; + +namespace ue2 { +/* Destructors to avoid weak vtables. */ + +RoseInstruction::~RoseInstruction() = default; +RoseInstrCatchUp::~RoseInstrCatchUp() = default; +RoseInstrCatchUpMpv::~RoseInstrCatchUpMpv() = default; +RoseInstrSomZero::~RoseInstrSomZero() = default; +RoseInstrSuffixesEod::~RoseInstrSuffixesEod() = default; +RoseInstrMatcherEod::~RoseInstrMatcherEod() = default; +RoseInstrEnd::~RoseInstrEnd() = default; +RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default; RoseInstrFlushCombination::~RoseInstrFlushCombination() = default; RoseInstrLastFlushCombination::~RoseInstrLastFlushCombination() = default; - -using OffsetMap = RoseInstruction::OffsetMap; - -static -u32 calc_jump(const OffsetMap &offset_map, const RoseInstruction *from, - const RoseInstruction *to) { - DEBUG_PRINTF("computing relative jump from %p to %p\n", from, to); - assert(from && contains(offset_map, from)); - assert(to && contains(offset_map, to)); - - u32 from_offset = offset_map.at(from); - u32 to_offset = offset_map.at(to); - DEBUG_PRINTF("offsets: %u -> %u\n", from_offset, to_offset); - assert(from_offset <= to_offset); - - return to_offset - from_offset; -} - -void RoseInstrAnchoredDelay::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->groups = groups; - inst->anch_id = anch_id; - inst->done_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckLitEarly::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->min_offset = min_offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckGroups::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->groups = groups; -} - -void RoseInstrCheckOnlyEod::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckBounds::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->min_bound = min_bound; - inst->max_bound = max_bound; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckNotHandled::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->key = key; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckSingleLookaround::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->offset = offset; - inst->reach_index = blob.lookaround_cache.get_offset_of({reach}, blob); - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - vector<s8> look_offsets; - vector<CharReach> reaches; - for (const auto &le : look) { - look_offsets.push_back(le.offset); - reaches.push_back(le.reach); - } - inst->look_index = blob.lookaround_cache.get_offset_of(look_offsets, blob); - inst->reach_index = blob.lookaround_cache.get_offset_of(reaches, blob); - inst->count = verify_u32(look.size()); - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMask::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->and_mask = and_mask; - inst->cmp_mask = cmp_mask; - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMask32::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - copy(begin(and_mask), end(and_mask), inst->and_mask); - copy(begin(cmp_mask), end(cmp_mask), inst->cmp_mask); - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - + +using OffsetMap = RoseInstruction::OffsetMap; + +static +u32 calc_jump(const OffsetMap &offset_map, const RoseInstruction *from, + const RoseInstruction *to) { + DEBUG_PRINTF("computing relative jump from %p to %p\n", from, to); + assert(from && contains(offset_map, from)); + assert(to && contains(offset_map, to)); + + u32 from_offset = offset_map.at(from); + u32 to_offset = offset_map.at(to); + DEBUG_PRINTF("offsets: %u -> %u\n", from_offset, to_offset); + assert(from_offset <= to_offset); + + return to_offset - from_offset; +} + +void RoseInstrAnchoredDelay::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->groups = groups; + inst->anch_id = anch_id; + inst->done_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckLitEarly::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->min_offset = min_offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->groups = groups; +} + +void RoseInstrCheckOnlyEod::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckBounds::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->min_bound = min_bound; + inst->max_bound = max_bound; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckNotHandled::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->key = key; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckSingleLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->offset = offset; + inst->reach_index = blob.lookaround_cache.get_offset_of({reach}, blob); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + vector<s8> look_offsets; + vector<CharReach> reaches; + for (const auto &le : look) { + look_offsets.push_back(le.offset); + reaches.push_back(le.reach); + } + inst->look_index = blob.lookaround_cache.get_offset_of(look_offsets, blob); + inst->reach_index = blob.lookaround_cache.get_offset_of(reaches, blob); + inst->count = verify_u32(look.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMask::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->and_mask = and_mask; + inst->cmp_mask = cmp_mask; + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMask32::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + copy(begin(and_mask), end(and_mask), inst->and_mask); + copy(begin(cmp_mask), end(cmp_mask), inst->cmp_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + void RoseInstrCheckMask64::write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const { RoseInstrBase::write(dest, blob, offset_map); @@ -173,71 +173,71 @@ void RoseInstrCheckMask64::write(void *dest, RoseEngineBlob &blob, inst->fail_jump = calc_jump(offset_map, this, target); } -void RoseInstrCheckByte::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->and_mask = and_mask; - inst->cmp_mask = cmp_mask; - inst->negation = negation; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckShufti16x8::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - copy(begin(nib_mask), end(nib_mask), inst->nib_mask); - copy(begin(bucket_select_mask), end(bucket_select_mask), - inst->bucket_select_mask); - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckShufti32x8::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - copy(begin(hi_mask), end(hi_mask), inst->hi_mask); - copy(begin(lo_mask), end(lo_mask), inst->lo_mask); - copy(begin(bucket_select_mask), end(bucket_select_mask), - inst->bucket_select_mask); - - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckShufti16x16::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - copy(begin(hi_mask), end(hi_mask), inst->hi_mask); - copy(begin(lo_mask), end(lo_mask), inst->lo_mask); - copy(begin(bucket_select_mask), end(bucket_select_mask), - inst->bucket_select_mask); - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckShufti32x16::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - copy(begin(hi_mask), end(hi_mask), inst->hi_mask); - copy(begin(lo_mask), end(lo_mask), inst->lo_mask); - copy(begin(bucket_select_mask_hi), end(bucket_select_mask_hi), - inst->bucket_select_mask_hi); - copy(begin(bucket_select_mask_lo), end(bucket_select_mask_lo), - inst->bucket_select_mask_lo); - inst->neg_mask = neg_mask; - inst->offset = offset; - inst->fail_jump = calc_jump(offset_map, this, target); -} - +void RoseInstrCheckByte::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->and_mask = and_mask; + inst->cmp_mask = cmp_mask; + inst->negation = negation; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti16x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + copy(begin(nib_mask), end(nib_mask), inst->nib_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti32x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti16x16::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckShufti32x16::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask_hi), end(bucket_select_mask_hi), + inst->bucket_select_mask_hi); + copy(begin(bucket_select_mask_lo), end(bucket_select_mask_lo), + inst->bucket_select_mask_lo); + inst->neg_mask = neg_mask; + inst->offset = offset; + inst->fail_jump = calc_jump(offset_map, this, target); +} + void RoseInstrCheckShufti64x8::write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const { RoseInstrBase::write(dest, blob, offset_map); @@ -268,432 +268,432 @@ void RoseInstrCheckShufti64x16::write(void *dest, RoseEngineBlob &blob, inst->fail_jump = calc_jump(offset_map, this, target); } -void RoseInstrCheckInfix::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->queue = queue; - inst->lag = lag; - inst->report = report; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckPrefix::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->queue = queue; - inst->lag = lag; - inst->report = report; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrPushDelayed::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->delay = delay; - inst->index = index; -} - -void RoseInstrSomAdjust::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->distance = distance; -} - -void RoseInstrSomLeftfix::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->queue = queue; - inst->lag = lag; -} - -void RoseInstrSomFromReport::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->som = som; -} - -void RoseInstrTriggerInfix::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->cancel = cancel; - inst->queue = queue; - inst->event = event; -} - -void RoseInstrTriggerSuffix::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->queue = queue; - inst->event = event; -} - -void RoseInstrDedupe::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->quash_som = quash_som; - inst->dkey = dkey; - inst->offset_adjust = offset_adjust; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrDedupeSom::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->quash_som = quash_som; - inst->dkey = dkey; - inst->offset_adjust = offset_adjust; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrReportChain::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->event = event; - inst->top_squash_distance = top_squash_distance; -} - -void RoseInstrReportSomInt::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->som = som; -} - -void RoseInstrReportSomAware::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->som = som; -} - -void RoseInstrReport::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; -} - -void RoseInstrReportExhaust::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; - inst->ekey = ekey; -} - -void RoseInstrReportSom::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; -} - -void RoseInstrReportSomExhaust::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; - inst->ekey = ekey; -} - -void RoseInstrDedupeAndReport::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->quash_som = quash_som; - inst->dkey = dkey; - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrFinalReport::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->onmatch = onmatch; - inst->offset_adjust = offset_adjust; -} - -void RoseInstrCheckExhausted::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->ekey = ekey; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMinLength::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->end_adj = end_adj; - inst->min_length = min_length; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrSetState::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->index = index; -} - -void RoseInstrSetGroups::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->groups = groups; -} - -void RoseInstrSquashGroups::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->groups = groups; -} - -void RoseInstrCheckState::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->index = index; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrSparseIterBegin::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->fail_jump = calc_jump(offset_map, this, target); - - // Resolve and write the multibit sparse iterator and the jump table. - vector<u32> keys; - vector<u32> jump_offsets; - for (const auto &jump : jump_table) { - keys.push_back(jump.first); - assert(contains(offset_map, jump.second)); - jump_offsets.push_back(offset_map.at(jump.second)); - } - - auto iter = mmbBuildSparseIterator(keys, num_keys); - assert(!iter.empty()); - inst->iter_offset = blob.add_iterator(iter); - inst->jump_table = blob.add(jump_offsets.begin(), jump_offsets.end()); - - // Store offsets for corresponding SPARSE_ITER_NEXT operations. - is_written = true; - iter_offset = inst->iter_offset; - jump_table_offset = inst->jump_table; -} - -void RoseInstrSparseIterNext::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->state = state; - inst->fail_jump = calc_jump(offset_map, this, target); - - // Use the same sparse iterator and jump table as the SPARSE_ITER_BEGIN - // instruction. - assert(begin); - assert(contains(offset_map, begin)); - assert(begin->is_written); - inst->iter_offset = begin->iter_offset; - inst->jump_table = begin->jump_table_offset; -} - -void RoseInstrSparseIterAny::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->fail_jump = calc_jump(offset_map, this, target); - - // Write the multibit sparse iterator. - auto iter = mmbBuildSparseIterator(keys, num_keys); - assert(!iter.empty()); - inst->iter_offset = blob.add_iterator(iter); -} - -void RoseInstrEnginesEod::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->iter_offset = iter_offset; -} - -void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - assert(!literal.empty()); - inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); - inst->lit_length = verify_u32(literal.size()); - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - assert(!literal.empty()); - inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); - inst->lit_length = verify_u32(literal.size()); - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMedLit::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - assert(!literal.empty()); - inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); - inst->lit_length = verify_u32(literal.size()); - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMedLitNocase::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - assert(!literal.empty()); - inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); - inst->lit_length = verify_u32(literal.size()); - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrMultipathLookaround::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - auto &cache = blob.lookaround_cache; - vector<s8> look_offsets; - vector<vector<CharReach>> reaches; - for (const auto &vle : multi_look) { - reaches.push_back({}); - bool done_offset = false; - - for (const auto &le : vle) { - reaches.back().push_back(le.reach); - - /* empty reaches don't have valid offsets */ - if (!done_offset && le.reach.any()) { - look_offsets.push_back(le.offset); - done_offset = true; - } - } - } - inst->look_index = cache.get_offset_of(look_offsets, blob); - inst->reach_index = cache.get_offset_of(reaches, blob); - inst->count = verify_u32(multi_look.size()); - inst->last_start = last_start; - copy(begin(start_mask), end(start_mask), inst->start_mask); - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMultipathShufti16x8::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - copy(begin(nib_mask), end(nib_mask), inst->nib_mask); - copy(begin(bucket_select_mask), begin(bucket_select_mask) + 16, - inst->bucket_select_mask); - copy(begin(data_select_mask), begin(data_select_mask) + 16, - inst->data_select_mask); - inst->hi_bits_mask = hi_bits_mask; - inst->lo_bits_mask = lo_bits_mask; - inst->neg_mask = neg_mask; - inst->base_offset = base_offset; - inst->last_start = last_start; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMultipathShufti32x8::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask); - copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask); - copy(begin(bucket_select_mask), begin(bucket_select_mask) + 32, - inst->bucket_select_mask); - copy(begin(data_select_mask), begin(data_select_mask) + 32, - inst->data_select_mask); - inst->hi_bits_mask = hi_bits_mask; - inst->lo_bits_mask = lo_bits_mask; - inst->neg_mask = neg_mask; - inst->base_offset = base_offset; - inst->last_start = last_start; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMultipathShufti32x16::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - copy(begin(hi_mask), end(hi_mask), inst->hi_mask); - copy(begin(lo_mask), end(lo_mask), inst->lo_mask); - copy(begin(bucket_select_mask_hi), begin(bucket_select_mask_hi) + 32, - inst->bucket_select_mask_hi); - copy(begin(bucket_select_mask_lo), begin(bucket_select_mask_lo) + 32, - inst->bucket_select_mask_lo); - copy(begin(data_select_mask), begin(data_select_mask) + 32, - inst->data_select_mask); - inst->hi_bits_mask = hi_bits_mask; - inst->lo_bits_mask = lo_bits_mask; - inst->neg_mask = neg_mask; - inst->base_offset = base_offset; - inst->last_start = last_start; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrCheckMultipathShufti64::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask); - copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask); - copy(begin(bucket_select_mask), end(bucket_select_mask), - inst->bucket_select_mask); - copy(begin(data_select_mask), end(data_select_mask), - inst->data_select_mask); - inst->hi_bits_mask = hi_bits_mask; - inst->lo_bits_mask = lo_bits_mask; - inst->neg_mask = neg_mask; - inst->base_offset = base_offset; - inst->last_start = last_start; - inst->fail_jump = calc_jump(offset_map, this, target); -} - -void RoseInstrIncludedJump::write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const { - RoseInstrBase::write(dest, blob, offset_map); - auto *inst = static_cast<impl_type *>(dest); - inst->child_offset = child_offset; - inst->squash = squash; -} - +void RoseInstrCheckInfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->queue = queue; + inst->lag = lag; + inst->report = report; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckPrefix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->queue = queue; + inst->lag = lag; + inst->report = report; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrPushDelayed::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->delay = delay; + inst->index = index; +} + +void RoseInstrSomAdjust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->distance = distance; +} + +void RoseInstrSomLeftfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->queue = queue; + inst->lag = lag; +} + +void RoseInstrSomFromReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->som = som; +} + +void RoseInstrTriggerInfix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->cancel = cancel; + inst->queue = queue; + inst->event = event; +} + +void RoseInstrTriggerSuffix::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->queue = queue; + inst->event = event; +} + +void RoseInstrDedupe::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrDedupeSom::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrReportChain::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->event = event; + inst->top_squash_distance = top_squash_distance; +} + +void RoseInstrReportSomInt::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->som = som; +} + +void RoseInstrReportSomAware::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->som = som; +} + +void RoseInstrReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrReportExhaust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->ekey = ekey; +} + +void RoseInstrReportSom::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrReportSomExhaust::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->ekey = ekey; +} + +void RoseInstrDedupeAndReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->quash_som = quash_som; + inst->dkey = dkey; + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrFinalReport::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->onmatch = onmatch; + inst->offset_adjust = offset_adjust; +} + +void RoseInstrCheckExhausted::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->ekey = ekey; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMinLength::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->end_adj = end_adj; + inst->min_length = min_length; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrSetState::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->index = index; +} + +void RoseInstrSetGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->groups = groups; +} + +void RoseInstrSquashGroups::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->groups = groups; +} + +void RoseInstrCheckState::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->index = index; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrSparseIterBegin::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->fail_jump = calc_jump(offset_map, this, target); + + // Resolve and write the multibit sparse iterator and the jump table. + vector<u32> keys; + vector<u32> jump_offsets; + for (const auto &jump : jump_table) { + keys.push_back(jump.first); + assert(contains(offset_map, jump.second)); + jump_offsets.push_back(offset_map.at(jump.second)); + } + + auto iter = mmbBuildSparseIterator(keys, num_keys); + assert(!iter.empty()); + inst->iter_offset = blob.add_iterator(iter); + inst->jump_table = blob.add(jump_offsets.begin(), jump_offsets.end()); + + // Store offsets for corresponding SPARSE_ITER_NEXT operations. + is_written = true; + iter_offset = inst->iter_offset; + jump_table_offset = inst->jump_table; +} + +void RoseInstrSparseIterNext::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->state = state; + inst->fail_jump = calc_jump(offset_map, this, target); + + // Use the same sparse iterator and jump table as the SPARSE_ITER_BEGIN + // instruction. + assert(begin); + assert(contains(offset_map, begin)); + assert(begin->is_written); + inst->iter_offset = begin->iter_offset; + inst->jump_table = begin->jump_table_offset; +} + +void RoseInstrSparseIterAny::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->fail_jump = calc_jump(offset_map, this, target); + + // Write the multibit sparse iterator. + auto iter = mmbBuildSparseIterator(keys, num_keys); + assert(!iter.empty()); + inst->iter_offset = blob.add_iterator(iter); +} + +void RoseInstrEnginesEod::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->iter_offset = iter_offset; +} + +void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMedLit::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMedLitNocase::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + assert(!literal.empty()); + inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1); + inst->lit_length = verify_u32(literal.size()); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrMultipathLookaround::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + auto &cache = blob.lookaround_cache; + vector<s8> look_offsets; + vector<vector<CharReach>> reaches; + for (const auto &vle : multi_look) { + reaches.push_back({}); + bool done_offset = false; + + for (const auto &le : vle) { + reaches.back().push_back(le.reach); + + /* empty reaches don't have valid offsets */ + if (!done_offset && le.reach.any()) { + look_offsets.push_back(le.offset); + done_offset = true; + } + } + } + inst->look_index = cache.get_offset_of(look_offsets, blob); + inst->reach_index = cache.get_offset_of(reaches, blob); + inst->count = verify_u32(multi_look.size()); + inst->last_start = last_start; + copy(begin(start_mask), end(start_mask), inst->start_mask); + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti16x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + copy(begin(nib_mask), end(nib_mask), inst->nib_mask); + copy(begin(bucket_select_mask), begin(bucket_select_mask) + 16, + inst->bucket_select_mask); + copy(begin(data_select_mask), begin(data_select_mask) + 16, + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti32x8::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask); + copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask); + copy(begin(bucket_select_mask), begin(bucket_select_mask) + 32, + inst->bucket_select_mask); + copy(begin(data_select_mask), begin(data_select_mask) + 32, + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti32x16::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + copy(begin(hi_mask), end(hi_mask), inst->hi_mask); + copy(begin(lo_mask), end(lo_mask), inst->lo_mask); + copy(begin(bucket_select_mask_hi), begin(bucket_select_mask_hi) + 32, + inst->bucket_select_mask_hi); + copy(begin(bucket_select_mask_lo), begin(bucket_select_mask_lo) + 32, + inst->bucket_select_mask_lo); + copy(begin(data_select_mask), begin(data_select_mask) + 32, + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrCheckMultipathShufti64::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + copy(begin(hi_mask), begin(hi_mask) + 16, inst->hi_mask); + copy(begin(lo_mask), begin(lo_mask) + 16, inst->lo_mask); + copy(begin(bucket_select_mask), end(bucket_select_mask), + inst->bucket_select_mask); + copy(begin(data_select_mask), end(data_select_mask), + inst->data_select_mask); + inst->hi_bits_mask = hi_bits_mask; + inst->lo_bits_mask = lo_bits_mask; + inst->neg_mask = neg_mask; + inst->base_offset = base_offset; + inst->last_start = last_start; + inst->fail_jump = calc_jump(offset_map, this, target); +} + +void RoseInstrIncludedJump::write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const { + RoseInstrBase::write(dest, blob, offset_map); + auto *inst = static_cast<impl_type *>(dest); + inst->child_offset = child_offset; + inst->squash = squash; +} + void RoseInstrSetLogical::write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const { RoseInstrBase::write(dest, blob, offset_map); auto *inst = static_cast<impl_type *>(dest); inst->lkey = lkey; inst->offset_adjust = offset_adjust; -} +} void RoseInstrSetCombination::write(void *dest, RoseEngineBlob &blob, const OffsetMap &offset_map) const { diff --git a/contrib/libs/hyperscan/src/rose/rose_build_instructions.h b/contrib/libs/hyperscan/src/rose/rose_build_instructions.h index ee2acddd48..f18f4a4715 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_instructions.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_instructions.h @@ -1,524 +1,524 @@ -/* +/* * Copyright (c) 2017-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Concrete classes for interpreter instructions. - * - * Note: this header should only be included in files which need to deal with - * the details of actual instructions. It is expected that most will only - * require access to the RoseInstruction API exposed in rose_build_program.h - */ - -#ifndef ROSE_BUILD_INSTRUCTIONS_H -#define ROSE_BUILD_INSTRUCTIONS_H - -#include "rose_build_lookaround.h" -#include "rose_build_program.h" -#include "util/hash.h" -#include "util/verify_types.h" - -namespace ue2 { - -/** - * \brief Abstract base class representing a single Rose instruction. - */ -class RoseInstruction { -public: - virtual ~RoseInstruction(); - - /** \brief Opcode used for the instruction in the bytecode. */ - virtual RoseInstructionCode code() const = 0; - - /** - * \brief Simple hash used for program equivalence. - * - * Note that pointers (jumps, for example) should not be used when - * calculating the hash: they will be converted to instruction offsets when - * compared later. - */ - virtual size_t hash() const = 0; - - /** \brief Length of the bytecode instruction in bytes. */ - virtual size_t byte_length() const = 0; - - using OffsetMap = std::unordered_map<const RoseInstruction *, u32>; - - /** - * \brief Writes a concrete implementation of this instruction. - * - * Other data that this instruction depends on is written directly into the - * blob, while the instruction structure itself (of size given by - * the byte_length() function) is written to dest. - */ - virtual void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const = 0; - - /** - * \brief Update a target pointer. - * - * If this instruction contains any reference to the old target, replace it - * with the new one. - */ - virtual void update_target(const RoseInstruction *old_target, - const RoseInstruction *new_target) = 0; - - /** - * \brief True if these instructions are equivalent within their own - * programs. - * - * Checks that any pointers to other instructions point to the same - * offsets. - */ - bool equiv(const RoseInstruction &other, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return equiv_impl(other, offsets, other_offsets); - } - -private: - virtual bool equiv_impl(const RoseInstruction &other, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const = 0; -}; - -/** - * \brief Templated implementation class to handle boring boilerplate code. - */ -template<RoseInstructionCode Opcode, class ImplType, class RoseInstrType> -class RoseInstrBase : public RoseInstruction { -protected: - static constexpr RoseInstructionCode opcode = Opcode; - using impl_type = ImplType; - -public: - RoseInstructionCode code() const override { return opcode; } - - size_t byte_length() const override { - return sizeof(impl_type); - } - - /** - * Note: this implementation simply zeroes the destination region and - * writes in the correct opcode. This is sufficient for trivial - * instructions, but instructions with data members will want to override - * it. - */ - void write(void *dest, RoseEngineBlob &, - const RoseInstruction::OffsetMap &) const override { - assert(dest != nullptr); - assert(ISALIGNED_N(dest, ROSE_INSTR_MIN_ALIGN)); - - impl_type *inst = static_cast<impl_type *>(dest); - memset(inst, 0, sizeof(impl_type)); - inst->code = verify_u8(opcode); - } - -private: - bool equiv_impl(const RoseInstruction &other, const OffsetMap &offsets, - const OffsetMap &other_offsets) const override { - const auto *ri_that = dynamic_cast<const RoseInstrType *>(&other); - if (!ri_that) { - return false; - } - const auto *ri_this = dynamic_cast<const RoseInstrType *>(this); - assert(ri_this); - return ri_this->equiv_to(*ri_that, offsets, other_offsets); - } -}; - -template<RoseInstructionCode Opcode, class ImplType, class RoseInstrType> -constexpr RoseInstructionCode - RoseInstrBase<Opcode, ImplType, RoseInstrType>::opcode; - -/** - * \brief Refinement of RoseInstrBase to use for instructions that have - * just a single target member, called "target". - */ -template<RoseInstructionCode Opcode, class ImplType, class RoseInstrType> -class RoseInstrBaseOneTarget - : public RoseInstrBase<Opcode, ImplType, RoseInstrType> { -public: - void update_target(const RoseInstruction *old_target, - const RoseInstruction *new_target) override { - RoseInstrType *ri = dynamic_cast<RoseInstrType *>(this); - assert(ri); - if (ri->target == old_target) { - ri->target = new_target; - } - } -}; - -/** - * \brief Refinement of RoseInstrBase to use for instructions that have no - * targets. - */ -template<RoseInstructionCode Opcode, class ImplType, class RoseInstrType> -class RoseInstrBaseNoTargets - : public RoseInstrBase<Opcode, ImplType, RoseInstrType> { -public: - void update_target(const RoseInstruction *, - const RoseInstruction *) override {} -}; - -/** - * \brief Refinement of RoseInstrBaseNoTargets to use for instructions that - * have no members at all, just an opcode. - */ -template<RoseInstructionCode Opcode, class ImplType, class RoseInstrType> -class RoseInstrBaseTrivial - : public RoseInstrBaseNoTargets<Opcode, ImplType, RoseInstrType> { -public: - virtual bool operator==(const RoseInstrType &) const { return true; } - - size_t hash() const override { - return hash_all(Opcode); - } - - bool equiv_to(const RoseInstrType &, const RoseInstruction::OffsetMap &, - const RoseInstruction::OffsetMap &) const { - return true; - } -}; - -//// -//// Concrete implementation classes start here. -//// - -class RoseInstrAnchoredDelay - : public RoseInstrBaseOneTarget<ROSE_INSTR_ANCHORED_DELAY, - ROSE_STRUCT_ANCHORED_DELAY, - RoseInstrAnchoredDelay> { -public: - rose_group groups; - u32 anch_id; - const RoseInstruction *target; - - RoseInstrAnchoredDelay(rose_group groups_in, u32 anch_id_in, - const RoseInstruction *target_in) - : groups(groups_in), anch_id(anch_id_in), target(target_in) {} - - bool operator==(const RoseInstrAnchoredDelay &ri) const { - return groups == ri.groups && anch_id == ri.anch_id - && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, groups, anch_id); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrAnchoredDelay &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return groups == ri.groups && anch_id == ri.anch_id - && offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckLitEarly - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_LIT_EARLY, - ROSE_STRUCT_CHECK_LIT_EARLY, - RoseInstrCheckLitEarly> { -public: - u32 min_offset; - const RoseInstruction *target; - - RoseInstrCheckLitEarly(u32 min_offset_in, const RoseInstruction *target_in) - : min_offset(min_offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckLitEarly &ri) const { - return min_offset == ri.min_offset && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, min_offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckLitEarly &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return min_offset == ri.min_offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckGroups - : public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_GROUPS, - ROSE_STRUCT_CHECK_GROUPS, - RoseInstrCheckGroups> { -public: - rose_group groups; - - explicit RoseInstrCheckGroups(rose_group groups_in) : groups(groups_in) {} - - bool operator==(const RoseInstrCheckGroups &ri) const { - return groups == ri.groups; - } - - size_t hash() const override { - return hash_all(opcode, groups); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckGroups &ri, const OffsetMap &, - const OffsetMap &) const { - return groups == ri.groups; - } -}; - -class RoseInstrCheckOnlyEod - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_ONLY_EOD, - ROSE_STRUCT_CHECK_ONLY_EOD, - RoseInstrCheckOnlyEod> { -public: - const RoseInstruction *target; - - explicit RoseInstrCheckOnlyEod(const RoseInstruction *target_in) - : target(target_in) {} - - bool operator==(const RoseInstrCheckOnlyEod &ri) const { - return target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckOnlyEod &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckBounds - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_BOUNDS, - ROSE_STRUCT_CHECK_BOUNDS, - RoseInstrCheckBounds> { -public: - u64a min_bound; - u64a max_bound; - const RoseInstruction *target; - - RoseInstrCheckBounds(u64a min, u64a max, const RoseInstruction *target_in) - : min_bound(min), max_bound(max), target(target_in) {} - - bool operator==(const RoseInstrCheckBounds &ri) const { - return min_bound == ri.min_bound && max_bound == ri.max_bound && - target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, min_bound, max_bound); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckBounds &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return min_bound == ri.min_bound && max_bound == ri.max_bound && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckNotHandled - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_NOT_HANDLED, - ROSE_STRUCT_CHECK_NOT_HANDLED, - RoseInstrCheckNotHandled> { -public: - u32 key; - const RoseInstruction *target; - - RoseInstrCheckNotHandled(u32 key_in, const RoseInstruction *target_in) - : key(key_in), target(target_in) {} - - bool operator==(const RoseInstrCheckNotHandled &ri) const { - return key == ri.key && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, key); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckNotHandled &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return key == ri.key && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckSingleLookaround - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SINGLE_LOOKAROUND, - ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND, - RoseInstrCheckSingleLookaround> { -public: - s8 offset; - CharReach reach; - const RoseInstruction *target; - - RoseInstrCheckSingleLookaround(s8 offset_in, CharReach reach_in, - const RoseInstruction *target_in) - : offset(offset_in), reach(std::move(reach_in)), target(target_in) {} - - bool operator==(const RoseInstrCheckSingleLookaround &ri) const { - return offset == ri.offset && reach == ri.reach && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, offset, reach); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckSingleLookaround &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return offset == ri.offset && reach == ri.reach && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckLookaround - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_LOOKAROUND, - ROSE_STRUCT_CHECK_LOOKAROUND, - RoseInstrCheckLookaround> { -public: - std::vector<LookEntry> look; - const RoseInstruction *target; - - RoseInstrCheckLookaround(std::vector<LookEntry> look_in, - const RoseInstruction *target_in) - : look(std::move(look_in)), target(target_in) {} - - bool operator==(const RoseInstrCheckLookaround &ri) const { - return look == ri.look && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, look); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return look == ri.look - && offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMask - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MASK, - ROSE_STRUCT_CHECK_MASK, - RoseInstrCheckMask> { -public: - u64a and_mask; - u64a cmp_mask; - u64a neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckMask(u64a and_mask_in, u64a cmp_mask_in, u64a neg_mask_in, - s32 offset_in, const RoseInstruction *target_in) - : and_mask(and_mask_in), cmp_mask(cmp_mask_in), neg_mask(neg_mask_in), - offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMask &ri) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, and_mask, cmp_mask, neg_mask, offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMask &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMask32 - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MASK_32, - ROSE_STRUCT_CHECK_MASK_32, - RoseInstrCheckMask32> { -public: - std::array<u8, 32> and_mask; - std::array<u8, 32> cmp_mask; - u32 neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckMask32(std::array<u8, 32> and_mask_in, - std::array<u8, 32> cmp_mask_in, u32 neg_mask_in, - s32 offset_in, const RoseInstruction *target_in) - : and_mask(std::move(and_mask_in)), cmp_mask(std::move(cmp_mask_in)), - neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMask32 &ri) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, and_mask, cmp_mask, neg_mask, offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMask32 &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Concrete classes for interpreter instructions. + * + * Note: this header should only be included in files which need to deal with + * the details of actual instructions. It is expected that most will only + * require access to the RoseInstruction API exposed in rose_build_program.h + */ + +#ifndef ROSE_BUILD_INSTRUCTIONS_H +#define ROSE_BUILD_INSTRUCTIONS_H + +#include "rose_build_lookaround.h" +#include "rose_build_program.h" +#include "util/hash.h" +#include "util/verify_types.h" + +namespace ue2 { + +/** + * \brief Abstract base class representing a single Rose instruction. + */ +class RoseInstruction { +public: + virtual ~RoseInstruction(); + + /** \brief Opcode used for the instruction in the bytecode. */ + virtual RoseInstructionCode code() const = 0; + + /** + * \brief Simple hash used for program equivalence. + * + * Note that pointers (jumps, for example) should not be used when + * calculating the hash: they will be converted to instruction offsets when + * compared later. + */ + virtual size_t hash() const = 0; + + /** \brief Length of the bytecode instruction in bytes. */ + virtual size_t byte_length() const = 0; + + using OffsetMap = std::unordered_map<const RoseInstruction *, u32>; + + /** + * \brief Writes a concrete implementation of this instruction. + * + * Other data that this instruction depends on is written directly into the + * blob, while the instruction structure itself (of size given by + * the byte_length() function) is written to dest. + */ + virtual void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const = 0; + + /** + * \brief Update a target pointer. + * + * If this instruction contains any reference to the old target, replace it + * with the new one. + */ + virtual void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) = 0; + + /** + * \brief True if these instructions are equivalent within their own + * programs. + * + * Checks that any pointers to other instructions point to the same + * offsets. + */ + bool equiv(const RoseInstruction &other, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return equiv_impl(other, offsets, other_offsets); + } + +private: + virtual bool equiv_impl(const RoseInstruction &other, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const = 0; +}; + +/** + * \brief Templated implementation class to handle boring boilerplate code. + */ +template<RoseInstructionCode Opcode, class ImplType, class RoseInstrType> +class RoseInstrBase : public RoseInstruction { +protected: + static constexpr RoseInstructionCode opcode = Opcode; + using impl_type = ImplType; + +public: + RoseInstructionCode code() const override { return opcode; } + + size_t byte_length() const override { + return sizeof(impl_type); + } + + /** + * Note: this implementation simply zeroes the destination region and + * writes in the correct opcode. This is sufficient for trivial + * instructions, but instructions with data members will want to override + * it. + */ + void write(void *dest, RoseEngineBlob &, + const RoseInstruction::OffsetMap &) const override { + assert(dest != nullptr); + assert(ISALIGNED_N(dest, ROSE_INSTR_MIN_ALIGN)); + + impl_type *inst = static_cast<impl_type *>(dest); + memset(inst, 0, sizeof(impl_type)); + inst->code = verify_u8(opcode); + } + +private: + bool equiv_impl(const RoseInstruction &other, const OffsetMap &offsets, + const OffsetMap &other_offsets) const override { + const auto *ri_that = dynamic_cast<const RoseInstrType *>(&other); + if (!ri_that) { + return false; + } + const auto *ri_this = dynamic_cast<const RoseInstrType *>(this); + assert(ri_this); + return ri_this->equiv_to(*ri_that, offsets, other_offsets); + } +}; + +template<RoseInstructionCode Opcode, class ImplType, class RoseInstrType> +constexpr RoseInstructionCode + RoseInstrBase<Opcode, ImplType, RoseInstrType>::opcode; + +/** + * \brief Refinement of RoseInstrBase to use for instructions that have + * just a single target member, called "target". + */ +template<RoseInstructionCode Opcode, class ImplType, class RoseInstrType> +class RoseInstrBaseOneTarget + : public RoseInstrBase<Opcode, ImplType, RoseInstrType> { +public: + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + RoseInstrType *ri = dynamic_cast<RoseInstrType *>(this); + assert(ri); + if (ri->target == old_target) { + ri->target = new_target; + } + } +}; + +/** + * \brief Refinement of RoseInstrBase to use for instructions that have no + * targets. + */ +template<RoseInstructionCode Opcode, class ImplType, class RoseInstrType> +class RoseInstrBaseNoTargets + : public RoseInstrBase<Opcode, ImplType, RoseInstrType> { +public: + void update_target(const RoseInstruction *, + const RoseInstruction *) override {} +}; + +/** + * \brief Refinement of RoseInstrBaseNoTargets to use for instructions that + * have no members at all, just an opcode. + */ +template<RoseInstructionCode Opcode, class ImplType, class RoseInstrType> +class RoseInstrBaseTrivial + : public RoseInstrBaseNoTargets<Opcode, ImplType, RoseInstrType> { +public: + virtual bool operator==(const RoseInstrType &) const { return true; } + + size_t hash() const override { + return hash_all(Opcode); + } + + bool equiv_to(const RoseInstrType &, const RoseInstruction::OffsetMap &, + const RoseInstruction::OffsetMap &) const { + return true; + } +}; + +//// +//// Concrete implementation classes start here. +//// + +class RoseInstrAnchoredDelay + : public RoseInstrBaseOneTarget<ROSE_INSTR_ANCHORED_DELAY, + ROSE_STRUCT_ANCHORED_DELAY, + RoseInstrAnchoredDelay> { +public: + rose_group groups; + u32 anch_id; + const RoseInstruction *target; + + RoseInstrAnchoredDelay(rose_group groups_in, u32 anch_id_in, + const RoseInstruction *target_in) + : groups(groups_in), anch_id(anch_id_in), target(target_in) {} + + bool operator==(const RoseInstrAnchoredDelay &ri) const { + return groups == ri.groups && anch_id == ri.anch_id + && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, groups, anch_id); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrAnchoredDelay &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return groups == ri.groups && anch_id == ri.anch_id + && offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckLitEarly + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_LIT_EARLY, + ROSE_STRUCT_CHECK_LIT_EARLY, + RoseInstrCheckLitEarly> { +public: + u32 min_offset; + const RoseInstruction *target; + + RoseInstrCheckLitEarly(u32 min_offset_in, const RoseInstruction *target_in) + : min_offset(min_offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckLitEarly &ri) const { + return min_offset == ri.min_offset && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, min_offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLitEarly &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return min_offset == ri.min_offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckGroups + : public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_GROUPS, + ROSE_STRUCT_CHECK_GROUPS, + RoseInstrCheckGroups> { +public: + rose_group groups; + + explicit RoseInstrCheckGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrCheckGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + return hash_all(opcode, groups); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrCheckOnlyEod + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_ONLY_EOD, + ROSE_STRUCT_CHECK_ONLY_EOD, + RoseInstrCheckOnlyEod> { +public: + const RoseInstruction *target; + + explicit RoseInstrCheckOnlyEod(const RoseInstruction *target_in) + : target(target_in) {} + + bool operator==(const RoseInstrCheckOnlyEod &ri) const { + return target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckOnlyEod &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckBounds + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_BOUNDS, + ROSE_STRUCT_CHECK_BOUNDS, + RoseInstrCheckBounds> { +public: + u64a min_bound; + u64a max_bound; + const RoseInstruction *target; + + RoseInstrCheckBounds(u64a min, u64a max, const RoseInstruction *target_in) + : min_bound(min), max_bound(max), target(target_in) {} + + bool operator==(const RoseInstrCheckBounds &ri) const { + return min_bound == ri.min_bound && max_bound == ri.max_bound && + target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, min_bound, max_bound); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckBounds &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return min_bound == ri.min_bound && max_bound == ri.max_bound && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckNotHandled + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_NOT_HANDLED, + ROSE_STRUCT_CHECK_NOT_HANDLED, + RoseInstrCheckNotHandled> { +public: + u32 key; + const RoseInstruction *target; + + RoseInstrCheckNotHandled(u32 key_in, const RoseInstruction *target_in) + : key(key_in), target(target_in) {} + + bool operator==(const RoseInstrCheckNotHandled &ri) const { + return key == ri.key && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, key); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckNotHandled &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return key == ri.key && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckSingleLookaround + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SINGLE_LOOKAROUND, + ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND, + RoseInstrCheckSingleLookaround> { +public: + s8 offset; + CharReach reach; + const RoseInstruction *target; + + RoseInstrCheckSingleLookaround(s8 offset_in, CharReach reach_in, + const RoseInstruction *target_in) + : offset(offset_in), reach(std::move(reach_in)), target(target_in) {} + + bool operator==(const RoseInstrCheckSingleLookaround &ri) const { + return offset == ri.offset && reach == ri.reach && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, offset, reach); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckSingleLookaround &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return offset == ri.offset && reach == ri.reach && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckLookaround + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_LOOKAROUND, + ROSE_STRUCT_CHECK_LOOKAROUND, + RoseInstrCheckLookaround> { +public: + std::vector<LookEntry> look; + const RoseInstruction *target; + + RoseInstrCheckLookaround(std::vector<LookEntry> look_in, + const RoseInstruction *target_in) + : look(std::move(look_in)), target(target_in) {} + + bool operator==(const RoseInstrCheckLookaround &ri) const { + return look == ri.look && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, look); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return look == ri.look + && offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMask + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MASK, + ROSE_STRUCT_CHECK_MASK, + RoseInstrCheckMask> { +public: + u64a and_mask; + u64a cmp_mask; + u64a neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckMask(u64a and_mask_in, u64a cmp_mask_in, u64a neg_mask_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(and_mask_in), cmp_mask(cmp_mask_in), neg_mask(neg_mask_in), + offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMask &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, and_mask, cmp_mask, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMask &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMask32 + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MASK_32, + ROSE_STRUCT_CHECK_MASK_32, + RoseInstrCheckMask32> { +public: + std::array<u8, 32> and_mask; + std::array<u8, 32> cmp_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckMask32(std::array<u8, 32> and_mask_in, + std::array<u8, 32> cmp_mask_in, u32 neg_mask_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(std::move(and_mask_in)), cmp_mask(std::move(cmp_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMask32 &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, and_mask, cmp_mask, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMask32 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + class RoseInstrCheckMask64 : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MASK_64, ROSE_STRUCT_CHECK_MASK_64, @@ -556,225 +556,225 @@ public: } }; -class RoseInstrCheckByte - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_BYTE, - ROSE_STRUCT_CHECK_BYTE, - RoseInstrCheckByte> { -public: - u8 and_mask; - u8 cmp_mask; - u8 negation; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckByte(u8 and_mask_in, u8 cmp_mask_in, u8 negation_in, - s32 offset_in, const RoseInstruction *target_in) - : and_mask(and_mask_in), cmp_mask(cmp_mask_in), negation(negation_in), - offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckByte &ri) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - negation == ri.negation && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, and_mask, cmp_mask, negation, offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckByte &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && - negation == ri.negation && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckShufti16x8 - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SHUFTI_16x8, - ROSE_STRUCT_CHECK_SHUFTI_16x8, - RoseInstrCheckShufti16x8> { -public: - std::array<u8, 32> nib_mask; - std::array<u8, 16> bucket_select_mask; - u32 neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckShufti16x8(std::array<u8, 32> nib_mask_in, - std::array<u8, 16> bucket_select_mask_in, - u32 neg_mask_in, s32 offset_in, - const RoseInstruction *target_in) - : nib_mask(std::move(nib_mask_in)), - bucket_select_mask(std::move(bucket_select_mask_in)), - neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckShufti16x8 &ri) const { - return nib_mask == ri.nib_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, nib_mask, bucket_select_mask, neg_mask, offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckShufti16x8 &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return nib_mask == ri.nib_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckShufti32x8 - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SHUFTI_32x8, - ROSE_STRUCT_CHECK_SHUFTI_32x8, - RoseInstrCheckShufti32x8> { -public: - std::array<u8, 16> hi_mask; - std::array<u8, 16> lo_mask; - std::array<u8, 32> bucket_select_mask; - u32 neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckShufti32x8(std::array<u8, 16> hi_mask_in, - std::array<u8, 16> lo_mask_in, - std::array<u8, 32> bucket_select_mask_in, - u32 neg_mask_in, s32 offset_in, - const RoseInstruction *target_in) - : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), - bucket_select_mask(std::move(bucket_select_mask_in)), - neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckShufti32x8 &ri) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask, neg_mask, - offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckShufti32x8 &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckShufti16x16 - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SHUFTI_16x16, - ROSE_STRUCT_CHECK_SHUFTI_16x16, - RoseInstrCheckShufti16x16> { -public: - std::array<u8, 32> hi_mask; - std::array<u8, 32> lo_mask; - std::array<u8, 32> bucket_select_mask; - u32 neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckShufti16x16(std::array<u8, 32> hi_mask_in, - std::array<u8, 32> lo_mask_in, - std::array<u8, 32> bucket_select_mask_in, - u32 neg_mask_in, s32 offset_in, - const RoseInstruction *target_in) - : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), - bucket_select_mask(std::move(bucket_select_mask_in)), - neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckShufti16x16 &ri) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask, neg_mask, - offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckShufti16x16 &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckShufti32x16 - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SHUFTI_32x16, - ROSE_STRUCT_CHECK_SHUFTI_32x16, - RoseInstrCheckShufti32x16> { -public: - std::array<u8, 32> hi_mask; - std::array<u8, 32> lo_mask; - std::array<u8, 32> bucket_select_mask_hi; - std::array<u8, 32> bucket_select_mask_lo; - u32 neg_mask; - s32 offset; - const RoseInstruction *target; - - RoseInstrCheckShufti32x16(std::array<u8, 32> hi_mask_in, - std::array<u8, 32> lo_mask_in, - std::array<u8, 32> bucket_select_mask_hi_in, - std::array<u8, 32> bucket_select_mask_lo_in, - u32 neg_mask_in, s32 offset_in, - const RoseInstruction *target_in) - : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), - bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)), - bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)), - neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} - - bool operator==(const RoseInstrCheckShufti32x16 &ri) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask_hi == ri.bucket_select_mask_hi && - bucket_select_mask_lo == ri.bucket_select_mask_lo && - neg_mask == ri.neg_mask && offset == ri.offset && - target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask_hi, - bucket_select_mask_lo, neg_mask, offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckShufti32x16 &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask_hi == ri.bucket_select_mask_hi && - bucket_select_mask_lo == ri.bucket_select_mask_lo && - neg_mask == ri.neg_mask && offset == ri.offset && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - +class RoseInstrCheckByte + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_BYTE, + ROSE_STRUCT_CHECK_BYTE, + RoseInstrCheckByte> { +public: + u8 and_mask; + u8 cmp_mask; + u8 negation; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckByte(u8 and_mask_in, u8 cmp_mask_in, u8 negation_in, + s32 offset_in, const RoseInstruction *target_in) + : and_mask(and_mask_in), cmp_mask(cmp_mask_in), negation(negation_in), + offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckByte &ri) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + negation == ri.negation && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, and_mask, cmp_mask, negation, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckByte &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return and_mask == ri.and_mask && cmp_mask == ri.cmp_mask && + negation == ri.negation && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti16x8 + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SHUFTI_16x8, + ROSE_STRUCT_CHECK_SHUFTI_16x8, + RoseInstrCheckShufti16x8> { +public: + std::array<u8, 32> nib_mask; + std::array<u8, 16> bucket_select_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti16x8(std::array<u8, 32> nib_mask_in, + std::array<u8, 16> bucket_select_mask_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : nib_mask(std::move(nib_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti16x8 &ri) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, nib_mask, bucket_select_mask, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti16x8 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti32x8 + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SHUFTI_32x8, + ROSE_STRUCT_CHECK_SHUFTI_32x8, + RoseInstrCheckShufti32x8> { +public: + std::array<u8, 16> hi_mask; + std::array<u8, 16> lo_mask; + std::array<u8, 32> bucket_select_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti32x8(std::array<u8, 16> hi_mask_in, + std::array<u8, 16> lo_mask_in, + std::array<u8, 32> bucket_select_mask_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti32x8 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask, neg_mask, + offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti32x8 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti16x16 + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SHUFTI_16x16, + ROSE_STRUCT_CHECK_SHUFTI_16x16, + RoseInstrCheckShufti16x16> { +public: + std::array<u8, 32> hi_mask; + std::array<u8, 32> lo_mask; + std::array<u8, 32> bucket_select_mask; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti16x16(std::array<u8, 32> hi_mask_in, + std::array<u8, 32> lo_mask_in, + std::array<u8, 32> bucket_select_mask_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti16x16 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask, neg_mask, + offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti16x16 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckShufti32x16 + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SHUFTI_32x16, + ROSE_STRUCT_CHECK_SHUFTI_32x16, + RoseInstrCheckShufti32x16> { +public: + std::array<u8, 32> hi_mask; + std::array<u8, 32> lo_mask; + std::array<u8, 32> bucket_select_mask_hi; + std::array<u8, 32> bucket_select_mask_lo; + u32 neg_mask; + s32 offset; + const RoseInstruction *target; + + RoseInstrCheckShufti32x16(std::array<u8, 32> hi_mask_in, + std::array<u8, 32> lo_mask_in, + std::array<u8, 32> bucket_select_mask_hi_in, + std::array<u8, 32> bucket_select_mask_lo_in, + u32 neg_mask_in, s32 offset_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)), + bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)), + neg_mask(neg_mask_in), offset(offset_in), target(target_in) {} + + bool operator==(const RoseInstrCheckShufti32x16 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + neg_mask == ri.neg_mask && offset == ri.offset && + target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask_hi, + bucket_select_mask_lo, neg_mask, offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckShufti32x16 &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + neg_mask == ri.neg_mask && offset == ri.offset && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + class RoseInstrCheckShufti64x8 : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SHUFTI_64x8, ROSE_STRUCT_CHECK_SHUFTI_64x8, @@ -878,1412 +878,1412 @@ public: } }; -class RoseInstrCheckInfix - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_INFIX, - ROSE_STRUCT_CHECK_INFIX, - RoseInstrCheckInfix> { -public: - u32 queue; - u32 lag; - ReportID report; - const RoseInstruction *target; - - RoseInstrCheckInfix(u32 queue_in, u32 lag_in, ReportID report_in, - const RoseInstruction *target_in) - : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} - - bool operator==(const RoseInstrCheckInfix &ri) const { - return queue == ri.queue && lag == ri.lag && report == ri.report && - target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, queue, lag, report); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckInfix &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return queue == ri.queue && lag == ri.lag && report == ri.report && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckPrefix - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_PREFIX, - ROSE_STRUCT_CHECK_PREFIX, - RoseInstrCheckPrefix> { -public: - u32 queue; - u32 lag; - ReportID report; - const RoseInstruction *target; - - RoseInstrCheckPrefix(u32 queue_in, u32 lag_in, ReportID report_in, - const RoseInstruction *target_in) - : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} - - bool operator==(const RoseInstrCheckPrefix &ri) const { - return queue == ri.queue && lag == ri.lag && report == ri.report && - target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, queue, lag, report); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckPrefix &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return queue == ri.queue && lag == ri.lag && report == ri.report && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrPushDelayed - : public RoseInstrBaseNoTargets<ROSE_INSTR_PUSH_DELAYED, - ROSE_STRUCT_PUSH_DELAYED, - RoseInstrPushDelayed> { -public: - u8 delay; - u32 index; - - RoseInstrPushDelayed(u8 delay_in, u32 index_in) - : delay(delay_in), index(index_in) {} - - bool operator==(const RoseInstrPushDelayed &ri) const { - return delay == ri.delay && index == ri.index; - } - - size_t hash() const override { - return hash_all(opcode, delay, index); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrPushDelayed &ri, const OffsetMap &, - const OffsetMap &) const { - return delay == ri.delay && index == ri.index; - } -}; - -class RoseInstrCatchUp - : public RoseInstrBaseTrivial<ROSE_INSTR_CATCH_UP, ROSE_STRUCT_CATCH_UP, - RoseInstrCatchUp> { -public: - ~RoseInstrCatchUp() override; -}; - -class RoseInstrCatchUpMpv - : public RoseInstrBaseTrivial<ROSE_INSTR_CATCH_UP_MPV, - ROSE_STRUCT_CATCH_UP_MPV, - RoseInstrCatchUpMpv> { -public: - ~RoseInstrCatchUpMpv() override; -}; - -class RoseInstrSomAdjust - : public RoseInstrBaseNoTargets<ROSE_INSTR_SOM_ADJUST, - ROSE_STRUCT_SOM_ADJUST, - RoseInstrSomAdjust> { -public: - u32 distance; - - explicit RoseInstrSomAdjust(u32 distance_in) : distance(distance_in) {} - - bool operator==(const RoseInstrSomAdjust &ri) const { - return distance == ri.distance; - } - - size_t hash() const override { - return hash_all(opcode, distance); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSomAdjust &ri, const OffsetMap &, - const OffsetMap &) const { - return distance == ri.distance; - } -}; - -class RoseInstrSomLeftfix - : public RoseInstrBaseNoTargets<ROSE_INSTR_SOM_LEFTFIX, - ROSE_STRUCT_SOM_LEFTFIX, - RoseInstrSomLeftfix> { -public: - u32 queue; - u32 lag; - - RoseInstrSomLeftfix(u32 queue_in, u32 lag_in) - : queue(queue_in), lag(lag_in) {} - - bool operator==(const RoseInstrSomLeftfix &ri) const { - return queue == ri.queue && lag == ri.lag; - } - - size_t hash() const override { - return hash_all(opcode, queue, lag); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSomLeftfix &ri, const OffsetMap &, - const OffsetMap &) const { - return queue == ri.queue && lag == ri.lag; - } -}; - -class RoseInstrSomFromReport - : public RoseInstrBaseNoTargets<ROSE_INSTR_SOM_FROM_REPORT, - ROSE_STRUCT_SOM_FROM_REPORT, - RoseInstrSomFromReport> { -public: - som_operation som; - - RoseInstrSomFromReport() { - std::memset(&som, 0, sizeof(som)); - } - - bool operator==(const RoseInstrSomFromReport &ri) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } - - size_t hash() const override { - return hash_all(opcode, som.type, som.onmatch); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSomFromReport &ri, const OffsetMap &, - const OffsetMap &) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } -}; - -class RoseInstrSomZero - : public RoseInstrBaseTrivial<ROSE_INSTR_SOM_ZERO, ROSE_STRUCT_SOM_ZERO, - RoseInstrSomZero> { -public: - ~RoseInstrSomZero() override; -}; - -class RoseInstrTriggerInfix - : public RoseInstrBaseNoTargets<ROSE_INSTR_TRIGGER_INFIX, - ROSE_STRUCT_TRIGGER_INFIX, - RoseInstrTriggerInfix> { -public: - u8 cancel; - u32 queue; - u32 event; - - RoseInstrTriggerInfix(u8 cancel_in, u32 queue_in, u32 event_in) - : cancel(cancel_in), queue(queue_in), event(event_in) {} - - bool operator==(const RoseInstrTriggerInfix &ri) const { - return cancel == ri.cancel && queue == ri.queue && event == ri.event; - } - - size_t hash() const override { - return hash_all(opcode, cancel, queue, event); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrTriggerInfix &ri, const OffsetMap &, - const OffsetMap &) const { - return cancel == ri.cancel && queue == ri.queue && event == ri.event; - } -}; - -class RoseInstrTriggerSuffix - : public RoseInstrBaseNoTargets<ROSE_INSTR_TRIGGER_SUFFIX, - ROSE_STRUCT_TRIGGER_SUFFIX, - RoseInstrTriggerSuffix> { -public: - u32 queue; - u32 event; - - RoseInstrTriggerSuffix(u32 queue_in, u32 event_in) - : queue(queue_in), event(event_in) {} - - bool operator==(const RoseInstrTriggerSuffix &ri) const { - return queue == ri.queue && event == ri.event; - } - - size_t hash() const override { - return hash_all(opcode, queue, event); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrTriggerSuffix &ri, const OffsetMap &, - const OffsetMap &) const { - return queue == ri.queue && event == ri.event; - } -}; - -class RoseInstrDedupe - : public RoseInstrBaseOneTarget<ROSE_INSTR_DEDUPE, ROSE_STRUCT_DEDUPE, - RoseInstrDedupe> { -public: - u8 quash_som; - u32 dkey; - s32 offset_adjust; - const RoseInstruction *target; - - RoseInstrDedupe(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, - const RoseInstruction *target_in) - : quash_som(quash_som_in), dkey(dkey_in), - offset_adjust(offset_adjust_in), target(target_in) {} - - bool operator==(const RoseInstrDedupe &ri) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - offset_adjust == ri.offset_adjust && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, quash_som, dkey, offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrDedupe &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - offset_adjust == ri.offset_adjust && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrDedupeSom - : public RoseInstrBaseOneTarget<ROSE_INSTR_DEDUPE_SOM, - ROSE_STRUCT_DEDUPE_SOM, - RoseInstrDedupeSom> { -public: - u8 quash_som; - u32 dkey; - s32 offset_adjust; - const RoseInstruction *target; - - RoseInstrDedupeSom(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, - const RoseInstruction *target_in) - : quash_som(quash_som_in), dkey(dkey_in), - offset_adjust(offset_adjust_in), target(target_in) {} - - bool operator==(const RoseInstrDedupeSom &ri) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - offset_adjust == ri.offset_adjust && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, quash_som, dkey, offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrDedupeSom &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - offset_adjust == ri.offset_adjust && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrReportChain - : public RoseInstrBaseNoTargets<ROSE_INSTR_REPORT_CHAIN, - ROSE_STRUCT_REPORT_CHAIN, - RoseInstrReportChain> { -public: - u32 event; - u64a top_squash_distance; - - RoseInstrReportChain(u32 event_in, u32 top_squash_distance_in) - : event(event_in), top_squash_distance(top_squash_distance_in) {} - - bool operator==(const RoseInstrReportChain &ri) const { - return event == ri.event && - top_squash_distance == ri.top_squash_distance; - } - - size_t hash() const override { - return hash_all(opcode, event, top_squash_distance); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportChain &ri, const OffsetMap &, - const OffsetMap &) const { - return event == ri.event && - top_squash_distance == ri.top_squash_distance; - } -}; - -class RoseInstrReportSomInt - : public RoseInstrBaseNoTargets<ROSE_INSTR_REPORT_SOM_INT, - ROSE_STRUCT_REPORT_SOM_INT, - RoseInstrReportSomInt> { -public: - som_operation som; - - RoseInstrReportSomInt() { - std::memset(&som, 0, sizeof(som)); - } - - bool operator==(const RoseInstrReportSomInt &ri) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } - - size_t hash() const override { - return hash_all(opcode, som.type, som.onmatch); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportSomInt &ri, const OffsetMap &, - const OffsetMap &) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } -}; - -class RoseInstrReportSomAware - : public RoseInstrBaseNoTargets<ROSE_INSTR_REPORT_SOM_AWARE, - ROSE_STRUCT_REPORT_SOM_AWARE, - RoseInstrReportSomAware> { -public: - som_operation som; - - RoseInstrReportSomAware() { - std::memset(&som, 0, sizeof(som)); - } - - bool operator==(const RoseInstrReportSomAware &ri) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } - - size_t hash() const override { - return hash_all(opcode, som.type, som.onmatch); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportSomAware &ri, const OffsetMap &, - const OffsetMap &) const { - return std::memcmp(&som, &ri.som, sizeof(som)) == 0; - } -}; - -class RoseInstrReport - : public RoseInstrBaseNoTargets<ROSE_INSTR_REPORT, ROSE_STRUCT_REPORT, - RoseInstrReport> { -public: - ReportID onmatch; - s32 offset_adjust; - - RoseInstrReport(ReportID onmatch_in, s32 offset_adjust_in) - : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} - - bool operator==(const RoseInstrReport &ri) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } - - size_t hash() const override { - return hash_all(opcode, onmatch, offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReport &ri, const OffsetMap &, - const OffsetMap &) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } -}; - -class RoseInstrReportExhaust - : public RoseInstrBaseNoTargets<ROSE_INSTR_REPORT_EXHAUST, - ROSE_STRUCT_REPORT_EXHAUST, - RoseInstrReportExhaust> { -public: - ReportID onmatch; - s32 offset_adjust; - u32 ekey; - - RoseInstrReportExhaust(ReportID onmatch_in, s32 offset_adjust_in, - u32 ekey_in) - : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} - - bool operator==(const RoseInstrReportExhaust &ri) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - ekey == ri.ekey; - } - - size_t hash() const override { - return hash_all(opcode, onmatch, offset_adjust, ekey); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportExhaust &ri, const OffsetMap &, - const OffsetMap &) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - ekey == ri.ekey; - } -}; - -class RoseInstrReportSom - : public RoseInstrBaseNoTargets<ROSE_INSTR_REPORT_SOM, - ROSE_STRUCT_REPORT_SOM, - RoseInstrReportSom> { -public: - ReportID onmatch; - s32 offset_adjust; - - RoseInstrReportSom(ReportID onmatch_in, s32 offset_adjust_in) - : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} - - bool operator==(const RoseInstrReportSom &ri) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } - - size_t hash() const override { - return hash_all(opcode, onmatch, offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportSom &ri, const OffsetMap &, - const OffsetMap &) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } -}; - -class RoseInstrReportSomExhaust - : public RoseInstrBaseNoTargets<ROSE_INSTR_REPORT_SOM_EXHAUST, - ROSE_STRUCT_REPORT_SOM_EXHAUST, - RoseInstrReportSomExhaust> { -public: - ReportID onmatch; - s32 offset_adjust; - u32 ekey; - - RoseInstrReportSomExhaust(ReportID onmatch_in, s32 offset_adjust_in, - u32 ekey_in) - : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} - - bool operator==(const RoseInstrReportSomExhaust &ri) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - ekey == ri.ekey; - } - - size_t hash() const override { - return hash_all(opcode, onmatch, offset_adjust, ekey); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrReportSomExhaust &ri, const OffsetMap &, - const OffsetMap &) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - ekey == ri.ekey; - } -}; - -class RoseInstrDedupeAndReport - : public RoseInstrBaseOneTarget<ROSE_INSTR_DEDUPE_AND_REPORT, - ROSE_STRUCT_DEDUPE_AND_REPORT, - RoseInstrDedupeAndReport> { -public: - u8 quash_som; - u32 dkey; - ReportID onmatch; - s32 offset_adjust; - const RoseInstruction *target; - - RoseInstrDedupeAndReport(u8 quash_som_in, u32 dkey_in, ReportID onmatch_in, - s32 offset_adjust_in, - const RoseInstruction *target_in) - : quash_som(quash_som_in), dkey(dkey_in), onmatch(onmatch_in), - offset_adjust(offset_adjust_in), target(target_in) {} - - bool operator==(const RoseInstrDedupeAndReport &ri) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, quash_som, dkey, onmatch, offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrDedupeAndReport &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return quash_som == ri.quash_som && dkey == ri.dkey && - onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrFinalReport - : public RoseInstrBaseNoTargets<ROSE_INSTR_FINAL_REPORT, - ROSE_STRUCT_FINAL_REPORT, - RoseInstrFinalReport> { -public: - ReportID onmatch; - s32 offset_adjust; - - RoseInstrFinalReport(ReportID onmatch_in, s32 offset_adjust_in) - : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} - - bool operator==(const RoseInstrFinalReport &ri) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } - - size_t hash() const override { - return hash_all(opcode, onmatch, offset_adjust); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrFinalReport &ri, const OffsetMap &, - const OffsetMap &) const { - return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; - } -}; - -class RoseInstrCheckExhausted - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_EXHAUSTED, - ROSE_STRUCT_CHECK_EXHAUSTED, - RoseInstrCheckExhausted> { -public: - u32 ekey; - const RoseInstruction *target; - - RoseInstrCheckExhausted(u32 ekey_in, const RoseInstruction *target_in) - : ekey(ekey_in), target(target_in) {} - - bool operator==(const RoseInstrCheckExhausted &ri) const { - return ekey == ri.ekey && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, ekey); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckExhausted &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return ekey == ri.ekey && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMinLength - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MIN_LENGTH, - ROSE_STRUCT_CHECK_MIN_LENGTH, - RoseInstrCheckMinLength> { -public: - s32 end_adj; - u64a min_length; - const RoseInstruction *target; - - RoseInstrCheckMinLength(s32 end_adj_in, u64a min_length_in, - const RoseInstruction *target_in) - : end_adj(end_adj_in), min_length(min_length_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMinLength &ri) const { - return end_adj == ri.end_adj && min_length == ri.min_length && - target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, end_adj, min_length); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMinLength &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return end_adj == ri.end_adj && min_length == ri.min_length && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrSetState - : public RoseInstrBaseNoTargets<ROSE_INSTR_SET_STATE, ROSE_STRUCT_SET_STATE, - RoseInstrSetState> { -public: - u32 index; - - explicit RoseInstrSetState(u32 index_in) : index(index_in) {} - - bool operator==(const RoseInstrSetState &ri) const { - return index == ri.index; - } - - size_t hash() const override { - return hash_all(opcode, index); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSetState &ri, const OffsetMap &, - const OffsetMap &) const { - return index == ri.index; - } -}; - -class RoseInstrSetGroups - : public RoseInstrBaseNoTargets<ROSE_INSTR_SET_GROUPS, - ROSE_STRUCT_SET_GROUPS, - RoseInstrSetGroups> { -public: - rose_group groups; - - explicit RoseInstrSetGroups(rose_group groups_in) : groups(groups_in) {} - - bool operator==(const RoseInstrSetGroups &ri) const { - return groups == ri.groups; - } - - size_t hash() const override { - return hash_all(opcode, groups); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSetGroups &ri, const OffsetMap &, - const OffsetMap &) const { - return groups == ri.groups; - } -}; - -class RoseInstrSquashGroups - : public RoseInstrBaseNoTargets<ROSE_INSTR_SQUASH_GROUPS, - ROSE_STRUCT_SQUASH_GROUPS, - RoseInstrSquashGroups> { -public: - rose_group groups; - - explicit RoseInstrSquashGroups(rose_group groups_in) : groups(groups_in) {} - - bool operator==(const RoseInstrSquashGroups &ri) const { - return groups == ri.groups; - } - - size_t hash() const override { - return hash_all(opcode, groups); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSquashGroups &ri, const OffsetMap &, - const OffsetMap &) const { - return groups == ri.groups; - } -}; - -class RoseInstrCheckState - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_STATE, - ROSE_STRUCT_CHECK_STATE, - RoseInstrCheckState> { -public: - u32 index; - const RoseInstruction *target; - - RoseInstrCheckState(u32 index_in, const RoseInstruction *target_in) - : index(index_in), target(target_in) {} - - bool operator==(const RoseInstrCheckState &ri) const { - return index == ri.index && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, index); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckState &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return index == ri.index && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrSparseIterBegin - : public RoseInstrBase<ROSE_INSTR_SPARSE_ITER_BEGIN, - ROSE_STRUCT_SPARSE_ITER_BEGIN, - RoseInstrSparseIterBegin> { -public: - u32 num_keys; // total number of multibit keys - std::vector<std::pair<u32, const RoseInstruction *>> jump_table; - const RoseInstruction *target; - - RoseInstrSparseIterBegin(u32 num_keys_in, - const RoseInstruction *target_in) - : num_keys(num_keys_in), target(target_in) {} - - bool operator==(const RoseInstrSparseIterBegin &ri) const { - return num_keys == ri.num_keys && jump_table == ri.jump_table && - target == ri.target; - } - - size_t hash() const override { - size_t v = hash_all(opcode, num_keys); - for (const u32 &key : jump_table | boost::adaptors::map_keys) { - hash_combine(v, key); - } - return v; - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - void update_target(const RoseInstruction *old_target, - const RoseInstruction *new_target) override { - if (target == old_target) { - target = new_target; - } - for (auto &jump : jump_table) { - if (jump.second == old_target) { - jump.second = new_target; - } - } - } - - bool equiv_to(const RoseInstrSparseIterBegin &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - if (iter_offset != ri.iter_offset || - offsets.at(target) != other_offsets.at(ri.target)) { - return false; - } - if (jump_table.size() != ri.jump_table.size()) { - return false; - } - auto it1 = jump_table.begin(), it2 = ri.jump_table.begin(); - for (; it1 != jump_table.end(); ++it1, ++it2) { - if (it1->first != it2->first) { - return false; - } - if (offsets.at(it1->second) != other_offsets.at(it2->second)) { - return false; - } - } - return true; - } - -private: - friend class RoseInstrSparseIterNext; - - // These variables allow us to use the same multibit iterator and jump - // table in subsequent SPARSE_ITER_NEXT write() operations. - mutable bool is_written = false; - mutable u32 iter_offset = 0; - mutable u32 jump_table_offset = 0; -}; - -class RoseInstrSparseIterNext - : public RoseInstrBase<ROSE_INSTR_SPARSE_ITER_NEXT, - ROSE_STRUCT_SPARSE_ITER_NEXT, - RoseInstrSparseIterNext> { -public: - u32 state; - const RoseInstrSparseIterBegin *begin; - const RoseInstruction *target; - - RoseInstrSparseIterNext(u32 state_in, - const RoseInstrSparseIterBegin *begin_in, - const RoseInstruction *target_in) - : state(state_in), begin(begin_in), target(target_in) {} - - bool operator==(const RoseInstrSparseIterNext &ri) const { - return state == ri.state && begin == ri.begin && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, state); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - void update_target(const RoseInstruction *old_target, - const RoseInstruction *new_target) override { - if (target == old_target) { - target = new_target; - } - if (begin == old_target) { - assert(new_target->code() == ROSE_INSTR_SPARSE_ITER_BEGIN); - begin = static_cast<const RoseInstrSparseIterBegin *>(new_target); - } - } - - bool equiv_to(const RoseInstrSparseIterNext &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return state == ri.state && - offsets.at(begin) == other_offsets.at(ri.begin) && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrSparseIterAny - : public RoseInstrBaseOneTarget<ROSE_INSTR_SPARSE_ITER_ANY, - ROSE_STRUCT_SPARSE_ITER_ANY, - RoseInstrSparseIterAny> { -public: - u32 num_keys; // total number of multibit keys - std::vector<u32> keys; - const RoseInstruction *target; - - RoseInstrSparseIterAny(u32 num_keys_in, std::vector<u32> keys_in, - const RoseInstruction *target_in) - : num_keys(num_keys_in), keys(std::move(keys_in)), target(target_in) {} - - bool operator==(const RoseInstrSparseIterAny &ri) const { - return num_keys == ri.num_keys && keys == ri.keys && - target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, num_keys, keys); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrSparseIterAny &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return num_keys == ri.num_keys && keys == ri.keys && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrEnginesEod - : public RoseInstrBaseNoTargets<ROSE_INSTR_ENGINES_EOD, - ROSE_STRUCT_ENGINES_EOD, - RoseInstrEnginesEod> { -public: - u32 iter_offset; - - explicit RoseInstrEnginesEod(u32 iter_in) : iter_offset(iter_in) {} - - bool operator==(const RoseInstrEnginesEod &ri) const { - return iter_offset == ri.iter_offset; - } - - size_t hash() const override { - return hash_all(opcode, iter_offset); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrEnginesEod &ri, const OffsetMap &, - const OffsetMap &) const { - return iter_offset == ri.iter_offset; - } -}; - -class RoseInstrSuffixesEod - : public RoseInstrBaseTrivial<ROSE_INSTR_SUFFIXES_EOD, - ROSE_STRUCT_SUFFIXES_EOD, - RoseInstrSuffixesEod> { -public: - ~RoseInstrSuffixesEod() override; -}; - -class RoseInstrMatcherEod : public RoseInstrBaseTrivial<ROSE_INSTR_MATCHER_EOD, - ROSE_STRUCT_MATCHER_EOD, - RoseInstrMatcherEod> { -public: - ~RoseInstrMatcherEod() override; -}; - -class RoseInstrCheckLongLit - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_LONG_LIT, - ROSE_STRUCT_CHECK_LONG_LIT, - RoseInstrCheckLongLit> { -public: - std::string literal; - const RoseInstruction *target; - - RoseInstrCheckLongLit(std::string literal_in, - const RoseInstruction *target_in) - : literal(std::move(literal_in)), target(target_in) {} - - bool operator==(const RoseInstrCheckLongLit &ri) const { - return literal == ri.literal && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, literal); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return literal == ri.literal && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckLongLitNocase - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_LONG_LIT_NOCASE, - ROSE_STRUCT_CHECK_LONG_LIT_NOCASE, - RoseInstrCheckLongLitNocase> { -public: - std::string literal; - const RoseInstruction *target; - - RoseInstrCheckLongLitNocase(std::string literal_in, - const RoseInstruction *target_in) - : literal(std::move(literal_in)), target(target_in) { - upperString(literal); - } - - bool operator==(const RoseInstrCheckLongLitNocase &ri) const { - return literal == ri.literal && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, literal); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckLongLitNocase &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return literal == ri.literal && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMedLit - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MED_LIT, - ROSE_STRUCT_CHECK_MED_LIT, - RoseInstrCheckMedLit> { -public: - std::string literal; - const RoseInstruction *target; - - explicit RoseInstrCheckMedLit(std::string literal_in, - const RoseInstruction *target_in) - : literal(std::move(literal_in)), target(target_in) {} - - bool operator==(const RoseInstrCheckMedLit &ri) const { - return literal == ri.literal && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, literal); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMedLit &ri, const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return literal == ri.literal && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMedLitNocase - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MED_LIT_NOCASE, - ROSE_STRUCT_CHECK_MED_LIT_NOCASE, - RoseInstrCheckMedLitNocase> { -public: - std::string literal; - const RoseInstruction *target; - - explicit RoseInstrCheckMedLitNocase(std::string literal_in, - const RoseInstruction *target_in) - : literal(std::move(literal_in)), target(target_in) { - upperString(literal); - } - - bool operator==(const RoseInstrCheckMedLitNocase &ri) const { - return literal == ri.literal && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, literal); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMedLitNocase &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return literal == ri.literal && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrClearWorkDone - : public RoseInstrBaseTrivial<ROSE_INSTR_CLEAR_WORK_DONE, - ROSE_STRUCT_CLEAR_WORK_DONE, - RoseInstrClearWorkDone> { -public: - ~RoseInstrClearWorkDone() override; -}; - -class RoseInstrMultipathLookaround - : public RoseInstrBaseOneTarget<ROSE_INSTR_MULTIPATH_LOOKAROUND, - ROSE_STRUCT_MULTIPATH_LOOKAROUND, - RoseInstrMultipathLookaround> { -public: - std::vector<std::vector<LookEntry>> multi_look; - s32 last_start; - std::array<u8, 16> start_mask; - const RoseInstruction *target; - - RoseInstrMultipathLookaround(std::vector<std::vector<LookEntry>> ml, - s32 last_start_in, - std::array<u8, 16> start_mask_in, - const RoseInstruction *target_in) - : multi_look(std::move(ml)), last_start(last_start_in), - start_mask(std::move(start_mask_in)), target(target_in) {} - - bool operator==(const RoseInstrMultipathLookaround &ri) const { - return multi_look == ri.multi_look && last_start == ri.last_start - && start_mask == ri.start_mask && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, multi_look, last_start, start_mask); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrMultipathLookaround &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return multi_look == ri.multi_look && last_start == ri.last_start - && start_mask == ri.start_mask - && offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMultipathShufti16x8 - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_16x8, - ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8, - RoseInstrCheckMultipathShufti16x8> { -public: - std::array<u8, 32> nib_mask; - std::array<u8, 64> bucket_select_mask; - std::array<u8, 64> data_select_mask; - u16 hi_bits_mask; - u16 lo_bits_mask; - u16 neg_mask; - s32 base_offset; - s32 last_start; - const RoseInstruction *target; - - RoseInstrCheckMultipathShufti16x8(std::array<u8, 32> nib_mask_in, - std::array<u8, 64> bucket_select_mask_in, - std::array<u8, 64> data_select_mask_in, - u16 hi_bits_mask_in, u16 lo_bits_mask_in, - u16 neg_mask_in, s32 base_offset_in, - s32 last_start_in, - const RoseInstruction *target_in) - : nib_mask(std::move(nib_mask_in)), - bucket_select_mask(std::move(bucket_select_mask_in)), - data_select_mask(std::move(data_select_mask_in)), - hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), - neg_mask(neg_mask_in), base_offset(base_offset_in), - last_start(last_start_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMultipathShufti16x8 &ri) const { - return nib_mask == ri.nib_mask && - bucket_select_mask == ri.bucket_select_mask && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && - neg_mask == ri.neg_mask && base_offset == ri.base_offset && - last_start == ri.last_start && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, nib_mask, bucket_select_mask, data_select_mask, - hi_bits_mask, lo_bits_mask, neg_mask, base_offset, - last_start); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMultipathShufti16x8 &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return nib_mask == ri.nib_mask && - bucket_select_mask == ri.bucket_select_mask && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && - base_offset == ri.base_offset && last_start == ri.last_start && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMultipathShufti32x8 - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x8, - ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8, - RoseInstrCheckMultipathShufti32x8> { -public: - std::array<u8, 32> hi_mask; - std::array<u8, 32> lo_mask; - std::array<u8, 64> bucket_select_mask; - std::array<u8, 64> data_select_mask; - u32 hi_bits_mask; - u32 lo_bits_mask; - u32 neg_mask; - s32 base_offset; - s32 last_start; - const RoseInstruction *target; - - RoseInstrCheckMultipathShufti32x8(std::array<u8, 32> hi_mask_in, - std::array<u8, 32> lo_mask_in, - std::array<u8, 64> bucket_select_mask_in, - std::array<u8, 64> data_select_mask_in, - u32 hi_bits_mask_in, u32 lo_bits_mask_in, - u32 neg_mask_in, s32 base_offset_in, - s32 last_start_in, - const RoseInstruction *target_in) - : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), - bucket_select_mask(std::move(bucket_select_mask_in)), - data_select_mask(std::move(data_select_mask_in)), - hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), - neg_mask(neg_mask_in), base_offset(base_offset_in), - last_start(last_start_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMultipathShufti32x8 &ri) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && - neg_mask == ri.neg_mask && base_offset == ri.base_offset && - last_start == ri.last_start && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask, - data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, - base_offset, last_start); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMultipathShufti32x8 &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && - base_offset == ri.base_offset && last_start == ri.last_start && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMultipathShufti32x16 - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x16, - ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16, - RoseInstrCheckMultipathShufti32x16> { -public: - std::array<u8, 32> hi_mask; - std::array<u8, 32> lo_mask; - std::array<u8, 64> bucket_select_mask_hi; - std::array<u8, 64> bucket_select_mask_lo; - std::array<u8, 64> data_select_mask; - u32 hi_bits_mask; - u32 lo_bits_mask; - u32 neg_mask; - s32 base_offset; - s32 last_start; - const RoseInstruction *target; - - RoseInstrCheckMultipathShufti32x16(std::array<u8, 32> hi_mask_in, - std::array<u8, 32> lo_mask_in, - std::array<u8, 64> bucket_select_mask_hi_in, - std::array<u8, 64> bucket_select_mask_lo_in, - std::array<u8, 64> data_select_mask_in, - u32 hi_bits_mask_in, u32 lo_bits_mask_in, - u32 neg_mask_in, s32 base_offset_in, - s32 last_start_in, - const RoseInstruction *target_in) - : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), - bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)), - bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)), - data_select_mask(std::move(data_select_mask_in)), - hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), - neg_mask(neg_mask_in), base_offset(base_offset_in), - last_start(last_start_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMultipathShufti32x16 &ri) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask_hi == ri.bucket_select_mask_hi && - bucket_select_mask_lo == ri.bucket_select_mask_lo && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && - neg_mask == ri.neg_mask && base_offset == ri.base_offset && - last_start == ri.last_start && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask_hi, - bucket_select_mask_lo, data_select_mask, hi_bits_mask, - lo_bits_mask, neg_mask, base_offset, last_start); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMultipathShufti32x16 &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask_hi == ri.bucket_select_mask_hi && - bucket_select_mask_lo == ri.bucket_select_mask_lo && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && - base_offset == ri.base_offset && last_start == ri.last_start && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrCheckMultipathShufti64 - : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64, - ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64, - RoseInstrCheckMultipathShufti64> { -public: - std::array<u8, 32> hi_mask; - std::array<u8, 32> lo_mask; - std::array<u8, 64> bucket_select_mask; - std::array<u8, 64> data_select_mask; - u64a hi_bits_mask; - u64a lo_bits_mask; - u64a neg_mask; - s32 base_offset; - s32 last_start; - const RoseInstruction *target; - - RoseInstrCheckMultipathShufti64(std::array<u8, 32> hi_mask_in, - std::array<u8, 32> lo_mask_in, - std::array<u8, 64> bucket_select_mask_in, - std::array<u8, 64> data_select_mask_in, - u64a hi_bits_mask_in, u64a lo_bits_mask_in, - u64a neg_mask_in, s32 base_offset_in, - s32 last_start_in, - const RoseInstruction *target_in) - : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), - bucket_select_mask(std::move(bucket_select_mask_in)), - data_select_mask(std::move(data_select_mask_in)), - hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), - neg_mask(neg_mask_in), base_offset(base_offset_in), - last_start(last_start_in), target(target_in) {} - - bool operator==(const RoseInstrCheckMultipathShufti64 &ri) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && - neg_mask == ri.neg_mask && base_offset == ri.base_offset && - last_start == ri.last_start && target == ri.target; - } - - size_t hash() const override { - return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask, - data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, - base_offset, last_start); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrCheckMultipathShufti64 &ri, - const OffsetMap &offsets, - const OffsetMap &other_offsets) const { - return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && - bucket_select_mask == ri.bucket_select_mask && - data_select_mask == ri.data_select_mask && - hi_bits_mask == ri.hi_bits_mask && - lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && - base_offset == ri.base_offset && last_start == ri.last_start && - offsets.at(target) == other_offsets.at(ri.target); - } -}; - -class RoseInstrIncludedJump - : public RoseInstrBaseNoTargets<ROSE_INSTR_INCLUDED_JUMP, - ROSE_STRUCT_INCLUDED_JUMP, - RoseInstrIncludedJump> { -public: - u32 child_offset; - u8 squash; - - RoseInstrIncludedJump(u32 child_offset_in, u8 squash_in) - : child_offset(child_offset_in), squash(squash_in) {} - - bool operator==(const RoseInstrIncludedJump &ri) const { - return child_offset == ri.child_offset && squash == ri.squash; - } - - size_t hash() const override { - return hash_all(static_cast<int>(opcode), child_offset, squash); - } - - void write(void *dest, RoseEngineBlob &blob, - const OffsetMap &offset_map) const override; - - bool equiv_to(const RoseInstrIncludedJump &ri, const OffsetMap &, - const OffsetMap &) const { - return child_offset == ri.child_offset && squash == ri.squash; - } -}; - +class RoseInstrCheckInfix + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_INFIX, + ROSE_STRUCT_CHECK_INFIX, + RoseInstrCheckInfix> { +public: + u32 queue; + u32 lag; + ReportID report; + const RoseInstruction *target; + + RoseInstrCheckInfix(u32 queue_in, u32 lag_in, ReportID report_in, + const RoseInstruction *target_in) + : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} + + bool operator==(const RoseInstrCheckInfix &ri) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, queue, lag, report); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckInfix &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckPrefix + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_PREFIX, + ROSE_STRUCT_CHECK_PREFIX, + RoseInstrCheckPrefix> { +public: + u32 queue; + u32 lag; + ReportID report; + const RoseInstruction *target; + + RoseInstrCheckPrefix(u32 queue_in, u32 lag_in, ReportID report_in, + const RoseInstruction *target_in) + : queue(queue_in), lag(lag_in), report(report_in), target(target_in) {} + + bool operator==(const RoseInstrCheckPrefix &ri) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, queue, lag, report); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckPrefix &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return queue == ri.queue && lag == ri.lag && report == ri.report && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrPushDelayed + : public RoseInstrBaseNoTargets<ROSE_INSTR_PUSH_DELAYED, + ROSE_STRUCT_PUSH_DELAYED, + RoseInstrPushDelayed> { +public: + u8 delay; + u32 index; + + RoseInstrPushDelayed(u8 delay_in, u32 index_in) + : delay(delay_in), index(index_in) {} + + bool operator==(const RoseInstrPushDelayed &ri) const { + return delay == ri.delay && index == ri.index; + } + + size_t hash() const override { + return hash_all(opcode, delay, index); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrPushDelayed &ri, const OffsetMap &, + const OffsetMap &) const { + return delay == ri.delay && index == ri.index; + } +}; + +class RoseInstrCatchUp + : public RoseInstrBaseTrivial<ROSE_INSTR_CATCH_UP, ROSE_STRUCT_CATCH_UP, + RoseInstrCatchUp> { +public: + ~RoseInstrCatchUp() override; +}; + +class RoseInstrCatchUpMpv + : public RoseInstrBaseTrivial<ROSE_INSTR_CATCH_UP_MPV, + ROSE_STRUCT_CATCH_UP_MPV, + RoseInstrCatchUpMpv> { +public: + ~RoseInstrCatchUpMpv() override; +}; + +class RoseInstrSomAdjust + : public RoseInstrBaseNoTargets<ROSE_INSTR_SOM_ADJUST, + ROSE_STRUCT_SOM_ADJUST, + RoseInstrSomAdjust> { +public: + u32 distance; + + explicit RoseInstrSomAdjust(u32 distance_in) : distance(distance_in) {} + + bool operator==(const RoseInstrSomAdjust &ri) const { + return distance == ri.distance; + } + + size_t hash() const override { + return hash_all(opcode, distance); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomAdjust &ri, const OffsetMap &, + const OffsetMap &) const { + return distance == ri.distance; + } +}; + +class RoseInstrSomLeftfix + : public RoseInstrBaseNoTargets<ROSE_INSTR_SOM_LEFTFIX, + ROSE_STRUCT_SOM_LEFTFIX, + RoseInstrSomLeftfix> { +public: + u32 queue; + u32 lag; + + RoseInstrSomLeftfix(u32 queue_in, u32 lag_in) + : queue(queue_in), lag(lag_in) {} + + bool operator==(const RoseInstrSomLeftfix &ri) const { + return queue == ri.queue && lag == ri.lag; + } + + size_t hash() const override { + return hash_all(opcode, queue, lag); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomLeftfix &ri, const OffsetMap &, + const OffsetMap &) const { + return queue == ri.queue && lag == ri.lag; + } +}; + +class RoseInstrSomFromReport + : public RoseInstrBaseNoTargets<ROSE_INSTR_SOM_FROM_REPORT, + ROSE_STRUCT_SOM_FROM_REPORT, + RoseInstrSomFromReport> { +public: + som_operation som; + + RoseInstrSomFromReport() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrSomFromReport &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + return hash_all(opcode, som.type, som.onmatch); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSomFromReport &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrSomZero + : public RoseInstrBaseTrivial<ROSE_INSTR_SOM_ZERO, ROSE_STRUCT_SOM_ZERO, + RoseInstrSomZero> { +public: + ~RoseInstrSomZero() override; +}; + +class RoseInstrTriggerInfix + : public RoseInstrBaseNoTargets<ROSE_INSTR_TRIGGER_INFIX, + ROSE_STRUCT_TRIGGER_INFIX, + RoseInstrTriggerInfix> { +public: + u8 cancel; + u32 queue; + u32 event; + + RoseInstrTriggerInfix(u8 cancel_in, u32 queue_in, u32 event_in) + : cancel(cancel_in), queue(queue_in), event(event_in) {} + + bool operator==(const RoseInstrTriggerInfix &ri) const { + return cancel == ri.cancel && queue == ri.queue && event == ri.event; + } + + size_t hash() const override { + return hash_all(opcode, cancel, queue, event); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrTriggerInfix &ri, const OffsetMap &, + const OffsetMap &) const { + return cancel == ri.cancel && queue == ri.queue && event == ri.event; + } +}; + +class RoseInstrTriggerSuffix + : public RoseInstrBaseNoTargets<ROSE_INSTR_TRIGGER_SUFFIX, + ROSE_STRUCT_TRIGGER_SUFFIX, + RoseInstrTriggerSuffix> { +public: + u32 queue; + u32 event; + + RoseInstrTriggerSuffix(u32 queue_in, u32 event_in) + : queue(queue_in), event(event_in) {} + + bool operator==(const RoseInstrTriggerSuffix &ri) const { + return queue == ri.queue && event == ri.event; + } + + size_t hash() const override { + return hash_all(opcode, queue, event); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrTriggerSuffix &ri, const OffsetMap &, + const OffsetMap &) const { + return queue == ri.queue && event == ri.event; + } +}; + +class RoseInstrDedupe + : public RoseInstrBaseOneTarget<ROSE_INSTR_DEDUPE, ROSE_STRUCT_DEDUPE, + RoseInstrDedupe> { +public: + u8 quash_som; + u32 dkey; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupe(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupe &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, quash_som, dkey, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupe &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrDedupeSom + : public RoseInstrBaseOneTarget<ROSE_INSTR_DEDUPE_SOM, + ROSE_STRUCT_DEDUPE_SOM, + RoseInstrDedupeSom> { +public: + u8 quash_som; + u32 dkey; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupeSom(u8 quash_som_in, u32 dkey_in, s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupeSom &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, quash_som, dkey, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupeSom &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrReportChain + : public RoseInstrBaseNoTargets<ROSE_INSTR_REPORT_CHAIN, + ROSE_STRUCT_REPORT_CHAIN, + RoseInstrReportChain> { +public: + u32 event; + u64a top_squash_distance; + + RoseInstrReportChain(u32 event_in, u32 top_squash_distance_in) + : event(event_in), top_squash_distance(top_squash_distance_in) {} + + bool operator==(const RoseInstrReportChain &ri) const { + return event == ri.event && + top_squash_distance == ri.top_squash_distance; + } + + size_t hash() const override { + return hash_all(opcode, event, top_squash_distance); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportChain &ri, const OffsetMap &, + const OffsetMap &) const { + return event == ri.event && + top_squash_distance == ri.top_squash_distance; + } +}; + +class RoseInstrReportSomInt + : public RoseInstrBaseNoTargets<ROSE_INSTR_REPORT_SOM_INT, + ROSE_STRUCT_REPORT_SOM_INT, + RoseInstrReportSomInt> { +public: + som_operation som; + + RoseInstrReportSomInt() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrReportSomInt &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + return hash_all(opcode, som.type, som.onmatch); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomInt &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrReportSomAware + : public RoseInstrBaseNoTargets<ROSE_INSTR_REPORT_SOM_AWARE, + ROSE_STRUCT_REPORT_SOM_AWARE, + RoseInstrReportSomAware> { +public: + som_operation som; + + RoseInstrReportSomAware() { + std::memset(&som, 0, sizeof(som)); + } + + bool operator==(const RoseInstrReportSomAware &ri) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } + + size_t hash() const override { + return hash_all(opcode, som.type, som.onmatch); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomAware &ri, const OffsetMap &, + const OffsetMap &) const { + return std::memcmp(&som, &ri.som, sizeof(som)) == 0; + } +}; + +class RoseInstrReport + : public RoseInstrBaseNoTargets<ROSE_INSTR_REPORT, ROSE_STRUCT_REPORT, + RoseInstrReport> { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrReport(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrReport &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + return hash_all(opcode, onmatch, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReport &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrReportExhaust + : public RoseInstrBaseNoTargets<ROSE_INSTR_REPORT_EXHAUST, + ROSE_STRUCT_REPORT_EXHAUST, + RoseInstrReportExhaust> { +public: + ReportID onmatch; + s32 offset_adjust; + u32 ekey; + + RoseInstrReportExhaust(ReportID onmatch_in, s32 offset_adjust_in, + u32 ekey_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} + + bool operator==(const RoseInstrReportExhaust &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } + + size_t hash() const override { + return hash_all(opcode, onmatch, offset_adjust, ekey); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportExhaust &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } +}; + +class RoseInstrReportSom + : public RoseInstrBaseNoTargets<ROSE_INSTR_REPORT_SOM, + ROSE_STRUCT_REPORT_SOM, + RoseInstrReportSom> { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrReportSom(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrReportSom &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + return hash_all(opcode, onmatch, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSom &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrReportSomExhaust + : public RoseInstrBaseNoTargets<ROSE_INSTR_REPORT_SOM_EXHAUST, + ROSE_STRUCT_REPORT_SOM_EXHAUST, + RoseInstrReportSomExhaust> { +public: + ReportID onmatch; + s32 offset_adjust; + u32 ekey; + + RoseInstrReportSomExhaust(ReportID onmatch_in, s32 offset_adjust_in, + u32 ekey_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in), ekey(ekey_in) {} + + bool operator==(const RoseInstrReportSomExhaust &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } + + size_t hash() const override { + return hash_all(opcode, onmatch, offset_adjust, ekey); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrReportSomExhaust &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + ekey == ri.ekey; + } +}; + +class RoseInstrDedupeAndReport + : public RoseInstrBaseOneTarget<ROSE_INSTR_DEDUPE_AND_REPORT, + ROSE_STRUCT_DEDUPE_AND_REPORT, + RoseInstrDedupeAndReport> { +public: + u8 quash_som; + u32 dkey; + ReportID onmatch; + s32 offset_adjust; + const RoseInstruction *target; + + RoseInstrDedupeAndReport(u8 quash_som_in, u32 dkey_in, ReportID onmatch_in, + s32 offset_adjust_in, + const RoseInstruction *target_in) + : quash_som(quash_som_in), dkey(dkey_in), onmatch(onmatch_in), + offset_adjust(offset_adjust_in), target(target_in) {} + + bool operator==(const RoseInstrDedupeAndReport &ri) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, quash_som, dkey, onmatch, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrDedupeAndReport &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return quash_som == ri.quash_som && dkey == ri.dkey && + onmatch == ri.onmatch && offset_adjust == ri.offset_adjust && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrFinalReport + : public RoseInstrBaseNoTargets<ROSE_INSTR_FINAL_REPORT, + ROSE_STRUCT_FINAL_REPORT, + RoseInstrFinalReport> { +public: + ReportID onmatch; + s32 offset_adjust; + + RoseInstrFinalReport(ReportID onmatch_in, s32 offset_adjust_in) + : onmatch(onmatch_in), offset_adjust(offset_adjust_in) {} + + bool operator==(const RoseInstrFinalReport &ri) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } + + size_t hash() const override { + return hash_all(opcode, onmatch, offset_adjust); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrFinalReport &ri, const OffsetMap &, + const OffsetMap &) const { + return onmatch == ri.onmatch && offset_adjust == ri.offset_adjust; + } +}; + +class RoseInstrCheckExhausted + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_EXHAUSTED, + ROSE_STRUCT_CHECK_EXHAUSTED, + RoseInstrCheckExhausted> { +public: + u32 ekey; + const RoseInstruction *target; + + RoseInstrCheckExhausted(u32 ekey_in, const RoseInstruction *target_in) + : ekey(ekey_in), target(target_in) {} + + bool operator==(const RoseInstrCheckExhausted &ri) const { + return ekey == ri.ekey && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, ekey); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckExhausted &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return ekey == ri.ekey && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMinLength + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MIN_LENGTH, + ROSE_STRUCT_CHECK_MIN_LENGTH, + RoseInstrCheckMinLength> { +public: + s32 end_adj; + u64a min_length; + const RoseInstruction *target; + + RoseInstrCheckMinLength(s32 end_adj_in, u64a min_length_in, + const RoseInstruction *target_in) + : end_adj(end_adj_in), min_length(min_length_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMinLength &ri) const { + return end_adj == ri.end_adj && min_length == ri.min_length && + target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, end_adj, min_length); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMinLength &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return end_adj == ri.end_adj && min_length == ri.min_length && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSetState + : public RoseInstrBaseNoTargets<ROSE_INSTR_SET_STATE, ROSE_STRUCT_SET_STATE, + RoseInstrSetState> { +public: + u32 index; + + explicit RoseInstrSetState(u32 index_in) : index(index_in) {} + + bool operator==(const RoseInstrSetState &ri) const { + return index == ri.index; + } + + size_t hash() const override { + return hash_all(opcode, index); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSetState &ri, const OffsetMap &, + const OffsetMap &) const { + return index == ri.index; + } +}; + +class RoseInstrSetGroups + : public RoseInstrBaseNoTargets<ROSE_INSTR_SET_GROUPS, + ROSE_STRUCT_SET_GROUPS, + RoseInstrSetGroups> { +public: + rose_group groups; + + explicit RoseInstrSetGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrSetGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + return hash_all(opcode, groups); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSetGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrSquashGroups + : public RoseInstrBaseNoTargets<ROSE_INSTR_SQUASH_GROUPS, + ROSE_STRUCT_SQUASH_GROUPS, + RoseInstrSquashGroups> { +public: + rose_group groups; + + explicit RoseInstrSquashGroups(rose_group groups_in) : groups(groups_in) {} + + bool operator==(const RoseInstrSquashGroups &ri) const { + return groups == ri.groups; + } + + size_t hash() const override { + return hash_all(opcode, groups); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSquashGroups &ri, const OffsetMap &, + const OffsetMap &) const { + return groups == ri.groups; + } +}; + +class RoseInstrCheckState + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_STATE, + ROSE_STRUCT_CHECK_STATE, + RoseInstrCheckState> { +public: + u32 index; + const RoseInstruction *target; + + RoseInstrCheckState(u32 index_in, const RoseInstruction *target_in) + : index(index_in), target(target_in) {} + + bool operator==(const RoseInstrCheckState &ri) const { + return index == ri.index && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, index); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckState &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return index == ri.index && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSparseIterBegin + : public RoseInstrBase<ROSE_INSTR_SPARSE_ITER_BEGIN, + ROSE_STRUCT_SPARSE_ITER_BEGIN, + RoseInstrSparseIterBegin> { +public: + u32 num_keys; // total number of multibit keys + std::vector<std::pair<u32, const RoseInstruction *>> jump_table; + const RoseInstruction *target; + + RoseInstrSparseIterBegin(u32 num_keys_in, + const RoseInstruction *target_in) + : num_keys(num_keys_in), target(target_in) {} + + bool operator==(const RoseInstrSparseIterBegin &ri) const { + return num_keys == ri.num_keys && jump_table == ri.jump_table && + target == ri.target; + } + + size_t hash() const override { + size_t v = hash_all(opcode, num_keys); + for (const u32 &key : jump_table | boost::adaptors::map_keys) { + hash_combine(v, key); + } + return v; + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + if (target == old_target) { + target = new_target; + } + for (auto &jump : jump_table) { + if (jump.second == old_target) { + jump.second = new_target; + } + } + } + + bool equiv_to(const RoseInstrSparseIterBegin &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + if (iter_offset != ri.iter_offset || + offsets.at(target) != other_offsets.at(ri.target)) { + return false; + } + if (jump_table.size() != ri.jump_table.size()) { + return false; + } + auto it1 = jump_table.begin(), it2 = ri.jump_table.begin(); + for (; it1 != jump_table.end(); ++it1, ++it2) { + if (it1->first != it2->first) { + return false; + } + if (offsets.at(it1->second) != other_offsets.at(it2->second)) { + return false; + } + } + return true; + } + +private: + friend class RoseInstrSparseIterNext; + + // These variables allow us to use the same multibit iterator and jump + // table in subsequent SPARSE_ITER_NEXT write() operations. + mutable bool is_written = false; + mutable u32 iter_offset = 0; + mutable u32 jump_table_offset = 0; +}; + +class RoseInstrSparseIterNext + : public RoseInstrBase<ROSE_INSTR_SPARSE_ITER_NEXT, + ROSE_STRUCT_SPARSE_ITER_NEXT, + RoseInstrSparseIterNext> { +public: + u32 state; + const RoseInstrSparseIterBegin *begin; + const RoseInstruction *target; + + RoseInstrSparseIterNext(u32 state_in, + const RoseInstrSparseIterBegin *begin_in, + const RoseInstruction *target_in) + : state(state_in), begin(begin_in), target(target_in) {} + + bool operator==(const RoseInstrSparseIterNext &ri) const { + return state == ri.state && begin == ri.begin && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, state); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + void update_target(const RoseInstruction *old_target, + const RoseInstruction *new_target) override { + if (target == old_target) { + target = new_target; + } + if (begin == old_target) { + assert(new_target->code() == ROSE_INSTR_SPARSE_ITER_BEGIN); + begin = static_cast<const RoseInstrSparseIterBegin *>(new_target); + } + } + + bool equiv_to(const RoseInstrSparseIterNext &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return state == ri.state && + offsets.at(begin) == other_offsets.at(ri.begin) && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrSparseIterAny + : public RoseInstrBaseOneTarget<ROSE_INSTR_SPARSE_ITER_ANY, + ROSE_STRUCT_SPARSE_ITER_ANY, + RoseInstrSparseIterAny> { +public: + u32 num_keys; // total number of multibit keys + std::vector<u32> keys; + const RoseInstruction *target; + + RoseInstrSparseIterAny(u32 num_keys_in, std::vector<u32> keys_in, + const RoseInstruction *target_in) + : num_keys(num_keys_in), keys(std::move(keys_in)), target(target_in) {} + + bool operator==(const RoseInstrSparseIterAny &ri) const { + return num_keys == ri.num_keys && keys == ri.keys && + target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, num_keys, keys); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrSparseIterAny &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return num_keys == ri.num_keys && keys == ri.keys && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrEnginesEod + : public RoseInstrBaseNoTargets<ROSE_INSTR_ENGINES_EOD, + ROSE_STRUCT_ENGINES_EOD, + RoseInstrEnginesEod> { +public: + u32 iter_offset; + + explicit RoseInstrEnginesEod(u32 iter_in) : iter_offset(iter_in) {} + + bool operator==(const RoseInstrEnginesEod &ri) const { + return iter_offset == ri.iter_offset; + } + + size_t hash() const override { + return hash_all(opcode, iter_offset); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrEnginesEod &ri, const OffsetMap &, + const OffsetMap &) const { + return iter_offset == ri.iter_offset; + } +}; + +class RoseInstrSuffixesEod + : public RoseInstrBaseTrivial<ROSE_INSTR_SUFFIXES_EOD, + ROSE_STRUCT_SUFFIXES_EOD, + RoseInstrSuffixesEod> { +public: + ~RoseInstrSuffixesEod() override; +}; + +class RoseInstrMatcherEod : public RoseInstrBaseTrivial<ROSE_INSTR_MATCHER_EOD, + ROSE_STRUCT_MATCHER_EOD, + RoseInstrMatcherEod> { +public: + ~RoseInstrMatcherEod() override; +}; + +class RoseInstrCheckLongLit + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_LONG_LIT, + ROSE_STRUCT_CHECK_LONG_LIT, + RoseInstrCheckLongLit> { +public: + std::string literal; + const RoseInstruction *target; + + RoseInstrCheckLongLit(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) {} + + bool operator==(const RoseInstrCheckLongLit &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckLongLitNocase + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_LONG_LIT_NOCASE, + ROSE_STRUCT_CHECK_LONG_LIT_NOCASE, + RoseInstrCheckLongLitNocase> { +public: + std::string literal; + const RoseInstruction *target; + + RoseInstrCheckLongLitNocase(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) { + upperString(literal); + } + + bool operator==(const RoseInstrCheckLongLitNocase &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckLongLitNocase &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMedLit + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MED_LIT, + ROSE_STRUCT_CHECK_MED_LIT, + RoseInstrCheckMedLit> { +public: + std::string literal; + const RoseInstruction *target; + + explicit RoseInstrCheckMedLit(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) {} + + bool operator==(const RoseInstrCheckMedLit &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMedLit &ri, const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMedLitNocase + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MED_LIT_NOCASE, + ROSE_STRUCT_CHECK_MED_LIT_NOCASE, + RoseInstrCheckMedLitNocase> { +public: + std::string literal; + const RoseInstruction *target; + + explicit RoseInstrCheckMedLitNocase(std::string literal_in, + const RoseInstruction *target_in) + : literal(std::move(literal_in)), target(target_in) { + upperString(literal); + } + + bool operator==(const RoseInstrCheckMedLitNocase &ri) const { + return literal == ri.literal && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, literal); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMedLitNocase &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return literal == ri.literal && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrClearWorkDone + : public RoseInstrBaseTrivial<ROSE_INSTR_CLEAR_WORK_DONE, + ROSE_STRUCT_CLEAR_WORK_DONE, + RoseInstrClearWorkDone> { +public: + ~RoseInstrClearWorkDone() override; +}; + +class RoseInstrMultipathLookaround + : public RoseInstrBaseOneTarget<ROSE_INSTR_MULTIPATH_LOOKAROUND, + ROSE_STRUCT_MULTIPATH_LOOKAROUND, + RoseInstrMultipathLookaround> { +public: + std::vector<std::vector<LookEntry>> multi_look; + s32 last_start; + std::array<u8, 16> start_mask; + const RoseInstruction *target; + + RoseInstrMultipathLookaround(std::vector<std::vector<LookEntry>> ml, + s32 last_start_in, + std::array<u8, 16> start_mask_in, + const RoseInstruction *target_in) + : multi_look(std::move(ml)), last_start(last_start_in), + start_mask(std::move(start_mask_in)), target(target_in) {} + + bool operator==(const RoseInstrMultipathLookaround &ri) const { + return multi_look == ri.multi_look && last_start == ri.last_start + && start_mask == ri.start_mask && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, multi_look, last_start, start_mask); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrMultipathLookaround &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return multi_look == ri.multi_look && last_start == ri.last_start + && start_mask == ri.start_mask + && offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti16x8 + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_16x8, + ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8, + RoseInstrCheckMultipathShufti16x8> { +public: + std::array<u8, 32> nib_mask; + std::array<u8, 64> bucket_select_mask; + std::array<u8, 64> data_select_mask; + u16 hi_bits_mask; + u16 lo_bits_mask; + u16 neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti16x8(std::array<u8, 32> nib_mask_in, + std::array<u8, 64> bucket_select_mask_in, + std::array<u8, 64> data_select_mask_in, + u16 hi_bits_mask_in, u16 lo_bits_mask_in, + u16 neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : nib_mask(std::move(nib_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti16x8 &ri) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, nib_mask, bucket_select_mask, data_select_mask, + hi_bits_mask, lo_bits_mask, neg_mask, base_offset, + last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti16x8 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return nib_mask == ri.nib_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti32x8 + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x8, + ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8, + RoseInstrCheckMultipathShufti32x8> { +public: + std::array<u8, 32> hi_mask; + std::array<u8, 32> lo_mask; + std::array<u8, 64> bucket_select_mask; + std::array<u8, 64> data_select_mask; + u32 hi_bits_mask; + u32 lo_bits_mask; + u32 neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti32x8(std::array<u8, 32> hi_mask_in, + std::array<u8, 32> lo_mask_in, + std::array<u8, 64> bucket_select_mask_in, + std::array<u8, 64> data_select_mask_in, + u32 hi_bits_mask_in, u32 lo_bits_mask_in, + u32 neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti32x8 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask, + data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, + base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti32x8 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti32x16 + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x16, + ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16, + RoseInstrCheckMultipathShufti32x16> { +public: + std::array<u8, 32> hi_mask; + std::array<u8, 32> lo_mask; + std::array<u8, 64> bucket_select_mask_hi; + std::array<u8, 64> bucket_select_mask_lo; + std::array<u8, 64> data_select_mask; + u32 hi_bits_mask; + u32 lo_bits_mask; + u32 neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti32x16(std::array<u8, 32> hi_mask_in, + std::array<u8, 32> lo_mask_in, + std::array<u8, 64> bucket_select_mask_hi_in, + std::array<u8, 64> bucket_select_mask_lo_in, + std::array<u8, 64> data_select_mask_in, + u32 hi_bits_mask_in, u32 lo_bits_mask_in, + u32 neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask_hi(std::move(bucket_select_mask_hi_in)), + bucket_select_mask_lo(std::move(bucket_select_mask_lo_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti32x16 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask_hi, + bucket_select_mask_lo, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti32x16 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask_hi == ri.bucket_select_mask_hi && + bucket_select_mask_lo == ri.bucket_select_mask_lo && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrCheckMultipathShufti64 + : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64, + ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64, + RoseInstrCheckMultipathShufti64> { +public: + std::array<u8, 32> hi_mask; + std::array<u8, 32> lo_mask; + std::array<u8, 64> bucket_select_mask; + std::array<u8, 64> data_select_mask; + u64a hi_bits_mask; + u64a lo_bits_mask; + u64a neg_mask; + s32 base_offset; + s32 last_start; + const RoseInstruction *target; + + RoseInstrCheckMultipathShufti64(std::array<u8, 32> hi_mask_in, + std::array<u8, 32> lo_mask_in, + std::array<u8, 64> bucket_select_mask_in, + std::array<u8, 64> data_select_mask_in, + u64a hi_bits_mask_in, u64a lo_bits_mask_in, + u64a neg_mask_in, s32 base_offset_in, + s32 last_start_in, + const RoseInstruction *target_in) + : hi_mask(std::move(hi_mask_in)), lo_mask(std::move(lo_mask_in)), + bucket_select_mask(std::move(bucket_select_mask_in)), + data_select_mask(std::move(data_select_mask_in)), + hi_bits_mask(hi_bits_mask_in), lo_bits_mask(lo_bits_mask_in), + neg_mask(neg_mask_in), base_offset(base_offset_in), + last_start(last_start_in), target(target_in) {} + + bool operator==(const RoseInstrCheckMultipathShufti64 &ri) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && + neg_mask == ri.neg_mask && base_offset == ri.base_offset && + last_start == ri.last_start && target == ri.target; + } + + size_t hash() const override { + return hash_all(opcode, hi_mask, lo_mask, bucket_select_mask, + data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, + base_offset, last_start); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrCheckMultipathShufti64 &ri, + const OffsetMap &offsets, + const OffsetMap &other_offsets) const { + return hi_mask == ri.hi_mask && lo_mask == ri.lo_mask && + bucket_select_mask == ri.bucket_select_mask && + data_select_mask == ri.data_select_mask && + hi_bits_mask == ri.hi_bits_mask && + lo_bits_mask == ri.lo_bits_mask && neg_mask == ri.neg_mask && + base_offset == ri.base_offset && last_start == ri.last_start && + offsets.at(target) == other_offsets.at(ri.target); + } +}; + +class RoseInstrIncludedJump + : public RoseInstrBaseNoTargets<ROSE_INSTR_INCLUDED_JUMP, + ROSE_STRUCT_INCLUDED_JUMP, + RoseInstrIncludedJump> { +public: + u32 child_offset; + u8 squash; + + RoseInstrIncludedJump(u32 child_offset_in, u8 squash_in) + : child_offset(child_offset_in), squash(squash_in) {} + + bool operator==(const RoseInstrIncludedJump &ri) const { + return child_offset == ri.child_offset && squash == ri.squash; + } + + size_t hash() const override { + return hash_all(static_cast<int>(opcode), child_offset, squash); + } + + void write(void *dest, RoseEngineBlob &blob, + const OffsetMap &offset_map) const override; + + bool equiv_to(const RoseInstrIncludedJump &ri, const OffsetMap &, + const OffsetMap &) const { + return child_offset == ri.child_offset && squash == ri.squash; + } +}; + class RoseInstrSetLogical : public RoseInstrBaseNoTargets<ROSE_INSTR_SET_LOGICAL, ROSE_STRUCT_SET_LOGICAL, @@ -2380,12 +2380,12 @@ public: } }; -class RoseInstrEnd - : public RoseInstrBaseTrivial<ROSE_INSTR_END, ROSE_STRUCT_END, - RoseInstrEnd> { -public: - ~RoseInstrEnd() override; -}; - -} -#endif +class RoseInstrEnd + : public RoseInstrBaseTrivial<ROSE_INSTR_END, ROSE_STRUCT_END, + RoseInstrEnd> { +public: + ~RoseInstrEnd() override; +}; + +} +#endif diff --git a/contrib/libs/hyperscan/src/rose/rose_build_lit_accel.cpp b/contrib/libs/hyperscan/src/rose/rose_build_lit_accel.cpp index 5d7af28313..b389f493d1 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_lit_accel.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_lit_accel.cpp @@ -1,467 +1,467 @@ -/* - * Copyright (c) 2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "rose_build_lit_accel.h" - -#include "grey.h" -#include "ue2common.h" -#include "hwlm/hwlm_build.h" -#include "hwlm/hwlm_internal.h" -#include "hwlm/hwlm_literal.h" -#include "nfa/accel.h" -#include "nfa/shufticompile.h" -#include "nfa/trufflecompile.h" -#include "util/compare.h" -#include "util/dump_charclass.h" -#include "util/ue2string.h" -#include "util/verify_types.h" - -using namespace std; - -namespace ue2 { - -static const unsigned int MAX_ACCEL_OFFSET = 16; -static const unsigned int MAX_SHUFTI_WIDTH = 240; - -static -size_t mask_overhang(const AccelString &lit) { - size_t msk_true_size = lit.msk.size(); - assert(msk_true_size <= HWLM_MASKLEN); - assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET); - for (u8 c : lit.msk) { - if (!c) { - msk_true_size--; - } else { - break; - } - } - - if (lit.s.length() >= msk_true_size) { - return 0; - } - - /* only short literals should be able to have a mask which overhangs */ - assert(lit.s.length() < MAX_ACCEL_OFFSET); - return msk_true_size - lit.s.length(); -} - -static -bool findDVerm(const vector<const AccelString *> &lits, AccelAux *aux) { - const AccelString &first = *lits.front(); - - struct candidate { - candidate(void) - : c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {} - candidate(const AccelString &base, u32 offset) - : c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0), - b5insens(false), valid(true) {} - char c1; - char c2; - u32 max_offset; - bool b5insens; - bool valid; - - bool operator>(const candidate &other) const { - if (!valid) { - return false; - } - - if (!other.valid) { - return true; - } - - if (other.cdiffers() && !cdiffers()) { - return false; - } - - if (!other.cdiffers() && cdiffers()) { - return true; - } - - if (!other.b5insens && b5insens) { - return false; - } - - if (other.b5insens && !b5insens) { - return true; - } - - if (max_offset > other.max_offset) { - return false; - } - - return true; - } - - bool cdiffers(void) const { - if (!b5insens) { - return c1 != c2; - } - return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR); - } - }; - - candidate best; - - for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) { - candidate curr(first, i); - - /* check to see if this pair appears in each string */ - for (const auto &lit_ptr : lits) { - const AccelString &lit = *lit_ptr; - if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) { - curr.b5insens = true; /* no choice but to be case insensitive */ - } - - bool found = false; - bool found_nc = false; - for (u32 j = 0; - !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) { - found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; - found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) - && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); - - if (curr.b5insens) { - found = found_nc; - } - } - - if (!curr.b5insens && !found && found_nc) { - curr.b5insens = true; - found = true; - } - - if (!found) { - goto next_candidate; - } - } - - /* check to find the max offset where this appears */ - for (const auto &lit_ptr : lits) { - const AccelString &lit = *lit_ptr; - for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; - j++) { - bool found = false; - if (curr.b5insens) { - found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) - && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); - } else { - found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; - } - - if (found) { - assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); - ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); - break; - } - } - } - - if (curr > best) { - best = curr; - } - - next_candidate:; - } - - if (!best.valid) { - return false; - } - - aux->dverm.offset = verify_u8(best.max_offset); - - if (!best.b5insens) { - aux->dverm.accel_type = ACCEL_DVERM; - aux->dverm.c1 = best.c1; - aux->dverm.c2 = best.c2; - DEBUG_PRINTF("built dverm for %02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); - } else { - aux->dverm.accel_type = ACCEL_DVERM_NOCASE; - aux->dverm.c1 = best.c1 & CASE_CLEAR; - aux->dverm.c2 = best.c2 & CASE_CLEAR; - DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n", - aux->dverm.c1, aux->dverm.c2); - } - return true; -} - -static -bool findSVerm(const vector<const AccelString *> &lits, AccelAux *aux) { - const AccelString &first = *lits.front(); - - struct candidate { - candidate(void) - : c(0), max_offset(0), b5insens(false), valid(false) {} - candidate(const AccelString &base, u32 offset) - : c(base.s[offset]), max_offset(0), - b5insens(false), valid(true) {} - char c; - u32 max_offset; - bool b5insens; - bool valid; - - bool operator>(const candidate &other) const { - if (!valid) { - return false; - } - - if (!other.valid) { - return true; - } - - if (!other.b5insens && b5insens) { - return false; - } - - if (other.b5insens && !b5insens) { - return true; - } - - if (max_offset > other.max_offset) { - return false; - } - - return true; - } - }; - - candidate best; - - for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()); i++) { - candidate curr(first, i); - - /* check to see if this pair appears in each string */ - for (const auto &lit_ptr : lits) { - const AccelString &lit = *lit_ptr; - if (lit.nocase && ourisalpha(curr.c)) { - curr.b5insens = true; /* no choice but to be case insensitive */ - } - - bool found = false; - bool found_nc = false; - for (u32 j = 0; - !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { - found |= curr.c == lit.s[j]; - found_nc |= (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); - - if (curr.b5insens) { - found = found_nc; - } - } - - if (!curr.b5insens && !found && found_nc) { - curr.b5insens = true; - found = true; - } - - if (!found) { - goto next_candidate; - } - } - - /* check to find the max offset where this appears */ - for (const auto &lit_ptr : lits) { - const AccelString &lit = *lit_ptr; - for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { - bool found = false; - if (curr.b5insens) { - found = (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); - } else { - found = curr.c == lit.s[j]; - } - - if (found) { - assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); - ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); - } - } - } - - if (curr > best) { - best = curr; - } - - next_candidate:; - } - - if (!best.valid) { - return false; - } - - if (!best.b5insens) { - aux->verm.accel_type = ACCEL_VERM; - aux->verm.c = best.c; - DEBUG_PRINTF("built verm for %02hhx\n", aux->verm.c); - } else { - aux->verm.accel_type = ACCEL_VERM_NOCASE; - aux->verm.c = best.c & CASE_CLEAR; - DEBUG_PRINTF("built verm nc for %02hhx\n", aux->verm.c); - } - aux->verm.offset = verify_u8(best.max_offset); - - return true; -} - -static -void filterLits(const vector<AccelString> &lits, hwlm_group_t expected_groups, - vector<const AccelString *> *filtered_lits, u32 *min_len) { - *min_len = MAX_ACCEL_OFFSET; - - for (const auto &lit : lits) { - if (!(lit.groups & expected_groups)) { - continue; - } - - const size_t lit_len = lit.s.length(); - if (lit_len < *min_len) { - *min_len = verify_u32(lit_len); - } - - DEBUG_PRINTF("lit: '%s', nocase=%d, groups=0x%llx\n", - escapeString(lit.s).c_str(), lit.nocase ? 1 : 0, - lit.groups); - filtered_lits->push_back(&lit); - } -} - -static -bool litGuardedByCharReach(const CharReach &cr, const AccelString &lit, - u32 max_offset) { - for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) { - unsigned char c = lit.s[i]; - if (lit.nocase) { - if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) { - return true; - } - } else { - if (cr.test(c)) { - return true; - } - } - } - - return false; -} - -static -void findForwardAccelScheme(const vector<AccelString> &lits, - hwlm_group_t expected_groups, AccelAux *aux) { - DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups); - u32 min_len = MAX_ACCEL_OFFSET; - vector<const AccelString *> filtered_lits; - - filterLits(lits, expected_groups, &filtered_lits, &min_len); - if (filtered_lits.empty()) { - return; - } - - if (findDVerm(filtered_lits, aux) - || findSVerm(filtered_lits, aux)) { - return; - } - - /* look for shufti/truffle */ - - vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach()); - for (const auto &lit : lits) { - if (!(lit.groups & expected_groups)) { - continue; - } - - u32 overhang = mask_overhang(lit); - for (u32 i = 0; i < overhang; i++) { - /* this offset overhangs the start of the real literal; look at the - * msk/cmp */ - for (u32 j = 0; j < N_CHARS; j++) { - if ((j & lit.msk[i]) == lit.cmp[i]) { - reach[i].set(j); - } - } - } - for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) { - CharReach &reach_i = reach[i]; - u32 i_effective = i - overhang; - - if (litGuardedByCharReach(reach_i, lit, i_effective)) { - continue; - } - unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective] - : lit.s.back(); - if (lit.nocase) { - reach_i.set(mytoupper(c)); - reach_i.set(mytolower(c)); - } else { - reach_i.set(c); - } - } - } - - u32 min_count = ~0U; - u32 min_offset = ~0U; - for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) { - size_t count = reach[i].count(); - DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i, - describeClass(reach[i]).c_str(), count); - if (count < min_count) { - min_count = (u32)count; - min_offset = i; - } - } - - if (min_count > MAX_SHUFTI_WIDTH) { - DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count); - return; - } - - const CharReach &cr = reach[min_offset]; - if (-1 != - shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) { - DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n", - describeClass(cr).c_str(), cr.count(), min_offset); - aux->shufti.accel_type = ACCEL_SHUFTI; - aux->shufti.offset = verify_u8(min_offset); - return; - } - - truffleBuildMasks(cr, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2); - DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n", - describeClass(cr).c_str(), cr.count(), min_offset); - aux->truffle.accel_type = ACCEL_TRUFFLE; - aux->truffle.offset = verify_u8(min_offset); -} - -void buildForwardAccel(HWLM *h, const vector<AccelString> &lits, - hwlm_group_t expected_groups) { - findForwardAccelScheme(lits, expected_groups, &h->accel1); - findForwardAccelScheme(lits, HWLM_ALL_GROUPS, &h->accel0); - - h->accel1_groups = expected_groups; -} - -} // namespace ue2 +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_lit_accel.h" + +#include "grey.h" +#include "ue2common.h" +#include "hwlm/hwlm_build.h" +#include "hwlm/hwlm_internal.h" +#include "hwlm/hwlm_literal.h" +#include "nfa/accel.h" +#include "nfa/shufticompile.h" +#include "nfa/trufflecompile.h" +#include "util/compare.h" +#include "util/dump_charclass.h" +#include "util/ue2string.h" +#include "util/verify_types.h" + +using namespace std; + +namespace ue2 { + +static const unsigned int MAX_ACCEL_OFFSET = 16; +static const unsigned int MAX_SHUFTI_WIDTH = 240; + +static +size_t mask_overhang(const AccelString &lit) { + size_t msk_true_size = lit.msk.size(); + assert(msk_true_size <= HWLM_MASKLEN); + assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET); + for (u8 c : lit.msk) { + if (!c) { + msk_true_size--; + } else { + break; + } + } + + if (lit.s.length() >= msk_true_size) { + return 0; + } + + /* only short literals should be able to have a mask which overhangs */ + assert(lit.s.length() < MAX_ACCEL_OFFSET); + return msk_true_size - lit.s.length(); +} + +static +bool findDVerm(const vector<const AccelString *> &lits, AccelAux *aux) { + const AccelString &first = *lits.front(); + + struct candidate { + candidate(void) + : c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {} + candidate(const AccelString &base, u32 offset) + : c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0), + b5insens(false), valid(true) {} + char c1; + char c2; + u32 max_offset; + bool b5insens; + bool valid; + + bool operator>(const candidate &other) const { + if (!valid) { + return false; + } + + if (!other.valid) { + return true; + } + + if (other.cdiffers() && !cdiffers()) { + return false; + } + + if (!other.cdiffers() && cdiffers()) { + return true; + } + + if (!other.b5insens && b5insens) { + return false; + } + + if (other.b5insens && !b5insens) { + return true; + } + + if (max_offset > other.max_offset) { + return false; + } + + return true; + } + + bool cdiffers(void) const { + if (!b5insens) { + return c1 != c2; + } + return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR); + } + }; + + candidate best; + + for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) { + candidate curr(first, i); + + /* check to see if this pair appears in each string */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) { + curr.b5insens = true; /* no choice but to be case insensitive */ + } + + bool found = false; + bool found_nc = false; + for (u32 j = 0; + !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) { + found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; + found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) + && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); + + if (curr.b5insens) { + found = found_nc; + } + } + + if (!curr.b5insens && !found && found_nc) { + curr.b5insens = true; + found = true; + } + + if (!found) { + goto next_candidate; + } + } + + /* check to find the max offset where this appears */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; + j++) { + bool found = false; + if (curr.b5insens) { + found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR) + && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR); + } else { + found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1]; + } + + if (found) { + assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); + ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); + break; + } + } + } + + if (curr > best) { + best = curr; + } + + next_candidate:; + } + + if (!best.valid) { + return false; + } + + aux->dverm.offset = verify_u8(best.max_offset); + + if (!best.b5insens) { + aux->dverm.accel_type = ACCEL_DVERM; + aux->dverm.c1 = best.c1; + aux->dverm.c2 = best.c2; + DEBUG_PRINTF("built dverm for %02hhx%02hhx\n", + aux->dverm.c1, aux->dverm.c2); + } else { + aux->dverm.accel_type = ACCEL_DVERM_NOCASE; + aux->dverm.c1 = best.c1 & CASE_CLEAR; + aux->dverm.c2 = best.c2 & CASE_CLEAR; + DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n", + aux->dverm.c1, aux->dverm.c2); + } + return true; +} + +static +bool findSVerm(const vector<const AccelString *> &lits, AccelAux *aux) { + const AccelString &first = *lits.front(); + + struct candidate { + candidate(void) + : c(0), max_offset(0), b5insens(false), valid(false) {} + candidate(const AccelString &base, u32 offset) + : c(base.s[offset]), max_offset(0), + b5insens(false), valid(true) {} + char c; + u32 max_offset; + bool b5insens; + bool valid; + + bool operator>(const candidate &other) const { + if (!valid) { + return false; + } + + if (!other.valid) { + return true; + } + + if (!other.b5insens && b5insens) { + return false; + } + + if (other.b5insens && !b5insens) { + return true; + } + + if (max_offset > other.max_offset) { + return false; + } + + return true; + } + }; + + candidate best; + + for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()); i++) { + candidate curr(first, i); + + /* check to see if this pair appears in each string */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + if (lit.nocase && ourisalpha(curr.c)) { + curr.b5insens = true; /* no choice but to be case insensitive */ + } + + bool found = false; + bool found_nc = false; + for (u32 j = 0; + !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { + found |= curr.c == lit.s[j]; + found_nc |= (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); + + if (curr.b5insens) { + found = found_nc; + } + } + + if (!curr.b5insens && !found && found_nc) { + curr.b5insens = true; + found = true; + } + + if (!found) { + goto next_candidate; + } + } + + /* check to find the max offset where this appears */ + for (const auto &lit_ptr : lits) { + const AccelString &lit = *lit_ptr; + for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) { + bool found = false; + if (curr.b5insens) { + found = (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR); + } else { + found = curr.c == lit.s[j]; + } + + if (found) { + assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET); + ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit)); + } + } + } + + if (curr > best) { + best = curr; + } + + next_candidate:; + } + + if (!best.valid) { + return false; + } + + if (!best.b5insens) { + aux->verm.accel_type = ACCEL_VERM; + aux->verm.c = best.c; + DEBUG_PRINTF("built verm for %02hhx\n", aux->verm.c); + } else { + aux->verm.accel_type = ACCEL_VERM_NOCASE; + aux->verm.c = best.c & CASE_CLEAR; + DEBUG_PRINTF("built verm nc for %02hhx\n", aux->verm.c); + } + aux->verm.offset = verify_u8(best.max_offset); + + return true; +} + +static +void filterLits(const vector<AccelString> &lits, hwlm_group_t expected_groups, + vector<const AccelString *> *filtered_lits, u32 *min_len) { + *min_len = MAX_ACCEL_OFFSET; + + for (const auto &lit : lits) { + if (!(lit.groups & expected_groups)) { + continue; + } + + const size_t lit_len = lit.s.length(); + if (lit_len < *min_len) { + *min_len = verify_u32(lit_len); + } + + DEBUG_PRINTF("lit: '%s', nocase=%d, groups=0x%llx\n", + escapeString(lit.s).c_str(), lit.nocase ? 1 : 0, + lit.groups); + filtered_lits->push_back(&lit); + } +} + +static +bool litGuardedByCharReach(const CharReach &cr, const AccelString &lit, + u32 max_offset) { + for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) { + unsigned char c = lit.s[i]; + if (lit.nocase) { + if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) { + return true; + } + } else { + if (cr.test(c)) { + return true; + } + } + } + + return false; +} + +static +void findForwardAccelScheme(const vector<AccelString> &lits, + hwlm_group_t expected_groups, AccelAux *aux) { + DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups); + u32 min_len = MAX_ACCEL_OFFSET; + vector<const AccelString *> filtered_lits; + + filterLits(lits, expected_groups, &filtered_lits, &min_len); + if (filtered_lits.empty()) { + return; + } + + if (findDVerm(filtered_lits, aux) + || findSVerm(filtered_lits, aux)) { + return; + } + + /* look for shufti/truffle */ + + vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach()); + for (const auto &lit : lits) { + if (!(lit.groups & expected_groups)) { + continue; + } + + u32 overhang = mask_overhang(lit); + for (u32 i = 0; i < overhang; i++) { + /* this offset overhangs the start of the real literal; look at the + * msk/cmp */ + for (u32 j = 0; j < N_CHARS; j++) { + if ((j & lit.msk[i]) == lit.cmp[i]) { + reach[i].set(j); + } + } + } + for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) { + CharReach &reach_i = reach[i]; + u32 i_effective = i - overhang; + + if (litGuardedByCharReach(reach_i, lit, i_effective)) { + continue; + } + unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective] + : lit.s.back(); + if (lit.nocase) { + reach_i.set(mytoupper(c)); + reach_i.set(mytolower(c)); + } else { + reach_i.set(c); + } + } + } + + u32 min_count = ~0U; + u32 min_offset = ~0U; + for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) { + size_t count = reach[i].count(); + DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i, + describeClass(reach[i]).c_str(), count); + if (count < min_count) { + min_count = (u32)count; + min_offset = i; + } + } + + if (min_count > MAX_SHUFTI_WIDTH) { + DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count); + return; + } + + const CharReach &cr = reach[min_offset]; + if (-1 != + shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) { + DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n", + describeClass(cr).c_str(), cr.count(), min_offset); + aux->shufti.accel_type = ACCEL_SHUFTI; + aux->shufti.offset = verify_u8(min_offset); + return; + } + + truffleBuildMasks(cr, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2); + DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n", + describeClass(cr).c_str(), cr.count(), min_offset); + aux->truffle.accel_type = ACCEL_TRUFFLE; + aux->truffle.offset = verify_u8(min_offset); +} + +void buildForwardAccel(HWLM *h, const vector<AccelString> &lits, + hwlm_group_t expected_groups) { + findForwardAccelScheme(lits, expected_groups, &h->accel1); + findForwardAccelScheme(lits, HWLM_ALL_GROUPS, &h->accel0); + + h->accel1_groups = expected_groups; +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/rose/rose_build_lit_accel.h b/contrib/libs/hyperscan/src/rose/rose_build_lit_accel.h index 5a959d7de2..f0c0143485 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_lit_accel.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_lit_accel.h @@ -1,71 +1,71 @@ -/* - * Copyright (c) 2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ROSE_BUILD_LIT_ACCEL_H -#define ROSE_BUILD_LIT_ACCEL_H - -#include "hwlm/hwlm.h" - -#include <string> -#include <tuple> -#include <utility> -#include <vector> - -struct HWLM; - -namespace ue2 { - -struct AccelString { - AccelString(std::string s_in, bool nocase_in, std::vector<u8> msk_in, - std::vector<u8> cmp_in, hwlm_group_t groups_in) - : s(std::move(s_in)), nocase(nocase_in), msk(std::move(msk_in)), - cmp(std::move(cmp_in)), groups(groups_in) {} - - std::string s; - bool nocase; - std::vector<u8> msk; - std::vector<u8> cmp; - hwlm_group_t groups; - - bool operator==(const AccelString &a) const { - return s == a.s && nocase == a.nocase && msk == a.msk && cmp == a.cmp && - groups == a.groups; - } - - bool operator<(const AccelString &a) const { - return std::tie(s, nocase, msk, cmp, groups) < - std::tie(a.s, a.nocase, a.msk, a.cmp, a.groups); - } -}; - -void buildForwardAccel(HWLM *h, const std::vector<AccelString> &lits, - hwlm_group_t expected_groups); - -} // namespace ue2 - -#endif // ROSE_BUILD_LIT_ACCEL_H +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_LIT_ACCEL_H +#define ROSE_BUILD_LIT_ACCEL_H + +#include "hwlm/hwlm.h" + +#include <string> +#include <tuple> +#include <utility> +#include <vector> + +struct HWLM; + +namespace ue2 { + +struct AccelString { + AccelString(std::string s_in, bool nocase_in, std::vector<u8> msk_in, + std::vector<u8> cmp_in, hwlm_group_t groups_in) + : s(std::move(s_in)), nocase(nocase_in), msk(std::move(msk_in)), + cmp(std::move(cmp_in)), groups(groups_in) {} + + std::string s; + bool nocase; + std::vector<u8> msk; + std::vector<u8> cmp; + hwlm_group_t groups; + + bool operator==(const AccelString &a) const { + return s == a.s && nocase == a.nocase && msk == a.msk && cmp == a.cmp && + groups == a.groups; + } + + bool operator<(const AccelString &a) const { + return std::tie(s, nocase, msk, cmp, groups) < + std::tie(a.s, a.nocase, a.msk, a.cmp, a.groups); + } +}; + +void buildForwardAccel(HWLM *h, const std::vector<AccelString> &lits, + hwlm_group_t expected_groups); + +} // namespace ue2 + +#endif // ROSE_BUILD_LIT_ACCEL_H diff --git a/contrib/libs/hyperscan/src/rose/rose_build_long_lit.cpp b/contrib/libs/hyperscan/src/rose/rose_build_long_lit.cpp index f9c3b0f840..45a2eb270d 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_long_lit.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_long_lit.cpp @@ -1,452 +1,452 @@ -/* - * Copyright (c) 2016-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "rose_build_long_lit.h" - -#include "rose_build_engine_blob.h" -#include "rose_build_impl.h" -#include "stream_long_lit_hash.h" -#include "util/bytecode_ptr.h" -#include "util/bitutils.h" -#include "util/verify_types.h" -#include "util/compile_context.h" - -#include <algorithm> -#include <numeric> - -using namespace std; - -namespace ue2 { - -/** \brief Minimum size for a non-empty hash table. Must be a power of two. */ -static constexpr size_t MIN_HASH_TABLE_SIZE = 128; - -/** \brief Maximum load factor (between zero and one) for a hash table. */ -static constexpr double MAX_HASH_TABLE_LOAD = 0.7; - -/** \brief Minimum size (in bits) for a bloom filter. Must be a power of two. */ -static constexpr u32 MIN_BLOOM_FILTER_SIZE = 256; - -/** \brief Maximum load factor (between zero and one) for a bloom filter. */ -static constexpr double MAX_BLOOM_FILTER_LOAD = 0.25; - -struct LongLitModeInfo { - u32 num_literals = 0; //!< Number of strings for this mode. - u32 hashed_positions = 0; //!< Number of hashable string positions. -}; - -struct LongLitInfo { - LongLitModeInfo caseful; - LongLitModeInfo nocase; -}; - -static -u32 roundUpToPowerOfTwo(u32 x) { - assert(x != 0); - u32 bits = lg2(x - 1) + 1; - assert(bits < 32); - return 1U << bits; -} - -static -LongLitInfo analyzeLongLits(const vector<ue2_case_string> &lits, - size_t max_len) { - LongLitInfo info; - - for (const auto &lit : lits) { - auto &lit_info = lit.nocase ? info.nocase : info.caseful; - assert(lit.s.size() > max_len); - lit_info.num_literals++; - lit_info.hashed_positions += lit.s.size() - max_len; - } - - DEBUG_PRINTF("case: hashed %u positions\n", info.caseful.hashed_positions); - DEBUG_PRINTF("nocase: hashed %u positions\n", info.nocase.hashed_positions); - - return info; -} - -static -void addToBloomFilter(vector<u8> &bloom, const u8 *substr, bool nocase) { - const u32 num_keys = verify_u32(bloom.size() * 8); - const u32 key_mask = (1U << lg2(num_keys)) -1; - - const auto hash_functions = { bloomHash_1, bloomHash_2, bloomHash_3 }; - for (const auto &hash_func : hash_functions) { - u32 hash = hash_func(substr, nocase); - u32 key = hash & key_mask; - DEBUG_PRINTF("set key %u (of %zu)\n", key, bloom.size() * 8); - bloom[key / 8] |= 1U << (key % 8); - } -} - -static -size_t bloomOccupancy(const vector<u8> &bloom) { - return accumulate(begin(bloom), end(bloom), 0, - [](const size_t &sum, const u8 &elem) { - return sum + popcount32(elem); - }); -} - -static -double bloomLoad(const vector<u8> &bloom) { - return (double)bloomOccupancy(bloom) / (double)(bloom.size() * 8); -} - -static -vector<u8> buildBloomFilter(const vector<ue2_case_string> &lits, size_t max_len, - size_t num_entries, bool nocase) { - assert(num_entries % 8 == 0); - assert((num_entries & (num_entries - 1)) == 0); // Must be power of two. - - vector<u8> bloom(num_entries / 8, 0); - - if (!num_entries) { - return bloom; - } - - for (const auto &lit : lits) { - if (nocase != lit.nocase) { - continue; - } - for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { - const u8 *substr = (const u8 *)lit.s.c_str() + offset; - addToBloomFilter(bloom, substr, nocase); - } - } - - DEBUG_PRINTF("%s bloom filter occupancy %zu of %zu entries\n", - nocase ? "nocase" : "caseful", bloomOccupancy(bloom), - num_entries); - - return bloom; -} - - -static -vector<u8> makeBloomFilter(const vector<ue2_case_string> &lits, - size_t max_len, bool nocase) { - vector<u8> bloom; - - size_t num_entries = MIN_BLOOM_FILTER_SIZE; - for (;;) { - bloom = buildBloomFilter(lits, max_len, num_entries, nocase); - DEBUG_PRINTF("built %s bloom for %zu entries: load %f\n", - nocase ? "nocase" : "caseful", num_entries, - bloomLoad(bloom)); - if (bloomLoad(bloom) < MAX_BLOOM_FILTER_LOAD) { - break; - } - num_entries *= 2; - } - return bloom; -} - -static UNUSED -size_t hashTableOccupancy(const vector<RoseLongLitHashEntry> &tab) { - return count_if(begin(tab), end(tab), [](const RoseLongLitHashEntry &ent) { - return ent.str_offset != 0; - }); -} - -static UNUSED -double hashTableLoad(const vector<RoseLongLitHashEntry> &tab) { - return (double)hashTableOccupancy(tab) / (double)(tab.size()); -} - -using LitOffsetVector = small_vector<pair<u32, u32>, 1>; - -static -vector<RoseLongLitHashEntry> buildHashTable( - size_t max_len, const vector<u32> &litToOffsetVal, - const map<u32, LitOffsetVector> &hashToLitOffPairs, - size_t numEntries) { - vector<RoseLongLitHashEntry> tab(numEntries, {0,0}); - - if (!numEntries) { - return tab; - } - - for (const auto &m : hashToLitOffPairs) { - u32 hash = m.first; - const LitOffsetVector &d = m.second; - - u32 bucket = hash % numEntries; - - // Placement via linear probing. - for (const auto &lit_offset : d) { - while (tab[bucket].str_offset != 0) { - bucket++; - if (bucket == numEntries) { - bucket = 0; - } - } - - u32 lit_id = lit_offset.first; - u32 offset = lit_offset.second; - - DEBUG_PRINTF("hash 0x%08x lit_id %u offset %u bucket %u\n", hash, - lit_id, offset, bucket); - - auto &entry = tab[bucket]; - entry.str_offset = verify_u32(litToOffsetVal.at(lit_id)); - assert(entry.str_offset != 0); - entry.str_len = offset + max_len; - } - } - - DEBUG_PRINTF("hash table occupancy %zu of %zu entries\n", - hashTableOccupancy(tab), numEntries); - - return tab; -} - -static -map<u32, LitOffsetVector> computeLitHashes(const vector<ue2_case_string> &lits, - size_t max_len, bool nocase) { - map<u32, LitOffsetVector> hashToLitOffPairs; - - for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) { - const ue2_case_string &lit = lits[lit_id]; - if (nocase != lit.nocase) { - continue; - } - for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { - const u8 *substr = (const u8 *)lit.s.c_str() + offset; - u32 hash = hashLongLiteral(substr, max_len, lit.nocase); - hashToLitOffPairs[hash].emplace_back(lit_id, offset); - } - } - - for (auto &m : hashToLitOffPairs) { - LitOffsetVector &d = m.second; - if (d.size() == 1) { - continue; - } - - // Sort by (offset, string) so that we'll be able to remove identical - // string prefixes. - stable_sort(begin(d), end(d), - [&](const pair<u32, u32> &a, const pair<u32, u32> &b) { - const auto &str_a = lits[a.first].s; - const auto &str_b = lits[b.first].s; - return tie(a.second, str_a) < tie(b.second, str_b); - }); - - // Remove entries that point to the same literal prefix. - d.erase(unique(begin(d), end(d), - [&](const pair<u32, u32> &a, const pair<u32, u32> &b) { - if (a.second != b.second) { - return false; - } - const auto &str_a = lits[a.first].s; - const auto &str_b = lits[b.first].s; - const size_t len = max_len + a.second; - return equal(begin(str_a), begin(str_a) + len, - begin(str_b)); - }), - end(d)); - - // Sort d by distance of the residual string (len minus our depth into - // the string). We need to put the 'furthest back' string first. - stable_sort(begin(d), end(d), - [](const pair<u32, u32> &a, const pair<u32, u32> &b) { - if (a.second != b.second) { - return a.second > b.second; /* longest is first */ - } - return a.first < b.first; - }); - } - - return hashToLitOffPairs; -} - -static -vector<RoseLongLitHashEntry> makeHashTable(const vector<ue2_case_string> &lits, - size_t max_len, - const vector<u32> &litToOffsetVal, - u32 numPositions, bool nocase) { - // Compute lit substring hashes. - const auto hashToLitOffPairs = computeLitHashes(lits, max_len, nocase); - - // Compute the size of the hash table: we need enough entries to satisfy - // our max load constraint, and it must be a power of two. - size_t num_entries = (double)numPositions / MAX_HASH_TABLE_LOAD + 1; - num_entries = roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, num_entries)); - - auto tab = buildHashTable(max_len, litToOffsetVal, hashToLitOffPairs, - num_entries); - DEBUG_PRINTF("built %s hash table for %zu entries: load %f\n", - nocase ? "nocase" : "caseful", num_entries, - hashTableLoad(tab)); - assert(hashTableLoad(tab) < MAX_HASH_TABLE_LOAD); - - return tab; -} - -static -vector<u8> buildLits(const vector<ue2_case_string> &lits, u32 baseOffset, - vector<u32> &litToOffsetVal) { - vector<u8> blob; - litToOffsetVal.resize(lits.size(), 0); - - u32 lit_id = 0; - for (const auto &lit : lits) { - u32 offset = baseOffset + verify_u32(blob.size()); - blob.insert(blob.end(), begin(lit.s), end(lit.s)); - litToOffsetVal[lit_id] = offset; - lit_id++; - } - - DEBUG_PRINTF("built %zu bytes of strings\n", blob.size()); - return blob; -} - -u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, - vector<ue2_case_string> &lits, - size_t longLitLengthThreshold, - size_t *historyRequired, - size_t *longLitStreamStateRequired) { - // Work in terms of history requirement (i.e. literal len - 1). - const size_t max_len = longLitLengthThreshold - 1; - - // We should only be building the long literal hash table in streaming mode. - if (!build.cc.streaming) { - return 0; - } - - if (lits.empty()) { - DEBUG_PRINTF("no long literals\n"); - return 0; - } - - // The last char of each literal is trimmed as we're not interested in full - // matches, only partial matches. - for (auto &lit : lits) { - assert(!lit.s.empty()); - lit.s.pop_back(); - } - - // Sort by caseful/caseless and in lexicographical order. - stable_sort(begin(lits), end(lits), [](const ue2_case_string &a, - const ue2_case_string &b) { - if (a.nocase != b.nocase) { - return a.nocase < b.nocase; - } - return a.s < b.s; - }); - - // Find literals that are prefixes of other literals (including - // duplicates). Note that we iterate in reverse, since we want to retain - // only the longest string from a set of prefixes. - auto it = unique(lits.rbegin(), lits.rend(), [](const ue2_case_string &a, - const ue2_case_string &b) { - return a.nocase == b.nocase && a.s.size() >= b.s.size() && - equal(b.s.begin(), b.s.end(), a.s.begin()); - }); - - // Erase dupes found by unique(). - lits.erase(lits.begin(), it.base()); - - LongLitInfo info = analyzeLongLits(lits, max_len); - - vector<u32> litToOffsetVal; - const size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable)); - vector<u8> lit_blob = buildLits(lits, headerSize, litToOffsetVal); - - // Build caseful bloom filter and hash table. - vector<u8> bloom_case; - vector<RoseLongLitHashEntry> tab_case; - if (info.caseful.num_literals) { - bloom_case = makeBloomFilter(lits, max_len, false); - tab_case = makeHashTable(lits, max_len, litToOffsetVal, - info.caseful.hashed_positions, false); - } - - // Build nocase bloom filter and hash table. - vector<u8> bloom_nocase; - vector<RoseLongLitHashEntry> tab_nocase; - if (info.nocase.num_literals) { - bloom_nocase = makeBloomFilter(lits, max_len, true); - tab_nocase = makeHashTable(lits, max_len, litToOffsetVal, - info.nocase.hashed_positions, true); - } - - size_t wholeLitTabSize = ROUNDUP_16(byte_length(lit_blob)); - size_t htOffsetCase = headerSize + wholeLitTabSize; - size_t htOffsetNocase = htOffsetCase + byte_length(tab_case); - size_t bloomOffsetCase = htOffsetNocase + byte_length(tab_nocase); - size_t bloomOffsetNocase = bloomOffsetCase + byte_length(bloom_case); - - size_t tabSize = ROUNDUP_16(bloomOffsetNocase + byte_length(bloom_nocase)); - - // need to add +2 to both of these to allow space for the actual largest - // value as well as handling the fact that we add one to the space when - // storing out a position to allow zero to mean "no stream state value" - u8 streamBitsCase = lg2(roundUpToPowerOfTwo(tab_case.size() + 2)); - u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(tab_nocase.size() + 2)); - u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8; - - auto table = make_zeroed_bytecode_ptr<char>(tabSize, 16); - assert(table); // otherwise would have thrown std::bad_alloc - - // Fill in the RoseLongLitTable header structure. - RoseLongLitTable *header = (RoseLongLitTable *)(table.get()); - header->size = verify_u32(tabSize); - header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255 - header->caseful.hashOffset = verify_u32(htOffsetCase); - header->caseful.hashBits = lg2(tab_case.size()); - header->caseful.streamStateBits = streamBitsCase; - header->caseful.bloomOffset = verify_u32(bloomOffsetCase); - header->caseful.bloomBits = lg2(bloom_case.size() * 8); - header->nocase.hashOffset = verify_u32(htOffsetNocase); - header->nocase.hashBits = lg2(tab_nocase.size()); - header->nocase.streamStateBits = streamBitsNocase; - header->nocase.bloomOffset = verify_u32(bloomOffsetNocase); - header->nocase.bloomBits = lg2(bloom_nocase.size() * 8); - assert(tot_state_bytes < sizeof(u64a)); - header->streamStateBytes = verify_u8(tot_state_bytes); // u8 - - // Copy in the literal strings, hash tables and bloom filters, - copy_bytes(table.get() + headerSize, lit_blob); - copy_bytes(table.get() + htOffsetCase, tab_case); - copy_bytes(table.get() + bloomOffsetCase, bloom_case); - copy_bytes(table.get() + htOffsetNocase, tab_nocase); - copy_bytes(table.get() + bloomOffsetNocase, bloom_nocase); - - DEBUG_PRINTF("built streaming table, size=%zu\n", tabSize); - DEBUG_PRINTF("requires %zu bytes of history\n", max_len); - DEBUG_PRINTF("requires %u bytes of stream state\n", tot_state_bytes); - - *historyRequired = max(*historyRequired, max_len); - *longLitStreamStateRequired = tot_state_bytes; - - return blob.add(table); -} - -} // namespace ue2 +/* + * Copyright (c) 2016-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_long_lit.h" + +#include "rose_build_engine_blob.h" +#include "rose_build_impl.h" +#include "stream_long_lit_hash.h" +#include "util/bytecode_ptr.h" +#include "util/bitutils.h" +#include "util/verify_types.h" +#include "util/compile_context.h" + +#include <algorithm> +#include <numeric> + +using namespace std; + +namespace ue2 { + +/** \brief Minimum size for a non-empty hash table. Must be a power of two. */ +static constexpr size_t MIN_HASH_TABLE_SIZE = 128; + +/** \brief Maximum load factor (between zero and one) for a hash table. */ +static constexpr double MAX_HASH_TABLE_LOAD = 0.7; + +/** \brief Minimum size (in bits) for a bloom filter. Must be a power of two. */ +static constexpr u32 MIN_BLOOM_FILTER_SIZE = 256; + +/** \brief Maximum load factor (between zero and one) for a bloom filter. */ +static constexpr double MAX_BLOOM_FILTER_LOAD = 0.25; + +struct LongLitModeInfo { + u32 num_literals = 0; //!< Number of strings for this mode. + u32 hashed_positions = 0; //!< Number of hashable string positions. +}; + +struct LongLitInfo { + LongLitModeInfo caseful; + LongLitModeInfo nocase; +}; + +static +u32 roundUpToPowerOfTwo(u32 x) { + assert(x != 0); + u32 bits = lg2(x - 1) + 1; + assert(bits < 32); + return 1U << bits; +} + +static +LongLitInfo analyzeLongLits(const vector<ue2_case_string> &lits, + size_t max_len) { + LongLitInfo info; + + for (const auto &lit : lits) { + auto &lit_info = lit.nocase ? info.nocase : info.caseful; + assert(lit.s.size() > max_len); + lit_info.num_literals++; + lit_info.hashed_positions += lit.s.size() - max_len; + } + + DEBUG_PRINTF("case: hashed %u positions\n", info.caseful.hashed_positions); + DEBUG_PRINTF("nocase: hashed %u positions\n", info.nocase.hashed_positions); + + return info; +} + +static +void addToBloomFilter(vector<u8> &bloom, const u8 *substr, bool nocase) { + const u32 num_keys = verify_u32(bloom.size() * 8); + const u32 key_mask = (1U << lg2(num_keys)) -1; + + const auto hash_functions = { bloomHash_1, bloomHash_2, bloomHash_3 }; + for (const auto &hash_func : hash_functions) { + u32 hash = hash_func(substr, nocase); + u32 key = hash & key_mask; + DEBUG_PRINTF("set key %u (of %zu)\n", key, bloom.size() * 8); + bloom[key / 8] |= 1U << (key % 8); + } +} + +static +size_t bloomOccupancy(const vector<u8> &bloom) { + return accumulate(begin(bloom), end(bloom), 0, + [](const size_t &sum, const u8 &elem) { + return sum + popcount32(elem); + }); +} + +static +double bloomLoad(const vector<u8> &bloom) { + return (double)bloomOccupancy(bloom) / (double)(bloom.size() * 8); +} + +static +vector<u8> buildBloomFilter(const vector<ue2_case_string> &lits, size_t max_len, + size_t num_entries, bool nocase) { + assert(num_entries % 8 == 0); + assert((num_entries & (num_entries - 1)) == 0); // Must be power of two. + + vector<u8> bloom(num_entries / 8, 0); + + if (!num_entries) { + return bloom; + } + + for (const auto &lit : lits) { + if (nocase != lit.nocase) { + continue; + } + for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { + const u8 *substr = (const u8 *)lit.s.c_str() + offset; + addToBloomFilter(bloom, substr, nocase); + } + } + + DEBUG_PRINTF("%s bloom filter occupancy %zu of %zu entries\n", + nocase ? "nocase" : "caseful", bloomOccupancy(bloom), + num_entries); + + return bloom; +} + + +static +vector<u8> makeBloomFilter(const vector<ue2_case_string> &lits, + size_t max_len, bool nocase) { + vector<u8> bloom; + + size_t num_entries = MIN_BLOOM_FILTER_SIZE; + for (;;) { + bloom = buildBloomFilter(lits, max_len, num_entries, nocase); + DEBUG_PRINTF("built %s bloom for %zu entries: load %f\n", + nocase ? "nocase" : "caseful", num_entries, + bloomLoad(bloom)); + if (bloomLoad(bloom) < MAX_BLOOM_FILTER_LOAD) { + break; + } + num_entries *= 2; + } + return bloom; +} + +static UNUSED +size_t hashTableOccupancy(const vector<RoseLongLitHashEntry> &tab) { + return count_if(begin(tab), end(tab), [](const RoseLongLitHashEntry &ent) { + return ent.str_offset != 0; + }); +} + +static UNUSED +double hashTableLoad(const vector<RoseLongLitHashEntry> &tab) { + return (double)hashTableOccupancy(tab) / (double)(tab.size()); +} + +using LitOffsetVector = small_vector<pair<u32, u32>, 1>; + +static +vector<RoseLongLitHashEntry> buildHashTable( + size_t max_len, const vector<u32> &litToOffsetVal, + const map<u32, LitOffsetVector> &hashToLitOffPairs, + size_t numEntries) { + vector<RoseLongLitHashEntry> tab(numEntries, {0,0}); + + if (!numEntries) { + return tab; + } + + for (const auto &m : hashToLitOffPairs) { + u32 hash = m.first; + const LitOffsetVector &d = m.second; + + u32 bucket = hash % numEntries; + + // Placement via linear probing. + for (const auto &lit_offset : d) { + while (tab[bucket].str_offset != 0) { + bucket++; + if (bucket == numEntries) { + bucket = 0; + } + } + + u32 lit_id = lit_offset.first; + u32 offset = lit_offset.second; + + DEBUG_PRINTF("hash 0x%08x lit_id %u offset %u bucket %u\n", hash, + lit_id, offset, bucket); + + auto &entry = tab[bucket]; + entry.str_offset = verify_u32(litToOffsetVal.at(lit_id)); + assert(entry.str_offset != 0); + entry.str_len = offset + max_len; + } + } + + DEBUG_PRINTF("hash table occupancy %zu of %zu entries\n", + hashTableOccupancy(tab), numEntries); + + return tab; +} + +static +map<u32, LitOffsetVector> computeLitHashes(const vector<ue2_case_string> &lits, + size_t max_len, bool nocase) { + map<u32, LitOffsetVector> hashToLitOffPairs; + + for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) { + const ue2_case_string &lit = lits[lit_id]; + if (nocase != lit.nocase) { + continue; + } + for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) { + const u8 *substr = (const u8 *)lit.s.c_str() + offset; + u32 hash = hashLongLiteral(substr, max_len, lit.nocase); + hashToLitOffPairs[hash].emplace_back(lit_id, offset); + } + } + + for (auto &m : hashToLitOffPairs) { + LitOffsetVector &d = m.second; + if (d.size() == 1) { + continue; + } + + // Sort by (offset, string) so that we'll be able to remove identical + // string prefixes. + stable_sort(begin(d), end(d), + [&](const pair<u32, u32> &a, const pair<u32, u32> &b) { + const auto &str_a = lits[a.first].s; + const auto &str_b = lits[b.first].s; + return tie(a.second, str_a) < tie(b.second, str_b); + }); + + // Remove entries that point to the same literal prefix. + d.erase(unique(begin(d), end(d), + [&](const pair<u32, u32> &a, const pair<u32, u32> &b) { + if (a.second != b.second) { + return false; + } + const auto &str_a = lits[a.first].s; + const auto &str_b = lits[b.first].s; + const size_t len = max_len + a.second; + return equal(begin(str_a), begin(str_a) + len, + begin(str_b)); + }), + end(d)); + + // Sort d by distance of the residual string (len minus our depth into + // the string). We need to put the 'furthest back' string first. + stable_sort(begin(d), end(d), + [](const pair<u32, u32> &a, const pair<u32, u32> &b) { + if (a.second != b.second) { + return a.second > b.second; /* longest is first */ + } + return a.first < b.first; + }); + } + + return hashToLitOffPairs; +} + +static +vector<RoseLongLitHashEntry> makeHashTable(const vector<ue2_case_string> &lits, + size_t max_len, + const vector<u32> &litToOffsetVal, + u32 numPositions, bool nocase) { + // Compute lit substring hashes. + const auto hashToLitOffPairs = computeLitHashes(lits, max_len, nocase); + + // Compute the size of the hash table: we need enough entries to satisfy + // our max load constraint, and it must be a power of two. + size_t num_entries = (double)numPositions / MAX_HASH_TABLE_LOAD + 1; + num_entries = roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, num_entries)); + + auto tab = buildHashTable(max_len, litToOffsetVal, hashToLitOffPairs, + num_entries); + DEBUG_PRINTF("built %s hash table for %zu entries: load %f\n", + nocase ? "nocase" : "caseful", num_entries, + hashTableLoad(tab)); + assert(hashTableLoad(tab) < MAX_HASH_TABLE_LOAD); + + return tab; +} + +static +vector<u8> buildLits(const vector<ue2_case_string> &lits, u32 baseOffset, + vector<u32> &litToOffsetVal) { + vector<u8> blob; + litToOffsetVal.resize(lits.size(), 0); + + u32 lit_id = 0; + for (const auto &lit : lits) { + u32 offset = baseOffset + verify_u32(blob.size()); + blob.insert(blob.end(), begin(lit.s), end(lit.s)); + litToOffsetVal[lit_id] = offset; + lit_id++; + } + + DEBUG_PRINTF("built %zu bytes of strings\n", blob.size()); + return blob; +} + +u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, + vector<ue2_case_string> &lits, + size_t longLitLengthThreshold, + size_t *historyRequired, + size_t *longLitStreamStateRequired) { + // Work in terms of history requirement (i.e. literal len - 1). + const size_t max_len = longLitLengthThreshold - 1; + + // We should only be building the long literal hash table in streaming mode. + if (!build.cc.streaming) { + return 0; + } + + if (lits.empty()) { + DEBUG_PRINTF("no long literals\n"); + return 0; + } + + // The last char of each literal is trimmed as we're not interested in full + // matches, only partial matches. + for (auto &lit : lits) { + assert(!lit.s.empty()); + lit.s.pop_back(); + } + + // Sort by caseful/caseless and in lexicographical order. + stable_sort(begin(lits), end(lits), [](const ue2_case_string &a, + const ue2_case_string &b) { + if (a.nocase != b.nocase) { + return a.nocase < b.nocase; + } + return a.s < b.s; + }); + + // Find literals that are prefixes of other literals (including + // duplicates). Note that we iterate in reverse, since we want to retain + // only the longest string from a set of prefixes. + auto it = unique(lits.rbegin(), lits.rend(), [](const ue2_case_string &a, + const ue2_case_string &b) { + return a.nocase == b.nocase && a.s.size() >= b.s.size() && + equal(b.s.begin(), b.s.end(), a.s.begin()); + }); + + // Erase dupes found by unique(). + lits.erase(lits.begin(), it.base()); + + LongLitInfo info = analyzeLongLits(lits, max_len); + + vector<u32> litToOffsetVal; + const size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable)); + vector<u8> lit_blob = buildLits(lits, headerSize, litToOffsetVal); + + // Build caseful bloom filter and hash table. + vector<u8> bloom_case; + vector<RoseLongLitHashEntry> tab_case; + if (info.caseful.num_literals) { + bloom_case = makeBloomFilter(lits, max_len, false); + tab_case = makeHashTable(lits, max_len, litToOffsetVal, + info.caseful.hashed_positions, false); + } + + // Build nocase bloom filter and hash table. + vector<u8> bloom_nocase; + vector<RoseLongLitHashEntry> tab_nocase; + if (info.nocase.num_literals) { + bloom_nocase = makeBloomFilter(lits, max_len, true); + tab_nocase = makeHashTable(lits, max_len, litToOffsetVal, + info.nocase.hashed_positions, true); + } + + size_t wholeLitTabSize = ROUNDUP_16(byte_length(lit_blob)); + size_t htOffsetCase = headerSize + wholeLitTabSize; + size_t htOffsetNocase = htOffsetCase + byte_length(tab_case); + size_t bloomOffsetCase = htOffsetNocase + byte_length(tab_nocase); + size_t bloomOffsetNocase = bloomOffsetCase + byte_length(bloom_case); + + size_t tabSize = ROUNDUP_16(bloomOffsetNocase + byte_length(bloom_nocase)); + + // need to add +2 to both of these to allow space for the actual largest + // value as well as handling the fact that we add one to the space when + // storing out a position to allow zero to mean "no stream state value" + u8 streamBitsCase = lg2(roundUpToPowerOfTwo(tab_case.size() + 2)); + u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(tab_nocase.size() + 2)); + u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8; + + auto table = make_zeroed_bytecode_ptr<char>(tabSize, 16); + assert(table); // otherwise would have thrown std::bad_alloc + + // Fill in the RoseLongLitTable header structure. + RoseLongLitTable *header = (RoseLongLitTable *)(table.get()); + header->size = verify_u32(tabSize); + header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255 + header->caseful.hashOffset = verify_u32(htOffsetCase); + header->caseful.hashBits = lg2(tab_case.size()); + header->caseful.streamStateBits = streamBitsCase; + header->caseful.bloomOffset = verify_u32(bloomOffsetCase); + header->caseful.bloomBits = lg2(bloom_case.size() * 8); + header->nocase.hashOffset = verify_u32(htOffsetNocase); + header->nocase.hashBits = lg2(tab_nocase.size()); + header->nocase.streamStateBits = streamBitsNocase; + header->nocase.bloomOffset = verify_u32(bloomOffsetNocase); + header->nocase.bloomBits = lg2(bloom_nocase.size() * 8); + assert(tot_state_bytes < sizeof(u64a)); + header->streamStateBytes = verify_u8(tot_state_bytes); // u8 + + // Copy in the literal strings, hash tables and bloom filters, + copy_bytes(table.get() + headerSize, lit_blob); + copy_bytes(table.get() + htOffsetCase, tab_case); + copy_bytes(table.get() + bloomOffsetCase, bloom_case); + copy_bytes(table.get() + htOffsetNocase, tab_nocase); + copy_bytes(table.get() + bloomOffsetNocase, bloom_nocase); + + DEBUG_PRINTF("built streaming table, size=%zu\n", tabSize); + DEBUG_PRINTF("requires %zu bytes of history\n", max_len); + DEBUG_PRINTF("requires %u bytes of stream state\n", tot_state_bytes); + + *historyRequired = max(*historyRequired, max_len); + *longLitStreamStateRequired = tot_state_bytes; + + return blob.add(table); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/rose/rose_build_long_lit.h b/contrib/libs/hyperscan/src/rose/rose_build_long_lit.h index 24b61a2f81..a77b1b6921 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_long_lit.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_long_lit.h @@ -1,51 +1,51 @@ -/* - * Copyright (c) 2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ROSE_BUILD_LONG_LIT_H -#define ROSE_BUILD_LONG_LIT_H - -#include "ue2common.h" - -#include <vector> - -namespace ue2 { - -class RoseBuildImpl; -class RoseEngineBlob; -struct ue2_case_string; - -u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, - std::vector<ue2_case_string> &lits, - size_t longLitLengthThreshold, - size_t *historyRequired, - size_t *longLitStreamStateRequired); - -} // namespace ue2 - - -#endif // ROSE_BUILD_LONG_LIT_H +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_LONG_LIT_H +#define ROSE_BUILD_LONG_LIT_H + +#include "ue2common.h" + +#include <vector> + +namespace ue2 { + +class RoseBuildImpl; +class RoseEngineBlob; +struct ue2_case_string; + +u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob, + std::vector<ue2_case_string> &lits, + size_t longLitLengthThreshold, + size_t *historyRequired, + size_t *longLitStreamStateRequired); + +} // namespace ue2 + + +#endif // ROSE_BUILD_LONG_LIT_H diff --git a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp index 11f9fa2aa2..d0540d79b0 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp @@ -40,12 +40,12 @@ #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/verify_types.h" #include <cstdlib> #include <queue> -#include <sstream> +#include <sstream> using namespace std; @@ -63,23 +63,23 @@ static const u32 MAX_LOOKAROUND_ENTRIES = 32; /** \brief We would rather have lookarounds with smaller reach than this. */ static const u32 LOOKAROUND_WIDE_REACH = 200; -#if defined(DEBUG) || defined(DUMP_SUPPORT) -static UNUSED -string dump(const map<s32, CharReach> &look) { - ostringstream oss; - for (auto it = look.begin(), ite = look.end(); it != ite; ++it) { - if (it != look.begin()) { - oss << ", "; - } - oss << "{" << it->first << ": " << describeClass(it->second) << "}"; - } - return oss.str(); -} -#endif - +#if defined(DEBUG) || defined(DUMP_SUPPORT) +static UNUSED +string dump(const map<s32, CharReach> &look) { + ostringstream oss; + for (auto it = look.begin(), ite = look.end(); it != ite; ++it) { + if (it != look.begin()) { + oss << ", "; + } + oss << "{" << it->first << ": " << describeClass(it->second) << "}"; + } + return oss.str(); +} +#endif + static void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) { - flat_set<NFAVertex> curr, next; + flat_set<NFAVertex> curr, next; // Consider only successors of start with the required top. for (const auto &e : out_edges_range(g.start, g)) { @@ -87,7 +87,7 @@ void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) { if (v == g.startDs) { continue; } - if (contains(g[e].tops, top)) { + if (contains(g[e].tops, top)) { curr.insert(v); } } @@ -116,7 +116,7 @@ void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) { static void getBackwardReach(const NGHolder &g, ReportID report, u32 lag, map<s32, CharReach> &look) { - flat_set<NFAVertex> curr, next; + flat_set<NFAVertex> curr, next; for (auto v : inv_adjacent_vertices_range(g.accept, g)) { if (contains(g[v].reports, report)) { @@ -187,7 +187,7 @@ void getForwardReach(const raw_dfa &rdfa, map<s32, CharReach> &look) { return; } - flat_set<dstate_id_t> curr, next; + flat_set<dstate_id_t> curr, next; curr.insert(rdfa.start_anchored); for (u32 i = 0; i < MAX_FWD_LEN && !curr.empty(); i++) { @@ -276,7 +276,7 @@ void findForwardReach(const RoseGraph &g, const RoseVertex v, for (const auto &e : out_edges_range(v, g)) { RoseVertex t = target(e, g); if (!g[t].left) { - DEBUG_PRINTF("successor %zu has no leftfix\n", g[t].index); + DEBUG_PRINTF("successor %zu has no leftfix\n", g[t].index); return; } rose_look.push_back(map<s32, CharReach>()); @@ -447,7 +447,7 @@ static void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v, set<CharReach> &flood_reach) { for (u32 lit_id : tbi.g[v].literals) { - const ue2_literal &s = tbi.literals.at(lit_id).s; + const ue2_literal &s = tbi.literals.at(lit_id).s; if (s.empty()) { continue; } @@ -460,69 +460,69 @@ void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v, } } - -namespace { -struct LookProto { - LookProto(s32 offset_in, CharReach reach_in) - : offset(offset_in), reach(move(reach_in)) {} - s32 offset; - CharReach reach; -}; -} - + +namespace { +struct LookProto { + LookProto(s32 offset_in, CharReach reach_in) + : offset(offset_in), reach(move(reach_in)) {} + s32 offset; + CharReach reach; +}; +} + +static +vector<LookProto> findLiteralReach(const rose_literal_id &lit) { + vector<LookProto> look; + look.reserve(lit.s.length()); + + s32 i = 0 - lit.s.length() - lit.delay; + for (const auto &c : lit.s) { + look.emplace_back(i, c); + i++; + } + + return look; +} + static -vector<LookProto> findLiteralReach(const rose_literal_id &lit) { - vector<LookProto> look; - look.reserve(lit.s.length()); - - s32 i = 0 - lit.s.length() - lit.delay; - for (const auto &c : lit.s) { - look.emplace_back(i, c); - i++; - } - - return look; -} - -static -vector<LookProto> findLiteralReach(const RoseBuildImpl &build, - const RoseVertex v) { - bool first = true; - vector<LookProto> look; - +vector<LookProto> findLiteralReach(const RoseBuildImpl &build, + const RoseVertex v) { + bool first = true; + vector<LookProto> look; + for (u32 lit_id : build.g[v].literals) { - const rose_literal_id &lit = build.literals.at(lit_id); - auto lit_look = findLiteralReach(lit); - - if (first) { - look = std::move(lit_look); - first = false; - continue; - } - - // Erase elements from look with keys not in lit_look. Where a key is - // in both maps, union its reach with the lookaround. - auto jt = begin(lit_look); - for (auto it = begin(look); it != end(look);) { - if (jt == end(lit_look)) { - // No further lit_look entries, erase remaining elements from - // look. - look.erase(it, end(look)); - break; - } - if (it->offset < jt->offset) { - // Offset is present in look but not in lit_look, erase. - it = look.erase(it); - } else if (it->offset > jt->offset) { - // Offset is preset in lit_look but not in look, ignore. - ++jt; - } else { - // Offset is present in both, union its reach with look. - it->reach |= jt->reach; - ++it; - ++jt; - } - } + const rose_literal_id &lit = build.literals.at(lit_id); + auto lit_look = findLiteralReach(lit); + + if (first) { + look = std::move(lit_look); + first = false; + continue; + } + + // Erase elements from look with keys not in lit_look. Where a key is + // in both maps, union its reach with the lookaround. + auto jt = begin(lit_look); + for (auto it = begin(look); it != end(look);) { + if (jt == end(lit_look)) { + // No further lit_look entries, erase remaining elements from + // look. + look.erase(it, end(look)); + break; + } + if (it->offset < jt->offset) { + // Offset is present in look but not in lit_look, erase. + it = look.erase(it); + } else if (it->offset > jt->offset) { + // Offset is preset in lit_look but not in look, ignore. + ++jt; + } else { + // Offset is present in both, union its reach with look. + it->reach |= jt->reach; + ++it; + ++jt; + } + } } return look; @@ -538,11 +538,11 @@ void trimLiterals(const RoseBuildImpl &build, const RoseVertex v, DEBUG_PRINTF("pre-trim lookaround: %s\n", dump(look).c_str()); for (const auto &m : findLiteralReach(build, v)) { - auto it = look.find(m.offset); + auto it = look.find(m.offset); if (it == end(look)) { continue; } - if (m.reach.isSubsetOf(it->second)) { + if (m.reach.isSubsetOf(it->second)) { DEBUG_PRINTF("can trim entry at %d\n", it->first); look.erase(it); } @@ -551,76 +551,76 @@ void trimLiterals(const RoseBuildImpl &build, const RoseVertex v, DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str()); } -static -void normaliseLeftfix(map<s32, CharReach> &look) { - // We can erase entries where the reach is "all characters", except for the - // very first one -- this might be required to establish a minimum bound on - // the literal's match offset. - - // TODO: It would be cleaner to use a literal program instruction to check - // the minimum bound explicitly. - - if (look.empty()) { - return; - } - - const auto earliest = begin(look)->first; - - vector<s32> dead; - for (const auto &m : look) { - if (m.second.all() && m.first != earliest) { - dead.push_back(m.first); - } - } - erase_all(&look, dead); -} - -static -bool trimMultipathLeftfix(const RoseBuildImpl &build, const RoseVertex v, - vector<map<s32, CharReach>> &looks) { - size_t path_count = 0; - for (auto &look : looks) { - ++path_count; - DEBUG_PRINTF("Path #%ld\n", path_count); - - assert(!look.empty()); - trimLiterals(build, v, look); - - if (look.empty()) { - return false; - } - - // Could be optimized here, just keep the empty byte of the longest path - normaliseLeftfix(look); - - if (look.size() > MAX_LOOKAROUND_ENTRIES) { - DEBUG_PRINTF("lookaround too big (%zu entries)\n", look.size()); - return false; - } - } - return true; -} - -static -void transToLookaround(const vector<map<s32, CharReach>> &looks, - vector<vector<LookEntry>> &lookarounds) { - for (const auto &look : looks) { - vector<LookEntry> lookaround; - DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str()); - lookaround.reserve(look.size()); - for (const auto &m : look) { - if (m.first < -128 || m.first > 127) { - DEBUG_PRINTF("range too big\n"); - lookarounds.clear(); - return; - } - s8 offset = verify_s8(m.first); - lookaround.emplace_back(offset, m.second); - } - lookarounds.push_back(lookaround); - } -} - +static +void normaliseLeftfix(map<s32, CharReach> &look) { + // We can erase entries where the reach is "all characters", except for the + // very first one -- this might be required to establish a minimum bound on + // the literal's match offset. + + // TODO: It would be cleaner to use a literal program instruction to check + // the minimum bound explicitly. + + if (look.empty()) { + return; + } + + const auto earliest = begin(look)->first; + + vector<s32> dead; + for (const auto &m : look) { + if (m.second.all() && m.first != earliest) { + dead.push_back(m.first); + } + } + erase_all(&look, dead); +} + +static +bool trimMultipathLeftfix(const RoseBuildImpl &build, const RoseVertex v, + vector<map<s32, CharReach>> &looks) { + size_t path_count = 0; + for (auto &look : looks) { + ++path_count; + DEBUG_PRINTF("Path #%ld\n", path_count); + + assert(!look.empty()); + trimLiterals(build, v, look); + + if (look.empty()) { + return false; + } + + // Could be optimized here, just keep the empty byte of the longest path + normaliseLeftfix(look); + + if (look.size() > MAX_LOOKAROUND_ENTRIES) { + DEBUG_PRINTF("lookaround too big (%zu entries)\n", look.size()); + return false; + } + } + return true; +} + +static +void transToLookaround(const vector<map<s32, CharReach>> &looks, + vector<vector<LookEntry>> &lookarounds) { + for (const auto &look : looks) { + vector<LookEntry> lookaround; + DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str()); + lookaround.reserve(look.size()); + for (const auto &m : look) { + if (m.first < -128 || m.first > 127) { + DEBUG_PRINTF("range too big\n"); + lookarounds.clear(); + return; + } + s8 offset = verify_s8(m.first); + lookaround.emplace_back(offset, m.second); + } + lookarounds.push_back(lookaround); + } +} + void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, vector<LookEntry> &lookaround) { lookaround.clear(); @@ -659,155 +659,155 @@ void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, } static -bool checkShuftiBuckets(const vector<map<s32, CharReach>> &looks, - u32 bucket_size) { - set<u32> bucket; - for (const auto &look : looks) { - for (const auto &l : look) { - CharReach cr = l.second; - if (cr.count() > 128) { - cr.flip(); - } - map <u16, u16> lo2hi; - - for (size_t i = cr.find_first(); i != CharReach::npos;) { - u8 it_hi = i >> 4; - u16 low_encode = 0; - while (i != CharReach::npos && (i >> 4) == it_hi) { - low_encode |= 1 << (i &0xf); - i = cr.find_next(i); - } - lo2hi[low_encode] |= 1 << it_hi; - } - - for (const auto &it : lo2hi) { - u32 hi_lo = (it.second << 16) | it.first; - bucket.insert(hi_lo); - } - } - } - DEBUG_PRINTF("shufti has %lu bucket(s)\n", bucket.size()); - return bucket.size() <= bucket_size; -} - -static -bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag, - vector<map<s32, CharReach>> &looks) { - if (!isAcyclic(g)) { - DEBUG_PRINTF("contains back-edge\n"); +bool checkShuftiBuckets(const vector<map<s32, CharReach>> &looks, + u32 bucket_size) { + set<u32> bucket; + for (const auto &look : looks) { + for (const auto &l : look) { + CharReach cr = l.second; + if (cr.count() > 128) { + cr.flip(); + } + map <u16, u16> lo2hi; + + for (size_t i = cr.find_first(); i != CharReach::npos;) { + u8 it_hi = i >> 4; + u16 low_encode = 0; + while (i != CharReach::npos && (i >> 4) == it_hi) { + low_encode |= 1 << (i &0xf); + i = cr.find_next(i); + } + lo2hi[low_encode] |= 1 << it_hi; + } + + for (const auto &it : lo2hi) { + u32 hi_lo = (it.second << 16) | it.first; + bucket.insert(hi_lo); + } + } + } + DEBUG_PRINTF("shufti has %lu bucket(s)\n", bucket.size()); + return bucket.size() <= bucket_size; +} + +static +bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag, + vector<map<s32, CharReach>> &looks) { + if (!isAcyclic(g)) { + DEBUG_PRINTF("contains back-edge\n"); return false; } - // Must be floating chains wired to startDs. - if (!isFloating(g)) { - DEBUG_PRINTF("not a floating start\n"); + // Must be floating chains wired to startDs. + if (!isFloating(g)) { + DEBUG_PRINTF("not a floating start\n"); return false; } - vector<NFAVertex> curr; - for (auto v : inv_adjacent_vertices_range(g.accept, g)) { - if (v == g.start || v == g.startDs) { - DEBUG_PRINTF("empty graph\n"); - return true; - } - if (contains(g[v].reports, report)) { - curr.push_back(v); - } - } - - assert(!curr.empty()); - - u32 total_len = curr.size(); - - for (const auto &v : curr) { - looks.emplace_back(map<s32, CharReach>()); - looks.back()[0 - (lag + 1)] = g[v].char_reach; - } - - bool curr_active = false; - - /* For each offset -i, we backwardly trace the path by vertices in curr. - * Once there are more than 8 paths and more than 64 bits total_len, - * which means that neither MULTIPATH_LOOKAROUND nor MULTIPATH_SHUFTI - * could be successfully built, we will give up the path finding. - * Otherwise, the loop will halt when all vertices in curr are startDs. - */ - for (u32 i = lag + 2; i < (lag + 2) + MAX_BACK_LEN; i++) { - curr_active = false; - size_t curr_size = curr.size(); - if (curr.size() > 1 && i > lag + MULTIPATH_MAX_LEN) { - DEBUG_PRINTF("range is larger than 16 in multi-path\n"); + vector<NFAVertex> curr; + for (auto v : inv_adjacent_vertices_range(g.accept, g)) { + if (v == g.start || v == g.startDs) { + DEBUG_PRINTF("empty graph\n"); + return true; + } + if (contains(g[v].reports, report)) { + curr.push_back(v); + } + } + + assert(!curr.empty()); + + u32 total_len = curr.size(); + + for (const auto &v : curr) { + looks.emplace_back(map<s32, CharReach>()); + looks.back()[0 - (lag + 1)] = g[v].char_reach; + } + + bool curr_active = false; + + /* For each offset -i, we backwardly trace the path by vertices in curr. + * Once there are more than 8 paths and more than 64 bits total_len, + * which means that neither MULTIPATH_LOOKAROUND nor MULTIPATH_SHUFTI + * could be successfully built, we will give up the path finding. + * Otherwise, the loop will halt when all vertices in curr are startDs. + */ + for (u32 i = lag + 2; i < (lag + 2) + MAX_BACK_LEN; i++) { + curr_active = false; + size_t curr_size = curr.size(); + if (curr.size() > 1 && i > lag + MULTIPATH_MAX_LEN) { + DEBUG_PRINTF("range is larger than 16 in multi-path\n"); return false; } - for (size_t idx = 0; idx < curr_size; idx++) { - NFAVertex v = curr[idx]; - if (v == g.startDs) { - continue; - } - assert(!is_special(v, g)); - - for (auto u : inv_adjacent_vertices_range(v, g)) { - if (u == g.start || u == g.startDs) { - curr[idx] = g.startDs; - break; - } - } - - if (is_special(curr[idx], g)) { - continue; - } - - for (auto u : inv_adjacent_vertices_range(v, g)) { - curr_active = true; - if (curr[idx] == v) { - curr[idx] = u; - looks[idx][0 - i] = g[u].char_reach; - total_len++; - } else { - curr.push_back(u); - looks.push_back(looks[idx]); - (looks.back())[0 - i] = g[u].char_reach; - total_len += looks.back().size(); - } - - if (curr.size() > MAX_LOOKAROUND_PATHS && total_len > 64) { - DEBUG_PRINTF("too many branches\n"); - return false; - } - } - } - if (!curr_active) { - break; - } - } - - if (curr_active) { - DEBUG_PRINTF("single path too long\n"); - return false; - } - - // More than 8 paths, check multi-path shufti. - if (curr.size() > MAX_LOOKAROUND_PATHS) { - u32 bucket_size = total_len > 32 ? 8 : 16; - if (!checkShuftiBuckets(looks, bucket_size)) { - DEBUG_PRINTF("shufti has too many buckets\n"); + for (size_t idx = 0; idx < curr_size; idx++) { + NFAVertex v = curr[idx]; + if (v == g.startDs) { + continue; + } + assert(!is_special(v, g)); + + for (auto u : inv_adjacent_vertices_range(v, g)) { + if (u == g.start || u == g.startDs) { + curr[idx] = g.startDs; + break; + } + } + + if (is_special(curr[idx], g)) { + continue; + } + + for (auto u : inv_adjacent_vertices_range(v, g)) { + curr_active = true; + if (curr[idx] == v) { + curr[idx] = u; + looks[idx][0 - i] = g[u].char_reach; + total_len++; + } else { + curr.push_back(u); + looks.push_back(looks[idx]); + (looks.back())[0 - i] = g[u].char_reach; + total_len += looks.back().size(); + } + + if (curr.size() > MAX_LOOKAROUND_PATHS && total_len > 64) { + DEBUG_PRINTF("too many branches\n"); + return false; + } + } + } + if (!curr_active) { + break; + } + } + + if (curr_active) { + DEBUG_PRINTF("single path too long\n"); + return false; + } + + // More than 8 paths, check multi-path shufti. + if (curr.size() > MAX_LOOKAROUND_PATHS) { + u32 bucket_size = total_len > 32 ? 8 : 16; + if (!checkShuftiBuckets(looks, bucket_size)) { + DEBUG_PRINTF("shufti has too many buckets\n"); return false; } - } + } - assert(!looks.empty()); - if (looks.size() == 1) { - DEBUG_PRINTF("single lookaround\n"); - } else { - DEBUG_PRINTF("multi-path lookaround\n"); + assert(!looks.empty()); + if (looks.size() == 1) { + DEBUG_PRINTF("single lookaround\n"); + } else { + DEBUG_PRINTF("multi-path lookaround\n"); } DEBUG_PRINTF("done\n"); return true; } bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, - vector<vector<LookEntry>> &lookaround) { + vector<vector<LookEntry>> &lookaround) { lookaround.clear(); const RoseGraph &g = build.g; @@ -823,19 +823,19 @@ bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, return false; } - vector<map<s32, CharReach>> looks; - if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.leftfix_report, - g[v].left.lag, looks)) { - DEBUG_PRINTF("graph has loop or too large\n"); + vector<map<s32, CharReach>> looks; + if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.leftfix_report, + g[v].left.lag, looks)) { + DEBUG_PRINTF("graph has loop or too large\n"); return false; } - if (!trimMultipathLeftfix(build, v, looks)) { + if (!trimMultipathLeftfix(build, v, looks)) { return false; } - transToLookaround(looks, lookaround); + transToLookaround(looks, lookaround); - return !lookaround.empty(); + return !lookaround.empty(); } void mergeLookaround(vector<LookEntry> &lookaround, @@ -846,7 +846,7 @@ void mergeLookaround(vector<LookEntry> &lookaround, } // Don't merge lookarounds at offsets we already have entries for. - flat_set<s8> offsets; + flat_set<s8> offsets; for (const auto &e : lookaround) { offsets.insert(e.offset); } diff --git a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.h b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.h index d1984f5b4a..70d4217ccc 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -33,13 +33,13 @@ #define ROSE_ROSE_BUILD_LOOKAROUND_H #include "rose_graph.h" -#include "util/hash.h" +#include "util/hash.h" #include <vector> -/** \brief Max path number for multi-path lookaround. */ -#define MAX_LOOKAROUND_PATHS 8 - +/** \brief Max path number for multi-path lookaround. */ +#define MAX_LOOKAROUND_PATHS 8 + namespace ue2 { class CharReach; @@ -48,7 +48,7 @@ class RoseBuildImpl; /** \brief Lookaround entry prototype, describing the reachability at a given * distance from the end of a role match. */ struct LookEntry { - LookEntry() : offset(0) {} + LookEntry() : offset(0) {} LookEntry(s8 offset_in, const CharReach &reach_in) : offset(offset_in), reach(reach_in) {} s8 offset; //!< offset from role match location. @@ -60,7 +60,7 @@ struct LookEntry { }; void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, - std::vector<LookEntry> &look_more); + std::vector<LookEntry> &look_more); /** * \brief If possible, render the prefix of the given vertex as a lookaround. @@ -69,22 +69,22 @@ void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v, * it can be satisfied with a lookaround alone. */ bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v, - std::vector<std::vector<LookEntry>> &lookaround); + std::vector<std::vector<LookEntry>> &lookaround); void mergeLookaround(std::vector<LookEntry> &lookaround, const std::vector<LookEntry> &more_lookaround); } // namespace ue2 -namespace std { - -template<> -struct hash<ue2::LookEntry> { - size_t operator()(const ue2::LookEntry &l) const { - return ue2::hash_all(l.offset, l.reach); - } -}; - -} // namespace std - +namespace std { + +template<> +struct hash<ue2::LookEntry> { + size_t operator()(const ue2::LookEntry &l) const { + return ue2::hash_all(l.offset, l.reach); + } +}; + +} // namespace std + #endif // ROSE_ROSE_BUILD_LOOKAROUND_H diff --git a/contrib/libs/hyperscan/src/rose/rose_build_matchers.cpp b/contrib/libs/hyperscan/src/rose/rose_build_matchers.cpp index f172c09536..4fde4c4418 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_matchers.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_matchers.cpp @@ -1,1053 +1,1053 @@ -/* +/* * Copyright (c) 2016-2019, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * \file - * \brief Rose build: code for constructing literal tables. - */ - -#include "rose_build_matchers.h" - -#include "rose_build_dump.h" -#include "rose_build_impl.h" -#include "rose_build_lit_accel.h" -#include "rose_build_width.h" -#include "hwlm/hwlm_build.h" -#include "hwlm/hwlm_internal.h" -#include "hwlm/hwlm_literal.h" -#include "nfa/castlecompile.h" -#include "nfa/nfa_api_queue.h" -#include "util/charreach_util.h" -#include "util/compile_context.h" -#include "util/compile_error.h" -#include "util/dump_charclass.h" -#include "util/make_unique.h" -#include "util/report.h" -#include "util/report_manager.h" -#include "util/verify_types.h" -#include "ue2common.h" - -#include <iomanip> -#include <sstream> - -#include <boost/range/adaptor/map.hpp> -#include <boost/range/adaptor/reversed.hpp> - -using namespace std; -using boost::adaptors::map_values; - -namespace ue2 { - -static const size_t MAX_ACCEL_STRING_LEN = 16; - -#if defined(DEBUG) || defined(DUMP_SUPPORT) -static UNUSED -string dumpMask(const vector<u8> &v) { - ostringstream oss; - for (u8 e : v) { - oss << setfill('0') << setw(2) << hex << (unsigned int)e; - } - return oss.str(); -} -#endif - -static -bool maskFromLeftGraph(const LeftEngInfo &left, vector<u8> &msk, - vector<u8> &cmp) { - const u32 lag = left.lag; - const ReportID report = left.leftfix_report; - - DEBUG_PRINTF("leftfix with lag %u, report %u\n", lag, report); - - assert(left.graph); - const NGHolder &h = *left.graph; - assert(in_degree(h.acceptEod, h) == 1); // no eod reports - - // Start with the set of reporter vertices for this leftfix. - set<NFAVertex> curr; - for (auto u : inv_adjacent_vertices_range(h.accept, h)) { - if (contains(h[u].reports, report)) { - curr.insert(u); - } - } - assert(!curr.empty()); - - size_t i = HWLM_MASKLEN - lag - 1; - do { - if (curr.empty() || contains(curr, h.start) - || contains(curr, h.startDs)) { - DEBUG_PRINTF("end of the road\n"); - break; - } - - set<NFAVertex> next; - CharReach cr; - for (NFAVertex v : curr) { - const auto &v_cr = h[v].char_reach; - DEBUG_PRINTF("vertex %zu, reach %s\n", h[v].index, - describeClass(v_cr).c_str()); - cr |= v_cr; - insert(&next, inv_adjacent_vertices(v, h)); - } - make_and_cmp_mask(cr, &msk.at(i), &cmp.at(i)); - DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i, - describeClass(cr).c_str(), msk[i], cmp[i]); - curr.swap(next); - } while (i-- > 0); - - return true; -} - -static -bool maskFromLeftCastle(const LeftEngInfo &left, vector<u8> &msk, - vector<u8> &cmp) { - const u32 lag = left.lag; - const ReportID report = left.leftfix_report; - - DEBUG_PRINTF("leftfix with lag %u, report %u\n", lag, report); - - assert(left.castle); - const CastleProto &c = *left.castle; - - depth min_width(depth::infinity()); - for (const PureRepeat &repeat : c.repeats | map_values) { - if (contains(repeat.reports, report)) { - min_width = min(min_width, repeat.bounds.min); - } - } - - DEBUG_PRINTF("castle min width for this report is %s\n", - min_width.str().c_str()); - - if (!min_width.is_finite() || min_width == depth(0)) { - DEBUG_PRINTF("bad min width\n"); - return false; - } - - u32 len = min_width; - u32 end = HWLM_MASKLEN - lag; - for (u32 i = end; i > end - min(end, len); i--) { - make_and_cmp_mask(c.reach(), &msk.at(i - 1), &cmp.at(i - 1)); - } - - return true; -} - -static -bool maskFromLeft(const LeftEngInfo &left, vector<u8> &msk, vector<u8> &cmp) { - if (left.lag >= HWLM_MASKLEN) { - DEBUG_PRINTF("too much lag\n"); - return false; - } - - if (left.graph) { - return maskFromLeftGraph(left, msk, cmp); - } else if (left.castle) { - return maskFromLeftCastle(left, msk, cmp); - } - - return false; -} - -static -bool maskFromPreds(const RoseBuildImpl &build, const rose_literal_id &id, - const RoseVertex v, vector<u8> &msk, vector<u8> &cmp) { - const RoseGraph &g = build.g; - - // For right now, wuss out and only handle cases with one pred. - if (in_degree(v, g) != 1) { - return false; - } - - // Root successors have no literal before them. - if (build.isRootSuccessor(v)) { - return false; - } - - // If we have a single predecessor with a short bound, we may be able to - // fill out a mask with the trailing bytes of the previous literal. This - // allows us to improve literals like the 'bar' in 'fo.bar'. - - RoseEdge e = *(in_edges(v, g).first); - u32 bound = g[e].maxBound; - if (bound != g[e].minBound || bound >= HWLM_MASKLEN) { - return false; - } - - bound += id.s.length(); - if (bound >= HWLM_MASKLEN) { - return false; - } - - DEBUG_PRINTF("bound %u\n", bound); - - RoseVertex u = source(e, g); - if (g[u].literals.size() != 1) { - DEBUG_PRINTF("u has %zu literals\n", g[u].literals.size()); - return false; - } - - u32 u_lit_id = *(g[u].literals.begin()); - const rose_literal_id &u_id = build.literals.at(u_lit_id); - DEBUG_PRINTF("u has lit: %s\n", escapeString(u_id.s).c_str()); - - // Number of characters to take from the back of u's literal. - size_t u_len = u_id.s.length(); - size_t u_sublen = min(u_len, (size_t)HWLM_MASKLEN - bound); - - size_t i = HWLM_MASKLEN - (bound + u_sublen); - - ue2_literal::const_iterator it, ite; - for (it = u_id.s.begin() + (u_len - u_sublen), ite = u_id.s.end(); - it != ite; ++it) { - make_and_cmp_mask(*it, &msk.at(i), &cmp.at(i)); - ++i; - } - - return true; -} - -static -bool addSurroundingMask(const RoseBuildImpl &build, const rose_literal_id &id, - const RoseVertex v, vector<u8> &msk, vector<u8> &cmp) { - // Start with zero masks. - msk.assign(HWLM_MASKLEN, 0); - cmp.assign(HWLM_MASKLEN, 0); - - const LeftEngInfo &left = build.g[v].left; - if (left && left.lag < HWLM_MASKLEN) { - if (maskFromLeft(left, msk, cmp)) { - DEBUG_PRINTF("mask from a leftfix!\n"); - return true; - } - } - - if (id.s.length() < HWLM_MASKLEN) { - if (maskFromPreds(build, id, v, msk, cmp)) { - DEBUG_PRINTF("mask from preds!\n"); - return true; - } - } - - return false; -} - -static -bool hamsterMaskCombine(vector<u8> &msk, vector<u8> &cmp, - const vector<u8> &v_msk, const vector<u8> &v_cmp) { - assert(msk.size() == HWLM_MASKLEN && cmp.size() == HWLM_MASKLEN); - assert(v_msk.size() == HWLM_MASKLEN && v_cmp.size() == HWLM_MASKLEN); - - u8 all_masks = 0; - - for (size_t i = 0; i < HWLM_MASKLEN; i++) { - u8 filter = ~(cmp[i] ^ v_cmp[i]); - msk[i] &= v_msk[i]; - msk[i] &= filter; - cmp[i] &= filter; - - all_masks |= msk[i]; - } - - // Return false if we have no bits on in any mask elements. - return all_masks != 0; -} - -static -bool addSurroundingMask(const RoseBuildImpl &build, const rose_literal_id &id, - const rose_literal_info &info, vector<u8> &msk, - vector<u8> &cmp) { - if (!build.cc.grey.roseHamsterMasks) { - return false; - } - - if (!info.delayed_ids.empty()) { - // Not safe to add masks to delayed literals at this late stage. - return false; - } - - msk.assign(HWLM_MASKLEN, 0); - cmp.assign(HWLM_MASKLEN, 0); - - size_t num = 0; - vector<u8> v_msk, v_cmp; - - for (RoseVertex v : info.vertices) { - if (!addSurroundingMask(build, id, v, v_msk, v_cmp)) { - DEBUG_PRINTF("no mask\n"); - return false; - } - - if (!num++) { - // First (or only) vertex, this becomes the mask/cmp pair. - msk = v_msk; - cmp = v_cmp; - } else { - // Multiple vertices with potentially different masks. We combine - // them into an 'advisory' mask. - if (!hamsterMaskCombine(msk, cmp, v_msk, v_cmp)) { - DEBUG_PRINTF("mask went to zero\n"); - return false; - } - } - } - - normaliseLiteralMask(id.s, msk, cmp); - - if (msk.empty()) { - DEBUG_PRINTF("no mask\n"); - return false; - } - - DEBUG_PRINTF("msk=%s, cmp=%s\n", dumpMask(msk).c_str(), - dumpMask(cmp).c_str()); - return true; -} - -void findMoreLiteralMasks(RoseBuildImpl &build) { - if (!build.cc.grey.roseHamsterMasks) { - return; - } - - vector<u32> candidates; - for (u32 id = 0; id < build.literals.size(); id++) { - const auto &lit = build.literals.at(id); - - if (lit.delay || build.isDelayed(id)) { - continue; - } - - // Literal masks are only allowed for literals that will end up in an - // HWLM table. - switch (lit.table) { - case ROSE_FLOATING: - case ROSE_EOD_ANCHORED: - case ROSE_ANCHORED_SMALL_BLOCK: - break; - default: - continue; - } - - candidates.push_back(id); - } - - for (const u32 &id : candidates) { - const auto &lit = build.literals.at(id); - auto &lit_info = build.literal_info.at(id); - - vector<u8> msk, cmp; - if (!addSurroundingMask(build, lit, lit_info, msk, cmp)) { - continue; - } - DEBUG_PRINTF("found surrounding mask for lit_id=%u (%s)\n", id, - dumpString(lit.s).c_str()); - u32 new_id = build.getLiteralId(lit.s, msk, cmp, lit.delay, lit.table); - if (new_id == id) { - continue; - } - DEBUG_PRINTF("replacing with new lit_id=%u\n", new_id); - - // Note that our new literal may already exist and have vertices, etc. - // We assume that this transform is happening prior to group assignment. - assert(lit_info.group_mask == 0); - auto &new_info = build.literal_info.at(new_id); - - // Move the vertices across. - new_info.vertices.insert(begin(lit_info.vertices), - end(lit_info.vertices)); - for (auto v : lit_info.vertices) { - build.g[v].literals.erase(id); - build.g[v].literals.insert(new_id); - } - lit_info.vertices.clear(); - - // Preserve other properties. - new_info.requires_benefits = lit_info.requires_benefits; - } -} - -// The mask already associated with the literal and any mask due to -// mixed-case is mandatory. -static -void addLiteralMask(const rose_literal_id &id, vector<u8> &msk, - vector<u8> &cmp) { - const size_t suffix_len = min(id.s.length(), size_t{HWLM_MASKLEN}); - bool mixed_suffix = mixed_sensitivity_in(id.s.end() - suffix_len, - id.s.end()); - - if (id.msk.empty() && !mixed_suffix) { - return; - } - - while (msk.size() < HWLM_MASKLEN) { - msk.insert(msk.begin(), 0); - cmp.insert(cmp.begin(), 0); - } - - if (!id.msk.empty()) { - assert(id.msk.size() <= HWLM_MASKLEN); - assert(id.msk.size() == id.cmp.size()); - for (size_t i = 0; i < id.msk.size(); i++) { - size_t mand_offset = msk.size() - i - 1; - size_t lit_offset = id.msk.size() - i - 1; - msk[mand_offset] = id.msk[lit_offset]; - cmp[mand_offset] = id.cmp[lit_offset]; - } - } - - if (mixed_suffix) { - auto it = id.s.rbegin(); - for (size_t i = 0; i < suffix_len; ++i, ++it) { - const auto &c = *it; - if (!c.nocase) { - size_t offset = HWLM_MASKLEN - i - 1; - DEBUG_PRINTF("offset %zu must match 0x%02x exactly\n", offset, - c.c); - make_and_cmp_mask(c, &msk[offset], &cmp[offset]); - } - } - } - - normaliseLiteralMask(id.s, msk, cmp); -} - -static -bool isDirectHighlander(const RoseBuildImpl &build, const u32 id, - const rose_literal_info &info) { - if (!build.isDirectReport(id)) { - return false; - } - - auto is_simple_exhaustible = [&build](ReportID rid) { - const Report &report = build.rm.getReport(rid); - return isSimpleExhaustible(report); - }; - - assert(!info.vertices.empty()); - for (const auto &v : info.vertices) { - const auto &reports = build.g[v].reports; - assert(!reports.empty()); - if (!all_of(begin(reports), end(reports), - is_simple_exhaustible)) { - return false; - } - } - return true; -} - -// Called by isNoRunsLiteral below. -static -bool isNoRunsVertex(const RoseBuildImpl &build, RoseVertex u) { - const RoseGraph &g = build.g; - if (!g[u].isBoring()) { - DEBUG_PRINTF("u=%zu is not boring\n", g[u].index); - return false; - } - - if (!g[u].reports.empty()) { - DEBUG_PRINTF("u=%zu has accept\n", g[u].index); - return false; - } - - /* TODO: handle non-root roles as well. It can't be that difficult... */ - - if (in_degree(u, g) != 1) { - DEBUG_PRINTF("u=%zu is not a root role\n", g[u].index); - return false; - } - - RoseEdge e = edge(build.root, u, g); - - if (!e) { - DEBUG_PRINTF("u=%zu is not a root role\n", g[u].index); - return false; - } - - if (g[e].minBound != 0 || g[e].maxBound != ROSE_BOUND_INF) { - DEBUG_PRINTF("u=%zu has bounds from root\n", g[u].index); - return false; - } - - for (const auto &oe : out_edges_range(u, g)) { - RoseVertex v = target(oe, g); - if (g[oe].maxBound != ROSE_BOUND_INF) { - DEBUG_PRINTF("edge (%zu,%zu) has max bound\n", g[u].index, - g[v].index); - return false; - } - if (g[v].left) { - DEBUG_PRINTF("v=%zu has rose prefix\n", g[v].index); - return false; - } - } - return true; -} - -static -bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id, - const rose_literal_info &info, const size_t max_len) { - DEBUG_PRINTF("lit id %u\n", id); - - if (info.requires_benefits) { - DEBUG_PRINTF("requires benefits\n"); // which would need confirm - return false; - } - - size_t len = build.literals.at(id).s.length(); - if (len > max_len) { - DEBUG_PRINTF("long literal, requires confirm\n"); - return false; - } - - if (len > ROSE_SHORT_LITERAL_LEN_MAX) { - DEBUG_PRINTF("medium-length literal, requires confirm\n"); - return false; - } - - if (isDirectHighlander(build, id, info)) { - DEBUG_PRINTF("highlander direct report\n"); - return true; - } - - // Undelayed vertices. - for (RoseVertex v : info.vertices) { - if (!isNoRunsVertex(build, v)) { - return false; - } - } - - // Delayed vertices. - for (u32 d : info.delayed_ids) { - assert(d < build.literal_info.size()); - const rose_literal_info &delayed_info = build.literal_info.at(d); - assert(delayed_info.undelayed_id == id); - for (RoseVertex v : delayed_info.vertices) { - if (!isNoRunsVertex(build, v)) { - return false; - } - } - } - - DEBUG_PRINTF("is no-runs literal\n"); - return true; -} - -static -bool isNoRunsFragment(const RoseBuildImpl &build, const LitFragment &f, - const size_t max_len) { - // For the fragment to be marked "no runs", every literal it fires must - // need no further confirmation work. - return all_of_in(f.lit_ids, [&](u32 lit_id) { - const auto &info = build.literal_info.at(lit_id); - return isNoRunsLiteral(build, lit_id, info, max_len); - }); -} - -static -const raw_puff &getChainedPuff(const RoseBuildImpl &build, - const Report &report) { - DEBUG_PRINTF("chained report, event %u\n", report.onmatch); - - // MPV has already been moved to the outfixes vector. - assert(!build.mpv_outfix); - - auto mpv_outfix_it = find_if( - begin(build.outfixes), end(build.outfixes), - [](const OutfixInfo &outfix) { return outfix.is_nonempty_mpv(); }); - assert(mpv_outfix_it != end(build.outfixes)); - const auto *mpv = mpv_outfix_it->mpv(); - - u32 puff_index = report.onmatch - MQE_TOP_FIRST; - assert(puff_index < mpv->triggered_puffettes.size()); - return mpv->triggered_puffettes.at(puff_index); -} - -/** - * \brief Returns a conservative estimate of the minimum offset at which the - * given literal can lead to a report. - * - * TODO: This could be made more precise by calculating a "distance to accept" - * for every vertex in the graph; right now we're only accurate for leaf nodes. - */ -static -u64a literalMinReportOffset(const RoseBuildImpl &build, - const rose_literal_id &lit, - const rose_literal_info &info) { - const auto &g = build.g; - - const u32 lit_len = verify_u32(lit.elength()); - - u64a lit_min_offset = UINT64_MAX; - - for (const auto &v : info.vertices) { - DEBUG_PRINTF("vertex %zu min_offset=%u\n", g[v].index, g[v].min_offset); - - u64a vert_offset = g[v].min_offset; - - if (vert_offset >= lit_min_offset) { - continue; - } - - u64a min_offset = UINT64_MAX; - - for (const auto &id : g[v].reports) { - const Report &report = build.rm.getReport(id); - DEBUG_PRINTF("report id %u, min offset=%llu\n", id, - report.minOffset); - if (report.type == INTERNAL_ROSE_CHAIN) { - // This vertex triggers an MPV, which will fire reports after - // repeating for a while. - assert(report.minOffset == 0); // Should not have bounds. - const auto &puff = getChainedPuff(build, report); - DEBUG_PRINTF("chained puff repeats=%u\n", puff.repeats); - const Report &puff_report = build.rm.getReport(puff.report); - DEBUG_PRINTF("puff report %u, min offset=%llu\n", puff.report, - puff_report.minOffset); - min_offset = min(min_offset, max(vert_offset + puff.repeats, - puff_report.minOffset)); - } else { - DEBUG_PRINTF("report min offset=%llu\n", report.minOffset); - min_offset = min(min_offset, max(vert_offset, - report.minOffset)); - } - } - - if (g[v].suffix) { - depth suffix_width = findMinWidth(g[v].suffix, g[v].suffix.top); - assert(suffix_width.is_reachable()); - DEBUG_PRINTF("suffix with width %s\n", suffix_width.str().c_str()); - min_offset = min(min_offset, vert_offset + suffix_width); - } - - if (!isLeafNode(v, g) || min_offset == UINT64_MAX) { - min_offset = vert_offset; - } - - lit_min_offset = min(lit_min_offset, min_offset); - } - - // If this literal in the undelayed literal corresponding to some delayed - // literals, we must take their minimum offsets into account. - for (const u32 &delayed_id : info.delayed_ids) { - const auto &delayed_lit = build.literals.at(delayed_id); - const auto &delayed_info = build.literal_info.at(delayed_id); - u64a delayed_min_offset = literalMinReportOffset(build, delayed_lit, - delayed_info); - DEBUG_PRINTF("delayed_id=%u, min_offset = %llu\n", delayed_id, - delayed_min_offset); - lit_min_offset = min(lit_min_offset, delayed_min_offset); - } - - // If we share a vertex with a shorter literal, our min offset might dip - // below the length of this one. - lit_min_offset = max(lit_min_offset, u64a{lit_len}); - - return lit_min_offset; -} - -template<class Container> -void trim_to_suffix(Container &c, size_t len) { - if (c.size() <= len) { - return; - } - - size_t suffix_len = c.size() - len; - c.erase(c.begin(), c.begin() + suffix_len); -} - -namespace { - -/** \brief Prototype for literal matcher construction. */ -struct MatcherProto { - /** \brief Literal fragments used to construct the literal matcher. */ - vector<hwlmLiteral> lits; - - /** \brief Longer literals used for acceleration analysis. */ - vector<AccelString> accel_lits; - - /** \brief The history required by the literal matcher. */ - size_t history_required = 0; - - /** \brief Insert the contents of another MatcherProto. */ - void insert(const MatcherProto &a); -}; -} - -static -void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp, - const LitFragment &f, u32 id, size_t max_len) { - const rose_literal_id &lit = build.literals.at(id); - - DEBUG_PRINTF("lit='%s' (len %zu)\n", dumpString(lit.s).c_str(), - lit.s.length()); - - vector<u8> msk = lit.msk; // copy - vector<u8> cmp = lit.cmp; // copy - - bool noruns = isNoRunsFragment(build, f, max_len); - DEBUG_PRINTF("fragment is %s\n", noruns ? "noruns" : "not noruns"); - - auto lit_final = lit.s; // copy - - if (lit_final.length() > ROSE_SHORT_LITERAL_LEN_MAX) { - DEBUG_PRINTF("truncating to tail of length %zu\n", - size_t{ROSE_SHORT_LITERAL_LEN_MAX}); - lit_final.erase(0, lit_final.length() - ROSE_SHORT_LITERAL_LEN_MAX); - // We shouldn't have set a threshold below 8 chars. - assert(msk.size() <= ROSE_SHORT_LITERAL_LEN_MAX); - assert(!noruns); - } - - addLiteralMask(lit, msk, cmp); - - const auto &s_final = lit_final.get_string(); - bool nocase = lit_final.any_nocase(); - - DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, cmp=%s\n", - f.fragment_id, escapeString(s_final).c_str(), (int)nocase, - noruns, dumpMask(msk).c_str(), dumpMask(cmp).c_str()); - - if (!maskIsConsistent(s_final, nocase, msk, cmp)) { - DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); - return; - } - - const auto &groups = f.groups; - - mp.lits.emplace_back(move(s_final), nocase, noruns, f.fragment_id, - groups, msk, cmp); -} - -static -void addAccelLiteral(MatcherProto &mp, const rose_literal_id &lit, - const rose_literal_info &info, size_t max_len) { - const auto &s = lit.s; // copy - - DEBUG_PRINTF("lit='%s' (len %zu)\n", dumpString(s).c_str(), s.length()); - - vector<u8> msk = lit.msk; // copy - vector<u8> cmp = lit.cmp; // copy - addLiteralMask(lit, msk, cmp); - - if (!maskIsConsistent(s.get_string(), s.any_nocase(), msk, cmp)) { - DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); - return; - } - - // Literals used for acceleration must be limited to max_len, as that's all - // we can see in history. - string s_final = lit.s.get_string(); - trim_to_suffix(s_final, max_len); - trim_to_suffix(msk, max_len); - trim_to_suffix(cmp, max_len); - - mp.accel_lits.emplace_back(s_final, lit.s.any_nocase(), msk, cmp, - info.group_mask); -} - -/** - * \brief Build up a vector of literals (and associated other data) for the - * given table. - * - * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can - * only lead to a pattern match after max_offset may be excluded. - */ -static -MatcherProto makeMatcherProto(const RoseBuildImpl &build, - const vector<LitFragment> &fragments, - rose_literal_table table, bool delay_rebuild, - size_t max_len, u32 max_offset = ROSE_BOUND_INF) { - MatcherProto mp; - - if (delay_rebuild) { - assert(table == ROSE_FLOATING); - assert(build.cc.streaming); - } - - vector<u32> used_lit_ids; - - for (const auto &f : fragments) { - assert(!f.lit_ids.empty()); - - // All literals that share a fragment are in the same table. - if (build.literals.at(f.lit_ids.front()).table != table) { - continue; // next fragment. - } - - DEBUG_PRINTF("fragment %u, %zu lit_ids\n", f.fragment_id, - f.lit_ids.size()); - - used_lit_ids.clear(); - for (u32 id : f.lit_ids) { - const rose_literal_id &lit = build.literals.at(id); - assert(id < build.literal_info.size()); - const auto &info = build.literal_info.at(id); - if (lit.delay) { - continue; /* delay id's are virtual-ish */ - } - - // When building the delay rebuild table, we only want to include - // literals that have delayed variants. - if (delay_rebuild && info.delayed_ids.empty()) { - DEBUG_PRINTF("not needed for delay rebuild\n"); - continue; - } - - if (max_offset != ROSE_BOUND_INF) { - u64a min_report = literalMinReportOffset(build, lit, info); - if (min_report > max_offset) { - DEBUG_PRINTF("min report offset=%llu exceeds " - "max_offset=%u\n", min_report, max_offset); - continue; - } - } - - used_lit_ids.push_back(id); - } - - if (used_lit_ids.empty()) { - continue; // next fragment. - } - - // Build our fragment (for the HWLM matcher) from the first literal. - addFragmentLiteral(build, mp, f, used_lit_ids.front(), max_len); - - for (u32 id : used_lit_ids) { - const rose_literal_id &lit = build.literals.at(id); - assert(id < build.literal_info.size()); - const auto &info = build.literal_info.at(id); - - // All literals contribute accel information. - addAccelLiteral(mp, lit, info, max_len); - - // All literals contribute to history requirement in streaming mode. - if (build.cc.streaming) { - size_t lit_hist_len = - max(lit.msk.size(), min(lit.s.length(), max_len)); - lit_hist_len = lit_hist_len ? lit_hist_len - 1 : 0; - DEBUG_PRINTF("lit requires %zu bytes of history\n", - lit_hist_len); - assert(lit_hist_len <= build.cc.grey.maxHistoryAvailable); - mp.history_required = max(mp.history_required, lit_hist_len); - } - } - } - - sort_and_unique(mp.lits); - sort_and_unique(mp.accel_lits); - - return mp; -} - -void MatcherProto::insert(const MatcherProto &a) { - ::ue2::insert(&lits, lits.end(), a.lits); - ::ue2::insert(&accel_lits, accel_lits.end(), a.accel_lits); - sort_and_unique(lits); - sort_and_unique(accel_lits); - history_required = max(history_required, a.history_required); -} - -static -void buildAccel(const RoseBuildImpl &build, - const vector<AccelString> &accel_lits, HWLM &hwlm) { - if (!build.cc.grey.hamsterAccelForward) { - return; - } - - if (hwlm.type == HWLM_ENGINE_NOOD) { - return; - } - - buildForwardAccel(&hwlm, accel_lits, build.getInitialGroups()); -} - -bytecode_ptr<HWLM> -buildHWLMMatcher(const RoseBuildImpl &build, LitProto *litProto) { - if (!litProto) { - return nullptr; - } - auto hwlm = hwlmBuild(*litProto->hwlmProto, build.cc, - build.getInitialGroups()); - if (!hwlm) { - throw CompileError("Unable to generate bytecode."); - } - - buildAccel(build, litProto->accel_lits, *hwlm); - - DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", - hwlm.size()); - return hwlm; -} - -unique_ptr<LitProto> -buildFloatingMatcherProto(const RoseBuildImpl &build, - const vector<LitFragment> &fragments, - size_t longLitLengthThreshold, - rose_group *fgroups, - size_t *historyRequired) { - DEBUG_PRINTF("Floating literal matcher\n"); - *fgroups = 0; - - auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, false, - longLitLengthThreshold); - if (mp.lits.empty()) { - DEBUG_PRINTF("empty floating matcher\n"); - return nullptr; - } - dumpMatcherLiterals(mp.lits, "floating", build.cc.grey); - - for (const hwlmLiteral &lit : mp.lits) { - *fgroups |= lit.groups; - } - - if (build.cc.streaming) { - DEBUG_PRINTF("history_required=%zu\n", mp.history_required); - assert(mp.history_required <= build.cc.grey.maxHistoryAvailable); - *historyRequired = max(*historyRequired, mp.history_required); - } - - auto proto = hwlmBuildProto(mp.lits, false, build.cc); - - if (!proto) { - throw CompileError("Unable to generate literal matcher proto."); - } - - return ue2::make_unique<LitProto>(move(proto), mp.accel_lits); -} - -unique_ptr<LitProto> -buildDelayRebuildMatcherProto(const RoseBuildImpl &build, - const vector<LitFragment> &fragments, - size_t longLitLengthThreshold) { - DEBUG_PRINTF("Delay literal matcher\n"); - if (!build.cc.streaming) { - DEBUG_PRINTF("not streaming\n"); - return nullptr; - } - - auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, true, - longLitLengthThreshold); - if (mp.lits.empty()) { - DEBUG_PRINTF("empty delay rebuild matcher\n"); - return nullptr; - } - dumpMatcherLiterals(mp.lits, "delay_rebuild", build.cc.grey); - - - auto proto = hwlmBuildProto(mp.lits, false, build.cc); - - if (!proto) { - throw CompileError("Unable to generate literal matcher proto."); - } - - return ue2::make_unique<LitProto>(move(proto), mp.accel_lits); -} - -unique_ptr<LitProto> -buildSmallBlockMatcherProto(const RoseBuildImpl &build, - const vector<LitFragment> &fragments) { - DEBUG_PRINTF("Small block literal matcher\n"); - if (build.cc.streaming) { - DEBUG_PRINTF("streaming mode\n"); - return nullptr; - } - - u32 float_min = findMinWidth(build, ROSE_FLOATING); - if (float_min > ROSE_SMALL_BLOCK_LEN) { - DEBUG_PRINTF("floating table has large min width %u, fail\n", - float_min); - return nullptr; - } - - auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, false, - ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); - if (mp.lits.empty()) { - DEBUG_PRINTF("no floating table\n"); - return nullptr; - } else if (mp.lits.size() == 1) { - DEBUG_PRINTF("single floating literal, noodle will be fast enough\n"); - return nullptr; - } - - auto mp_anchored = makeMatcherProto(build, fragments, - ROSE_ANCHORED_SMALL_BLOCK, false, - ROSE_SMALL_BLOCK_LEN, - ROSE_SMALL_BLOCK_LEN); - if (mp_anchored.lits.empty()) { - DEBUG_PRINTF("no small-block anchored literals\n"); - return nullptr; - } - - mp.insert(mp_anchored); - dumpMatcherLiterals(mp.lits, "smallblock", build.cc.grey); - - // None of our literals should be longer than the small block limit. - assert(all_of(begin(mp.lits), end(mp.lits), [](const hwlmLiteral &lit) { - return lit.s.length() <= ROSE_SMALL_BLOCK_LEN; - })); - - if (mp.lits.empty()) { - DEBUG_PRINTF("no literals shorter than small block len\n"); - return nullptr; - } - - auto proto = hwlmBuildProto(mp.lits, false, build.cc); - - if (!proto) { - throw CompileError("Unable to generate literal matcher proto."); - } - - return ue2::make_unique<LitProto>(move(proto), mp.accel_lits); -} - -unique_ptr<LitProto> -buildEodAnchoredMatcherProto(const RoseBuildImpl &build, - const vector<LitFragment> &fragments) { - DEBUG_PRINTF("Eod anchored literal matcher\n"); - auto mp = makeMatcherProto(build, fragments, ROSE_EOD_ANCHORED, false, - build.ematcher_region_size); - - if (mp.lits.empty()) { - DEBUG_PRINTF("no eod anchored literals\n"); - assert(!build.ematcher_region_size); - return nullptr; - } - dumpMatcherLiterals(mp.lits, "eod", build.cc.grey); - - assert(build.ematcher_region_size); - - auto proto = hwlmBuildProto(mp.lits, false, build.cc); - - if (!proto) { - throw CompileError("Unable to generate literal matcher proto."); - } - - return ue2::make_unique<LitProto>(move(proto), mp.accel_lits); -} - -} // namespace ue2 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Rose build: code for constructing literal tables. + */ + +#include "rose_build_matchers.h" + +#include "rose_build_dump.h" +#include "rose_build_impl.h" +#include "rose_build_lit_accel.h" +#include "rose_build_width.h" +#include "hwlm/hwlm_build.h" +#include "hwlm/hwlm_internal.h" +#include "hwlm/hwlm_literal.h" +#include "nfa/castlecompile.h" +#include "nfa/nfa_api_queue.h" +#include "util/charreach_util.h" +#include "util/compile_context.h" +#include "util/compile_error.h" +#include "util/dump_charclass.h" +#include "util/make_unique.h" +#include "util/report.h" +#include "util/report_manager.h" +#include "util/verify_types.h" +#include "ue2common.h" + +#include <iomanip> +#include <sstream> + +#include <boost/range/adaptor/map.hpp> +#include <boost/range/adaptor/reversed.hpp> + +using namespace std; +using boost::adaptors::map_values; + +namespace ue2 { + +static const size_t MAX_ACCEL_STRING_LEN = 16; + +#if defined(DEBUG) || defined(DUMP_SUPPORT) +static UNUSED +string dumpMask(const vector<u8> &v) { + ostringstream oss; + for (u8 e : v) { + oss << setfill('0') << setw(2) << hex << (unsigned int)e; + } + return oss.str(); +} +#endif + +static +bool maskFromLeftGraph(const LeftEngInfo &left, vector<u8> &msk, + vector<u8> &cmp) { + const u32 lag = left.lag; + const ReportID report = left.leftfix_report; + + DEBUG_PRINTF("leftfix with lag %u, report %u\n", lag, report); + + assert(left.graph); + const NGHolder &h = *left.graph; + assert(in_degree(h.acceptEod, h) == 1); // no eod reports + + // Start with the set of reporter vertices for this leftfix. + set<NFAVertex> curr; + for (auto u : inv_adjacent_vertices_range(h.accept, h)) { + if (contains(h[u].reports, report)) { + curr.insert(u); + } + } + assert(!curr.empty()); + + size_t i = HWLM_MASKLEN - lag - 1; + do { + if (curr.empty() || contains(curr, h.start) + || contains(curr, h.startDs)) { + DEBUG_PRINTF("end of the road\n"); + break; + } + + set<NFAVertex> next; + CharReach cr; + for (NFAVertex v : curr) { + const auto &v_cr = h[v].char_reach; + DEBUG_PRINTF("vertex %zu, reach %s\n", h[v].index, + describeClass(v_cr).c_str()); + cr |= v_cr; + insert(&next, inv_adjacent_vertices(v, h)); + } + make_and_cmp_mask(cr, &msk.at(i), &cmp.at(i)); + DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i, + describeClass(cr).c_str(), msk[i], cmp[i]); + curr.swap(next); + } while (i-- > 0); + + return true; +} + +static +bool maskFromLeftCastle(const LeftEngInfo &left, vector<u8> &msk, + vector<u8> &cmp) { + const u32 lag = left.lag; + const ReportID report = left.leftfix_report; + + DEBUG_PRINTF("leftfix with lag %u, report %u\n", lag, report); + + assert(left.castle); + const CastleProto &c = *left.castle; + + depth min_width(depth::infinity()); + for (const PureRepeat &repeat : c.repeats | map_values) { + if (contains(repeat.reports, report)) { + min_width = min(min_width, repeat.bounds.min); + } + } + + DEBUG_PRINTF("castle min width for this report is %s\n", + min_width.str().c_str()); + + if (!min_width.is_finite() || min_width == depth(0)) { + DEBUG_PRINTF("bad min width\n"); + return false; + } + + u32 len = min_width; + u32 end = HWLM_MASKLEN - lag; + for (u32 i = end; i > end - min(end, len); i--) { + make_and_cmp_mask(c.reach(), &msk.at(i - 1), &cmp.at(i - 1)); + } + + return true; +} + +static +bool maskFromLeft(const LeftEngInfo &left, vector<u8> &msk, vector<u8> &cmp) { + if (left.lag >= HWLM_MASKLEN) { + DEBUG_PRINTF("too much lag\n"); + return false; + } + + if (left.graph) { + return maskFromLeftGraph(left, msk, cmp); + } else if (left.castle) { + return maskFromLeftCastle(left, msk, cmp); + } + + return false; +} + +static +bool maskFromPreds(const RoseBuildImpl &build, const rose_literal_id &id, + const RoseVertex v, vector<u8> &msk, vector<u8> &cmp) { + const RoseGraph &g = build.g; + + // For right now, wuss out and only handle cases with one pred. + if (in_degree(v, g) != 1) { + return false; + } + + // Root successors have no literal before them. + if (build.isRootSuccessor(v)) { + return false; + } + + // If we have a single predecessor with a short bound, we may be able to + // fill out a mask with the trailing bytes of the previous literal. This + // allows us to improve literals like the 'bar' in 'fo.bar'. + + RoseEdge e = *(in_edges(v, g).first); + u32 bound = g[e].maxBound; + if (bound != g[e].minBound || bound >= HWLM_MASKLEN) { + return false; + } + + bound += id.s.length(); + if (bound >= HWLM_MASKLEN) { + return false; + } + + DEBUG_PRINTF("bound %u\n", bound); + + RoseVertex u = source(e, g); + if (g[u].literals.size() != 1) { + DEBUG_PRINTF("u has %zu literals\n", g[u].literals.size()); + return false; + } + + u32 u_lit_id = *(g[u].literals.begin()); + const rose_literal_id &u_id = build.literals.at(u_lit_id); + DEBUG_PRINTF("u has lit: %s\n", escapeString(u_id.s).c_str()); + + // Number of characters to take from the back of u's literal. + size_t u_len = u_id.s.length(); + size_t u_sublen = min(u_len, (size_t)HWLM_MASKLEN - bound); + + size_t i = HWLM_MASKLEN - (bound + u_sublen); + + ue2_literal::const_iterator it, ite; + for (it = u_id.s.begin() + (u_len - u_sublen), ite = u_id.s.end(); + it != ite; ++it) { + make_and_cmp_mask(*it, &msk.at(i), &cmp.at(i)); + ++i; + } + + return true; +} + +static +bool addSurroundingMask(const RoseBuildImpl &build, const rose_literal_id &id, + const RoseVertex v, vector<u8> &msk, vector<u8> &cmp) { + // Start with zero masks. + msk.assign(HWLM_MASKLEN, 0); + cmp.assign(HWLM_MASKLEN, 0); + + const LeftEngInfo &left = build.g[v].left; + if (left && left.lag < HWLM_MASKLEN) { + if (maskFromLeft(left, msk, cmp)) { + DEBUG_PRINTF("mask from a leftfix!\n"); + return true; + } + } + + if (id.s.length() < HWLM_MASKLEN) { + if (maskFromPreds(build, id, v, msk, cmp)) { + DEBUG_PRINTF("mask from preds!\n"); + return true; + } + } + + return false; +} + +static +bool hamsterMaskCombine(vector<u8> &msk, vector<u8> &cmp, + const vector<u8> &v_msk, const vector<u8> &v_cmp) { + assert(msk.size() == HWLM_MASKLEN && cmp.size() == HWLM_MASKLEN); + assert(v_msk.size() == HWLM_MASKLEN && v_cmp.size() == HWLM_MASKLEN); + + u8 all_masks = 0; + + for (size_t i = 0; i < HWLM_MASKLEN; i++) { + u8 filter = ~(cmp[i] ^ v_cmp[i]); + msk[i] &= v_msk[i]; + msk[i] &= filter; + cmp[i] &= filter; + + all_masks |= msk[i]; + } + + // Return false if we have no bits on in any mask elements. + return all_masks != 0; +} + +static +bool addSurroundingMask(const RoseBuildImpl &build, const rose_literal_id &id, + const rose_literal_info &info, vector<u8> &msk, + vector<u8> &cmp) { + if (!build.cc.grey.roseHamsterMasks) { + return false; + } + + if (!info.delayed_ids.empty()) { + // Not safe to add masks to delayed literals at this late stage. + return false; + } + + msk.assign(HWLM_MASKLEN, 0); + cmp.assign(HWLM_MASKLEN, 0); + + size_t num = 0; + vector<u8> v_msk, v_cmp; + + for (RoseVertex v : info.vertices) { + if (!addSurroundingMask(build, id, v, v_msk, v_cmp)) { + DEBUG_PRINTF("no mask\n"); + return false; + } + + if (!num++) { + // First (or only) vertex, this becomes the mask/cmp pair. + msk = v_msk; + cmp = v_cmp; + } else { + // Multiple vertices with potentially different masks. We combine + // them into an 'advisory' mask. + if (!hamsterMaskCombine(msk, cmp, v_msk, v_cmp)) { + DEBUG_PRINTF("mask went to zero\n"); + return false; + } + } + } + + normaliseLiteralMask(id.s, msk, cmp); + + if (msk.empty()) { + DEBUG_PRINTF("no mask\n"); + return false; + } + + DEBUG_PRINTF("msk=%s, cmp=%s\n", dumpMask(msk).c_str(), + dumpMask(cmp).c_str()); + return true; +} + +void findMoreLiteralMasks(RoseBuildImpl &build) { + if (!build.cc.grey.roseHamsterMasks) { + return; + } + + vector<u32> candidates; + for (u32 id = 0; id < build.literals.size(); id++) { + const auto &lit = build.literals.at(id); + + if (lit.delay || build.isDelayed(id)) { + continue; + } + + // Literal masks are only allowed for literals that will end up in an + // HWLM table. + switch (lit.table) { + case ROSE_FLOATING: + case ROSE_EOD_ANCHORED: + case ROSE_ANCHORED_SMALL_BLOCK: + break; + default: + continue; + } + + candidates.push_back(id); + } + + for (const u32 &id : candidates) { + const auto &lit = build.literals.at(id); + auto &lit_info = build.literal_info.at(id); + + vector<u8> msk, cmp; + if (!addSurroundingMask(build, lit, lit_info, msk, cmp)) { + continue; + } + DEBUG_PRINTF("found surrounding mask for lit_id=%u (%s)\n", id, + dumpString(lit.s).c_str()); + u32 new_id = build.getLiteralId(lit.s, msk, cmp, lit.delay, lit.table); + if (new_id == id) { + continue; + } + DEBUG_PRINTF("replacing with new lit_id=%u\n", new_id); + + // Note that our new literal may already exist and have vertices, etc. + // We assume that this transform is happening prior to group assignment. + assert(lit_info.group_mask == 0); + auto &new_info = build.literal_info.at(new_id); + + // Move the vertices across. + new_info.vertices.insert(begin(lit_info.vertices), + end(lit_info.vertices)); + for (auto v : lit_info.vertices) { + build.g[v].literals.erase(id); + build.g[v].literals.insert(new_id); + } + lit_info.vertices.clear(); + + // Preserve other properties. + new_info.requires_benefits = lit_info.requires_benefits; + } +} + +// The mask already associated with the literal and any mask due to +// mixed-case is mandatory. +static +void addLiteralMask(const rose_literal_id &id, vector<u8> &msk, + vector<u8> &cmp) { + const size_t suffix_len = min(id.s.length(), size_t{HWLM_MASKLEN}); + bool mixed_suffix = mixed_sensitivity_in(id.s.end() - suffix_len, + id.s.end()); + + if (id.msk.empty() && !mixed_suffix) { + return; + } + + while (msk.size() < HWLM_MASKLEN) { + msk.insert(msk.begin(), 0); + cmp.insert(cmp.begin(), 0); + } + + if (!id.msk.empty()) { + assert(id.msk.size() <= HWLM_MASKLEN); + assert(id.msk.size() == id.cmp.size()); + for (size_t i = 0; i < id.msk.size(); i++) { + size_t mand_offset = msk.size() - i - 1; + size_t lit_offset = id.msk.size() - i - 1; + msk[mand_offset] = id.msk[lit_offset]; + cmp[mand_offset] = id.cmp[lit_offset]; + } + } + + if (mixed_suffix) { + auto it = id.s.rbegin(); + for (size_t i = 0; i < suffix_len; ++i, ++it) { + const auto &c = *it; + if (!c.nocase) { + size_t offset = HWLM_MASKLEN - i - 1; + DEBUG_PRINTF("offset %zu must match 0x%02x exactly\n", offset, + c.c); + make_and_cmp_mask(c, &msk[offset], &cmp[offset]); + } + } + } + + normaliseLiteralMask(id.s, msk, cmp); +} + +static +bool isDirectHighlander(const RoseBuildImpl &build, const u32 id, + const rose_literal_info &info) { + if (!build.isDirectReport(id)) { + return false; + } + + auto is_simple_exhaustible = [&build](ReportID rid) { + const Report &report = build.rm.getReport(rid); + return isSimpleExhaustible(report); + }; + + assert(!info.vertices.empty()); + for (const auto &v : info.vertices) { + const auto &reports = build.g[v].reports; + assert(!reports.empty()); + if (!all_of(begin(reports), end(reports), + is_simple_exhaustible)) { + return false; + } + } + return true; +} + +// Called by isNoRunsLiteral below. +static +bool isNoRunsVertex(const RoseBuildImpl &build, RoseVertex u) { + const RoseGraph &g = build.g; + if (!g[u].isBoring()) { + DEBUG_PRINTF("u=%zu is not boring\n", g[u].index); + return false; + } + + if (!g[u].reports.empty()) { + DEBUG_PRINTF("u=%zu has accept\n", g[u].index); + return false; + } + + /* TODO: handle non-root roles as well. It can't be that difficult... */ + + if (in_degree(u, g) != 1) { + DEBUG_PRINTF("u=%zu is not a root role\n", g[u].index); + return false; + } + + RoseEdge e = edge(build.root, u, g); + + if (!e) { + DEBUG_PRINTF("u=%zu is not a root role\n", g[u].index); + return false; + } + + if (g[e].minBound != 0 || g[e].maxBound != ROSE_BOUND_INF) { + DEBUG_PRINTF("u=%zu has bounds from root\n", g[u].index); + return false; + } + + for (const auto &oe : out_edges_range(u, g)) { + RoseVertex v = target(oe, g); + if (g[oe].maxBound != ROSE_BOUND_INF) { + DEBUG_PRINTF("edge (%zu,%zu) has max bound\n", g[u].index, + g[v].index); + return false; + } + if (g[v].left) { + DEBUG_PRINTF("v=%zu has rose prefix\n", g[v].index); + return false; + } + } + return true; +} + +static +bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id, + const rose_literal_info &info, const size_t max_len) { + DEBUG_PRINTF("lit id %u\n", id); + + if (info.requires_benefits) { + DEBUG_PRINTF("requires benefits\n"); // which would need confirm + return false; + } + + size_t len = build.literals.at(id).s.length(); + if (len > max_len) { + DEBUG_PRINTF("long literal, requires confirm\n"); + return false; + } + + if (len > ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("medium-length literal, requires confirm\n"); + return false; + } + + if (isDirectHighlander(build, id, info)) { + DEBUG_PRINTF("highlander direct report\n"); + return true; + } + + // Undelayed vertices. + for (RoseVertex v : info.vertices) { + if (!isNoRunsVertex(build, v)) { + return false; + } + } + + // Delayed vertices. + for (u32 d : info.delayed_ids) { + assert(d < build.literal_info.size()); + const rose_literal_info &delayed_info = build.literal_info.at(d); + assert(delayed_info.undelayed_id == id); + for (RoseVertex v : delayed_info.vertices) { + if (!isNoRunsVertex(build, v)) { + return false; + } + } + } + + DEBUG_PRINTF("is no-runs literal\n"); + return true; +} + +static +bool isNoRunsFragment(const RoseBuildImpl &build, const LitFragment &f, + const size_t max_len) { + // For the fragment to be marked "no runs", every literal it fires must + // need no further confirmation work. + return all_of_in(f.lit_ids, [&](u32 lit_id) { + const auto &info = build.literal_info.at(lit_id); + return isNoRunsLiteral(build, lit_id, info, max_len); + }); +} + +static +const raw_puff &getChainedPuff(const RoseBuildImpl &build, + const Report &report) { + DEBUG_PRINTF("chained report, event %u\n", report.onmatch); + + // MPV has already been moved to the outfixes vector. + assert(!build.mpv_outfix); + + auto mpv_outfix_it = find_if( + begin(build.outfixes), end(build.outfixes), + [](const OutfixInfo &outfix) { return outfix.is_nonempty_mpv(); }); + assert(mpv_outfix_it != end(build.outfixes)); + const auto *mpv = mpv_outfix_it->mpv(); + + u32 puff_index = report.onmatch - MQE_TOP_FIRST; + assert(puff_index < mpv->triggered_puffettes.size()); + return mpv->triggered_puffettes.at(puff_index); +} + +/** + * \brief Returns a conservative estimate of the minimum offset at which the + * given literal can lead to a report. + * + * TODO: This could be made more precise by calculating a "distance to accept" + * for every vertex in the graph; right now we're only accurate for leaf nodes. + */ +static +u64a literalMinReportOffset(const RoseBuildImpl &build, + const rose_literal_id &lit, + const rose_literal_info &info) { + const auto &g = build.g; + + const u32 lit_len = verify_u32(lit.elength()); + + u64a lit_min_offset = UINT64_MAX; + + for (const auto &v : info.vertices) { + DEBUG_PRINTF("vertex %zu min_offset=%u\n", g[v].index, g[v].min_offset); + + u64a vert_offset = g[v].min_offset; + + if (vert_offset >= lit_min_offset) { + continue; + } + + u64a min_offset = UINT64_MAX; + + for (const auto &id : g[v].reports) { + const Report &report = build.rm.getReport(id); + DEBUG_PRINTF("report id %u, min offset=%llu\n", id, + report.minOffset); + if (report.type == INTERNAL_ROSE_CHAIN) { + // This vertex triggers an MPV, which will fire reports after + // repeating for a while. + assert(report.minOffset == 0); // Should not have bounds. + const auto &puff = getChainedPuff(build, report); + DEBUG_PRINTF("chained puff repeats=%u\n", puff.repeats); + const Report &puff_report = build.rm.getReport(puff.report); + DEBUG_PRINTF("puff report %u, min offset=%llu\n", puff.report, + puff_report.minOffset); + min_offset = min(min_offset, max(vert_offset + puff.repeats, + puff_report.minOffset)); + } else { + DEBUG_PRINTF("report min offset=%llu\n", report.minOffset); + min_offset = min(min_offset, max(vert_offset, + report.minOffset)); + } + } + + if (g[v].suffix) { + depth suffix_width = findMinWidth(g[v].suffix, g[v].suffix.top); + assert(suffix_width.is_reachable()); + DEBUG_PRINTF("suffix with width %s\n", suffix_width.str().c_str()); + min_offset = min(min_offset, vert_offset + suffix_width); + } + + if (!isLeafNode(v, g) || min_offset == UINT64_MAX) { + min_offset = vert_offset; + } + + lit_min_offset = min(lit_min_offset, min_offset); + } + + // If this literal in the undelayed literal corresponding to some delayed + // literals, we must take their minimum offsets into account. + for (const u32 &delayed_id : info.delayed_ids) { + const auto &delayed_lit = build.literals.at(delayed_id); + const auto &delayed_info = build.literal_info.at(delayed_id); + u64a delayed_min_offset = literalMinReportOffset(build, delayed_lit, + delayed_info); + DEBUG_PRINTF("delayed_id=%u, min_offset = %llu\n", delayed_id, + delayed_min_offset); + lit_min_offset = min(lit_min_offset, delayed_min_offset); + } + + // If we share a vertex with a shorter literal, our min offset might dip + // below the length of this one. + lit_min_offset = max(lit_min_offset, u64a{lit_len}); + + return lit_min_offset; +} + +template<class Container> +void trim_to_suffix(Container &c, size_t len) { + if (c.size() <= len) { + return; + } + + size_t suffix_len = c.size() - len; + c.erase(c.begin(), c.begin() + suffix_len); +} + +namespace { + +/** \brief Prototype for literal matcher construction. */ +struct MatcherProto { + /** \brief Literal fragments used to construct the literal matcher. */ + vector<hwlmLiteral> lits; + + /** \brief Longer literals used for acceleration analysis. */ + vector<AccelString> accel_lits; + + /** \brief The history required by the literal matcher. */ + size_t history_required = 0; + + /** \brief Insert the contents of another MatcherProto. */ + void insert(const MatcherProto &a); +}; +} + +static +void addFragmentLiteral(const RoseBuildImpl &build, MatcherProto &mp, + const LitFragment &f, u32 id, size_t max_len) { + const rose_literal_id &lit = build.literals.at(id); + + DEBUG_PRINTF("lit='%s' (len %zu)\n", dumpString(lit.s).c_str(), + lit.s.length()); + + vector<u8> msk = lit.msk; // copy + vector<u8> cmp = lit.cmp; // copy + + bool noruns = isNoRunsFragment(build, f, max_len); + DEBUG_PRINTF("fragment is %s\n", noruns ? "noruns" : "not noruns"); + + auto lit_final = lit.s; // copy + + if (lit_final.length() > ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("truncating to tail of length %zu\n", + size_t{ROSE_SHORT_LITERAL_LEN_MAX}); + lit_final.erase(0, lit_final.length() - ROSE_SHORT_LITERAL_LEN_MAX); + // We shouldn't have set a threshold below 8 chars. + assert(msk.size() <= ROSE_SHORT_LITERAL_LEN_MAX); + assert(!noruns); + } + + addLiteralMask(lit, msk, cmp); + + const auto &s_final = lit_final.get_string(); + bool nocase = lit_final.any_nocase(); + + DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, cmp=%s\n", + f.fragment_id, escapeString(s_final).c_str(), (int)nocase, + noruns, dumpMask(msk).c_str(), dumpMask(cmp).c_str()); + + if (!maskIsConsistent(s_final, nocase, msk, cmp)) { + DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); + return; + } + + const auto &groups = f.groups; + + mp.lits.emplace_back(move(s_final), nocase, noruns, f.fragment_id, + groups, msk, cmp); +} + +static +void addAccelLiteral(MatcherProto &mp, const rose_literal_id &lit, + const rose_literal_info &info, size_t max_len) { + const auto &s = lit.s; // copy + + DEBUG_PRINTF("lit='%s' (len %zu)\n", dumpString(s).c_str(), s.length()); + + vector<u8> msk = lit.msk; // copy + vector<u8> cmp = lit.cmp; // copy + addLiteralMask(lit, msk, cmp); + + if (!maskIsConsistent(s.get_string(), s.any_nocase(), msk, cmp)) { + DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n"); + return; + } + + // Literals used for acceleration must be limited to max_len, as that's all + // we can see in history. + string s_final = lit.s.get_string(); + trim_to_suffix(s_final, max_len); + trim_to_suffix(msk, max_len); + trim_to_suffix(cmp, max_len); + + mp.accel_lits.emplace_back(s_final, lit.s.any_nocase(), msk, cmp, + info.group_mask); +} + +/** + * \brief Build up a vector of literals (and associated other data) for the + * given table. + * + * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can + * only lead to a pattern match after max_offset may be excluded. + */ +static +MatcherProto makeMatcherProto(const RoseBuildImpl &build, + const vector<LitFragment> &fragments, + rose_literal_table table, bool delay_rebuild, + size_t max_len, u32 max_offset = ROSE_BOUND_INF) { + MatcherProto mp; + + if (delay_rebuild) { + assert(table == ROSE_FLOATING); + assert(build.cc.streaming); + } + + vector<u32> used_lit_ids; + + for (const auto &f : fragments) { + assert(!f.lit_ids.empty()); + + // All literals that share a fragment are in the same table. + if (build.literals.at(f.lit_ids.front()).table != table) { + continue; // next fragment. + } + + DEBUG_PRINTF("fragment %u, %zu lit_ids\n", f.fragment_id, + f.lit_ids.size()); + + used_lit_ids.clear(); + for (u32 id : f.lit_ids) { + const rose_literal_id &lit = build.literals.at(id); + assert(id < build.literal_info.size()); + const auto &info = build.literal_info.at(id); + if (lit.delay) { + continue; /* delay id's are virtual-ish */ + } + + // When building the delay rebuild table, we only want to include + // literals that have delayed variants. + if (delay_rebuild && info.delayed_ids.empty()) { + DEBUG_PRINTF("not needed for delay rebuild\n"); + continue; + } + + if (max_offset != ROSE_BOUND_INF) { + u64a min_report = literalMinReportOffset(build, lit, info); + if (min_report > max_offset) { + DEBUG_PRINTF("min report offset=%llu exceeds " + "max_offset=%u\n", min_report, max_offset); + continue; + } + } + + used_lit_ids.push_back(id); + } + + if (used_lit_ids.empty()) { + continue; // next fragment. + } + + // Build our fragment (for the HWLM matcher) from the first literal. + addFragmentLiteral(build, mp, f, used_lit_ids.front(), max_len); + + for (u32 id : used_lit_ids) { + const rose_literal_id &lit = build.literals.at(id); + assert(id < build.literal_info.size()); + const auto &info = build.literal_info.at(id); + + // All literals contribute accel information. + addAccelLiteral(mp, lit, info, max_len); + + // All literals contribute to history requirement in streaming mode. + if (build.cc.streaming) { + size_t lit_hist_len = + max(lit.msk.size(), min(lit.s.length(), max_len)); + lit_hist_len = lit_hist_len ? lit_hist_len - 1 : 0; + DEBUG_PRINTF("lit requires %zu bytes of history\n", + lit_hist_len); + assert(lit_hist_len <= build.cc.grey.maxHistoryAvailable); + mp.history_required = max(mp.history_required, lit_hist_len); + } + } + } + + sort_and_unique(mp.lits); + sort_and_unique(mp.accel_lits); + + return mp; +} + +void MatcherProto::insert(const MatcherProto &a) { + ::ue2::insert(&lits, lits.end(), a.lits); + ::ue2::insert(&accel_lits, accel_lits.end(), a.accel_lits); + sort_and_unique(lits); + sort_and_unique(accel_lits); + history_required = max(history_required, a.history_required); +} + +static +void buildAccel(const RoseBuildImpl &build, + const vector<AccelString> &accel_lits, HWLM &hwlm) { + if (!build.cc.grey.hamsterAccelForward) { + return; + } + + if (hwlm.type == HWLM_ENGINE_NOOD) { + return; + } + + buildForwardAccel(&hwlm, accel_lits, build.getInitialGroups()); +} + +bytecode_ptr<HWLM> +buildHWLMMatcher(const RoseBuildImpl &build, LitProto *litProto) { + if (!litProto) { + return nullptr; + } + auto hwlm = hwlmBuild(*litProto->hwlmProto, build.cc, + build.getInitialGroups()); + if (!hwlm) { + throw CompileError("Unable to generate bytecode."); + } + + buildAccel(build, litProto->accel_lits, *hwlm); + + DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", + hwlm.size()); + return hwlm; +} + +unique_ptr<LitProto> +buildFloatingMatcherProto(const RoseBuildImpl &build, + const vector<LitFragment> &fragments, + size_t longLitLengthThreshold, + rose_group *fgroups, + size_t *historyRequired) { + DEBUG_PRINTF("Floating literal matcher\n"); + *fgroups = 0; + + auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, false, + longLitLengthThreshold); + if (mp.lits.empty()) { + DEBUG_PRINTF("empty floating matcher\n"); + return nullptr; + } + dumpMatcherLiterals(mp.lits, "floating", build.cc.grey); + + for (const hwlmLiteral &lit : mp.lits) { + *fgroups |= lit.groups; + } + + if (build.cc.streaming) { + DEBUG_PRINTF("history_required=%zu\n", mp.history_required); + assert(mp.history_required <= build.cc.grey.maxHistoryAvailable); + *historyRequired = max(*historyRequired, mp.history_required); + } + + auto proto = hwlmBuildProto(mp.lits, false, build.cc); + + if (!proto) { + throw CompileError("Unable to generate literal matcher proto."); + } + + return ue2::make_unique<LitProto>(move(proto), mp.accel_lits); +} + +unique_ptr<LitProto> +buildDelayRebuildMatcherProto(const RoseBuildImpl &build, + const vector<LitFragment> &fragments, + size_t longLitLengthThreshold) { + DEBUG_PRINTF("Delay literal matcher\n"); + if (!build.cc.streaming) { + DEBUG_PRINTF("not streaming\n"); + return nullptr; + } + + auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, true, + longLitLengthThreshold); + if (mp.lits.empty()) { + DEBUG_PRINTF("empty delay rebuild matcher\n"); + return nullptr; + } + dumpMatcherLiterals(mp.lits, "delay_rebuild", build.cc.grey); + + + auto proto = hwlmBuildProto(mp.lits, false, build.cc); + + if (!proto) { + throw CompileError("Unable to generate literal matcher proto."); + } + + return ue2::make_unique<LitProto>(move(proto), mp.accel_lits); +} + +unique_ptr<LitProto> +buildSmallBlockMatcherProto(const RoseBuildImpl &build, + const vector<LitFragment> &fragments) { + DEBUG_PRINTF("Small block literal matcher\n"); + if (build.cc.streaming) { + DEBUG_PRINTF("streaming mode\n"); + return nullptr; + } + + u32 float_min = findMinWidth(build, ROSE_FLOATING); + if (float_min > ROSE_SMALL_BLOCK_LEN) { + DEBUG_PRINTF("floating table has large min width %u, fail\n", + float_min); + return nullptr; + } + + auto mp = makeMatcherProto(build, fragments, ROSE_FLOATING, false, + ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN); + if (mp.lits.empty()) { + DEBUG_PRINTF("no floating table\n"); + return nullptr; + } else if (mp.lits.size() == 1) { + DEBUG_PRINTF("single floating literal, noodle will be fast enough\n"); + return nullptr; + } + + auto mp_anchored = makeMatcherProto(build, fragments, + ROSE_ANCHORED_SMALL_BLOCK, false, + ROSE_SMALL_BLOCK_LEN, + ROSE_SMALL_BLOCK_LEN); + if (mp_anchored.lits.empty()) { + DEBUG_PRINTF("no small-block anchored literals\n"); + return nullptr; + } + + mp.insert(mp_anchored); + dumpMatcherLiterals(mp.lits, "smallblock", build.cc.grey); + + // None of our literals should be longer than the small block limit. + assert(all_of(begin(mp.lits), end(mp.lits), [](const hwlmLiteral &lit) { + return lit.s.length() <= ROSE_SMALL_BLOCK_LEN; + })); + + if (mp.lits.empty()) { + DEBUG_PRINTF("no literals shorter than small block len\n"); + return nullptr; + } + + auto proto = hwlmBuildProto(mp.lits, false, build.cc); + + if (!proto) { + throw CompileError("Unable to generate literal matcher proto."); + } + + return ue2::make_unique<LitProto>(move(proto), mp.accel_lits); +} + +unique_ptr<LitProto> +buildEodAnchoredMatcherProto(const RoseBuildImpl &build, + const vector<LitFragment> &fragments) { + DEBUG_PRINTF("Eod anchored literal matcher\n"); + auto mp = makeMatcherProto(build, fragments, ROSE_EOD_ANCHORED, false, + build.ematcher_region_size); + + if (mp.lits.empty()) { + DEBUG_PRINTF("no eod anchored literals\n"); + assert(!build.ematcher_region_size); + return nullptr; + } + dumpMatcherLiterals(mp.lits, "eod", build.cc.grey); + + assert(build.ematcher_region_size); + + auto proto = hwlmBuildProto(mp.lits, false, build.cc); + + if (!proto) { + throw CompileError("Unable to generate literal matcher proto."); + } + + return ue2::make_unique<LitProto>(move(proto), mp.accel_lits); +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/rose/rose_build_matchers.h b/contrib/libs/hyperscan/src/rose/rose_build_matchers.h index 8fb70d68ac..ef8999ed01 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_matchers.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_matchers.h @@ -1,129 +1,129 @@ -/* - * Copyright (c) 2016-2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** - * \file - * \brief Rose build: code for constructing literal tables. - */ - -#ifndef ROSE_BUILD_MATCHERS_H -#define ROSE_BUILD_MATCHERS_H - -#include "rose_build_impl.h" -#include "rose_build_lit_accel.h" -#include "hwlm/hwlm_build.h" -#include "util/bytecode_ptr.h" -#include "util/ue2string.h" - -#include <vector> - -struct Grey; -struct HWLM; - -namespace ue2 { - -static constexpr u32 INVALID_FRAG_ID = ~0U; - -struct LitFragment { - LitFragment(u32 fragment_id_in, ue2_literal s_in, - rose_group groups_in, u32 lit_id) - : fragment_id(fragment_id_in), s(s_in), groups(groups_in), - lit_ids({lit_id}) {} - LitFragment(u32 fragment_id_in, ue2_literal s_in, - rose_group groups_in, std::vector<u32> lit_ids_in) - : fragment_id(fragment_id_in), s(s_in), groups(groups_in), - lit_ids(std::move(lit_ids_in)) {} - u32 fragment_id; - - /** - * \brief literal fragment. - */ - ue2_literal s; - - /** - * \brief FDR confirm squash mask for included literals. - */ - u8 squash = 0; - - /** - * \brief FDR confirm squash mask for included literals (Delayed - * literals only). - */ - u8 delay_squash = 0; - - /** - * \brief Fragment id of included literal. - */ - u32 included_frag_id = INVALID_FRAG_ID; - - /** - * \brief Fragment Id of included literal (Delayed literals only). - */ - u32 included_delay_frag_id = INVALID_FRAG_ID; - rose_group groups; - std::vector<u32> lit_ids; - u32 lit_program_offset = ROSE_INVALID_PROG_OFFSET; - u32 delay_program_offset = ROSE_INVALID_PROG_OFFSET; -}; - -struct LitProto { - LitProto(std::unique_ptr<HWLMProto> hwlmProto_in, - std::vector<AccelString> &accel_lits_in) - : hwlmProto(std::move(hwlmProto_in)), accel_lits(accel_lits_in) {} - - std::unique_ptr<HWLMProto> hwlmProto; - std::vector<AccelString> accel_lits; -}; - -bytecode_ptr<HWLM> -buildHWLMMatcher(const RoseBuildImpl &build, LitProto *proto); - -std::unique_ptr<LitProto> -buildFloatingMatcherProto(const RoseBuildImpl &build, - const std::vector<LitFragment> &fragments, - size_t longLitLengthThreshold, - rose_group *fgroups, - size_t *historyRequired); - -std::unique_ptr<LitProto> -buildDelayRebuildMatcherProto(const RoseBuildImpl &build, - const std::vector<LitFragment> &fragments, - size_t longLitLengthThreshold); -std::unique_ptr<LitProto> -buildSmallBlockMatcherProto(const RoseBuildImpl &build, - const std::vector<LitFragment> &fragments); - -std::unique_ptr<LitProto> -buildEodAnchoredMatcherProto(const RoseBuildImpl &build, - const std::vector<LitFragment> &fragments); - -void findMoreLiteralMasks(RoseBuildImpl &build); - -} // namespace ue2 - -#endif // ROSE_BUILD_MATCHERS_H +/* + * Copyright (c) 2016-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * \file + * \brief Rose build: code for constructing literal tables. + */ + +#ifndef ROSE_BUILD_MATCHERS_H +#define ROSE_BUILD_MATCHERS_H + +#include "rose_build_impl.h" +#include "rose_build_lit_accel.h" +#include "hwlm/hwlm_build.h" +#include "util/bytecode_ptr.h" +#include "util/ue2string.h" + +#include <vector> + +struct Grey; +struct HWLM; + +namespace ue2 { + +static constexpr u32 INVALID_FRAG_ID = ~0U; + +struct LitFragment { + LitFragment(u32 fragment_id_in, ue2_literal s_in, + rose_group groups_in, u32 lit_id) + : fragment_id(fragment_id_in), s(s_in), groups(groups_in), + lit_ids({lit_id}) {} + LitFragment(u32 fragment_id_in, ue2_literal s_in, + rose_group groups_in, std::vector<u32> lit_ids_in) + : fragment_id(fragment_id_in), s(s_in), groups(groups_in), + lit_ids(std::move(lit_ids_in)) {} + u32 fragment_id; + + /** + * \brief literal fragment. + */ + ue2_literal s; + + /** + * \brief FDR confirm squash mask for included literals. + */ + u8 squash = 0; + + /** + * \brief FDR confirm squash mask for included literals (Delayed + * literals only). + */ + u8 delay_squash = 0; + + /** + * \brief Fragment id of included literal. + */ + u32 included_frag_id = INVALID_FRAG_ID; + + /** + * \brief Fragment Id of included literal (Delayed literals only). + */ + u32 included_delay_frag_id = INVALID_FRAG_ID; + rose_group groups; + std::vector<u32> lit_ids; + u32 lit_program_offset = ROSE_INVALID_PROG_OFFSET; + u32 delay_program_offset = ROSE_INVALID_PROG_OFFSET; +}; + +struct LitProto { + LitProto(std::unique_ptr<HWLMProto> hwlmProto_in, + std::vector<AccelString> &accel_lits_in) + : hwlmProto(std::move(hwlmProto_in)), accel_lits(accel_lits_in) {} + + std::unique_ptr<HWLMProto> hwlmProto; + std::vector<AccelString> accel_lits; +}; + +bytecode_ptr<HWLM> +buildHWLMMatcher(const RoseBuildImpl &build, LitProto *proto); + +std::unique_ptr<LitProto> +buildFloatingMatcherProto(const RoseBuildImpl &build, + const std::vector<LitFragment> &fragments, + size_t longLitLengthThreshold, + rose_group *fgroups, + size_t *historyRequired); + +std::unique_ptr<LitProto> +buildDelayRebuildMatcherProto(const RoseBuildImpl &build, + const std::vector<LitFragment> &fragments, + size_t longLitLengthThreshold); +std::unique_ptr<LitProto> +buildSmallBlockMatcherProto(const RoseBuildImpl &build, + const std::vector<LitFragment> &fragments); + +std::unique_ptr<LitProto> +buildEodAnchoredMatcherProto(const RoseBuildImpl &build, + const std::vector<LitFragment> &fragments); + +void findMoreLiteralMasks(RoseBuildImpl &build); + +} // namespace ue2 + +#endif // ROSE_BUILD_MATCHERS_H diff --git a/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp index 0045782cfb..5066dbd578 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp @@ -63,12 +63,12 @@ #include "util/container.h" #include "util/dump_charclass.h" #include "util/graph_range.h" -#include "util/hash.h" -#include "util/insertion_ordered.h" +#include "util/hash.h" +#include "util/insertion_ordered.h" #include "util/order_check.h" #include "util/report_manager.h" #include "util/ue2string.h" -#include "util/unordered.h" +#include "util/unordered.h" #include <algorithm> #include <functional> @@ -84,7 +84,7 @@ using namespace std; using boost::adaptors::map_values; -using boost::adaptors::map_keys; +using boost::adaptors::map_keys; namespace ue2 { @@ -94,7 +94,7 @@ static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64; static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32; static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10; static const size_t MERGE_GROUP_SIZE_MAX = 200; -static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000; +static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000; /** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */ static const size_t DFA_CHUNK_SIZE_MAX = 200; @@ -102,10 +102,10 @@ static const size_t DFA_CHUNK_SIZE_MAX = 200; /** \brief Max DFA states in a merged DFA. */ static const size_t DFA_MERGE_MAX_STATES = 8000; -/** \brief In block mode, merge two prefixes even if they don't have identical - * literal sets if they have fewer than this many states and the merged graph - * is also small. */ -static constexpr size_t MAX_BLOCK_PREFIX_MERGE_VERTICES = 32; +/** \brief In block mode, merge two prefixes even if they don't have identical + * literal sets if they have fewer than this many states and the merged graph + * is also small. */ +static constexpr size_t MAX_BLOCK_PREFIX_MERGE_VERTICES = 32; static size_t small_merge_max_vertices(const CompileContext &cc) { @@ -124,17 +124,17 @@ size_t small_rose_threshold(const CompileContext &cc) { * reports should not contribute to the hash. */ static -size_t hashLeftfix(const left_id &left) { +size_t hashLeftfix(const left_id &left) { size_t val = 0; - if (left.castle()) { - hash_combine(val, left.castle()->reach()); - for (const auto &pr : left.castle()->repeats) { + if (left.castle()) { + hash_combine(val, left.castle()->reach()); + for (const auto &pr : left.castle()->repeats) { hash_combine(val, pr.first); // top hash_combine(val, pr.second.bounds); } - } else if (left.graph()) { - hash_combine(val, hash_holder(*left.graph())); + } else if (left.graph()) { + hash_combine(val, hash_holder(*left.graph())); } return val; @@ -150,7 +150,7 @@ struct RoseGroup { const RoseGraph &g = build.g; assert(in_degree(v, g) == 1); RoseVertex u = *inv_adjacent_vertices(v, g).first; - parent = g[u].index; + parent = g[u].index; } bool operator<(const RoseGroup &b) const { @@ -180,24 +180,24 @@ private: }; /** - * Intended to find graphs that are identical except for their report - * IDs. Relies on vertex and edge indices to pick up graphs that have been - * messily put together in different orderings. Only implemented for castles and - * holders. + * Intended to find graphs that are identical except for their report + * IDs. Relies on vertex and edge indices to pick up graphs that have been + * messily put together in different orderings. Only implemented for castles and + * holders. */ -static -bool is_equal(const left_id &u_left, ReportID u_report, - const left_id &v_left, ReportID v_report) { - if (u_left.castle() && v_left.castle()) { - return is_equal(*u_left.castle(), u_report, *v_left.castle(), v_report); - } +static +bool is_equal(const left_id &u_left, ReportID u_report, + const left_id &v_left, ReportID v_report) { + if (u_left.castle() && v_left.castle()) { + return is_equal(*u_left.castle(), u_report, *v_left.castle(), v_report); + } - if (!u_left.graph() || !v_left.graph()) { - return false; + if (!u_left.graph() || !v_left.graph()) { + return false; } - return is_equal(*u_left.graph(), u_report, *v_left.graph(), v_report); -} + return is_equal(*u_left.graph(), u_report, *v_left.graph(), v_report); +} } // namespace @@ -212,8 +212,8 @@ bool is_equal(const left_id &u_left, ReportID u_report, * * Note: only roles with a single predecessor vertex are considered for this * transform - it should probably be generalised to work for roles which share - * the same set of predecessor roles as for \ref dedupeLeftfixesVariableLag or - * it should be retired entirely. + * the same set of predecessor roles as for \ref dedupeLeftfixesVariableLag or + * it should be retired entirely. */ bool dedupeLeftfixes(RoseBuildImpl &tbi) { DEBUG_PRINTF("deduping leftfixes\n"); @@ -248,7 +248,7 @@ bool dedupeLeftfixes(RoseBuildImpl &tbi) { for (deque<RoseVertex> &verts : roses | map_values) { DEBUG_PRINTF("group has %zu vertices\n", verts.size()); - unordered_set<left_id> seen; + unordered_set<left_id> seen; for (auto jt = verts.begin(), jte = verts.end(); jt != jte; ++jt) { RoseVertex v = *jt; @@ -260,16 +260,16 @@ bool dedupeLeftfixes(RoseBuildImpl &tbi) { } // Scan the rest of the list for dupes. - for (auto kt = std::next(jt); kt != jte; ++kt) { - if (g[v].left == g[*kt].left - || !is_equal(g[v].left, g[v].left.leftfix_report, - g[*kt].left, g[*kt].left.leftfix_report)) { + for (auto kt = std::next(jt); kt != jte; ++kt) { + if (g[v].left == g[*kt].left + || !is_equal(g[v].left, g[v].left.leftfix_report, + g[*kt].left, g[*kt].left.leftfix_report)) { continue; } // Dupe found. DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n", - g[*kt].index, g[v].index); + g[*kt].index, g[v].index); assert(g[v].left.lag == g[*kt].left.lag); g[*kt].left = g[v].left; work_done = true; @@ -320,7 +320,7 @@ bool is_equal(const suffix_id &s1, const suffix_id &s2) { void dedupeSuffixes(RoseBuildImpl &tbi) { DEBUG_PRINTF("deduping suffixes\n"); - unordered_map<suffix_id, set<RoseVertex>> suffix_map; + unordered_map<suffix_id, set<RoseVertex>> suffix_map; map<pair<size_t, set<ReportID>>, vector<suffix_id>> part; // Collect suffixes into groups. @@ -387,7 +387,7 @@ template<class EngineRef> class Bouquet { private: list<EngineRef> ordering; // Unique list in insert order. - using BouquetMap = ue2_unordered_map<EngineRef, deque<RoseVertex>>; + using BouquetMap = ue2_unordered_map<EngineRef, deque<RoseVertex>>; BouquetMap bouquet; public: void insert(const EngineRef &h, RoseVertex v) { @@ -485,246 +485,246 @@ static void chunkBouquets(const Bouquet<EngineRef> &in, } } -static -bool stringsCanFinishAtSameSpot(const ue2_literal &u, - ue2_literal::const_iterator v_b, - ue2_literal::const_iterator v_e) { - ue2_literal::const_iterator u_e = u.end(); - ue2_literal::const_iterator u_b = u.begin(); - - while (u_e != u_b && v_e != v_b) { - --u_e; - --v_e; - - if (!overlaps(*u_e, *v_e)) { - return false; - } - } - - return true; -} - +static +bool stringsCanFinishAtSameSpot(const ue2_literal &u, + ue2_literal::const_iterator v_b, + ue2_literal::const_iterator v_e) { + ue2_literal::const_iterator u_e = u.end(); + ue2_literal::const_iterator u_b = u.begin(); + + while (u_e != u_b && v_e != v_b) { + --u_e; + --v_e; + + if (!overlaps(*u_e, *v_e)) { + return false; + } + } + + return true; +} + /** - * Check that if after u has been seen, that it is impossible for the arrival of - * v to require the inspection of an engine earlier than u did. + * Check that if after u has been seen, that it is impossible for the arrival of + * v to require the inspection of an engine earlier than u did. + * + * Let delta be the earliest that v can be seen after u (may be zero) * - * Let delta be the earliest that v can be seen after u (may be zero) - * - * ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta) - * ==> - ulag <= delta - vlag - * ==> vlag - ulag <= delta + * ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta) + * ==> - ulag <= delta - vlag + * ==> vlag - ulag <= delta */ static bool checkPrefix(const rose_literal_id &ul, const u32 ulag, const rose_literal_id &vl, const u32 vlag) { - DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag, - escapeString(vl.s).c_str(), vlag); - - if (vl.delay || ul.delay) { - /* engine related literals should not be delayed anyway */ + DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag, + escapeString(vl.s).c_str(), vlag); + + if (vl.delay || ul.delay) { + /* engine related literals should not be delayed anyway */ return false; } - if (ulag >= vlag) { - assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag); - return true; - } - - size_t min_allowed_delta = vlag - ulag; - DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta); - - for (size_t i = 0; i < min_allowed_delta; i++) { - if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { - DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i); - return false; - } - } - - DEBUG_PRINTF("OK\n"); - return true; -} - -static -bool hasSameEngineType(const RoseVertexProps &u_prop, - const RoseVertexProps &v_prop) { - const left_id u_left = u_prop.left; - const left_id v_left = v_prop.left; - - return !u_left.haig() == !v_left.haig() - && !u_left.dfa() == !v_left.dfa() - && !u_left.castle() == !v_left.castle() - && !u_left.graph() == !v_left.graph(); -} - -/** - * Verifies that merging the leftfix of vertices does not cause conflicts due - * to the literals on the right. - * - * The main concern is that the lags of the literals and overlap between them - * allow the engine check offset to potentially regress. - * - * Parameters are vectors of literals + lag pairs. - * + if (ulag >= vlag) { + assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag); + return true; + } + + size_t min_allowed_delta = vlag - ulag; + DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta); + + for (size_t i = 0; i < min_allowed_delta; i++) { + if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { + DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i); + return false; + } + } + + DEBUG_PRINTF("OK\n"); + return true; +} + +static +bool hasSameEngineType(const RoseVertexProps &u_prop, + const RoseVertexProps &v_prop) { + const left_id u_left = u_prop.left; + const left_id v_left = v_prop.left; + + return !u_left.haig() == !v_left.haig() + && !u_left.dfa() == !v_left.dfa() + && !u_left.castle() == !v_left.castle() + && !u_left.graph() == !v_left.graph(); +} + +/** + * Verifies that merging the leftfix of vertices does not cause conflicts due + * to the literals on the right. + * + * The main concern is that the lags of the literals and overlap between them + * allow the engine check offset to potentially regress. + * + * Parameters are vectors of literals + lag pairs. + * * Note: if more constraints of when the leftfixes were going to be checked - * (mandatory lookarounds passing, offset checks), more merges may be allowed. - */ -static -bool compatibleLiteralsForMerge( - const vector<pair<const rose_literal_id *, u32>> &ulits, - const vector<pair<const rose_literal_id *, u32>> &vlits) { - assert(!ulits.empty()); - assert(!vlits.empty()); - - // We cannot merge engines that prefix literals in different tables. - if (ulits[0].first->table != vlits[0].first->table) { + * (mandatory lookarounds passing, offset checks), more merges may be allowed. + */ +static +bool compatibleLiteralsForMerge( + const vector<pair<const rose_literal_id *, u32>> &ulits, + const vector<pair<const rose_literal_id *, u32>> &vlits) { + assert(!ulits.empty()); + assert(!vlits.empty()); + + // We cannot merge engines that prefix literals in different tables. + if (ulits[0].first->table != vlits[0].first->table) { DEBUG_PRINTF("literals in different tables\n"); return false; } - // We don't handle delayed cases yet. - for (const auto &ue : ulits) { - const rose_literal_id &ul = *ue.first; - if (ul.delay) { + // We don't handle delayed cases yet. + for (const auto &ue : ulits) { + const rose_literal_id &ul = *ue.first; + if (ul.delay) { return false; } } - for (const auto &ve : vlits) { - const rose_literal_id &vl = *ve.first; - if (vl.delay) { + for (const auto &ve : vlits) { + const rose_literal_id &vl = *ve.first; + if (vl.delay) { return false; } } - /* An engine requires that all accesses to it are ordered by offsets. (ie, - we can not check an engine's state at offset Y, if we have already - checked its status at offset X and X > Y). If we can not establish that + /* An engine requires that all accesses to it are ordered by offsets. (ie, + we can not check an engine's state at offset Y, if we have already + checked its status at offset X and X > Y). If we can not establish that the literals used for triggering will satisfy this property, then it is - not safe to merge the engine. */ - for (const auto &ue : ulits) { - const rose_literal_id &ul = *ue.first; - u32 ulag = ue.second; - - for (const auto &ve : vlits) { - const rose_literal_id &vl = *ve.first; - u32 vlag = ve.second; - - if (!checkPrefix(ul, ulag, vl, vlag) - || !checkPrefix(vl, vlag, ul, ulag)) { - DEBUG_PRINTF("prefix check failed\n"); - return false; - } - } - } - - return true; -} - -/** - * True if this graph has few enough accel states to be implemented as an NFA - * with all of those states actually becoming accel schemes. - */ -static -bool isAccelerableLeftfix(const RoseBuildImpl &build, const NGHolder &g) { - u32 num = countAccelStates(g, &build.rm, build.cc); - DEBUG_PRINTF("graph with %zu vertices has %u accel states\n", - num_vertices(g), num); - return num <= NFA_MAX_ACCEL_STATES; -} - -/** - * In block mode, we want to be a little more selective -- We will only merge - * prefix engines when the literal sets are the same or if the merged graph - * has only grown by a small amount. - */ -static -bool safeBlockModeMerge(const RoseBuildImpl &build, RoseVertex u, - RoseVertex v) { - assert(!build.cc.streaming); - assert(build.isRootSuccessor(u) == build.isRootSuccessor(v)); - - // Always merge infixes if we can (subject to the other criteria in - // mergeableRoseVertices). - if (!build.isRootSuccessor(u)) { - return true; - } - - const RoseGraph &g = build.g; - - // Merge prefixes with identical literal sets (as we'd have to run them - // both when we see those literals anyway). - if (g[u].literals == g[v].literals) { - return true; - } - - // The rest of this function only deals with the case when both vertices - // have graph leftfixes. - if (!g[u].left.graph || !g[v].left.graph) { - return false; - } - - const size_t u_count = num_vertices(*g[u].left.graph); - const size_t v_count = num_vertices(*g[v].left.graph); - DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n", - u_count, v_count); - if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES || - v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { - DEBUG_PRINTF("prefixes too big already\n"); - return false; - } - - DEBUG_PRINTF("trying merge\n"); - NGHolder h; - cloneHolder(h, *g[v].left.graph); - if (!mergeNfaPair(*g[u].left.graph, h, nullptr, build.cc)) { - DEBUG_PRINTF("couldn't merge\n"); - return false; - } - - const size_t merged_count = num_vertices(h); - DEBUG_PRINTF("merged result has %zu vertices\n", merged_count); - if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { - DEBUG_PRINTF("exceeded limit\n"); - return false; - } - - // We want to only perform merges that take advantage of some - // commonality in the two input graphs, so we check that the number of - // vertices has only grown a small amount: somewhere between the sum - // (no commonality) and the max (no growth at all) of the vertex counts - // of the input graphs. - const size_t max_size = u_count + v_count; - const size_t min_size = max(u_count, v_count); - const size_t max_growth = ((max_size - min_size) * 25) / 100; - if (merged_count > min_size + max_growth) { - DEBUG_PRINTF("grew too much\n"); - return false; - } - - // We don't want to squander any chances at accelerating. - if (!isAccelerableLeftfix(build, h) && - (isAccelerableLeftfix(build, *g[u].left.graph) || - isAccelerableLeftfix(build, *g[v].left.graph))) { - DEBUG_PRINTF("would lose accel property\n"); - return false; - } - - DEBUG_PRINTF("safe to merge\n"); - return true; -} - -bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, - RoseVertex v) { - assert(u != v); - - if (!hasSameEngineType(tbi.g[u], tbi.g[v])) { - return false; - } - - if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u, v)) { - return false; - } - + not safe to merge the engine. */ + for (const auto &ue : ulits) { + const rose_literal_id &ul = *ue.first; + u32 ulag = ue.second; + + for (const auto &ve : vlits) { + const rose_literal_id &vl = *ve.first; + u32 vlag = ve.second; + + if (!checkPrefix(ul, ulag, vl, vlag) + || !checkPrefix(vl, vlag, ul, ulag)) { + DEBUG_PRINTF("prefix check failed\n"); + return false; + } + } + } + + return true; +} + +/** + * True if this graph has few enough accel states to be implemented as an NFA + * with all of those states actually becoming accel schemes. + */ +static +bool isAccelerableLeftfix(const RoseBuildImpl &build, const NGHolder &g) { + u32 num = countAccelStates(g, &build.rm, build.cc); + DEBUG_PRINTF("graph with %zu vertices has %u accel states\n", + num_vertices(g), num); + return num <= NFA_MAX_ACCEL_STATES; +} + +/** + * In block mode, we want to be a little more selective -- We will only merge + * prefix engines when the literal sets are the same or if the merged graph + * has only grown by a small amount. + */ +static +bool safeBlockModeMerge(const RoseBuildImpl &build, RoseVertex u, + RoseVertex v) { + assert(!build.cc.streaming); + assert(build.isRootSuccessor(u) == build.isRootSuccessor(v)); + + // Always merge infixes if we can (subject to the other criteria in + // mergeableRoseVertices). + if (!build.isRootSuccessor(u)) { + return true; + } + + const RoseGraph &g = build.g; + + // Merge prefixes with identical literal sets (as we'd have to run them + // both when we see those literals anyway). + if (g[u].literals == g[v].literals) { + return true; + } + + // The rest of this function only deals with the case when both vertices + // have graph leftfixes. + if (!g[u].left.graph || !g[v].left.graph) { + return false; + } + + const size_t u_count = num_vertices(*g[u].left.graph); + const size_t v_count = num_vertices(*g[v].left.graph); + DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n", + u_count, v_count); + if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES || + v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("prefixes too big already\n"); + return false; + } + + DEBUG_PRINTF("trying merge\n"); + NGHolder h; + cloneHolder(h, *g[v].left.graph); + if (!mergeNfaPair(*g[u].left.graph, h, nullptr, build.cc)) { + DEBUG_PRINTF("couldn't merge\n"); + return false; + } + + const size_t merged_count = num_vertices(h); + DEBUG_PRINTF("merged result has %zu vertices\n", merged_count); + if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("exceeded limit\n"); + return false; + } + + // We want to only perform merges that take advantage of some + // commonality in the two input graphs, so we check that the number of + // vertices has only grown a small amount: somewhere between the sum + // (no commonality) and the max (no growth at all) of the vertex counts + // of the input graphs. + const size_t max_size = u_count + v_count; + const size_t min_size = max(u_count, v_count); + const size_t max_growth = ((max_size - min_size) * 25) / 100; + if (merged_count > min_size + max_growth) { + DEBUG_PRINTF("grew too much\n"); + return false; + } + + // We don't want to squander any chances at accelerating. + if (!isAccelerableLeftfix(build, h) && + (isAccelerableLeftfix(build, *g[u].left.graph) || + isAccelerableLeftfix(build, *g[v].left.graph))) { + DEBUG_PRINTF("would lose accel property\n"); + return false; + } + + DEBUG_PRINTF("safe to merge\n"); + return true; +} + +bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, + RoseVertex v) { + assert(u != v); + + if (!hasSameEngineType(tbi.g[u], tbi.g[v])) { + return false; + } + + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u, v)) { + return false; + } + /* We cannot merge prefixes/vertices if they are successors of different * root vertices */ if (tbi.isRootSuccessor(u)) { @@ -739,105 +739,105 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u, } } - u32 ulag = tbi.g[u].left.lag; - vector<pair<const rose_literal_id *, u32>> ulits; - ulits.reserve(tbi.g[u].literals.size()); - for (u32 id : tbi.g[u].literals) { - ulits.emplace_back(&tbi.literals.at(id), ulag); - } + u32 ulag = tbi.g[u].left.lag; + vector<pair<const rose_literal_id *, u32>> ulits; + ulits.reserve(tbi.g[u].literals.size()); + for (u32 id : tbi.g[u].literals) { + ulits.emplace_back(&tbi.literals.at(id), ulag); + } - u32 vlag = tbi.g[v].left.lag; - vector<pair<const rose_literal_id *, u32>> vlits; - vlits.reserve(tbi.g[v].literals.size()); - for (u32 id : tbi.g[v].literals) { - vlits.emplace_back(&tbi.literals.at(id), vlag); - } + u32 vlag = tbi.g[v].left.lag; + vector<pair<const rose_literal_id *, u32>> vlits; + vlits.reserve(tbi.g[v].literals.size()); + for (u32 id : tbi.g[v].literals) { + vlits.emplace_back(&tbi.literals.at(id), vlag); + } - if (!compatibleLiteralsForMerge(ulits, vlits)) { - return false; + if (!compatibleLiteralsForMerge(ulits, vlits)) { + return false; } - DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].index, - tbi.g[v].index); + DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].index, + tbi.g[v].index); return true; } -/* We cannot merge an engine, if a trigger literal and a post literal overlap - * in such a way that engine status needs to be check at a point before the - * engine's current location. - * - * i.e., for a trigger literal u and a pos literal v, - * where delta is the earliest v can appear after t, - * we require that v_loc - v_lag >= u_loc - * ==> u_loc + delta - v_lag >= u_loc - * ==> delta >= v_lag - * - */ +/* We cannot merge an engine, if a trigger literal and a post literal overlap + * in such a way that engine status needs to be check at a point before the + * engine's current location. + * + * i.e., for a trigger literal u and a pos literal v, + * where delta is the earliest v can appear after t, + * we require that v_loc - v_lag >= u_loc + * ==> u_loc + delta - v_lag >= u_loc + * ==> delta >= v_lag + * + */ static -bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl, - u32 vlag) { - DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(), - escapeString(vl.s).c_str(), vlag); - - for (size_t i = 0; i < vlag; i++) { - if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { - DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i); - return false; +bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl, + u32 vlag) { + DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(), + escapeString(vl.s).c_str(), vlag); + + for (size_t i = 0; i < vlag; i++) { + if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) { + DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i); + return false; } } - - DEBUG_PRINTF("OK\n"); + + DEBUG_PRINTF("OK\n"); return true; } -template<typename VertexCont> -static never_inline -bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1, - const VertexCont &v2) { - flat_set<RoseVertex> preds; +template<typename VertexCont> +static never_inline +bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1, + const VertexCont &v2) { + flat_set<RoseVertex> preds; for (auto v : v1) { - insert(&preds, inv_adjacent_vertices(v, build.g)); - } - - flat_set<u32> pred_lits; - - /* No need to examine delays of a common pred - as it must already have - * survived the delay checks. - * - * This is important when the pred is in the anchored table as - * the literal is no longer available. */ - flat_set<RoseVertex> known_good_preds; - for (auto v : v2) { - insert(&known_good_preds, inv_adjacent_vertices(v, build.g)); - } - + insert(&preds, inv_adjacent_vertices(v, build.g)); + } + + flat_set<u32> pred_lits; + + /* No need to examine delays of a common pred - as it must already have + * survived the delay checks. + * + * This is important when the pred is in the anchored table as + * the literal is no longer available. */ + flat_set<RoseVertex> known_good_preds; + for (auto v : v2) { + insert(&known_good_preds, inv_adjacent_vertices(v, build.g)); + } + for (auto u : preds) { - if (!contains(known_good_preds, u)) { - insert(&pred_lits, build.g[u].literals); - } - } - - vector<const rose_literal_id *> pred_rose_lits; - pred_rose_lits.reserve(pred_lits.size()); - for (const auto &p : pred_lits) { - pred_rose_lits.push_back(&build.literals.at(p)); - } - - for (auto v : v2) { - u32 vlag = build.g[v].left.lag; - if (!vlag) { - continue; - } - - for (const u32 vlit : build.g[v].literals) { - const rose_literal_id &vl = build.literals.at(vlit); - assert(!vl.delay); // this should never have got this far? - for (const auto &ul : pred_rose_lits) { - assert(!ul->delay); // this should never have got this far? - - if (!checkPredDelay(*ul, vl, vlag)) { - return false; - } + if (!contains(known_good_preds, u)) { + insert(&pred_lits, build.g[u].literals); + } + } + + vector<const rose_literal_id *> pred_rose_lits; + pred_rose_lits.reserve(pred_lits.size()); + for (const auto &p : pred_lits) { + pred_rose_lits.push_back(&build.literals.at(p)); + } + + for (auto v : v2) { + u32 vlag = build.g[v].left.lag; + if (!vlag) { + continue; + } + + for (const u32 vlit : build.g[v].literals) { + const rose_literal_id &vl = build.literals.at(vlit); + assert(!vl.delay); // this should never have got this far? + for (const auto &ul : pred_rose_lits) { + assert(!ul->delay); // this should never have got this far? + + if (!checkPredDelay(*ul, vl, vlag)) { + return false; + } } } } @@ -849,65 +849,65 @@ static bool mergeableRoseVertices(const RoseBuildImpl &tbi, const deque<RoseVertex> &verts1, const deque<RoseVertex> &verts2) { - assert(!verts1.empty()); - assert(!verts2.empty()); - - RoseVertex u_front = verts1.front(); - RoseVertex v_front = verts2.front(); - - /* all vertices must have the same engine type: assume all verts in each - * group are already of the same type */ - if (!hasSameEngineType(tbi.g[u_front], tbi.g[v_front])) { - return false; - } - - bool is_prefix = tbi.isRootSuccessor(u_front); - - /* We cannot merge prefixes/vertices if they are successors of different - * root vertices: similarly, assume the grouped vertices are compatible */ - if (is_prefix) { - assert(tbi.isRootSuccessor(v_front)); - set<RoseVertex> u_preds; - set<RoseVertex> v_preds; - insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g)); - insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g)); - - if (u_preds != v_preds) { - return false; - } - } - - vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */ - for (auto a : verts1) { - if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, v_front, a)) { - return false; - } - - u32 ulag = tbi.g[a].left.lag; - for (u32 id : tbi.g[a].literals) { - ulits.emplace_back(&tbi.literals.at(id), ulag); - } - } - - vector<pair<const rose_literal_id *, u32>> vlits; - for (auto a : verts2) { - if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u_front, a)) { - return false; - } - - u32 vlag = tbi.g[a].left.lag; - for (u32 id : tbi.g[a].literals) { - vlits.emplace_back(&tbi.literals.at(id), vlag); - } - } - - if (!compatibleLiteralsForMerge(ulits, vlits)) { - return false; - } - + assert(!verts1.empty()); + assert(!verts2.empty()); + + RoseVertex u_front = verts1.front(); + RoseVertex v_front = verts2.front(); + + /* all vertices must have the same engine type: assume all verts in each + * group are already of the same type */ + if (!hasSameEngineType(tbi.g[u_front], tbi.g[v_front])) { + return false; + } + + bool is_prefix = tbi.isRootSuccessor(u_front); + + /* We cannot merge prefixes/vertices if they are successors of different + * root vertices: similarly, assume the grouped vertices are compatible */ + if (is_prefix) { + assert(tbi.isRootSuccessor(v_front)); + set<RoseVertex> u_preds; + set<RoseVertex> v_preds; + insert(&u_preds, inv_adjacent_vertices(u_front, tbi.g)); + insert(&v_preds, inv_adjacent_vertices(v_front, tbi.g)); + + if (u_preds != v_preds) { + return false; + } + } + + vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */ + for (auto a : verts1) { + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, v_front, a)) { + return false; + } + + u32 ulag = tbi.g[a].left.lag; + for (u32 id : tbi.g[a].literals) { + ulits.emplace_back(&tbi.literals.at(id), ulag); + } + } + + vector<pair<const rose_literal_id *, u32>> vlits; + for (auto a : verts2) { + if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, u_front, a)) { + return false; + } + + u32 vlag = tbi.g[a].left.lag; + for (u32 id : tbi.g[a].literals) { + vlits.emplace_back(&tbi.literals.at(id), vlag); + } + } + + if (!compatibleLiteralsForMerge(ulits, vlits)) { + return false; + } + // Check preds are compatible as well. - if (!checkPredDelays(tbi, verts1, verts2) - || !checkPredDelays(tbi, verts2, verts1)) { + if (!checkPredDelays(tbi, verts1, verts2) + || !checkPredDelays(tbi, verts2, verts1)) { return false; } @@ -968,35 +968,35 @@ struct RoseMergeCandidate { } static -bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2, - const vector<RoseVertex> &verts1, - const vector<RoseVertex> &verts2) { +bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2, + const vector<RoseVertex> &verts1, + const vector<RoseVertex> &verts2) { assert(!verts1.empty() && !verts2.empty()); - DEBUG_PRINTF("merging pair of leftfixes:\n"); - DEBUG_PRINTF(" A:%016zx: tops %s\n", r1.hash(), - as_string_list(all_tops(r1)).c_str()); - DEBUG_PRINTF(" B:%016zx: tops %s\n", r2.hash(), - as_string_list(all_tops(r2)).c_str()); + DEBUG_PRINTF("merging pair of leftfixes:\n"); + DEBUG_PRINTF(" A:%016zx: tops %s\n", r1.hash(), + as_string_list(all_tops(r1)).c_str()); + DEBUG_PRINTF(" B:%016zx: tops %s\n", r2.hash(), + as_string_list(all_tops(r2)).c_str()); + + RoseGraph &g = build.g; - RoseGraph &g = build.g; - if (r1.graph()) { assert(r2.graph()); assert(r1.graph()->kind == r2.graph()->kind); - if (!mergeNfaPair(*r1.graph(), *r2.graph(), nullptr, build.cc)) { + if (!mergeNfaPair(*r1.graph(), *r2.graph(), nullptr, build.cc)) { DEBUG_PRINTF("nfa merge failed\n"); return false; } - /* The graph in r1 has been merged into the graph in r2. Update r1's - * vertices with the new graph ptr. mergeNfaPair() does not alter the - * tops from the input graph so no need to update top values. - * - * It is the responsibility of the caller to ensure that the tops are - * distinct when they have different trigger conditions. - * [Note: mergeLeftfixesVariableLag() should have a common parent set] - */ + /* The graph in r1 has been merged into the graph in r2. Update r1's + * vertices with the new graph ptr. mergeNfaPair() does not alter the + * tops from the input graph so no need to update top values. + * + * It is the responsibility of the caller to ensure that the tops are + * distinct when they have different trigger conditions. + * [Note: mergeLeftfixesVariableLag() should have a common parent set] + */ shared_ptr<NGHolder> &h = g[verts2.front()].left.graph; for (RoseVertex v : verts1) { g[v].left.graph = h; @@ -1005,7 +1005,7 @@ bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2, return true; } else if (r1.castle()) { assert(r2.castle()); - assert(build.cc.grey.allowCastle); + assert(build.cc.grey.allowCastle); map<u32, u32> top_map; if (!mergeCastle(*r2.castle(), *r1.castle(), top_map)) { @@ -1029,200 +1029,200 @@ bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2, return false; } -/** - * Checks that there is no problem due to the involved vertices if we merge two - * leftfix engines. - * - * This functions takes the vertices on the right of the two engines. - * - * Unlike mergeableRoseVertices(), this does not: - * - check that engines themselves can be merged - * - use heuristics to find out if merging the engines is wise. - */ +/** + * Checks that there is no problem due to the involved vertices if we merge two + * leftfix engines. + * + * This functions takes the vertices on the right of the two engines. + * + * Unlike mergeableRoseVertices(), this does not: + * - check that engines themselves can be merged + * - use heuristics to find out if merging the engines is wise. + */ static -bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build, - const vector<RoseVertex> &targets_1, - const vector<RoseVertex> &targets_2) { - assert(!targets_1.empty()); - assert(!targets_2.empty()); - - vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */ - for (auto a : targets_1) { - u32 ulag = build.g[a].left.lag; - for (u32 id : build.g[a].literals) { - ulits.emplace_back(&build.literals.at(id), ulag); - } - } - - vector<pair<const rose_literal_id *, u32>> vlits; - for (auto a : targets_2) { - u32 vlag = build.g[a].left.lag; - for (u32 id : build.g[a].literals) { - vlits.emplace_back(&build.literals.at(id), vlag); - } - } - - if (!compatibleLiteralsForMerge(ulits, vlits)) { - return false; - } - - // Check preds are compatible as well. - if (!checkPredDelays(build, targets_1, targets_2) - || !checkPredDelays(build, targets_2, targets_1)) { - return false; - } - - DEBUG_PRINTF("vertex sets are mergeable\n"); - return true; -} - -/** - * In block mode, we want to be a little more selective -- we will only merge - * prefix engines when the literal sets are the same or if the merged graph - * has only grown by a small amount. - */ -static -bool goodBlockModeMerge(const RoseBuildImpl &build, - const vector<RoseVertex> &u_verts, const left_id &u_eng, - const vector<RoseVertex> &v_verts, - const left_id &v_eng) { - assert(!build.cc.streaming); - - // Always merge infixes if we can (subject to the other criteria in - // mergeableRoseVertices). - if (!build.isRootSuccessor(u_verts.front())) { - return true; - } - - const RoseGraph &g = build.g; - - flat_set<u32> u_lits; - for (RoseVertex u : u_verts) { - insert(&u_lits, g[u].literals); - } - - flat_set<u32> v_lits; - for (RoseVertex v : v_verts) { - insert(&v_lits, g[v].literals); - } - - // Merge prefixes with identical literal sets (as we'd have to run them - // both when we see those literals anyway). - if (u_lits == v_lits) { - return true; - } - - // The rest of this function only deals with the case when have graph - // leftfixes. - if (!u_eng.graph()) { - return false; - } - assert(v_eng.graph()); - const NGHolder &ug = *u_eng.graph(); - const NGHolder &vg = *v_eng.graph(); - - size_t u_count = num_vertices(ug); - size_t v_count = num_vertices(vg); - DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n", - u_count, v_count); - if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES || - v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { - DEBUG_PRINTF("prefixes too big already\n"); - return false; - } - - DEBUG_PRINTF("trying merge\n"); - NGHolder h; - cloneHolder(h, vg); - if (!mergeNfaPair(ug, h, nullptr, build.cc)) { - DEBUG_PRINTF("couldn't merge\n"); - return false; - } - - const size_t merged_count = num_vertices(h); - DEBUG_PRINTF("merged result has %zu vertices\n", merged_count); - if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { - DEBUG_PRINTF("exceeded limit\n"); - return false; - } - - // We want to only perform merges that take advantage of some - // commonality in the two input graphs, so we check that the number of - // vertices has only grown a small amount: somewhere between the sum - // (no commonality) and the max (no growth at all) of the vertex counts - // of the input graphs. - size_t max_size = u_count + v_count; - size_t min_size = max(u_count, v_count); - size_t max_growth = ((max_size - min_size) * 25) / 100; - if (merged_count > min_size + max_growth) { - DEBUG_PRINTF("grew too much\n"); - return false; - } - - // We don't want to squander any chances at accelerating. - if (!isAccelerableLeftfix(build, h) - && (isAccelerableLeftfix(build, ug) - || isAccelerableLeftfix(build, vg))) { - DEBUG_PRINTF("would lose accel property\n"); - return false; - } - - DEBUG_PRINTF("safe to merge\n"); - return true; -} - -/** - * Merge r1 into r2 if safe and appropriate. Returns true on success. - */ -static -bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1, - const vector<RoseVertex> &targets_1, - left_id &r2, - const vector<RoseVertex> &targets_2) { - if (targets_1.empty() || targets_2.empty()) { - /* one of the engines has already been merged away */ - return false; - } - - assert(!r1.graph() == !r2.graph()); - if (r1.graph()) { - NGHolder *h1 = r1.graph(); - NGHolder *h2 = r2.graph(); - CharReach stop1 = findStopAlphabet(*h1, SOM_NONE); - CharReach stop2 = findStopAlphabet(*h2, SOM_NONE); - CharReach stopboth = stop1 & stop2; - DEBUG_PRINTF("stop1=%zu, stop2=%zu, stopboth=%zu\n", stop1.count(), - stop2.count(), stopboth.count()); - if (stopboth.count() < 10 - && (stop1.count() > 10 || stop2.count() > 10)) { - DEBUG_PRINTF("skip merge, would kill stop alphabet\n"); - return false; - } - size_t maxstop = max(stop1.count(), stop2.count()); - if (maxstop > 200 && stopboth.count() < 200) { - DEBUG_PRINTF("skip merge, would reduce stop alphabet\n"); - return false; - } - } - - /* Rechecking that the targets are compatible, as we may have already - * merged new states into r1 or r2 and we need to verify that this - * candidate is still ok. */ - if (!checkVerticesOkForLeftfixMerge(build, targets_1, targets_2)) { - return false; - } - - if (!build.cc.streaming - && !goodBlockModeMerge(build, targets_1, r1, targets_2, r2)) { - return false; - } - - return mergeLeftfixPair(build, r1, r2, targets_1, targets_2); +bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build, + const vector<RoseVertex> &targets_1, + const vector<RoseVertex> &targets_2) { + assert(!targets_1.empty()); + assert(!targets_2.empty()); + + vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */ + for (auto a : targets_1) { + u32 ulag = build.g[a].left.lag; + for (u32 id : build.g[a].literals) { + ulits.emplace_back(&build.literals.at(id), ulag); + } + } + + vector<pair<const rose_literal_id *, u32>> vlits; + for (auto a : targets_2) { + u32 vlag = build.g[a].left.lag; + for (u32 id : build.g[a].literals) { + vlits.emplace_back(&build.literals.at(id), vlag); + } + } + + if (!compatibleLiteralsForMerge(ulits, vlits)) { + return false; + } + + // Check preds are compatible as well. + if (!checkPredDelays(build, targets_1, targets_2) + || !checkPredDelays(build, targets_2, targets_1)) { + return false; + } + + DEBUG_PRINTF("vertex sets are mergeable\n"); + return true; +} + +/** + * In block mode, we want to be a little more selective -- we will only merge + * prefix engines when the literal sets are the same or if the merged graph + * has only grown by a small amount. + */ +static +bool goodBlockModeMerge(const RoseBuildImpl &build, + const vector<RoseVertex> &u_verts, const left_id &u_eng, + const vector<RoseVertex> &v_verts, + const left_id &v_eng) { + assert(!build.cc.streaming); + + // Always merge infixes if we can (subject to the other criteria in + // mergeableRoseVertices). + if (!build.isRootSuccessor(u_verts.front())) { + return true; + } + + const RoseGraph &g = build.g; + + flat_set<u32> u_lits; + for (RoseVertex u : u_verts) { + insert(&u_lits, g[u].literals); + } + + flat_set<u32> v_lits; + for (RoseVertex v : v_verts) { + insert(&v_lits, g[v].literals); + } + + // Merge prefixes with identical literal sets (as we'd have to run them + // both when we see those literals anyway). + if (u_lits == v_lits) { + return true; + } + + // The rest of this function only deals with the case when have graph + // leftfixes. + if (!u_eng.graph()) { + return false; + } + assert(v_eng.graph()); + const NGHolder &ug = *u_eng.graph(); + const NGHolder &vg = *v_eng.graph(); + + size_t u_count = num_vertices(ug); + size_t v_count = num_vertices(vg); + DEBUG_PRINTF("u prefix has %zu vertices, v prefix has %zu vertices\n", + u_count, v_count); + if (u_count > MAX_BLOCK_PREFIX_MERGE_VERTICES || + v_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("prefixes too big already\n"); + return false; + } + + DEBUG_PRINTF("trying merge\n"); + NGHolder h; + cloneHolder(h, vg); + if (!mergeNfaPair(ug, h, nullptr, build.cc)) { + DEBUG_PRINTF("couldn't merge\n"); + return false; + } + + const size_t merged_count = num_vertices(h); + DEBUG_PRINTF("merged result has %zu vertices\n", merged_count); + if (merged_count > MAX_BLOCK_PREFIX_MERGE_VERTICES) { + DEBUG_PRINTF("exceeded limit\n"); + return false; + } + + // We want to only perform merges that take advantage of some + // commonality in the two input graphs, so we check that the number of + // vertices has only grown a small amount: somewhere between the sum + // (no commonality) and the max (no growth at all) of the vertex counts + // of the input graphs. + size_t max_size = u_count + v_count; + size_t min_size = max(u_count, v_count); + size_t max_growth = ((max_size - min_size) * 25) / 100; + if (merged_count > min_size + max_growth) { + DEBUG_PRINTF("grew too much\n"); + return false; + } + + // We don't want to squander any chances at accelerating. + if (!isAccelerableLeftfix(build, h) + && (isAccelerableLeftfix(build, ug) + || isAccelerableLeftfix(build, vg))) { + DEBUG_PRINTF("would lose accel property\n"); + return false; + } + + DEBUG_PRINTF("safe to merge\n"); + return true; +} + +/** + * Merge r1 into r2 if safe and appropriate. Returns true on success. + */ +static +bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1, + const vector<RoseVertex> &targets_1, + left_id &r2, + const vector<RoseVertex> &targets_2) { + if (targets_1.empty() || targets_2.empty()) { + /* one of the engines has already been merged away */ + return false; + } + + assert(!r1.graph() == !r2.graph()); + if (r1.graph()) { + NGHolder *h1 = r1.graph(); + NGHolder *h2 = r2.graph(); + CharReach stop1 = findStopAlphabet(*h1, SOM_NONE); + CharReach stop2 = findStopAlphabet(*h2, SOM_NONE); + CharReach stopboth = stop1 & stop2; + DEBUG_PRINTF("stop1=%zu, stop2=%zu, stopboth=%zu\n", stop1.count(), + stop2.count(), stopboth.count()); + if (stopboth.count() < 10 + && (stop1.count() > 10 || stop2.count() > 10)) { + DEBUG_PRINTF("skip merge, would kill stop alphabet\n"); + return false; + } + size_t maxstop = max(stop1.count(), stop2.count()); + if (maxstop > 200 && stopboth.count() < 200) { + DEBUG_PRINTF("skip merge, would reduce stop alphabet\n"); + return false; + } + } + + /* Rechecking that the targets are compatible, as we may have already + * merged new states into r1 or r2 and we need to verify that this + * candidate is still ok. */ + if (!checkVerticesOkForLeftfixMerge(build, targets_1, targets_2)) { + return false; + } + + if (!build.cc.streaming + && !goodBlockModeMerge(build, targets_1, r1, targets_2, r2)) { + return false; + } + + return mergeLeftfixPair(build, r1, r2, targets_1, targets_2); } static bool nfaHasNarrowStart(const NGHolder &g) { - if (out_degree(g.startDs, g) > 1) { + if (out_degree(g.startDs, g) > 1) { return false; // unanchored } @@ -1267,91 +1267,91 @@ bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) { static u32 commonPrefixLength(left_id &r1, left_id &r2) { if (r1.graph() && r2.graph()) { - return commonPrefixLength(*r1.graph(), *r2.graph()); + return commonPrefixLength(*r1.graph(), *r2.graph()); } else if (r1.castle() && r2.castle()) { return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle())); } return 0; } -namespace { -struct MergeKey { - MergeKey(const left_id &left, flat_set<RoseVertex> parents_in) : - parents(std::move(parents_in)) { - - // We want to distinguish prefixes (but not infixes) on whether they - // have a narrow start or max width. - if (left.graph() && !is_triggered(*left.graph())) { - const NGHolder &h = *left.graph(); - narrowStart = nfaHasNarrowStart(h); - hasMaxWidth = nfaHasFiniteMaxWidth(h); - } else { - narrowStart = false; - hasMaxWidth = false; - } - - if (left.castle()) { - /* castles should have a non-empty reach */ - assert(left.castle()->reach().any()); - castle_cr = left.castle()->reach(); - } else { - assert(left.graph()); - } - } - - bool operator<(const MergeKey &b) const { - const MergeKey &a = *this; - ORDER_CHECK(narrowStart); - ORDER_CHECK(hasMaxWidth); - ORDER_CHECK(castle_cr); - ORDER_CHECK(parents); - return false; - } - - // NOTE: these two bool discriminators are only used for prefixes, not - // infixes. - bool narrowStart; - bool hasMaxWidth; - CharReach castle_cr; /* empty for graphs, reach (non-empty) for castles. */ - - flat_set<RoseVertex> parents; -}; -} - -template <typename T> -static -void chunk(vector<T> in, vector<vector<T>> *out, size_t chunk_size) { - if (in.size() <= chunk_size) { - out->push_back(std::move(in)); - return; - } - - out->push_back(vector<T>()); - out->back().reserve(chunk_size); - for (const auto &t : in) { - if (out->back().size() >= chunk_size) { - out->push_back(vector<T>()); - out->back().reserve(chunk_size); - } - out->back().push_back(std::move(t)); - } -} - -static -insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) { - insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts; - for (auto v : vertices_range(g)) { - const auto &left = g[v].left; - if (!left) { - continue; - } - assert(contains(all_reports(left), left.leftfix_report)); - eng_verts[left].push_back(v); - } - - return eng_verts; -} - +namespace { +struct MergeKey { + MergeKey(const left_id &left, flat_set<RoseVertex> parents_in) : + parents(std::move(parents_in)) { + + // We want to distinguish prefixes (but not infixes) on whether they + // have a narrow start or max width. + if (left.graph() && !is_triggered(*left.graph())) { + const NGHolder &h = *left.graph(); + narrowStart = nfaHasNarrowStart(h); + hasMaxWidth = nfaHasFiniteMaxWidth(h); + } else { + narrowStart = false; + hasMaxWidth = false; + } + + if (left.castle()) { + /* castles should have a non-empty reach */ + assert(left.castle()->reach().any()); + castle_cr = left.castle()->reach(); + } else { + assert(left.graph()); + } + } + + bool operator<(const MergeKey &b) const { + const MergeKey &a = *this; + ORDER_CHECK(narrowStart); + ORDER_CHECK(hasMaxWidth); + ORDER_CHECK(castle_cr); + ORDER_CHECK(parents); + return false; + } + + // NOTE: these two bool discriminators are only used for prefixes, not + // infixes. + bool narrowStart; + bool hasMaxWidth; + CharReach castle_cr; /* empty for graphs, reach (non-empty) for castles. */ + + flat_set<RoseVertex> parents; +}; +} + +template <typename T> +static +void chunk(vector<T> in, vector<vector<T>> *out, size_t chunk_size) { + if (in.size() <= chunk_size) { + out->push_back(std::move(in)); + return; + } + + out->push_back(vector<T>()); + out->back().reserve(chunk_size); + for (const auto &t : in) { + if (out->back().size() >= chunk_size) { + out->push_back(vector<T>()); + out->back().reserve(chunk_size); + } + out->back().push_back(std::move(t)); + } +} + +static +insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) { + insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts; + for (auto v : vertices_range(g)) { + const auto &left = g[v].left; + if (!left) { + continue; + } + assert(contains(all_reports(left), left.leftfix_report)); + eng_verts[left].push_back(v); + } + + return eng_verts; +} + /** * This pass attempts to merge prefix/infix engines which share a common set of * parent vertices. @@ -1363,9 +1363,9 @@ insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) { * the stop alphabet. * * Infixes: - * - It is expected that when this is run all infixes are still at the single - * top stage as we have not yet merged unrelated infixes together. After - * execution, castles may have multiple (but equivalent) tops. + * - It is expected that when this is run all infixes are still at the single + * top stage as we have not yet merged unrelated infixes together. After + * execution, castles may have multiple (but equivalent) tops. * * Prefixes: * - transient prefixes are not considered. @@ -1375,48 +1375,48 @@ insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) { * - merges are not considered in cases where dot star start state will be * reformed to optimise a leading repeat. */ -void mergeLeftfixesVariableLag(RoseBuildImpl &build) { - if (!build.cc.grey.mergeRose) { +void mergeLeftfixesVariableLag(RoseBuildImpl &build) { + if (!build.cc.grey.mergeRose) { return; } - assert(!hasOrphanedTops(build)); + assert(!hasOrphanedTops(build)); - RoseGraph &g = build.g; + RoseGraph &g = build.g; DEBUG_PRINTF("-----\n"); DEBUG_PRINTF("entry\n"); DEBUG_PRINTF("-----\n"); - auto eng_verts = get_eng_verts(g); + auto eng_verts = get_eng_verts(g); - map<MergeKey, vector<left_id>> engine_groups; - for (const auto &e : eng_verts) { - const left_id &left = e.first; - const auto &verts = e.second; + map<MergeKey, vector<left_id>> engine_groups; + for (const auto &e : eng_verts) { + const left_id &left = e.first; + const auto &verts = e.second; // Only non-transient for the moment. - if (contains(build.transient, left)) { + if (contains(build.transient, left)) { continue; } // No forced McClellan or Haig infix merges. - if (left.dfa() || left.haig()) { + if (left.dfa() || left.haig()) { continue; } - assert(left.graph() || left.castle()); + assert(left.graph() || left.castle()); - if (left.graph()) { - const NGHolder &h = *left.graph(); - /* we should not have merged yet */ - assert(!is_triggered(h) || onlyOneTop(h)); + if (left.graph()) { + const NGHolder &h = *left.graph(); + /* we should not have merged yet */ + assert(!is_triggered(h) || onlyOneTop(h)); - if (hasReformedStartDotStar(h, build.cc.grey)) { + if (hasReformedStartDotStar(h, build.cc.grey)) { continue; // preserve the optimisation of the leading repeat } - } else { - assert(left.castle()); + } else { + assert(left.castle()); - if (!build.cc.grey.allowCastle) { - DEBUG_PRINTF("castle merging disallowed by greybox\n"); + if (!build.cc.grey.allowCastle) { + DEBUG_PRINTF("castle merging disallowed by greybox\n"); continue; } } @@ -1425,20 +1425,20 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) { // parents, so that we can merge differently-anchored prefix roses // together. (Prompted by UE-2100) - flat_set<RoseVertex> parents; - for (RoseVertex v : verts) { - insert(&parents, inv_adjacent_vertices_range(v, g)); + flat_set<RoseVertex> parents; + for (RoseVertex v : verts) { + insert(&parents, inv_adjacent_vertices_range(v, g)); } - if (contains(parents, build.anchored_root)) { - parents.erase(build.anchored_root); - parents.insert(build.root); + if (contains(parents, build.anchored_root)) { + parents.erase(build.anchored_root); + parents.insert(build.root); } - assert(!parents.empty()); - + assert(!parents.empty()); + #ifndef _WIN32 - engine_groups[MergeKey(left, parents)].push_back(left); + engine_groups[MergeKey(left, parents)].push_back(left); #else // On windows, when passing MergeKey object into map 'engine_groups', // it will not be copied, but will be freed along with @@ -1452,59 +1452,59 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) { #endif } - vector<vector<left_id>> chunks; - for (auto &raw_group : engine_groups | map_values) { - chunk(move(raw_group), &chunks, MERGE_GROUP_SIZE_MAX); - } - engine_groups.clear(); - - DEBUG_PRINTF("chunked roses into %zu groups\n", chunks.size()); - - for (auto &roses : chunks) { - if (roses.size() < 2) { + vector<vector<left_id>> chunks; + for (auto &raw_group : engine_groups | map_values) { + chunk(move(raw_group), &chunks, MERGE_GROUP_SIZE_MAX); + } + engine_groups.clear(); + + DEBUG_PRINTF("chunked roses into %zu groups\n", chunks.size()); + + for (auto &roses : chunks) { + if (roses.size() < 2) { continue; } - // All pairs on the prio queue. - u32 tie_breaker = 0; - priority_queue<RoseMergeCandidate> pq; - for (auto it = roses.begin(), ite = roses.end(); it != ite; ++it) { - left_id r1 = *it; - const vector<RoseVertex> &targets_1 = eng_verts[r1]; - - for (auto jt = next(it); jt != ite; ++jt) { - left_id r2 = *jt; - - /* we should have already split on engine types and reach */ - assert(!r1.castle() == !r2.castle()); - assert(!r1.graph() == !r2.graph()); - assert(!r1.castle() - || r1.castle()->reach() == r2.castle()->reach()); - - const vector<RoseVertex> &targets_2 = eng_verts[r2]; - if (!checkVerticesOkForLeftfixMerge(build, targets_1, - targets_2)) { - continue; // No point queueing unmergeable cases. - } - - u32 cpl = commonPrefixLength(r1, r2); - pq.push(RoseMergeCandidate(r1, r2, cpl, tie_breaker++)); - } - } - - DEBUG_PRINTF("merge queue has %zu entries\n", pq.size()); - - while (!pq.empty()) { - left_id r1 = pq.top().r1; - left_id r2 = pq.top().r2; - DEBUG_PRINTF("pq pop h1=%p, h2=%p, cpl=%u, states=%u\n", - r1.graph(), r2.graph(), pq.top().cpl, pq.top().states); - pq.pop(); - vector<RoseVertex> &targets_1 = eng_verts[r1]; - vector<RoseVertex> &targets_2 = eng_verts[r2]; - if (mergeLeftVL_tryMergeCandidate(build, r1, targets_1, r2, - targets_2)) { - insert(&targets_2, targets_2.end(), targets_1); - targets_1.clear(); + // All pairs on the prio queue. + u32 tie_breaker = 0; + priority_queue<RoseMergeCandidate> pq; + for (auto it = roses.begin(), ite = roses.end(); it != ite; ++it) { + left_id r1 = *it; + const vector<RoseVertex> &targets_1 = eng_verts[r1]; + + for (auto jt = next(it); jt != ite; ++jt) { + left_id r2 = *jt; + + /* we should have already split on engine types and reach */ + assert(!r1.castle() == !r2.castle()); + assert(!r1.graph() == !r2.graph()); + assert(!r1.castle() + || r1.castle()->reach() == r2.castle()->reach()); + + const vector<RoseVertex> &targets_2 = eng_verts[r2]; + if (!checkVerticesOkForLeftfixMerge(build, targets_1, + targets_2)) { + continue; // No point queueing unmergeable cases. + } + + u32 cpl = commonPrefixLength(r1, r2); + pq.push(RoseMergeCandidate(r1, r2, cpl, tie_breaker++)); + } + } + + DEBUG_PRINTF("merge queue has %zu entries\n", pq.size()); + + while (!pq.empty()) { + left_id r1 = pq.top().r1; + left_id r2 = pq.top().r2; + DEBUG_PRINTF("pq pop h1=%p, h2=%p, cpl=%u, states=%u\n", + r1.graph(), r2.graph(), pq.top().cpl, pq.top().states); + pq.pop(); + vector<RoseVertex> &targets_1 = eng_verts[r1]; + vector<RoseVertex> &targets_2 = eng_verts[r2]; + if (mergeLeftVL_tryMergeCandidate(build, r1, targets_1, r2, + targets_2)) { + insert(&targets_2, targets_2.end(), targets_1); + targets_1.clear(); } } } @@ -1512,7 +1512,7 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) { DEBUG_PRINTF("-----\n"); DEBUG_PRINTF("exit\n"); DEBUG_PRINTF("-----\n"); - assert(!hasOrphanedTops(build)); + assert(!hasOrphanedTops(build)); } namespace { @@ -1521,15 +1521,15 @@ namespace { * Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path. */ struct DedupeLeftKey { - DedupeLeftKey(const RoseBuildImpl &build, - flat_set<pair<size_t, u32>> preds_in, const left_id &left) - : left_hash(hashLeftfix(left)), preds(move(preds_in)), - transient(contains(build.transient, left)) { + DedupeLeftKey(const RoseBuildImpl &build, + flat_set<pair<size_t, u32>> preds_in, const left_id &left) + : left_hash(hashLeftfix(left)), preds(move(preds_in)), + transient(contains(build.transient, left)) { } bool operator<(const DedupeLeftKey &b) const { - return tie(left_hash, preds, transient) - < tie(b.left_hash, b.preds, b.transient); + return tie(left_hash, preds, transient) + < tie(b.left_hash, b.preds, b.transient); } private: @@ -1538,23 +1538,23 @@ private: size_t left_hash; /** For each in-edge, the pair of (parent index, edge top). */ - flat_set<pair<size_t, u32>> preds; - - /** We don't want to combine transient with non-transient. */ - bool transient; + flat_set<pair<size_t, u32>> preds; + + /** We don't want to combine transient with non-transient. */ + bool transient; }; } // namespace -static -flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) { - flat_set<pair<size_t, u32>> preds; - for (const auto &e : in_edges_range(v, g)) { - preds.emplace(g[source(e, g)].index, g[e].rose_top); - } - return preds; -} - +static +flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) { + flat_set<pair<size_t, u32>> preds; + for (const auto &e : in_edges_range(v, g)) { + preds.emplace(g[source(e, g)].index, g[e].rose_top); + } + return preds; +} + /** * This is a generalisation of \ref dedupeLeftfixes which relaxes two * restrictions: multiple predecessor roles are allowed and the delay used by @@ -1572,99 +1572,99 @@ flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) { * successor may want to inspect it; the overlap relationships between the * involved literals are examined to ensure that this property holds. * - * Note: this is unable to dedupe when delayed literals are involved unlike - * dedupeLeftfixes. + * Note: this is unable to dedupe when delayed literals are involved unlike + * dedupeLeftfixes. */ -void dedupeLeftfixesVariableLag(RoseBuildImpl &build) { +void dedupeLeftfixesVariableLag(RoseBuildImpl &build) { DEBUG_PRINTF("entry\n"); - RoseGraph &g = build.g; - auto eng_verts = get_eng_verts(g); + RoseGraph &g = build.g; + auto eng_verts = get_eng_verts(g); - map<DedupeLeftKey, vector<left_id>> engine_groups; - for (const auto &e : eng_verts) { - const left_id &left = e.first; - const auto &verts = e.second; + map<DedupeLeftKey, vector<left_id>> engine_groups; + for (const auto &e : eng_verts) { + const left_id &left = e.first; + const auto &verts = e.second; - /* There should only be one report on an engine as no merges have - * happened yet. (aside from eod prefixes) */ - if (all_reports(left).size() != 1) { - assert(any_of_in(adjacent_vertices_range(verts.front(), g), - [&](RoseVertex w) { return g[w].eod_accept; })); + /* There should only be one report on an engine as no merges have + * happened yet. (aside from eod prefixes) */ + if (all_reports(left).size() != 1) { + assert(any_of_in(adjacent_vertices_range(verts.front(), g), + [&](RoseVertex w) { return g[w].eod_accept; })); continue; } - if (left.haig()) { - /* TODO: allow deduping of identical haigs */ + if (left.haig()) { + /* TODO: allow deduping of identical haigs */ continue; } - if (left.graph()) { - /* we should not have merged yet */ - assert(!is_triggered(*left.graph()) || onlyOneTop(*left.graph())); - } - - auto preds = get_pred_tops(verts.front(), g); - for (RoseVertex v : verts) { - if (preds != get_pred_tops(v, g)) { - DEBUG_PRINTF("distinct pred sets\n"); - continue; - } - } - engine_groups[DedupeLeftKey(build, move(preds), left)].push_back(left); - } - - /* We don't bother chunking as we expect deduping to be successful if the - * hashes match */ - - for (auto &group : engine_groups | map_values) { - DEBUG_PRINTF("group of %zu roses\n", group.size()); - - if (group.size() < 2) { + if (left.graph()) { + /* we should not have merged yet */ + assert(!is_triggered(*left.graph()) || onlyOneTop(*left.graph())); + } + + auto preds = get_pred_tops(verts.front(), g); + for (RoseVertex v : verts) { + if (preds != get_pred_tops(v, g)) { + DEBUG_PRINTF("distinct pred sets\n"); + continue; + } + } + engine_groups[DedupeLeftKey(build, move(preds), left)].push_back(left); + } + + /* We don't bother chunking as we expect deduping to be successful if the + * hashes match */ + + for (auto &group : engine_groups | map_values) { + DEBUG_PRINTF("group of %zu roses\n", group.size()); + + if (group.size() < 2) { continue; } - for (auto it = group.begin(); it != group.end(); ++it) { + for (auto it = group.begin(); it != group.end(); ++it) { left_id r1 = *it; - vector<RoseVertex> &verts1 = eng_verts[r1]; - assert(!verts1.empty()); /* cleared engines should be behind us */ + vector<RoseVertex> &verts1 = eng_verts[r1]; + assert(!verts1.empty()); /* cleared engines should be behind us */ + + assert(all_reports(r1).size() == 1); + ReportID r1_report = *all_reports(r1).begin(); - assert(all_reports(r1).size() == 1); - ReportID r1_report = *all_reports(r1).begin(); - - for (auto jt = next(it); jt != group.end(); ++jt) { + for (auto jt = next(it); jt != group.end(); ++jt) { left_id r2 = *jt; - vector<RoseVertex> &verts2 = eng_verts[r2]; - assert(!verts2.empty()); - assert(all_reports(r2).size() == 1); - ReportID r2_report = *all_reports(r2).begin(); + vector<RoseVertex> &verts2 = eng_verts[r2]; + assert(!verts2.empty()); + assert(all_reports(r2).size() == 1); + ReportID r2_report = *all_reports(r2).begin(); - if (!is_equal(r1, r1_report, r2, r2_report)) { + if (!is_equal(r1, r1_report, r2, r2_report)) { continue; } - if (!checkVerticesOkForLeftfixMerge(build, verts1, verts2)) { + if (!checkVerticesOkForLeftfixMerge(build, verts1, verts2)) { continue; } DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph()); - // Replace r1 with r2. + // Replace r1 with r2. for (auto v : verts1) { DEBUG_PRINTF("replacing report %u with %u on %zu\n", - r2_report, r1_report, g[v].index); + r2_report, r1_report, g[v].index); u32 orig_lag = g[v].left.lag; - g[v].left = g[verts2.front()].left; + g[v].left = g[verts2.front()].left; g[v].left.lag = orig_lag; } - - insert(&verts2, verts2.end(), verts1); - verts1.clear(); - - /* remove stale entry from transient set, if present */ - build.transient.erase(r1); - + + insert(&verts2, verts2.end(), verts1); + verts1.clear(); + + /* remove stale entry from transient set, if present */ + build.transient.erase(r1); + break; } } @@ -1672,7 +1672,7 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &build) { } static -u32 findUnusedTop(const flat_set<u32> &tops) { +u32 findUnusedTop(const flat_set<u32> &tops) { u32 i = 0; while (contains(tops, i)) { i++; @@ -1688,19 +1688,19 @@ void replaceTops(NGHolder &h, const map<u32, u32> &top_mapping) { if (v == h.startDs) { continue; } - flat_set<u32> new_tops; - for (u32 t : h[e].tops) { - DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t); - new_tops.insert(top_mapping.at(t)); - } - h[e].tops = std::move(new_tops); + flat_set<u32> new_tops; + for (u32 t : h[e].tops) { + DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t); + new_tops.insert(top_mapping.at(t)); + } + h[e].tops = std::move(new_tops); } } static bool setDistinctTops(NGHolder &h1, const NGHolder &h2, map<u32, u32> &top_mapping) { - flat_set<u32> tops1 = getTops(h1), tops2 = getTops(h2); + flat_set<u32> tops1 = getTops(h1), tops2 = getTops(h2); DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(), tops2.size()); @@ -1738,7 +1738,7 @@ bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, } for (auto v : verts1) { - DEBUG_PRINTF("vertex %zu\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); assert(!g[v].left.haig); assert(!g[v].left.dfa); for (const auto &e : in_edges_range(v, g)) { @@ -1747,7 +1747,7 @@ bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, assert(contains(top_mapping, t)); g[e].rose_top = top_mapping[t]; DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n", - g[source(e, g)].index, g[target(e, g)].index, t, + g[source(e, g)].index, g[target(e, g)].index, t, top_mapping[t]); } } @@ -1768,7 +1768,7 @@ bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2, } for (auto v : verts1) { - DEBUG_PRINTF("vertex %zu\n", g[v].index); + DEBUG_PRINTF("vertex %zu\n", g[v].index); u32 t = g[v].suffix.top; assert(contains(top_mapping, t)); g[v].suffix.top = top_mapping[t]; @@ -1796,7 +1796,7 @@ void mergeNfaLeftfixes(RoseBuildImpl &tbi, LeftfixBouquet &roses) { // We track the number of accelerable states for each graph in a map and // only recompute them when the graph is modified. - unordered_map<left_id, u32> accel_count; + unordered_map<left_id, u32> accel_count; for (const auto &rose : roses) { assert(rose.graph()->kind == NFA_INFIX); accel_count[rose] = estimatedAccelStates(tbi, *rose.graph()); @@ -1965,109 +1965,109 @@ void mergeSmallLeftfixes(RoseBuildImpl &tbi) { } } -static -void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands, - insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) { - /* caller must have already ensured that candidates have the same reach */ - RoseGraph &g = build.g; - DEBUG_PRINTF("%zu castle leftfix merge candidates\n", cands.size()); - - for (auto it = cands.begin(); it != cands.end(); ++it) { - left_id &cand_1 = *it; - vector<RoseVertex> &verts_1 = eng_verts[cand_1]; - if (verts_1.empty()) { - continue; - } - - for (auto jt = next(it); jt != cands.end(); ++jt) { - const left_id &cand_2 = *jt; - vector<RoseVertex> &verts_2 = eng_verts[cand_2]; - if (verts_2.empty()) { - continue; - } - - assert(cand_1.castle()->reach() == cand_2.castle()->reach()); - - if (!checkVerticesOkForLeftfixMerge(build, verts_1, verts_2)) { - DEBUG_PRINTF("not mergeable\n"); - continue; // next cand_2 - } - - DEBUG_PRINTF("castle1=%p (size %zu)\n", cand_1.castle(), - cand_1.castle()->repeats.size()); - DEBUG_PRINTF("castle2=%p (size %zu)\n", cand_2.castle(), - cand_2.castle()->repeats.size()); - - map<u32, u32> top_map; - if (!mergeCastle(*cand_1.castle(), *cand_2.castle(), top_map)) { - DEBUG_PRINTF("couldn't merge\n"); - continue; // next cand_2 - } - - // Update castle2's roses to point to castle1 now. - shared_ptr<CastleProto> winner = g[verts_1.front()].left.castle; - for (auto v : verts_2) { - assert(g[v].left.castle.get() == cand_2.castle()); - g[v].left.castle = winner; - for (const auto &e : in_edges_range(v, g)) { - g[e].rose_top = top_map.at(g[e].rose_top); - } - } - - insert(&verts_1, verts_1.end(), verts_2); - verts_2.clear(); - } - } -} - -/** - * Merges castles with the same reach together regardless of where in the rose - * graph they are. Note: there is no requirement for the castles to have common - * parent or target vertices. - * - * There are no heuristics for reducing block mode merges as castle speed - * mainly depends on the reach being scanned. - */ -void mergeCastleLeftfixes(RoseBuildImpl &build) { +static +void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands, + insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) { + /* caller must have already ensured that candidates have the same reach */ + RoseGraph &g = build.g; + DEBUG_PRINTF("%zu castle leftfix merge candidates\n", cands.size()); + + for (auto it = cands.begin(); it != cands.end(); ++it) { + left_id &cand_1 = *it; + vector<RoseVertex> &verts_1 = eng_verts[cand_1]; + if (verts_1.empty()) { + continue; + } + + for (auto jt = next(it); jt != cands.end(); ++jt) { + const left_id &cand_2 = *jt; + vector<RoseVertex> &verts_2 = eng_verts[cand_2]; + if (verts_2.empty()) { + continue; + } + + assert(cand_1.castle()->reach() == cand_2.castle()->reach()); + + if (!checkVerticesOkForLeftfixMerge(build, verts_1, verts_2)) { + DEBUG_PRINTF("not mergeable\n"); + continue; // next cand_2 + } + + DEBUG_PRINTF("castle1=%p (size %zu)\n", cand_1.castle(), + cand_1.castle()->repeats.size()); + DEBUG_PRINTF("castle2=%p (size %zu)\n", cand_2.castle(), + cand_2.castle()->repeats.size()); + + map<u32, u32> top_map; + if (!mergeCastle(*cand_1.castle(), *cand_2.castle(), top_map)) { + DEBUG_PRINTF("couldn't merge\n"); + continue; // next cand_2 + } + + // Update castle2's roses to point to castle1 now. + shared_ptr<CastleProto> winner = g[verts_1.front()].left.castle; + for (auto v : verts_2) { + assert(g[v].left.castle.get() == cand_2.castle()); + g[v].left.castle = winner; + for (const auto &e : in_edges_range(v, g)) { + g[e].rose_top = top_map.at(g[e].rose_top); + } + } + + insert(&verts_1, verts_1.end(), verts_2); + verts_2.clear(); + } + } +} + +/** + * Merges castles with the same reach together regardless of where in the rose + * graph they are. Note: there is no requirement for the castles to have common + * parent or target vertices. + * + * There are no heuristics for reducing block mode merges as castle speed + * mainly depends on the reach being scanned. + */ +void mergeCastleLeftfixes(RoseBuildImpl &build) { DEBUG_PRINTF("entry\n"); - if (!build.cc.grey.mergeRose || !build.cc.grey.roseMultiTopRoses - || !build.cc.grey.allowCastle) { + if (!build.cc.grey.mergeRose || !build.cc.grey.roseMultiTopRoses + || !build.cc.grey.allowCastle) { return; } - RoseGraph &g = build.g; + RoseGraph &g = build.g; - insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts; + insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts; for (auto v : vertices_range(g)) { - if (!g[v].left.castle) { + if (!g[v].left.castle) { continue; } - // Handle infixes only. - if (build.isRootSuccessor(v)) { + // Handle infixes only. + if (build.isRootSuccessor(v)) { continue; } - eng_verts[g[v].left].push_back(v); - } + eng_verts[g[v].left].push_back(v); + } - map<CharReach, vector<left_id>> by_reach; - for (const auto &left : eng_verts | map_keys) { - by_reach[left.castle()->reach()].push_back(left); - } + map<CharReach, vector<left_id>> by_reach; + for (const auto &left : eng_verts | map_keys) { + by_reach[left.castle()->reach()].push_back(left); + } - vector<vector<left_id>> chunks; - for (auto &raw_group : by_reach | map_values) { - chunk(move(raw_group), &chunks, MERGE_CASTLE_GROUP_SIZE_MAX); + vector<vector<left_id>> chunks; + for (auto &raw_group : by_reach | map_values) { + chunk(move(raw_group), &chunks, MERGE_CASTLE_GROUP_SIZE_MAX); } - by_reach.clear(); + by_reach.clear(); - DEBUG_PRINTF("chunked castles into %zu groups\n", chunks.size()); + DEBUG_PRINTF("chunked castles into %zu groups\n", chunks.size()); - for (auto &chunk : chunks) { - mergeCastleChunk(build, chunk, eng_verts); + for (auto &chunk : chunks) { + mergeCastleChunk(build, chunk, eng_verts); } } @@ -2081,7 +2081,7 @@ void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes, // If this isn't an acyclic case, we track the number of accelerable states // for each graph in a map and only recompute them when the graph is // modified. - unordered_map<suffix_id, u32> accel_count; + unordered_map<suffix_id, u32> accel_count; if (!acyclic) { for (const auto &suffix : suffixes) { assert(suffix.graph() && suffix.graph()->kind == NFA_SUFFIX); @@ -2093,11 +2093,11 @@ void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes, suffix_id s1 = *it; const deque<RoseVertex> &verts1 = suffixes.vertices(s1); assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX); - - // Caller should ensure that we don't propose merges of graphs that are - // already too big. - assert(num_vertices(*s1.graph()) < small_merge_max_vertices(tbi.cc)); - + + // Caller should ensure that we don't propose merges of graphs that are + // already too big. + assert(num_vertices(*s1.graph()) < small_merge_max_vertices(tbi.cc)); + deque<suffix_id> merged; for (auto jt = next(it); jt != suffixes.end(); ++jt) { suffix_id s2 = *jt; @@ -2210,11 +2210,11 @@ void mergeAcyclicSuffixes(RoseBuildImpl &tbi) { assert(!g[v].suffix.haig); - if (num_vertices(*h) >= small_merge_max_vertices(tbi.cc)) { + if (num_vertices(*h) >= small_merge_max_vertices(tbi.cc)) { continue; } - if (!isAcyclic(*h)) { + if (!isAcyclic(*h)) { continue; } @@ -2327,8 +2327,8 @@ map<NGHolder *, NGHolder *> chunkedNfaMerge(RoseBuildImpl &build, batch.push_back(*it); assert((*it)->kind == NFA_OUTFIX); if (batch.size() == MERGE_GROUP_SIZE_MAX || next(it) == ite) { - auto batch_merged = mergeNfaCluster(batch, &build.rm, build.cc); - insert(&merged, batch_merged); + auto batch_merged = mergeNfaCluster(batch, &build.rm, build.cc); + insert(&merged, batch_merged); batch.clear(); } } @@ -2347,9 +2347,9 @@ void mergeOutfixNfas(RoseBuildImpl &tbi, vector<NGHolder *> &nfas) { map<NGHolder *, size_t> nfa_mapping; for (size_t i = 0; i < outfixes.size(); i++) { - auto *holder = outfixes[i].holder(); - if (holder) { - nfa_mapping[holder] = i; + auto *holder = outfixes[i].holder(); + if (holder) { + nfa_mapping[holder] = i; } } @@ -2413,7 +2413,7 @@ private: template<class RawDfa, class MergeFunctor> static void pairwiseDfaMerge(vector<RawDfa *> &dfas, - unordered_map<RawDfa *, size_t> &dfa_mapping, + unordered_map<RawDfa *, size_t> &dfa_mapping, vector<OutfixInfo> &outfixes, MergeFunctor merge_func) { DEBUG_PRINTF("merging group of size %zu\n", dfas.size()); @@ -2441,7 +2441,7 @@ void pairwiseDfaMerge(vector<RawDfa *> &dfas, RawDfa *dfa_ptr = rdfa.get(); dfa_mapping[dfa_ptr] = dfa_mapping[*it]; dfa_mapping.erase(*it); - winner.proto = move(rdfa); + winner.proto = move(rdfa); mergeOutfixInfo(winner, victim); @@ -2455,7 +2455,7 @@ void pairwiseDfaMerge(vector<RawDfa *> &dfas, template<class RawDfa, class MergeFunctor> static void chunkedDfaMerge(vector<RawDfa *> &dfas, - unordered_map<RawDfa *, size_t> &dfa_mapping, + unordered_map<RawDfa *, size_t> &dfa_mapping, vector<OutfixInfo> &outfixes, MergeFunctor merge_func) { DEBUG_PRINTF("begin merge of %zu dfas\n", dfas.size()); @@ -2489,11 +2489,11 @@ void mergeOutfixDfas(RoseBuildImpl &tbi, vector<raw_dfa *> &dfas) { /* key is index into outfix array as iterators, etc may be invalidated by * element addition. */ - unordered_map<raw_dfa *, size_t> dfa_mapping; + unordered_map<raw_dfa *, size_t> dfa_mapping; for (size_t i = 0; i < outfixes.size(); i++) { - auto *rdfa = outfixes[i].rdfa(); - if (rdfa) { - dfa_mapping[rdfa] = i; + auto *rdfa = outfixes[i].rdfa(); + if (rdfa) { + dfa_mapping[rdfa] = i; } } @@ -2514,10 +2514,10 @@ void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm, bool seen_dfa = false; u32 nfa_count = 0; for (const auto &outfix : tbi.outfixes) { - if (outfix.holder()) { + if (outfix.holder()) { DEBUG_PRINTF("nfa\n"); nfa_count++; - } else if (outfix.rdfa()) { + } else if (outfix.rdfa()) { DEBUG_PRINTF("dfa\n"); seen_dfa = true; } @@ -2533,32 +2533,32 @@ void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm, /* key is index into outfix array as iterators, etc may be invalidated by * element addition. */ size_t new_dfas = 0; - unordered_map<raw_dfa *, size_t> dfa_mapping; + unordered_map<raw_dfa *, size_t> dfa_mapping; vector<raw_dfa *> dfas; for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) { - auto &outfix = *it; - assert(!outfix.is_dead()); - - if (outfix.rdfa()) { - auto *rdfa = outfix.rdfa(); - dfas.push_back(rdfa); - dfa_mapping[rdfa] = it - tbi.outfixes.begin(); + auto &outfix = *it; + assert(!outfix.is_dead()); + + if (outfix.rdfa()) { + auto *rdfa = outfix.rdfa(); + dfas.push_back(rdfa); + dfa_mapping[rdfa] = it - tbi.outfixes.begin(); continue; } - if (!outfix.holder()) { + if (!outfix.holder()) { continue; } - NGHolder *h = outfix.holder(); + NGHolder *h = outfix.holder(); assert(h->kind == NFA_OUTFIX); auto rdfa = buildMcClellan(*h, &rm, grey); if (rdfa) { // Transform this outfix into a DFA and add it to the merge set. dfa_mapping[rdfa.get()] = it - tbi.outfixes.begin(); dfas.push_back(rdfa.get()); - outfix.proto = move(rdfa); + outfix.proto = move(rdfa); new_dfas++; } } @@ -2584,11 +2584,11 @@ void mergeOutfixHaigs(RoseBuildImpl &tbi, vector<raw_som_dfa *> &dfas, vector<OutfixInfo> &outfixes = tbi.outfixes; - unordered_map<raw_som_dfa *, size_t> dfa_mapping; + unordered_map<raw_som_dfa *, size_t> dfa_mapping; for (size_t i = 0; i < outfixes.size(); i++) { - auto *haig = outfixes[i].haig(); - if (haig) { - dfa_mapping[haig] = i; + auto *haig = outfixes[i].haig(); + if (haig) { + dfa_mapping[haig] = i; } } @@ -2613,13 +2613,13 @@ void mergeOutfixes(RoseBuildImpl &tbi) { vector<raw_dfa *> dfas; vector<raw_som_dfa *> som_dfas; - for (auto &outfix : tbi.outfixes) { - if (outfix.rdfa()) { - dfas.push_back(outfix.rdfa()); - } else if (outfix.holder()) { - nfas.push_back(outfix.holder()); - } else if (outfix.haig()) { - som_dfas.push_back(outfix.haig()); + for (auto &outfix : tbi.outfixes) { + if (outfix.rdfa()) { + dfas.push_back(outfix.rdfa()); + } else if (outfix.holder()) { + nfas.push_back(outfix.holder()); + } else if (outfix.haig()) { + som_dfas.push_back(outfix.haig()); } } @@ -2644,7 +2644,7 @@ u32 allowedSquashDistance(const CharReach &cr, u32 min_width, /* TODO: inspect further back in the pattern */ for (u32 lit_id : g[tv].literals) { - const rose_literal_id &lit = tbi.literals.at(lit_id); + const rose_literal_id &lit = tbi.literals.at(lit_id); if (lit.delay) { return 0; /* TODO: better */ } @@ -2724,7 +2724,7 @@ void mergePuffixes(RoseBuildImpl &tbi) { u32 squashDistance = allowedSquashDistance(repeat.reach, repeat.bounds.min, tbi, v); - Report ir = makeMpvTrigger(event, squashDistance); + Report ir = makeMpvTrigger(event, squashDistance); ReportID id = tbi.rm.getInternalId(ir); DEBUG_PRINTF("puffette event q%u t%u\n", queue, event); @@ -2736,8 +2736,8 @@ void mergePuffixes(RoseBuildImpl &tbi) { static void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m, u32 top, const vector<RoseVertex> &verts) { - DEBUG_PRINTF("merged in as top %u of %p, updating %zu vertices\n", top, - m.get(), verts.size()); + DEBUG_PRINTF("merged in as top %u of %p, updating %zu vertices\n", top, + m.get(), verts.size()); for (auto v : verts) { assert(g[v].suffix.castle); @@ -2747,56 +2747,56 @@ void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m, } static -void mergeCastleSuffixChunk(RoseGraph &g, const vector<CastleProto *> &castles, - const unordered_map<CastleProto *, vector<RoseVertex>> &eng_verts) { +void mergeCastleSuffixChunk(RoseGraph &g, const vector<CastleProto *> &castles, + const unordered_map<CastleProto *, vector<RoseVertex>> &eng_verts) { if (castles.size() <= 1) { return; } - DEBUG_PRINTF("merging reach %s, %zu elements\n", - describeClass(castles[0]->reach()).c_str(), castles.size()); + DEBUG_PRINTF("merging reach %s, %zu elements\n", + describeClass(castles[0]->reach()).c_str(), castles.size()); - CastleProto *m = nullptr; + CastleProto *m = nullptr; - for (CastleProto *c : castles) { + for (CastleProto *c : castles) { assert(c->repeats.size() == 1); // Not yet merged. - assert(g[eng_verts.at(c).front()].suffix.castle.get() == c); - if (!m) { - m = c; + assert(g[eng_verts.at(c).front()].suffix.castle.get() == c); + if (!m) { + m = c; continue; } - u32 top = m->merge(c->repeats[0]); - if (top == CastleProto::max_occupancy) { + u32 top = m->merge(c->repeats[0]); + if (top == CastleProto::max_occupancy) { // No room left to merge into 'm'. This one becomes the new 'm'. DEBUG_PRINTF("next mergee\n"); m = c; - continue; + continue; } - updateCastleSuffix(g, g[eng_verts.at(m).front()].suffix.castle, top, - eng_verts.at(c)); - DEBUG_PRINTF("added to %p, top %u\n", m, top); + updateCastleSuffix(g, g[eng_verts.at(m).front()].suffix.castle, top, + eng_verts.at(c)); + DEBUG_PRINTF("added to %p, top %u\n", m, top); } } -void mergeCastleSuffixes(RoseBuildImpl &build) { +void mergeCastleSuffixes(RoseBuildImpl &build) { DEBUG_PRINTF("entry\n"); - if (!build.cc.grey.allowCastle || !build.cc.grey.mergeSuffixes) { + if (!build.cc.grey.allowCastle || !build.cc.grey.mergeSuffixes) { return; } - unordered_map<CastleProto *, vector<RoseVertex>> eng_verts; - map<CharReach, vector<CastleProto *>> by_reach; + unordered_map<CastleProto *, vector<RoseVertex>> eng_verts; + map<CharReach, vector<CastleProto *>> by_reach; - RoseGraph &g = build.g; + RoseGraph &g = build.g; for (auto v : vertices_range(g)) { if (!g[v].suffix.castle) { continue; } - CastleProto *c = g[v].suffix.castle.get(); + CastleProto *c = g[v].suffix.castle.get(); if (c->repeats.size() != 1) { // This code assumes it's the only place merging is being done. @@ -2804,14 +2804,14 @@ void mergeCastleSuffixes(RoseBuildImpl &build) { continue; } - if (!contains(eng_verts, c)) { + if (!contains(eng_verts, c)) { by_reach[c->reach()].push_back(c); } - eng_verts[c].push_back(v); + eng_verts[c].push_back(v); } - for (auto &chunk : by_reach | map_values) { - mergeCastleSuffixChunk(g, chunk, eng_verts); + for (auto &chunk : by_reach | map_values) { + mergeCastleSuffixChunk(g, chunk, eng_verts); } } diff --git a/contrib/libs/hyperscan/src/rose/rose_build_merge.h b/contrib/libs/hyperscan/src/rose/rose_build_merge.h index 216cf50bcc..6de6c7786a 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_merge.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_merge.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -27,8 +27,8 @@ */ /** \file - * \brief Rose Build: functions for reducing the number of engines in a Rose - * graph through merging or deduplicating engines. + * \brief Rose Build: functions for reducing the number of engines in a Rose + * graph through merging or deduplicating engines. */ #ifndef ROSE_BUILD_MERGE_H diff --git a/contrib/libs/hyperscan/src/rose/rose_build_misc.cpp b/contrib/libs/hyperscan/src/rose/rose_build_misc.cpp index 81cfda7ca5..0b0e689c99 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_misc.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_misc.cpp @@ -26,17 +26,17 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include "rose_build_misc.h" +#include "rose_build_misc.h" #include "rose_build_impl.h" -#include "rose_build_resources.h" -#include "hwlm/hwlm_literal.h" +#include "rose_build_resources.h" +#include "hwlm/hwlm_literal.h" #include "nfa/castlecompile.h" #include "nfa/goughcompile.h" #include "nfa/mcclellancompile_util.h" #include "nfa/nfa_api.h" #include "nfa/rdfa.h" -#include "nfa/tamaramacompile.h" +#include "nfa/tamaramacompile.h" #include "nfagraph/ng_holder.h" #include "nfagraph/ng_limex.h" #include "nfagraph/ng_reports.h" @@ -67,9 +67,9 @@ namespace ue2 { // just to get it out of the header RoseBuild::~RoseBuild() { } -RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, - SomSlotManager &ssm_in, - SmallWriteBuild &smwr_in, +RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, + SomSlotManager &ssm_in, + SmallWriteBuild &smwr_in, const CompileContext &cc_in, const BoundaryReports &boundary_in) : cc(cc_in), @@ -82,7 +82,7 @@ RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in, max_rose_anchored_floating_overlap(0), rm(rm_in), ssm(ssm_in), - smwr(smwr_in), + smwr(smwr_in), boundary(boundary_in), next_nfa_report(0) { // add root vertices to graph @@ -154,12 +154,12 @@ bool isInTable(const RoseBuildImpl &tbi, RoseVertex v, // All literals for a given vertex will be in the same table, so we need // only inspect the first one. - const auto lit_table = tbi.literals.at(*lit_ids.begin()).table; + const auto lit_table = tbi.literals.at(*lit_ids.begin()).table; // Verify that all literals for this vertex are in the same table. - assert(all_of_in(lit_ids, [&](u32 lit_id) { - return tbi.literals.at(lit_id).table == lit_table; - })); + assert(all_of_in(lit_ids, [&](u32 lit_id) { + return tbi.literals.at(lit_id).table == lit_table; + })); return lit_table == table; } @@ -186,7 +186,7 @@ bool RoseBuildImpl::hasLiteralInTable(RoseVertex v, bool RoseBuildImpl::hasNoFloatingRoots() const { for (auto v : adjacent_vertices_range(root, g)) { if (isFloating(v)) { - DEBUG_PRINTF("direct floating root %zu\n", g[v].index); + DEBUG_PRINTF("direct floating root %zu\n", g[v].index); return false; } } @@ -194,7 +194,7 @@ bool RoseBuildImpl::hasNoFloatingRoots() const { /* need to check if the anchored_root has any literals which are too deep */ for (auto v : adjacent_vertices_range(anchored_root, g)) { if (isFloating(v)) { - DEBUG_PRINTF("indirect floating root %zu\n", g[v].index); + DEBUG_PRINTF("indirect floating root %zu\n", g[v].index); return false; } } @@ -209,7 +209,7 @@ size_t RoseBuildImpl::maxLiteralLen(RoseVertex v) const { size_t maxlen = 0; for (const auto &lit_id : lit_ids) { - maxlen = max(maxlen, literals.at(lit_id).elength()); + maxlen = max(maxlen, literals.at(lit_id).elength()); } return maxlen; @@ -222,19 +222,19 @@ size_t RoseBuildImpl::minLiteralLen(RoseVertex v) const { size_t minlen = ROSE_BOUND_INF; for (const auto &lit_id : lit_ids) { - minlen = min(minlen, literals.at(lit_id).elength()); + minlen = min(minlen, literals.at(lit_id).elength()); } return minlen; } // RoseBuild factory -unique_ptr<RoseBuild> makeRoseBuilder(ReportManager &rm, - SomSlotManager &ssm, - SmallWriteBuild &smwr, +unique_ptr<RoseBuild> makeRoseBuilder(ReportManager &rm, + SomSlotManager &ssm, + SmallWriteBuild &smwr, const CompileContext &cc, const BoundaryReports &boundary) { - return ue2::make_unique<RoseBuildImpl>(rm, ssm, smwr, cc, boundary); + return ue2::make_unique<RoseBuildImpl>(rm, ssm, smwr, cc, boundary); } bool roseIsPureLiteral(const RoseEngine *t) { @@ -285,30 +285,30 @@ size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b) { static const rose_literal_id &getOverlapLiteral(const RoseBuildImpl &tbi, u32 literal_id) { - auto it = tbi.anchoredLitSuffix.find(literal_id); + auto it = tbi.anchoredLitSuffix.find(literal_id); if (it != tbi.anchoredLitSuffix.end()) { return it->second; } - return tbi.literals.at(literal_id); -} - -ue2_literal findNonOverlappingTail(const set<ue2_literal> &lits, - const ue2_literal &s) { - size_t max_overlap = 0; - - for (const auto &lit : lits) { - size_t overlap = lit != s ? maxStringOverlap(lit, s) - : maxStringSelfOverlap(s); - max_overlap = max(max_overlap, overlap); - } - - /* find the tail that doesn't overlap */ - ue2_literal tail = s.substr(max_overlap); - DEBUG_PRINTF("%zu overlap, tail: '%s'\n", max_overlap, - dumpString(tail).c_str()); - return tail; -} - + return tbi.literals.at(literal_id); +} + +ue2_literal findNonOverlappingTail(const set<ue2_literal> &lits, + const ue2_literal &s) { + size_t max_overlap = 0; + + for (const auto &lit : lits) { + size_t overlap = lit != s ? maxStringOverlap(lit, s) + : maxStringSelfOverlap(s); + max_overlap = max(max_overlap, overlap); + } + + /* find the tail that doesn't overlap */ + ue2_literal tail = s.substr(max_overlap); + DEBUG_PRINTF("%zu overlap, tail: '%s'\n", max_overlap, + dumpString(tail).c_str()); + return tail; +} + size_t RoseBuildImpl::maxLiteralOverlap(RoseVertex u, RoseVertex v) const { size_t overlap = 0; for (auto u_lit_id : g[u].literals) { @@ -324,14 +324,14 @@ size_t RoseBuildImpl::maxLiteralOverlap(RoseVertex u, RoseVertex v) const { void RoseBuildImpl::removeVertices(const vector<RoseVertex> &dead) { for (auto v : dead) { assert(!isAnyStart(v)); - DEBUG_PRINTF("removing vertex %zu\n", g[v].index); + DEBUG_PRINTF("removing vertex %zu\n", g[v].index); for (auto lit_id : g[v].literals) { literal_info[lit_id].vertices.erase(v); } - clear_vertex(v, g); + clear_vertex(v, g); remove_vertex(v, g); } - renumber_vertices(g); + renumber_vertices(g); } // Find the maximum bound on the edges to this vertex's successors ignoring @@ -365,14 +365,14 @@ u32 RoseBuildImpl::calcSuccMaxBound(RoseVertex u) const { u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table) { - DEBUG_PRINTF("getting id for %s in table %d\n", dumpString(s).c_str(), - table); + DEBUG_PRINTF("getting id for %s in table %d\n", dumpString(s).c_str(), + table); assert(table != ROSE_ANCHORED); rose_literal_id key(s, table, delay); - auto m = literals.insert(key); - u32 id = m.first; - bool inserted = m.second; + auto m = literals.insert(key); + u32 id = m.first; + bool inserted = m.second; if (inserted) { literal_info.push_back(rose_literal_info()); @@ -452,17 +452,17 @@ rose_literal_id::rose_literal_id(const ue2_literal &s_in, u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, const vector<u8> &msk, const vector<u8> &cmp, u32 delay, rose_literal_table table) { - DEBUG_PRINTF("getting id for %s in table %d\n", dumpString(s).c_str(), - table); + DEBUG_PRINTF("getting id for %s in table %d\n", dumpString(s).c_str(), + table); assert(table != ROSE_ANCHORED); rose_literal_id key(s, msk, cmp, table, delay); /* ue2_literals are always uppercased if nocase and must have an * alpha char */ - auto m = literals.insert(key); - u32 id = m.first; - bool inserted = m.second; + auto m = literals.insert(key); + u32 id = m.first; + bool inserted = m.second; if (inserted) { literal_info.push_back(rose_literal_info()); @@ -481,12 +481,12 @@ u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, const vector<u8> &msk, u32 RoseBuildImpl::getNewLiteralId() { rose_literal_id key(ue2_literal(), ROSE_ANCHORED, 0); - u32 numLiterals = verify_u32(literals.size()); + u32 numLiterals = verify_u32(literals.size()); key.distinctiveness = numLiterals; - auto m = literals.insert(key); - assert(m.second); - u32 id = m.first; + auto m = literals.insert(key); + assert(m.second); + u32 id = m.first; literal_info.push_back(rose_literal_info()); assert(literal_info.size() == id + 1); @@ -504,15 +504,15 @@ bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b) { } #ifndef NDEBUG -bool roseHasTops(const RoseBuildImpl &build, RoseVertex v) { - const RoseGraph &g = build.g; +bool roseHasTops(const RoseBuildImpl &build, RoseVertex v) { + const RoseGraph &g = build.g; assert(g[v].left); set<u32> graph_tops; - if (!build.isRootSuccessor(v)) { - for (const auto &e : in_edges_range(v, g)) { - graph_tops.insert(g[e].rose_top); - } + if (!build.isRootSuccessor(v)) { + for (const auto &e : in_edges_range(v, g)) { + graph_tops.insert(g[e].rose_top); + } } return is_subset_of(graph_tops, all_tops(g[v].left)); @@ -527,40 +527,40 @@ u32 OutfixInfo::get_queue(QueueIndexFactory &qif) { return queue; } -namespace { -class OutfixAllReports : public boost::static_visitor<set<ReportID>> { -public: - set<ReportID> operator()(const boost::blank &) const { - return set<ReportID>(); +namespace { +class OutfixAllReports : public boost::static_visitor<set<ReportID>> { +public: + set<ReportID> operator()(const boost::blank &) const { + return set<ReportID>(); } - - template<class T> - set<ReportID> operator()(const unique_ptr<T> &x) const { - return all_reports(*x); + + template<class T> + set<ReportID> operator()(const unique_ptr<T> &x) const { + return all_reports(*x); } - set<ReportID> operator()(const MpvProto &mpv) const { - set<ReportID> reports; - for (const auto &puff : mpv.puffettes) { - reports.insert(puff.report); - } - for (const auto &puff : mpv.triggered_puffettes) { - reports.insert(puff.report); - } - return reports; + set<ReportID> operator()(const MpvProto &mpv) const { + set<ReportID> reports; + for (const auto &puff : mpv.puffettes) { + reports.insert(puff.report); + } + for (const auto &puff : mpv.triggered_puffettes) { + reports.insert(puff.report); + } + return reports; } -}; -} +}; +} -set<ReportID> all_reports(const OutfixInfo &outfix) { - auto reports = boost::apply_visitor(OutfixAllReports(), outfix.proto); +set<ReportID> all_reports(const OutfixInfo &outfix) { + auto reports = boost::apply_visitor(OutfixAllReports(), outfix.proto); assert(!reports.empty()); return reports; } bool RoseSuffixInfo::operator==(const RoseSuffixInfo &b) const { return top == b.top && graph == b.graph && castle == b.castle && - rdfa == b.rdfa && haig == b.haig && tamarama == b.tamarama; + rdfa == b.rdfa && haig == b.haig && tamarama == b.tamarama; } bool RoseSuffixInfo::operator<(const RoseSuffixInfo &b) const { @@ -570,15 +570,15 @@ bool RoseSuffixInfo::operator<(const RoseSuffixInfo &b) const { ORDER_CHECK(castle); ORDER_CHECK(haig); ORDER_CHECK(rdfa); - ORDER_CHECK(tamarama); + ORDER_CHECK(tamarama); assert(a.dfa_min_width == b.dfa_min_width); assert(a.dfa_max_width == b.dfa_max_width); return false; } -size_t RoseSuffixInfo::hash() const { - return hash_all(top, graph, castle, rdfa, haig, tamarama); -} +size_t RoseSuffixInfo::hash() const { + return hash_all(top, graph, castle, rdfa, haig, tamarama); +} void RoseSuffixInfo::reset(void) { top = 0; @@ -586,16 +586,16 @@ void RoseSuffixInfo::reset(void) { castle.reset(); rdfa.reset(); haig.reset(); - tamarama.reset(); - dfa_min_width = depth(0); + tamarama.reset(); + dfa_min_width = depth(0); dfa_max_width = depth::infinity(); } std::set<ReportID> all_reports(const suffix_id &s) { assert(s.graph() || s.castle() || s.haig() || s.dfa()); - if (s.tamarama()) { - return all_reports(*s.tamarama()); - } else if (s.graph()) { + if (s.tamarama()) { + return all_reports(*s.tamarama()); + } else if (s.graph()) { return all_reports(*s.graph()); } else if (s.castle()) { return all_reports(*s.castle()); @@ -680,9 +680,9 @@ bool has_non_eod_accepts(const suffix_id &s) { set<u32> all_tops(const suffix_id &s) { assert(s.graph() || s.castle() || s.haig() || s.dfa()); if (s.graph()) { - flat_set<u32> tops = getTops(*s.graph()); - assert(!tops.empty()); - return {tops.begin(), tops.end()}; + flat_set<u32> tops = getTops(*s.graph()); + assert(!tops.empty()); + return {tops.begin(), tops.end()}; } if (s.castle()) { @@ -694,7 +694,7 @@ set<u32> all_tops(const suffix_id &s) { } size_t suffix_id::hash() const { - return hash_all(g, c, d, h, t); + return hash_all(g, c, d, h, t); } bool isAnchored(const left_id &r) { @@ -702,13 +702,13 @@ bool isAnchored(const left_id &r) { if (r.graph()) { return isAnchored(*r.graph()); } - if (r.dfa()) { - return r.dfa()->start_anchored == DEAD_STATE; - } - if (r.haig()) { - return r.haig()->start_anchored == DEAD_STATE; - } - + if (r.dfa()) { + return r.dfa()->start_anchored == DEAD_STATE; + } + if (r.haig()) { + return r.haig()->start_anchored == DEAD_STATE; + } + // All other types are explicitly anchored. return true; } @@ -738,8 +738,8 @@ depth findMaxWidth(const left_id &r) { set<u32> all_tops(const left_id &r) { assert(r.graph() || r.castle() || r.haig() || r.dfa()); if (r.graph()) { - flat_set<u32> tops = getTops(*r.graph()); - return {tops.begin(), tops.end()}; + flat_set<u32> tops = getTops(*r.graph()); + return {tops.begin(), tops.end()}; } if (r.castle()) { @@ -750,25 +750,25 @@ set<u32> all_tops(const left_id &r) { return {0}; } -set<u32> all_reports(const left_id &left) { - assert(left.graph() || left.castle() || left.haig() || left.dfa()); - if (left.graph()) { - return all_reports(*left.graph()); - } else if (left.castle()) { - return all_reports(*left.castle()); - } else if (left.dfa()) { - return all_reports(*left.dfa()); - } else { - return all_reports(*left.haig()); - } -} - +set<u32> all_reports(const left_id &left) { + assert(left.graph() || left.castle() || left.haig() || left.dfa()); + if (left.graph()) { + return all_reports(*left.graph()); + } else if (left.castle()) { + return all_reports(*left.castle()); + } else if (left.dfa()) { + return all_reports(*left.dfa()); + } else { + return all_reports(*left.haig()); + } +} + u32 num_tops(const left_id &r) { return all_tops(r).size(); } size_t left_id::hash() const { - return hash_all(g, c, d, h); + return hash_all(g, c, d, h); } u64a findMaxOffset(const set<ReportID> &reports, const ReportManager &rm) { @@ -785,19 +785,19 @@ u64a findMaxOffset(const set<ReportID> &reports, const ReportManager &rm) { return maxOffset; } -size_t LeftEngInfo::hash() const { - return hash_all(graph, castle, dfa, haig, tamarama, lag, leftfix_report); -} - +size_t LeftEngInfo::hash() const { + return hash_all(graph, castle, dfa, haig, tamarama, lag, leftfix_report); +} + void LeftEngInfo::reset(void) { graph.reset(); castle.reset(); dfa.reset(); haig.reset(); - tamarama.reset(); + tamarama.reset(); lag = 0; leftfix_report = MO_INVALID_IDX; - dfa_min_width = depth(0); + dfa_min_width = depth(0); dfa_max_width = depth::infinity(); } @@ -809,16 +809,16 @@ LeftEngInfo::operator bool() const { return graph || castle || dfa || haig; } -u32 roseQuality(const RoseResources &res, const RoseEngine *t) { +u32 roseQuality(const RoseResources &res, const RoseEngine *t) { /* Rose is low quality if the atable is a Mcclellan 16 or has multiple DFAs */ - if (res.has_anchored) { - if (res.has_anchored_multiple) { + if (res.has_anchored) { + if (res.has_anchored_multiple) { DEBUG_PRINTF("multiple atable engines\n"); return 0; } - if (res.has_anchored_large) { + if (res.has_anchored_large) { DEBUG_PRINTF("m16 atable engine\n"); return 0; } @@ -827,16 +827,16 @@ u32 roseQuality(const RoseResources &res, const RoseEngine *t) { /* if we always run multiple engines then we are slow */ u32 always_run = 0; - if (res.has_anchored) { + if (res.has_anchored) { always_run++; } - if (t->eagerIterOffset) { - /* eager prefixes are always run */ - always_run++; - } - - if (res.has_floating) { + if (t->eagerIterOffset) { + /* eager prefixes are always run */ + always_run++; + } + + if (res.has_floating) { /* TODO: ignore conditional ftables, or ftables beyond smwr region */ always_run++; } @@ -875,59 +875,59 @@ u32 roseQuality(const RoseResources &res, const RoseEngine *t) { return 1; } -u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) { - const auto &lit_vertices = build.literal_info.at(lit_id).vertices; - assert(!lit_vertices.empty()); - - u32 min_offset = UINT32_MAX; - for (const auto &v : lit_vertices) { - min_offset = min(min_offset, build.g[v].min_offset); - } - - return min_offset; -} - -u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { - const auto &lit_vertices = build.literal_info.at(lit_id).vertices; - assert(!lit_vertices.empty()); - - u32 max_offset = 0; - for (const auto &v : lit_vertices) { - max_offset = max(max_offset, build.g[v].max_offset); - } - - return max_offset; -} - -bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) { - const auto &g = build.g; - const auto v = target(e, g); - - if (!build.g[v].eod_accept) { - return false; - } - - // If there's a graph between us and EOD, we shouldn't be eager. - if (build.g[v].left) { - return false; - } - - // Must be exactly at EOD. - if (g[e].minBound != 0 || g[e].maxBound != 0) { - return false; - } - - // In streaming mode, we can only eagerly report EOD for literals in the - // EOD-anchored table, as that's the only time we actually know where EOD - // is. In block mode, we always have this information. - const auto u = source(e, g); - if (build.cc.streaming && !build.isInETable(u)) { - return false; - } - - return true; -} - +u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) { + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + assert(!lit_vertices.empty()); + + u32 min_offset = UINT32_MAX; + for (const auto &v : lit_vertices) { + min_offset = min(min_offset, build.g[v].min_offset); + } + + return min_offset; +} + +u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) { + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + assert(!lit_vertices.empty()); + + u32 max_offset = 0; + for (const auto &v : lit_vertices) { + max_offset = max(max_offset, build.g[v].max_offset); + } + + return max_offset; +} + +bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) { + const auto &g = build.g; + const auto v = target(e, g); + + if (!build.g[v].eod_accept) { + return false; + } + + // If there's a graph between us and EOD, we shouldn't be eager. + if (build.g[v].left) { + return false; + } + + // Must be exactly at EOD. + if (g[e].minBound != 0 || g[e].maxBound != 0) { + return false; + } + + // In streaming mode, we can only eagerly report EOD for literals in the + // EOD-anchored table, as that's the only time we actually know where EOD + // is. In block mode, we always have this information. + const auto u = source(e, g); + if (build.cc.streaming && !build.isInETable(u)) { + return false; + } + + return true; +} + #ifndef NDEBUG /** \brief Returns true if all the graphs (NFA, DFA, Haig, etc) in this Rose * graph are implementable. */ @@ -937,7 +937,7 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { // First, check the Rose leftfixes. for (auto v : vertices_range(g)) { - DEBUG_PRINTF("leftfix: check vertex %zu\n", g[v].index); + DEBUG_PRINTF("leftfix: check vertex %zu\n", g[v].index); if (g[v].left.castle) { DEBUG_PRINTF("castle ok\n"); @@ -953,10 +953,10 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { } if (g[v].left.graph) { assert(g[v].left.graph->kind - == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX)); + == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX)); if (!isImplementableNFA(*g[v].left.graph, nullptr, tbi.cc)) { - DEBUG_PRINTF("nfa prefix %zu failed (%zu vertices)\n", - g[v].index, num_vertices(*g[v].left.graph)); + DEBUG_PRINTF("nfa prefix %zu failed (%zu vertices)\n", + g[v].index, num_vertices(*g[v].left.graph)); return false; } } @@ -965,7 +965,7 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { // Suffix graphs. for (auto v : vertices_range(g)) { - DEBUG_PRINTF("suffix: check vertex %zu\n", g[v].index); + DEBUG_PRINTF("suffix: check vertex %zu\n", g[v].index); const RoseSuffixInfo &suffix = g[v].suffix; if (suffix.castle) { @@ -983,8 +983,8 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { if (suffix.graph) { assert(suffix.graph->kind == NFA_SUFFIX); if (!isImplementableNFA(*suffix.graph, &tbi.rm, tbi.cc)) { - DEBUG_PRINTF("nfa suffix %zu failed (%zu vertices)\n", - g[v].index, num_vertices(*suffix.graph)); + DEBUG_PRINTF("nfa suffix %zu failed (%zu vertices)\n", + g[v].index, num_vertices(*suffix.graph)); return false; } } @@ -992,53 +992,53 @@ bool canImplementGraphs(const RoseBuildImpl &tbi) { return true; } - + /** * \brief True if there is an engine with a top that is not triggered by a * vertex in the Rose graph. This is a consistency check used in assertions. */ -bool hasOrphanedTops(const RoseBuildImpl &build) { - const RoseGraph &g = build.g; - +bool hasOrphanedTops(const RoseBuildImpl &build) { + const RoseGraph &g = build.g; + unordered_map<left_id, set<u32>> leftfixes; - unordered_map<suffix_id, set<u32>> suffixes; - - for (auto v : vertices_range(g)) { - if (g[v].left) { + unordered_map<suffix_id, set<u32>> suffixes; + + for (auto v : vertices_range(g)) { + if (g[v].left) { set<u32> &tops = leftfixes[g[v].left]; - if (!build.isRootSuccessor(v)) { - // Tops for infixes come from the in-edges. - for (const auto &e : in_edges_range(v, g)) { - tops.insert(g[e].rose_top); - } - } - } - if (g[v].suffix) { - suffixes[g[v].suffix].insert(g[v].suffix.top); - } - } - + if (!build.isRootSuccessor(v)) { + // Tops for infixes come from the in-edges. + for (const auto &e : in_edges_range(v, g)) { + tops.insert(g[e].rose_top); + } + } + } + if (g[v].suffix) { + suffixes[g[v].suffix].insert(g[v].suffix.top); + } + } + for (const auto &e : leftfixes) { - if (all_tops(e.first) != e.second) { - DEBUG_PRINTF("rose tops (%s) don't match rose graph (%s)\n", - as_string_list(all_tops(e.first)).c_str(), - as_string_list(e.second).c_str()); - return true; - } - } - - for (const auto &e : suffixes) { - if (all_tops(e.first) != e.second) { - DEBUG_PRINTF("suffix tops (%s) don't match rose graph (%s)\n", - as_string_list(all_tops(e.first)).c_str(), - as_string_list(e.second).c_str()); - return true; - } - } - - return false; -} - + if (all_tops(e.first) != e.second) { + DEBUG_PRINTF("rose tops (%s) don't match rose graph (%s)\n", + as_string_list(all_tops(e.first)).c_str(), + as_string_list(e.second).c_str()); + return true; + } + } + + for (const auto &e : suffixes) { + if (all_tops(e.first) != e.second) { + DEBUG_PRINTF("suffix tops (%s) don't match rose graph (%s)\n", + as_string_list(all_tops(e.first)).c_str(), + as_string_list(e.second).c_str()); + return true; + } + } + + return false; +} + #endif // NDEBUG } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/rose/rose_build_misc.h b/contrib/libs/hyperscan/src/rose/rose_build_misc.h index 203feba871..f34b829200 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_misc.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_misc.h @@ -1,46 +1,46 @@ -/* - * Copyright (c) 2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ROSE_BUILD_MISC_H -#define ROSE_BUILD_MISC_H - -#include "ue2common.h" - -struct RoseEngine; - -namespace ue2 { - -struct RoseResources; - -/* used by heuristics to determine the small write engine. High numbers are - * intended to indicate a lightweight rose. */ -u32 roseQuality(const RoseResources &res, const RoseEngine *rose); - -} - -#endif +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_MISC_H +#define ROSE_BUILD_MISC_H + +#include "ue2common.h" + +struct RoseEngine; + +namespace ue2 { + +struct RoseResources; + +/* used by heuristics to determine the small write engine. High numbers are + * intended to indicate a lightweight rose. */ +u32 roseQuality(const RoseResources &res, const RoseEngine *rose); + +} + +#endif diff --git a/contrib/libs/hyperscan/src/rose/rose_build_program.cpp b/contrib/libs/hyperscan/src/rose/rose_build_program.cpp index 4a6e7506ca..7d1d7ecbb5 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_program.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_program.cpp @@ -1,318 +1,318 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include "rose_build_program.h" - -#include "rose_build_engine_blob.h" -#include "rose_build_instructions.h" -#include "rose_build_lookaround.h" -#include "rose_build_resources.h" -#include "nfa/nfa_api_queue.h" -#include "nfa/nfa_build_util.h" -#include "nfa/tamaramacompile.h" -#include "nfagraph/ng_util.h" -#include "util/charreach_util.h" -#include "util/container.h" -#include "util/compile_context.h" -#include "util/compile_error.h" -#include "util/report_manager.h" -#include "util/unordered.h" -#include "util/verify_types.h" - -#include <boost/range/adaptor/map.hpp> - -#include <algorithm> -#include <cstring> - -using namespace std; -using boost::adaptors::map_values; -using boost::adaptors::map_keys; - -namespace ue2 { - -engine_info::engine_info(const NFA *nfa, bool trans) - : type((NFAEngineType)nfa->type), accepts_eod(nfaAcceptsEod(nfa)), - stream_size(nfa->streamStateSize), - scratch_size(nfa->scratchStateSize), - scratch_align(state_alignment(*nfa)), - transient(trans) { - assert(scratch_align); -} - -left_build_info::left_build_info(u32 q, u32 l, u32 t, rose_group sm, - const std::vector<u8> &stops, u32 max_ql, - u8 cm_count, const CharReach &cm_cr) - : queue(q), lag(l), transient(t), squash_mask(sm), stopAlphabet(stops), - max_queuelen(max_ql), countingMiracleCount(cm_count), - countingMiracleReach(cm_cr) { -} - -left_build_info::left_build_info(const vector<vector<LookEntry>> &looks) - : has_lookaround(true), lookaround(looks) { -} - -using OffsetMap = RoseInstruction::OffsetMap; - -static -OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { - OffsetMap offset_map; - u32 offset = 0; - for (const auto &ri : program) { - offset = ROUNDUP_N(offset, ROSE_INSTR_MIN_ALIGN); - DEBUG_PRINTF("instr %p (opcode %d) -> offset %u\n", ri.get(), - ri->code(), offset); - assert(!contains(offset_map, ri.get())); - offset_map.emplace(ri.get(), offset); - offset += ri->byte_length(); - } - *total_len = offset; - return offset_map; -} - -RoseProgram::RoseProgram() { + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rose_build_program.h" + +#include "rose_build_engine_blob.h" +#include "rose_build_instructions.h" +#include "rose_build_lookaround.h" +#include "rose_build_resources.h" +#include "nfa/nfa_api_queue.h" +#include "nfa/nfa_build_util.h" +#include "nfa/tamaramacompile.h" +#include "nfagraph/ng_util.h" +#include "util/charreach_util.h" +#include "util/container.h" +#include "util/compile_context.h" +#include "util/compile_error.h" +#include "util/report_manager.h" +#include "util/unordered.h" +#include "util/verify_types.h" + +#include <boost/range/adaptor/map.hpp> + +#include <algorithm> +#include <cstring> + +using namespace std; +using boost::adaptors::map_values; +using boost::adaptors::map_keys; + +namespace ue2 { + +engine_info::engine_info(const NFA *nfa, bool trans) + : type((NFAEngineType)nfa->type), accepts_eod(nfaAcceptsEod(nfa)), + stream_size(nfa->streamStateSize), + scratch_size(nfa->scratchStateSize), + scratch_align(state_alignment(*nfa)), + transient(trans) { + assert(scratch_align); +} + +left_build_info::left_build_info(u32 q, u32 l, u32 t, rose_group sm, + const std::vector<u8> &stops, u32 max_ql, + u8 cm_count, const CharReach &cm_cr) + : queue(q), lag(l), transient(t), squash_mask(sm), stopAlphabet(stops), + max_queuelen(max_ql), countingMiracleCount(cm_count), + countingMiracleReach(cm_cr) { +} + +left_build_info::left_build_info(const vector<vector<LookEntry>> &looks) + : has_lookaround(true), lookaround(looks) { +} + +using OffsetMap = RoseInstruction::OffsetMap; + +static +OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) { + OffsetMap offset_map; + u32 offset = 0; + for (const auto &ri : program) { + offset = ROUNDUP_N(offset, ROSE_INSTR_MIN_ALIGN); + DEBUG_PRINTF("instr %p (opcode %d) -> offset %u\n", ri.get(), + ri->code(), offset); + assert(!contains(offset_map, ri.get())); + offset_map.emplace(ri.get(), offset); + offset += ri->byte_length(); + } + *total_len = offset; + return offset_map; +} + +RoseProgram::RoseProgram() { prog.push_back(std::make_unique<RoseInstrEnd>()); -} - -RoseProgram::~RoseProgram() = default; - -RoseProgram::RoseProgram(RoseProgram &&) = default; -RoseProgram &RoseProgram::operator=(RoseProgram &&) = default; - -bool RoseProgram::empty() const { - assert(!prog.empty()); - assert(prog.back()->code() == ROSE_INSTR_END); - // Empty if we only have one element, the END instruction. - return next(prog.begin()) == prog.end(); -} - -const RoseInstruction *RoseProgram::end_instruction() const { - assert(!prog.empty()); - assert(prog.back()->code() == ROSE_INSTR_END); - - return prog.back().get(); -} - -void RoseProgram::update_targets(RoseProgram::iterator it, - RoseProgram::iterator it_end, - const RoseInstruction *old_target, - const RoseInstruction *new_target) { - assert(old_target && new_target && old_target != new_target); - for (; it != it_end; ++it) { - unique_ptr<RoseInstruction> &ri = *it; - assert(ri); - ri->update_target(old_target, new_target); - } -} - -RoseProgram::iterator RoseProgram::insert(RoseProgram::iterator it, - unique_ptr<RoseInstruction> ri) { - assert(!prog.empty()); - assert(it != end()); - assert(prog.back()->code() == ROSE_INSTR_END); - - return prog.insert(it, move(ri)); -} - -RoseProgram::iterator RoseProgram::insert(RoseProgram::iterator it, - RoseProgram &&block) { - assert(!prog.empty()); - assert(it != end()); - assert(prog.back()->code() == ROSE_INSTR_END); - - if (block.empty()) { - return it; - } - - const RoseInstruction *end_ptr = block.end_instruction(); - assert(end_ptr->code() == ROSE_INSTR_END); - block.prog.pop_back(); - - const RoseInstruction *new_target = it->get(); - update_targets(block.prog.begin(), block.prog.end(), end_ptr, new_target); - - // Workaround: container insert() for ranges doesn't return an iterator - // in the version of the STL distributed with gcc 4.8. - auto dist = distance(prog.begin(), it); - prog.insert(it, make_move_iterator(block.prog.begin()), - make_move_iterator(block.prog.end())); - it = prog.begin(); - advance(it, dist); - return it; -} - -RoseProgram::iterator RoseProgram::erase(RoseProgram::iterator first, - RoseProgram::iterator last) { - return prog.erase(first, last); -} - -void RoseProgram::add_before_end(std::unique_ptr<RoseInstruction> ri) { - assert(!prog.empty()); - insert(std::prev(prog.end()), std::move(ri)); -} - -void RoseProgram::add_before_end(RoseProgram &&block) { - assert(!prog.empty()); - assert(prog.back()->code() == ROSE_INSTR_END); - - if (block.empty()) { - return; - } - - insert(prev(prog.end()), move(block)); -} - -void RoseProgram::add_block(RoseProgram &&block) { - assert(!prog.empty()); - assert(prog.back()->code() == ROSE_INSTR_END); - - if (block.empty()) { - return; - } - - // Replace pointers to the current END with pointers to the first - // instruction in the new sequence. - const RoseInstruction *end_ptr = end_instruction(); - prog.pop_back(); - update_targets(prog.begin(), prog.end(), end_ptr, - block.prog.front().get()); - prog.insert(prog.end(), make_move_iterator(block.prog.begin()), - make_move_iterator(block.prog.end())); -} - -bytecode_ptr<char> writeProgram(RoseEngineBlob &blob, - const RoseProgram &program) { - u32 total_len = 0; - const auto offset_map = makeOffsetMap(program, &total_len); - DEBUG_PRINTF("%zu instructions, len %u\n", program.size(), total_len); - - auto bytecode = make_zeroed_bytecode_ptr<char>(total_len, - ROSE_INSTR_MIN_ALIGN); - char *ptr = bytecode.get(); - - for (const auto &ri : program) { - assert(contains(offset_map, ri.get())); - const u32 offset = offset_map.at(ri.get()); - ri->write(ptr + offset, blob, offset_map); - } - - return bytecode; -} - -size_t RoseProgramHash::operator()(const RoseProgram &program) const { - size_t v = 0; - for (const auto &ri : program) { - assert(ri); - hash_combine(v, ri->hash()); - } - return v; -} - -bool RoseProgramEquivalence::operator()(const RoseProgram &prog1, - const RoseProgram &prog2) const { - if (prog1.size() != prog2.size()) { - return false; - } - - u32 len_1 = 0, len_2 = 0; - const auto offset_map_1 = makeOffsetMap(prog1, &len_1); - const auto offset_map_2 = makeOffsetMap(prog2, &len_2); - - if (len_1 != len_2) { - return false; - } - - auto is_equiv = [&](const unique_ptr<RoseInstruction> &a, - const unique_ptr<RoseInstruction> &b) { - assert(a && b); - return a->equiv(*b, offset_map_1, offset_map_2); - }; - - return std::equal(prog1.begin(), prog1.end(), prog2.begin(), is_equiv); -} - -/* Removes any CHECK_HANDLED instructions from the given program */ -static -void stripCheckHandledInstruction(RoseProgram &prog) { - for (auto it = prog.begin(); it != prog.end();) { - auto ins = dynamic_cast<const RoseInstrCheckNotHandled *>(it->get()); - if (!ins) { - ++it; - continue; - } - - auto next_it = next(it); - assert(next_it != prog.end()); /* there should always be an end ins */ - auto next_ins = next_it->get(); - - /* update all earlier instructions which point to ins to instead point - * to the next instruction. Only need to look at earlier as we only ever - * jump forward. */ - RoseProgram::update_targets(prog.begin(), it, ins, next_ins); - - /* remove check handled instruction */ - it = prog.erase(it, next_it); - } -} - - +} + +RoseProgram::~RoseProgram() = default; + +RoseProgram::RoseProgram(RoseProgram &&) = default; +RoseProgram &RoseProgram::operator=(RoseProgram &&) = default; + +bool RoseProgram::empty() const { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + // Empty if we only have one element, the END instruction. + return next(prog.begin()) == prog.end(); +} + +const RoseInstruction *RoseProgram::end_instruction() const { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + return prog.back().get(); +} + +void RoseProgram::update_targets(RoseProgram::iterator it, + RoseProgram::iterator it_end, + const RoseInstruction *old_target, + const RoseInstruction *new_target) { + assert(old_target && new_target && old_target != new_target); + for (; it != it_end; ++it) { + unique_ptr<RoseInstruction> &ri = *it; + assert(ri); + ri->update_target(old_target, new_target); + } +} + +RoseProgram::iterator RoseProgram::insert(RoseProgram::iterator it, + unique_ptr<RoseInstruction> ri) { + assert(!prog.empty()); + assert(it != end()); + assert(prog.back()->code() == ROSE_INSTR_END); + + return prog.insert(it, move(ri)); +} + +RoseProgram::iterator RoseProgram::insert(RoseProgram::iterator it, + RoseProgram &&block) { + assert(!prog.empty()); + assert(it != end()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return it; + } + + const RoseInstruction *end_ptr = block.end_instruction(); + assert(end_ptr->code() == ROSE_INSTR_END); + block.prog.pop_back(); + + const RoseInstruction *new_target = it->get(); + update_targets(block.prog.begin(), block.prog.end(), end_ptr, new_target); + + // Workaround: container insert() for ranges doesn't return an iterator + // in the version of the STL distributed with gcc 4.8. + auto dist = distance(prog.begin(), it); + prog.insert(it, make_move_iterator(block.prog.begin()), + make_move_iterator(block.prog.end())); + it = prog.begin(); + advance(it, dist); + return it; +} + +RoseProgram::iterator RoseProgram::erase(RoseProgram::iterator first, + RoseProgram::iterator last) { + return prog.erase(first, last); +} + +void RoseProgram::add_before_end(std::unique_ptr<RoseInstruction> ri) { + assert(!prog.empty()); + insert(std::prev(prog.end()), std::move(ri)); +} + +void RoseProgram::add_before_end(RoseProgram &&block) { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return; + } + + insert(prev(prog.end()), move(block)); +} + +void RoseProgram::add_block(RoseProgram &&block) { + assert(!prog.empty()); + assert(prog.back()->code() == ROSE_INSTR_END); + + if (block.empty()) { + return; + } + + // Replace pointers to the current END with pointers to the first + // instruction in the new sequence. + const RoseInstruction *end_ptr = end_instruction(); + prog.pop_back(); + update_targets(prog.begin(), prog.end(), end_ptr, + block.prog.front().get()); + prog.insert(prog.end(), make_move_iterator(block.prog.begin()), + make_move_iterator(block.prog.end())); +} + +bytecode_ptr<char> writeProgram(RoseEngineBlob &blob, + const RoseProgram &program) { + u32 total_len = 0; + const auto offset_map = makeOffsetMap(program, &total_len); + DEBUG_PRINTF("%zu instructions, len %u\n", program.size(), total_len); + + auto bytecode = make_zeroed_bytecode_ptr<char>(total_len, + ROSE_INSTR_MIN_ALIGN); + char *ptr = bytecode.get(); + + for (const auto &ri : program) { + assert(contains(offset_map, ri.get())); + const u32 offset = offset_map.at(ri.get()); + ri->write(ptr + offset, blob, offset_map); + } + + return bytecode; +} + +size_t RoseProgramHash::operator()(const RoseProgram &program) const { + size_t v = 0; + for (const auto &ri : program) { + assert(ri); + hash_combine(v, ri->hash()); + } + return v; +} + +bool RoseProgramEquivalence::operator()(const RoseProgram &prog1, + const RoseProgram &prog2) const { + if (prog1.size() != prog2.size()) { + return false; + } + + u32 len_1 = 0, len_2 = 0; + const auto offset_map_1 = makeOffsetMap(prog1, &len_1); + const auto offset_map_2 = makeOffsetMap(prog2, &len_2); + + if (len_1 != len_2) { + return false; + } + + auto is_equiv = [&](const unique_ptr<RoseInstruction> &a, + const unique_ptr<RoseInstruction> &b) { + assert(a && b); + return a->equiv(*b, offset_map_1, offset_map_2); + }; + + return std::equal(prog1.begin(), prog1.end(), prog2.begin(), is_equiv); +} + +/* Removes any CHECK_HANDLED instructions from the given program */ +static +void stripCheckHandledInstruction(RoseProgram &prog) { + for (auto it = prog.begin(); it != prog.end();) { + auto ins = dynamic_cast<const RoseInstrCheckNotHandled *>(it->get()); + if (!ins) { + ++it; + continue; + } + + auto next_it = next(it); + assert(next_it != prog.end()); /* there should always be an end ins */ + auto next_ins = next_it->get(); + + /* update all earlier instructions which point to ins to instead point + * to the next instruction. Only need to look at earlier as we only ever + * jump forward. */ + RoseProgram::update_targets(prog.begin(), it, ins, next_ins); + + /* remove check handled instruction */ + it = prog.erase(it, next_it); + } +} + + /** Returns true if the program may read the interpreter's work_done flag */ -static -bool reads_work_done_flag(const RoseProgram &prog) { - for (const auto &ri : prog) { - if (dynamic_cast<const RoseInstrSquashGroups *>(ri.get())) { - return true; - } - } - return false; -} - -void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) { - if (!eodNfaIterOffset) { - return; - } - - RoseProgram block; +static +bool reads_work_done_flag(const RoseProgram &prog) { + for (const auto &ri : prog) { + if (dynamic_cast<const RoseInstrSquashGroups *>(ri.get())) { + return true; + } + } + return false; +} + +void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program) { + if (!eodNfaIterOffset) { + return; + } + + RoseProgram block; block.add_before_end(std::make_unique<RoseInstrEnginesEod>(eodNfaIterOffset)); - program.add_block(move(block)); -} - -void addSuffixesEodProgram(RoseProgram &program) { - RoseProgram block; + program.add_block(move(block)); +} + +void addSuffixesEodProgram(RoseProgram &program) { + RoseProgram block; block.add_before_end(std::make_unique<RoseInstrSuffixesEod>()); - program.add_block(move(block)); -} - -void addMatcherEodProgram(RoseProgram &program) { - RoseProgram block; + program.add_block(move(block)); +} + +void addMatcherEodProgram(RoseProgram &program) { + RoseProgram block; block.add_before_end(std::make_unique<RoseInstrMatcherEod>()); - program.add_block(move(block)); -} - + program.add_block(move(block)); +} + void addFlushCombinationProgram(RoseProgram &program) { program.add_before_end(std::make_unique<RoseInstrFlushCombination>()); } @@ -321,190 +321,190 @@ void addLastFlushCombinationProgram(RoseProgram &program) { program.add_before_end(std::make_unique<RoseInstrLastFlushCombination>()); } -static -void makeRoleCheckLeftfix(const RoseBuildImpl &build, - const map<RoseVertex, left_build_info> &leftfix_info, - RoseVertex v, RoseProgram &program) { - auto it = leftfix_info.find(v); - if (it == end(leftfix_info)) { - return; - } - const left_build_info &lni = it->second; - if (lni.has_lookaround) { - return; // Leftfix completely implemented by lookaround. - } - - assert(!build.cc.streaming || - build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); - - bool is_prefix = build.isRootSuccessor(v); - const auto *end_inst = program.end_instruction(); - - unique_ptr<RoseInstruction> ri; - if (is_prefix) { - ri = std::make_unique<RoseInstrCheckPrefix>(lni.queue, build.g[v].left.lag, - build.g[v].left.leftfix_report, - end_inst); - } else { - ri = std::make_unique<RoseInstrCheckInfix>(lni.queue, build.g[v].left.lag, - build.g[v].left.leftfix_report, - end_inst); - } - program.add_before_end(move(ri)); -} - -static -void makeAnchoredLiteralDelay(const RoseBuildImpl &build, - const ProgramBuild &prog_build, u32 lit_id, - RoseProgram &program) { - // Only relevant for literals in the anchored table. - const rose_literal_id &lit = build.literals.at(lit_id); - if (lit.table != ROSE_ANCHORED) { - return; - } - - // If this literal match cannot occur after floatingMinLiteralMatchOffset, - // we do not need this check. - bool all_too_early = true; - rose_group groups = 0; - - const auto &lit_vertices = build.literal_info.at(lit_id).vertices; - for (RoseVertex v : lit_vertices) { - if (build.g[v].max_offset > prog_build.floatingMinLiteralMatchOffset) { - all_too_early = false; - } - groups |= build.g[v].groups; - } - - if (all_too_early) { - return; - } - - assert(contains(prog_build.anchored_programs, lit_id)); - u32 anch_id = prog_build.anchored_programs.at(lit_id); - - const auto *end_inst = program.end_instruction(); - auto ri = std::make_unique<RoseInstrAnchoredDelay>(groups, anch_id, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeDedupe(const ReportManager &rm, const Report &report, - RoseProgram &program) { - const auto *end_inst = program.end_instruction(); - auto ri = - std::make_unique<RoseInstrDedupe>(report.quashSom, rm.getDkey(report), - report.offsetAdjust, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeDedupeSom(const ReportManager &rm, const Report &report, - RoseProgram &program) { - const auto *end_inst = program.end_instruction(); - auto ri = std::make_unique<RoseInstrDedupeSom>(report.quashSom, - rm.getDkey(report), - report.offsetAdjust, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeCatchup(const ReportManager &rm, bool needs_catchup, - const flat_set<ReportID> &reports, RoseProgram &program) { - if (!needs_catchup) { - return; - } - - // Everything except the INTERNAL_ROSE_CHAIN report needs catchup to run - // before reports are triggered. - - auto report_needs_catchup = [&](const ReportID &id) { - const Report &report = rm.getReport(id); - return report.type != INTERNAL_ROSE_CHAIN; - }; - - if (!any_of(begin(reports), end(reports), report_needs_catchup)) { - DEBUG_PRINTF("none of the given reports needs catchup\n"); - return; - } - +static +void makeRoleCheckLeftfix(const RoseBuildImpl &build, + const map<RoseVertex, left_build_info> &leftfix_info, + RoseVertex v, RoseProgram &program) { + auto it = leftfix_info.find(v); + if (it == end(leftfix_info)) { + return; + } + const left_build_info &lni = it->second; + if (lni.has_lookaround) { + return; // Leftfix completely implemented by lookaround. + } + + assert(!build.cc.streaming || + build.g[v].left.lag <= MAX_STORED_LEFTFIX_LAG); + + bool is_prefix = build.isRootSuccessor(v); + const auto *end_inst = program.end_instruction(); + + unique_ptr<RoseInstruction> ri; + if (is_prefix) { + ri = std::make_unique<RoseInstrCheckPrefix>(lni.queue, build.g[v].left.lag, + build.g[v].left.leftfix_report, + end_inst); + } else { + ri = std::make_unique<RoseInstrCheckInfix>(lni.queue, build.g[v].left.lag, + build.g[v].left.leftfix_report, + end_inst); + } + program.add_before_end(move(ri)); +} + +static +void makeAnchoredLiteralDelay(const RoseBuildImpl &build, + const ProgramBuild &prog_build, u32 lit_id, + RoseProgram &program) { + // Only relevant for literals in the anchored table. + const rose_literal_id &lit = build.literals.at(lit_id); + if (lit.table != ROSE_ANCHORED) { + return; + } + + // If this literal match cannot occur after floatingMinLiteralMatchOffset, + // we do not need this check. + bool all_too_early = true; + rose_group groups = 0; + + const auto &lit_vertices = build.literal_info.at(lit_id).vertices; + for (RoseVertex v : lit_vertices) { + if (build.g[v].max_offset > prog_build.floatingMinLiteralMatchOffset) { + all_too_early = false; + } + groups |= build.g[v].groups; + } + + if (all_too_early) { + return; + } + + assert(contains(prog_build.anchored_programs, lit_id)); + u32 anch_id = prog_build.anchored_programs.at(lit_id); + + const auto *end_inst = program.end_instruction(); + auto ri = std::make_unique<RoseInstrAnchoredDelay>(groups, anch_id, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeDedupe(const ReportManager &rm, const Report &report, + RoseProgram &program) { + const auto *end_inst = program.end_instruction(); + auto ri = + std::make_unique<RoseInstrDedupe>(report.quashSom, rm.getDkey(report), + report.offsetAdjust, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeDedupeSom(const ReportManager &rm, const Report &report, + RoseProgram &program) { + const auto *end_inst = program.end_instruction(); + auto ri = std::make_unique<RoseInstrDedupeSom>(report.quashSom, + rm.getDkey(report), + report.offsetAdjust, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeCatchup(const ReportManager &rm, bool needs_catchup, + const flat_set<ReportID> &reports, RoseProgram &program) { + if (!needs_catchup) { + return; + } + + // Everything except the INTERNAL_ROSE_CHAIN report needs catchup to run + // before reports are triggered. + + auto report_needs_catchup = [&](const ReportID &id) { + const Report &report = rm.getReport(id); + return report.type != INTERNAL_ROSE_CHAIN; + }; + + if (!any_of(begin(reports), end(reports), report_needs_catchup)) { + DEBUG_PRINTF("none of the given reports needs catchup\n"); + return; + } + program.add_before_end(std::make_unique<RoseInstrCatchUp>()); -} - -static -void writeSomOperation(const Report &report, som_operation *op) { - assert(op); - - memset(op, 0, sizeof(*op)); - - switch (report.type) { - case EXTERNAL_CALLBACK_SOM_REL: - op->type = SOM_EXTERNAL_CALLBACK_REL; - break; - case INTERNAL_SOM_LOC_SET: - op->type = SOM_INTERNAL_LOC_SET; - break; - case INTERNAL_SOM_LOC_SET_IF_UNSET: - op->type = SOM_INTERNAL_LOC_SET_IF_UNSET; - break; - case INTERNAL_SOM_LOC_SET_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_SET_IF_WRITABLE; - break; - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - op->type = SOM_INTERNAL_LOC_SET_REV_NFA; - break; - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET; - break; - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE; - break; - case INTERNAL_SOM_LOC_COPY: - op->type = SOM_INTERNAL_LOC_COPY; - break; - case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_COPY_IF_WRITABLE; - break; - case INTERNAL_SOM_LOC_MAKE_WRITABLE: - op->type = SOM_INTERNAL_LOC_MAKE_WRITABLE; - break; - case EXTERNAL_CALLBACK_SOM_STORED: - op->type = SOM_EXTERNAL_CALLBACK_STORED; - break; - case EXTERNAL_CALLBACK_SOM_ABS: - op->type = SOM_EXTERNAL_CALLBACK_ABS; - break; - case EXTERNAL_CALLBACK_SOM_REV_NFA: - op->type = SOM_EXTERNAL_CALLBACK_REV_NFA; - break; - case INTERNAL_SOM_LOC_SET_FROM: - op->type = SOM_INTERNAL_LOC_SET_FROM; - break; - case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: - op->type = SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE; - break; - default: - // This report doesn't correspond to a SOM operation. - assert(0); - throw CompileError("Unable to generate bytecode."); - } - - op->onmatch = report.onmatch; - - switch (report.type) { - case EXTERNAL_CALLBACK_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - op->aux.revNfaIndex = report.revNfaIndex; - break; - default: - op->aux.somDistance = report.somDistance; - break; - } -} - -static +} + +static +void writeSomOperation(const Report &report, som_operation *op) { + assert(op); + + memset(op, 0, sizeof(*op)); + + switch (report.type) { + case EXTERNAL_CALLBACK_SOM_REL: + op->type = SOM_EXTERNAL_CALLBACK_REL; + break; + case INTERNAL_SOM_LOC_SET: + op->type = SOM_INTERNAL_LOC_SET; + break; + case INTERNAL_SOM_LOC_SET_IF_UNSET: + op->type = SOM_INTERNAL_LOC_SET_IF_UNSET; + break; + case INTERNAL_SOM_LOC_SET_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_SET_IF_WRITABLE; + break; + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + op->type = SOM_INTERNAL_LOC_SET_REV_NFA; + break; + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_UNSET; + break; + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_SET_REV_NFA_IF_WRITABLE; + break; + case INTERNAL_SOM_LOC_COPY: + op->type = SOM_INTERNAL_LOC_COPY; + break; + case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_COPY_IF_WRITABLE; + break; + case INTERNAL_SOM_LOC_MAKE_WRITABLE: + op->type = SOM_INTERNAL_LOC_MAKE_WRITABLE; + break; + case EXTERNAL_CALLBACK_SOM_STORED: + op->type = SOM_EXTERNAL_CALLBACK_STORED; + break; + case EXTERNAL_CALLBACK_SOM_ABS: + op->type = SOM_EXTERNAL_CALLBACK_ABS; + break; + case EXTERNAL_CALLBACK_SOM_REV_NFA: + op->type = SOM_EXTERNAL_CALLBACK_REV_NFA; + break; + case INTERNAL_SOM_LOC_SET_FROM: + op->type = SOM_INTERNAL_LOC_SET_FROM; + break; + case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: + op->type = SOM_INTERNAL_LOC_SET_FROM_IF_WRITABLE; + break; + default: + // This report doesn't correspond to a SOM operation. + assert(0); + throw CompileError("Unable to generate bytecode."); + } + + op->onmatch = report.onmatch; + + switch (report.type) { + case EXTERNAL_CALLBACK_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + op->aux.revNfaIndex = report.revNfaIndex; + break; + default: + op->aux.somDistance = report.somDistance; + break; + } +} + +static void addLogicalSetRequired(const Report &report, ReportManager &rm, RoseProgram &program) { if (report.lkey == INVALID_LKEY) { @@ -522,60 +522,60 @@ void addLogicalSetRequired(const Report &report, ReportManager &rm, } static -void makeReport(const RoseBuildImpl &build, const ReportID id, - const bool has_som, RoseProgram &program) { - assert(id < build.rm.numReports()); - const Report &report = build.rm.getReport(id); - - RoseProgram report_block; - const RoseInstruction *end_inst = report_block.end_instruction(); - - // Handle min/max offset checks. - if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { - auto ri = std::make_unique<RoseInstrCheckBounds>(report.minOffset, - report.maxOffset, end_inst); - report_block.add_before_end(move(ri)); - } - - // If this report has an exhaustion key, we can check it in the program - // rather than waiting until we're in the callback adaptor. - if (report.ekey != INVALID_EKEY) { - auto ri = std::make_unique<RoseInstrCheckExhausted>(report.ekey, end_inst); - report_block.add_before_end(move(ri)); - } - - // External SOM reports that aren't passthrough need their SOM value - // calculated. - if (isExternalSomReport(report) && - report.type != EXTERNAL_CALLBACK_SOM_PASS) { - auto ri = std::make_unique<RoseInstrSomFromReport>(); - writeSomOperation(report, &ri->som); - report_block.add_before_end(move(ri)); - } - - // Min length constraint. - if (report.minLength > 0) { - assert(build.hasSom); - auto ri = std::make_unique<RoseInstrCheckMinLength>( - report.offsetAdjust, report.minLength, end_inst); - report_block.add_before_end(move(ri)); - } - - if (report.quashSom) { +void makeReport(const RoseBuildImpl &build, const ReportID id, + const bool has_som, RoseProgram &program) { + assert(id < build.rm.numReports()); + const Report &report = build.rm.getReport(id); + + RoseProgram report_block; + const RoseInstruction *end_inst = report_block.end_instruction(); + + // Handle min/max offset checks. + if (report.minOffset > 0 || report.maxOffset < MAX_OFFSET) { + auto ri = std::make_unique<RoseInstrCheckBounds>(report.minOffset, + report.maxOffset, end_inst); + report_block.add_before_end(move(ri)); + } + + // If this report has an exhaustion key, we can check it in the program + // rather than waiting until we're in the callback adaptor. + if (report.ekey != INVALID_EKEY) { + auto ri = std::make_unique<RoseInstrCheckExhausted>(report.ekey, end_inst); + report_block.add_before_end(move(ri)); + } + + // External SOM reports that aren't passthrough need their SOM value + // calculated. + if (isExternalSomReport(report) && + report.type != EXTERNAL_CALLBACK_SOM_PASS) { + auto ri = std::make_unique<RoseInstrSomFromReport>(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); + } + + // Min length constraint. + if (report.minLength > 0) { + assert(build.hasSom); + auto ri = std::make_unique<RoseInstrCheckMinLength>( + report.offsetAdjust, report.minLength, end_inst); + report_block.add_before_end(move(ri)); + } + + if (report.quashSom) { report_block.add_before_end(std::make_unique<RoseInstrSomZero>()); - } - - switch (report.type) { - case EXTERNAL_CALLBACK: + } + + switch (report.type) { + case EXTERNAL_CALLBACK: if (build.rm.numCkeys()) { addFlushCombinationProgram(report_block); } - if (!has_som) { - // Dedupe is only necessary if this report has a dkey, or if there - // are SOM reports to catch up. - bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; - if (report.ekey == INVALID_EKEY) { - if (needs_dedupe) { + if (!has_som) { + // Dedupe is only necessary if this report has a dkey, or if there + // are SOM reports to catch up. + bool needs_dedupe = build.rm.getDkey(report) != ~0U || build.hasSom; + if (report.ekey == INVALID_EKEY) { + if (needs_dedupe) { if (!report.quiet) { report_block.add_before_end( std::make_unique<RoseInstrDedupeAndReport>( @@ -584,17 +584,17 @@ void makeReport(const RoseBuildImpl &build, const ReportID id, } else { makeDedupe(build.rm, report, report_block); } - } else { + } else { if (!report.quiet) { report_block.add_before_end( std::make_unique<RoseInstrReport>( report.onmatch, report.offsetAdjust)); } - } - } else { - if (needs_dedupe) { - makeDedupe(build.rm, report, report_block); - } + } + } else { + if (needs_dedupe) { + makeDedupe(build.rm, report, report_block); + } if (!report.quiet) { report_block.add_before_end( std::make_unique<RoseInstrReportExhaust>( @@ -603,15 +603,15 @@ void makeReport(const RoseBuildImpl &build, const ReportID id, report_block.add_before_end( std::make_unique<RoseInstrSetExhaust>(report.ekey)); } - } - } else { // has_som - makeDedupeSom(build.rm, report, report_block); - if (report.ekey == INVALID_EKEY) { + } + } else { // has_som + makeDedupeSom(build.rm, report, report_block); + if (report.ekey == INVALID_EKEY) { if (!report.quiet) { report_block.add_before_end(std::make_unique<RoseInstrReportSom>( report.onmatch, report.offsetAdjust)); } - } else { + } else { if (!report.quiet) { report_block.add_before_end( std::make_unique<RoseInstrReportSomExhaust>( @@ -620,53 +620,53 @@ void makeReport(const RoseBuildImpl &build, const ReportID id, report_block.add_before_end( std::make_unique<RoseInstrSetExhaust>(report.ekey)); } - } - } + } + } addLogicalSetRequired(report, build.rm, report_block); - break; - case INTERNAL_SOM_LOC_SET: - case INTERNAL_SOM_LOC_SET_IF_UNSET: - case INTERNAL_SOM_LOC_SET_IF_WRITABLE: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: - case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: - case INTERNAL_SOM_LOC_COPY: - case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: - case INTERNAL_SOM_LOC_MAKE_WRITABLE: - case INTERNAL_SOM_LOC_SET_FROM: - case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: + break; + case INTERNAL_SOM_LOC_SET: + case INTERNAL_SOM_LOC_SET_IF_UNSET: + case INTERNAL_SOM_LOC_SET_IF_WRITABLE: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_UNSET: + case INTERNAL_SOM_LOC_SET_SOM_REV_NFA_IF_WRITABLE: + case INTERNAL_SOM_LOC_COPY: + case INTERNAL_SOM_LOC_COPY_IF_WRITABLE: + case INTERNAL_SOM_LOC_MAKE_WRITABLE: + case INTERNAL_SOM_LOC_SET_FROM: + case INTERNAL_SOM_LOC_SET_FROM_IF_WRITABLE: if (build.rm.numCkeys()) { addFlushCombinationProgram(report_block); } - if (has_som) { - auto ri = std::make_unique<RoseInstrReportSomAware>(); - writeSomOperation(report, &ri->som); - report_block.add_before_end(move(ri)); - } else { - auto ri = std::make_unique<RoseInstrReportSomInt>(); - writeSomOperation(report, &ri->som); - report_block.add_before_end(move(ri)); - } - break; - case INTERNAL_ROSE_CHAIN: { + if (has_som) { + auto ri = std::make_unique<RoseInstrReportSomAware>(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); + } else { + auto ri = std::make_unique<RoseInstrReportSomInt>(); + writeSomOperation(report, &ri->som); + report_block.add_before_end(move(ri)); + } + break; + case INTERNAL_ROSE_CHAIN: { report_block.add_before_end(std::make_unique<RoseInstrReportChain>( - report.onmatch, report.topSquashDistance)); - break; - } - case EXTERNAL_CALLBACK_SOM_REL: - case EXTERNAL_CALLBACK_SOM_STORED: - case EXTERNAL_CALLBACK_SOM_ABS: - case EXTERNAL_CALLBACK_SOM_REV_NFA: + report.onmatch, report.topSquashDistance)); + break; + } + case EXTERNAL_CALLBACK_SOM_REL: + case EXTERNAL_CALLBACK_SOM_STORED: + case EXTERNAL_CALLBACK_SOM_ABS: + case EXTERNAL_CALLBACK_SOM_REV_NFA: if (build.rm.numCkeys()) { addFlushCombinationProgram(report_block); } - makeDedupeSom(build.rm, report, report_block); - if (report.ekey == INVALID_EKEY) { + makeDedupeSom(build.rm, report, report_block); + if (report.ekey == INVALID_EKEY) { if (!report.quiet) { report_block.add_before_end(std::make_unique<RoseInstrReportSom>( report.onmatch, report.offsetAdjust)); } - } else { + } else { if (!report.quiet) { report_block.add_before_end( std::make_unique<RoseInstrReportSomExhaust>( @@ -675,20 +675,20 @@ void makeReport(const RoseBuildImpl &build, const ReportID id, report_block.add_before_end( std::make_unique<RoseInstrSetExhaust>(report.ekey)); } - } + } addLogicalSetRequired(report, build.rm, report_block); - break; - case EXTERNAL_CALLBACK_SOM_PASS: + break; + case EXTERNAL_CALLBACK_SOM_PASS: if (build.rm.numCkeys()) { addFlushCombinationProgram(report_block); } - makeDedupeSom(build.rm, report, report_block); - if (report.ekey == INVALID_EKEY) { + makeDedupeSom(build.rm, report, report_block); + if (report.ekey == INVALID_EKEY) { if (!report.quiet) { report_block.add_before_end(std::make_unique<RoseInstrReportSom>( report.onmatch, report.offsetAdjust)); } - } else { + } else { if (!report.quiet) { report_block.add_before_end( std::make_unique<RoseInstrReportSomExhaust>( @@ -697,370 +697,370 @@ void makeReport(const RoseBuildImpl &build, const ReportID id, report_block.add_before_end( std::make_unique<RoseInstrSetExhaust>(report.ekey)); } - } + } addLogicalSetRequired(report, build.rm, report_block); - break; - - default: - assert(0); - throw CompileError("Unable to generate bytecode."); - } - - program.add_block(move(report_block)); -} - -static -void makeRoleReports(const RoseBuildImpl &build, - const std::map<RoseVertex, left_build_info> &leftfix_info, - bool needs_catchup, RoseVertex v, RoseProgram &program) { - const auto &g = build.g; - - bool report_som = false; - if (g[v].left.tracksSom()) { - /* we are a suffaig - need to update role to provide som to the - * suffix. */ - assert(contains(leftfix_info, v)); - const left_build_info &lni = leftfix_info.at(v); - program.add_before_end( - std::make_unique<RoseInstrSomLeftfix>(lni.queue, g[v].left.lag)); - report_som = true; - } else if (g[v].som_adjust) { - program.add_before_end( - std::make_unique<RoseInstrSomAdjust>(g[v].som_adjust)); - report_som = true; - } - - makeCatchup(build.rm, needs_catchup, g[v].reports, program); - - RoseProgram report_block; - for (ReportID id : g[v].reports) { - makeReport(build, id, report_som, report_block); - } - program.add_before_end(move(report_block)); -} - -static -void makeRoleSetState(const unordered_map<RoseVertex, u32> &roleStateIndices, - RoseVertex v, RoseProgram &program) { - // We only need this instruction if a state index has been assigned to this - // vertex. - auto it = roleStateIndices.find(v); - if (it == end(roleStateIndices)) { - return; - } + break; + + default: + assert(0); + throw CompileError("Unable to generate bytecode."); + } + + program.add_block(move(report_block)); +} + +static +void makeRoleReports(const RoseBuildImpl &build, + const std::map<RoseVertex, left_build_info> &leftfix_info, + bool needs_catchup, RoseVertex v, RoseProgram &program) { + const auto &g = build.g; + + bool report_som = false; + if (g[v].left.tracksSom()) { + /* we are a suffaig - need to update role to provide som to the + * suffix. */ + assert(contains(leftfix_info, v)); + const left_build_info &lni = leftfix_info.at(v); + program.add_before_end( + std::make_unique<RoseInstrSomLeftfix>(lni.queue, g[v].left.lag)); + report_som = true; + } else if (g[v].som_adjust) { + program.add_before_end( + std::make_unique<RoseInstrSomAdjust>(g[v].som_adjust)); + report_som = true; + } + + makeCatchup(build.rm, needs_catchup, g[v].reports, program); + + RoseProgram report_block; + for (ReportID id : g[v].reports) { + makeReport(build, id, report_som, report_block); + } + program.add_before_end(move(report_block)); +} + +static +void makeRoleSetState(const unordered_map<RoseVertex, u32> &roleStateIndices, + RoseVertex v, RoseProgram &program) { + // We only need this instruction if a state index has been assigned to this + // vertex. + auto it = roleStateIndices.find(v); + if (it == end(roleStateIndices)) { + return; + } program.add_before_end(std::make_unique<RoseInstrSetState>(it->second)); -} - -static -void makePushDelayedInstructions(const RoseLiteralMap &literals, - ProgramBuild &prog_build, - const flat_set<u32> &delayed_ids, - RoseProgram &program) { - vector<RoseInstrPushDelayed> delay_instructions; - - for (const auto &delayed_lit_id : delayed_ids) { - DEBUG_PRINTF("delayed lit id %u\n", delayed_lit_id); - assert(contains(prog_build.delay_programs, delayed_lit_id)); - u32 delay_id = prog_build.delay_programs.at(delayed_lit_id); - const auto &delay_lit = literals.at(delayed_lit_id); - delay_instructions.emplace_back(verify_u8(delay_lit.delay), delay_id); - } - - sort_and_unique(delay_instructions, [](const RoseInstrPushDelayed &a, - const RoseInstrPushDelayed &b) { - return tie(a.delay, a.index) < tie(b.delay, b.index); - }); - - for (const auto &ri : delay_instructions) { +} + +static +void makePushDelayedInstructions(const RoseLiteralMap &literals, + ProgramBuild &prog_build, + const flat_set<u32> &delayed_ids, + RoseProgram &program) { + vector<RoseInstrPushDelayed> delay_instructions; + + for (const auto &delayed_lit_id : delayed_ids) { + DEBUG_PRINTF("delayed lit id %u\n", delayed_lit_id); + assert(contains(prog_build.delay_programs, delayed_lit_id)); + u32 delay_id = prog_build.delay_programs.at(delayed_lit_id); + const auto &delay_lit = literals.at(delayed_lit_id); + delay_instructions.emplace_back(verify_u8(delay_lit.delay), delay_id); + } + + sort_and_unique(delay_instructions, [](const RoseInstrPushDelayed &a, + const RoseInstrPushDelayed &b) { + return tie(a.delay, a.index) < tie(b.delay, b.index); + }); + + for (const auto &ri : delay_instructions) { program.add_before_end(std::make_unique<RoseInstrPushDelayed>(ri)); - } -} - -static -void makeCheckLiteralInstruction(const rose_literal_id &lit, - size_t longLitLengthThreshold, - RoseProgram &program, - const CompileContext &cc) { - assert(longLitLengthThreshold > 0); - - DEBUG_PRINTF("lit=%s, long lit threshold %zu\n", dumpString(lit.s).c_str(), - longLitLengthThreshold); - - if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) { - DEBUG_PRINTF("lit short enough to not need confirm\n"); - return; - } - - // Check resource limits as well. - if (lit.s.length() > cc.grey.limitLiteralLength) { - throw ResourceLimitError(); - } - - if (lit.s.length() <= longLitLengthThreshold) { - DEBUG_PRINTF("is a medium-length literal\n"); - const auto *end_inst = program.end_instruction(); - unique_ptr<RoseInstruction> ri; - if (lit.s.any_nocase()) { - ri = std::make_unique<RoseInstrCheckMedLitNocase>(lit.s.get_string(), - end_inst); - } else { - ri = std::make_unique<RoseInstrCheckMedLit>(lit.s.get_string(), - end_inst); - } - program.add_before_end(move(ri)); - return; - } - - // Long literal support should only really be used for the floating table - // in streaming mode. - assert(lit.table == ROSE_FLOATING && cc.streaming); - - DEBUG_PRINTF("is a long literal\n"); - - const auto *end_inst = program.end_instruction(); - unique_ptr<RoseInstruction> ri; - if (lit.s.any_nocase()) { - ri = std::make_unique<RoseInstrCheckLongLitNocase>(lit.s.get_string(), - end_inst); - } else { - ri = std::make_unique<RoseInstrCheckLongLit>(lit.s.get_string(), end_inst); - } - program.add_before_end(move(ri)); -} - -static -void makeRoleCheckNotHandled(ProgramBuild &prog_build, RoseVertex v, - RoseProgram &program) { - u32 handled_key; - if (contains(prog_build.handledKeys, v)) { - handled_key = prog_build.handledKeys.at(v); - } else { - handled_key = verify_u32(prog_build.handledKeys.size()); - prog_build.handledKeys.emplace(v, handled_key); - } - - const auto *end_inst = program.end_instruction(); - auto ri = std::make_unique<RoseInstrCheckNotHandled>(handled_key, end_inst); - program.add_before_end(move(ri)); -} - -static -void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, - const RoseEdge &e, RoseProgram &program) { - const RoseGraph &g = build.g; - const RoseVertex u = source(e, g); - - // We know that we can trust the anchored table (DFA) to always deliver us - // literals at the correct offset. - if (build.isAnchored(v)) { - DEBUG_PRINTF("literal in anchored table, skipping bounds check\n"); - return; - } - - // Use the minimum literal length. - u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); - - u64a min_bound = g[e].minBound + lit_length; - u64a max_bound = g[e].maxBound == ROSE_BOUND_INF - ? ROSE_BOUND_INF - : g[e].maxBound + lit_length; - - if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { - assert(g[u].fixedOffset()); - // Make offsets absolute. - min_bound += g[u].max_offset; - if (max_bound != ROSE_BOUND_INF) { - max_bound += g[u].max_offset; - } - } - - assert(max_bound <= ROSE_BOUND_INF); - assert(min_bound <= max_bound); - - // CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET - // (max value of a u64a) to represent ROSE_BOUND_INF. - if (max_bound == ROSE_BOUND_INF) { - max_bound = MAX_OFFSET; - } - - // This instruction should be doing _something_ -- bounds should be tighter - // than just {length, inf}. - assert(min_bound > lit_length || max_bound < MAX_OFFSET); - - const auto *end_inst = program.end_instruction(); - program.add_before_end( - std::make_unique<RoseInstrCheckBounds>(min_bound, max_bound, end_inst)); -} - -static -void makeRoleGroups(const RoseGraph &g, ProgramBuild &prog_build, - RoseVertex v, RoseProgram &program) { - rose_group groups = g[v].groups; - if (!groups) { - return; - } - - // The set of "already on" groups as we process this vertex is the - // intersection of the groups set by our predecessors. - assert(in_degree(v, g) > 0); - rose_group already_on = ~rose_group{0}; - for (const auto &u : inv_adjacent_vertices_range(v, g)) { - already_on &= prog_build.vertex_group_map.at(u); - } - - DEBUG_PRINTF("already_on=0x%llx\n", already_on); - DEBUG_PRINTF("squashable=0x%llx\n", prog_build.squashable_groups); - DEBUG_PRINTF("groups=0x%llx\n", groups); - - already_on &= ~prog_build.squashable_groups; - DEBUG_PRINTF("squashed already_on=0x%llx\n", already_on); - - // We don't *have* to mask off the groups that we know are already on, but - // this will make bugs more apparent. - groups &= ~already_on; - - if (!groups) { - DEBUG_PRINTF("no new groups to set, skipping\n"); - return; - } - + } +} + +static +void makeCheckLiteralInstruction(const rose_literal_id &lit, + size_t longLitLengthThreshold, + RoseProgram &program, + const CompileContext &cc) { + assert(longLitLengthThreshold > 0); + + DEBUG_PRINTF("lit=%s, long lit threshold %zu\n", dumpString(lit.s).c_str(), + longLitLengthThreshold); + + if (lit.s.length() <= ROSE_SHORT_LITERAL_LEN_MAX) { + DEBUG_PRINTF("lit short enough to not need confirm\n"); + return; + } + + // Check resource limits as well. + if (lit.s.length() > cc.grey.limitLiteralLength) { + throw ResourceLimitError(); + } + + if (lit.s.length() <= longLitLengthThreshold) { + DEBUG_PRINTF("is a medium-length literal\n"); + const auto *end_inst = program.end_instruction(); + unique_ptr<RoseInstruction> ri; + if (lit.s.any_nocase()) { + ri = std::make_unique<RoseInstrCheckMedLitNocase>(lit.s.get_string(), + end_inst); + } else { + ri = std::make_unique<RoseInstrCheckMedLit>(lit.s.get_string(), + end_inst); + } + program.add_before_end(move(ri)); + return; + } + + // Long literal support should only really be used for the floating table + // in streaming mode. + assert(lit.table == ROSE_FLOATING && cc.streaming); + + DEBUG_PRINTF("is a long literal\n"); + + const auto *end_inst = program.end_instruction(); + unique_ptr<RoseInstruction> ri; + if (lit.s.any_nocase()) { + ri = std::make_unique<RoseInstrCheckLongLitNocase>(lit.s.get_string(), + end_inst); + } else { + ri = std::make_unique<RoseInstrCheckLongLit>(lit.s.get_string(), end_inst); + } + program.add_before_end(move(ri)); +} + +static +void makeRoleCheckNotHandled(ProgramBuild &prog_build, RoseVertex v, + RoseProgram &program) { + u32 handled_key; + if (contains(prog_build.handledKeys, v)) { + handled_key = prog_build.handledKeys.at(v); + } else { + handled_key = verify_u32(prog_build.handledKeys.size()); + prog_build.handledKeys.emplace(v, handled_key); + } + + const auto *end_inst = program.end_instruction(); + auto ri = std::make_unique<RoseInstrCheckNotHandled>(handled_key, end_inst); + program.add_before_end(move(ri)); +} + +static +void makeRoleCheckBounds(const RoseBuildImpl &build, RoseVertex v, + const RoseEdge &e, RoseProgram &program) { + const RoseGraph &g = build.g; + const RoseVertex u = source(e, g); + + // We know that we can trust the anchored table (DFA) to always deliver us + // literals at the correct offset. + if (build.isAnchored(v)) { + DEBUG_PRINTF("literal in anchored table, skipping bounds check\n"); + return; + } + + // Use the minimum literal length. + u32 lit_length = g[v].eod_accept ? 0 : verify_u32(build.minLiteralLen(v)); + + u64a min_bound = g[e].minBound + lit_length; + u64a max_bound = g[e].maxBound == ROSE_BOUND_INF + ? ROSE_BOUND_INF + : g[e].maxBound + lit_length; + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + assert(g[u].fixedOffset()); + // Make offsets absolute. + min_bound += g[u].max_offset; + if (max_bound != ROSE_BOUND_INF) { + max_bound += g[u].max_offset; + } + } + + assert(max_bound <= ROSE_BOUND_INF); + assert(min_bound <= max_bound); + + // CHECK_BOUNDS instruction uses 64-bit bounds, so we can use MAX_OFFSET + // (max value of a u64a) to represent ROSE_BOUND_INF. + if (max_bound == ROSE_BOUND_INF) { + max_bound = MAX_OFFSET; + } + + // This instruction should be doing _something_ -- bounds should be tighter + // than just {length, inf}. + assert(min_bound > lit_length || max_bound < MAX_OFFSET); + + const auto *end_inst = program.end_instruction(); + program.add_before_end( + std::make_unique<RoseInstrCheckBounds>(min_bound, max_bound, end_inst)); +} + +static +void makeRoleGroups(const RoseGraph &g, ProgramBuild &prog_build, + RoseVertex v, RoseProgram &program) { + rose_group groups = g[v].groups; + if (!groups) { + return; + } + + // The set of "already on" groups as we process this vertex is the + // intersection of the groups set by our predecessors. + assert(in_degree(v, g) > 0); + rose_group already_on = ~rose_group{0}; + for (const auto &u : inv_adjacent_vertices_range(v, g)) { + already_on &= prog_build.vertex_group_map.at(u); + } + + DEBUG_PRINTF("already_on=0x%llx\n", already_on); + DEBUG_PRINTF("squashable=0x%llx\n", prog_build.squashable_groups); + DEBUG_PRINTF("groups=0x%llx\n", groups); + + already_on &= ~prog_build.squashable_groups; + DEBUG_PRINTF("squashed already_on=0x%llx\n", already_on); + + // We don't *have* to mask off the groups that we know are already on, but + // this will make bugs more apparent. + groups &= ~already_on; + + if (!groups) { + DEBUG_PRINTF("no new groups to set, skipping\n"); + return; + } + program.add_before_end(std::make_unique<RoseInstrSetGroups>(groups)); -} - -static -bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) { - size_t reach_size = cr.count(); - assert(reach_size > 0); - // check whether entry_size is some power of 2. - if ((reach_size - 1) & reach_size) { - return false; - } - make_and_cmp_mask(cr, &andmask, &cmpmask); - if ((1 << popcount32((u8)(~andmask))) ^ reach_size) { - return false; - } - return true; -} - -static -bool checkReachWithFlip(const CharReach &cr, u8 &andmask, - u8 &cmpmask, u8 &flip) { - if (checkReachMask(cr, andmask, cmpmask)) { - flip = 0; - return true; - } - if (checkReachMask(~cr, andmask, cmpmask)) { - flip = 1; - return true; - } - return false; -} - -static -bool makeRoleByte(const vector<LookEntry> &look, RoseProgram &program) { - if (look.size() == 1) { - const auto &entry = look[0]; - u8 andmask_u8, cmpmask_u8; - u8 flip; - if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) { - return false; - } - s32 checkbyte_offset = verify_s32(entry.offset); - DEBUG_PRINTF("CHECK BYTE offset=%d\n", checkbyte_offset); - const auto *end_inst = program.end_instruction(); - auto ri = std::make_unique<RoseInstrCheckByte>(andmask_u8, cmpmask_u8, flip, - checkbyte_offset, end_inst); - program.add_before_end(move(ri)); - return true; - } - return false; -} - -static -bool makeRoleMask(const vector<LookEntry> &look, RoseProgram &program) { - if (look.back().offset < look.front().offset + 8) { - s32 base_offset = verify_s32(look.front().offset); - u64a and_mask = 0; - u64a cmp_mask = 0; - u64a neg_mask = 0; - for (const auto &entry : look) { - u8 andmask_u8, cmpmask_u8, flip; - if (!checkReachWithFlip(entry.reach, andmask_u8, - cmpmask_u8, flip)) { - return false; - } - DEBUG_PRINTF("entry offset %d\n", entry.offset); - u32 shift = (entry.offset - base_offset) << 3; - and_mask |= (u64a)andmask_u8 << shift; - cmp_mask |= (u64a)cmpmask_u8 << shift; - if (flip) { - neg_mask |= 0xffLLU << shift; - } - } - DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n", - and_mask, cmp_mask); - const auto *end_inst = program.end_instruction(); - auto ri = std::make_unique<RoseInstrCheckMask>(and_mask, cmp_mask, neg_mask, - base_offset, end_inst); - program.add_before_end(move(ri)); - return true; - } - return false; -} - -static UNUSED -string convertMaskstoString(u8 *p, int byte_len) { - string s; - for (int i = 0; i < byte_len; i++) { - u8 hi = *p >> 4; - u8 lo = *p & 0xf; - s += (char)(hi + (hi < 10 ? 48 : 87)); - s += (char)(lo + (lo < 10 ? 48 : 87)); - p++; - } - return s; -} - -static -bool makeRoleMask32(const vector<LookEntry> &look, - RoseProgram &program) { - if (look.back().offset >= look.front().offset + 32) { - return false; - } - s32 base_offset = verify_s32(look.front().offset); - array<u8, 32> and_mask, cmp_mask; - and_mask.fill(0); - cmp_mask.fill(0); - u32 neg_mask = 0; - for (const auto &entry : look) { - u8 andmask_u8, cmpmask_u8, flip; - if (!checkReachWithFlip(entry.reach, andmask_u8, - cmpmask_u8, flip)) { - return false; - } - u32 shift = entry.offset - base_offset; - assert(shift < 32); - and_mask[shift] = andmask_u8; - cmp_mask[shift] = cmpmask_u8; - if (flip) { - neg_mask |= 1 << shift; - } - } - - DEBUG_PRINTF("and_mask %s\n", - convertMaskstoString(and_mask.data(), 32).c_str()); - DEBUG_PRINTF("cmp_mask %s\n", - convertMaskstoString(cmp_mask.data(), 32).c_str()); - DEBUG_PRINTF("neg_mask %08x\n", neg_mask); - DEBUG_PRINTF("base_offset %d\n", base_offset); - - const auto *end_inst = program.end_instruction(); - auto ri = std::make_unique<RoseInstrCheckMask32>(and_mask, cmp_mask, neg_mask, - base_offset, end_inst); - program.add_before_end(move(ri)); - return true; -} - +} + +static +bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) { + size_t reach_size = cr.count(); + assert(reach_size > 0); + // check whether entry_size is some power of 2. + if ((reach_size - 1) & reach_size) { + return false; + } + make_and_cmp_mask(cr, &andmask, &cmpmask); + if ((1 << popcount32((u8)(~andmask))) ^ reach_size) { + return false; + } + return true; +} + +static +bool checkReachWithFlip(const CharReach &cr, u8 &andmask, + u8 &cmpmask, u8 &flip) { + if (checkReachMask(cr, andmask, cmpmask)) { + flip = 0; + return true; + } + if (checkReachMask(~cr, andmask, cmpmask)) { + flip = 1; + return true; + } + return false; +} + +static +bool makeRoleByte(const vector<LookEntry> &look, RoseProgram &program) { + if (look.size() == 1) { + const auto &entry = look[0]; + u8 andmask_u8, cmpmask_u8; + u8 flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, cmpmask_u8, flip)) { + return false; + } + s32 checkbyte_offset = verify_s32(entry.offset); + DEBUG_PRINTF("CHECK BYTE offset=%d\n", checkbyte_offset); + const auto *end_inst = program.end_instruction(); + auto ri = std::make_unique<RoseInstrCheckByte>(andmask_u8, cmpmask_u8, flip, + checkbyte_offset, end_inst); + program.add_before_end(move(ri)); + return true; + } + return false; +} + +static +bool makeRoleMask(const vector<LookEntry> &look, RoseProgram &program) { + if (look.back().offset < look.front().offset + 8) { + s32 base_offset = verify_s32(look.front().offset); + u64a and_mask = 0; + u64a cmp_mask = 0; + u64a neg_mask = 0; + for (const auto &entry : look) { + u8 andmask_u8, cmpmask_u8, flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, + cmpmask_u8, flip)) { + return false; + } + DEBUG_PRINTF("entry offset %d\n", entry.offset); + u32 shift = (entry.offset - base_offset) << 3; + and_mask |= (u64a)andmask_u8 << shift; + cmp_mask |= (u64a)cmpmask_u8 << shift; + if (flip) { + neg_mask |= 0xffLLU << shift; + } + } + DEBUG_PRINTF("CHECK MASK and_mask=%llx cmp_mask=%llx\n", + and_mask, cmp_mask); + const auto *end_inst = program.end_instruction(); + auto ri = std::make_unique<RoseInstrCheckMask>(and_mask, cmp_mask, neg_mask, + base_offset, end_inst); + program.add_before_end(move(ri)); + return true; + } + return false; +} + +static UNUSED +string convertMaskstoString(u8 *p, int byte_len) { + string s; + for (int i = 0; i < byte_len; i++) { + u8 hi = *p >> 4; + u8 lo = *p & 0xf; + s += (char)(hi + (hi < 10 ? 48 : 87)); + s += (char)(lo + (lo < 10 ? 48 : 87)); + p++; + } + return s; +} + +static +bool makeRoleMask32(const vector<LookEntry> &look, + RoseProgram &program) { + if (look.back().offset >= look.front().offset + 32) { + return false; + } + s32 base_offset = verify_s32(look.front().offset); + array<u8, 32> and_mask, cmp_mask; + and_mask.fill(0); + cmp_mask.fill(0); + u32 neg_mask = 0; + for (const auto &entry : look) { + u8 andmask_u8, cmpmask_u8, flip; + if (!checkReachWithFlip(entry.reach, andmask_u8, + cmpmask_u8, flip)) { + return false; + } + u32 shift = entry.offset - base_offset; + assert(shift < 32); + and_mask[shift] = andmask_u8; + cmp_mask[shift] = cmpmask_u8; + if (flip) { + neg_mask |= 1 << shift; + } + } + + DEBUG_PRINTF("and_mask %s\n", + convertMaskstoString(and_mask.data(), 32).c_str()); + DEBUG_PRINTF("cmp_mask %s\n", + convertMaskstoString(cmp_mask.data(), 32).c_str()); + DEBUG_PRINTF("neg_mask %08x\n", neg_mask); + DEBUG_PRINTF("base_offset %d\n", base_offset); + + const auto *end_inst = program.end_instruction(); + auto ri = std::make_unique<RoseInstrCheckMask32>(and_mask, cmp_mask, neg_mask, + base_offset, end_inst); + program.add_before_end(move(ri)); + return true; +} + static bool makeRoleMask64(const vector<LookEntry> &look, RoseProgram &program, const target_t &target) { @@ -1104,202 +1104,202 @@ bool makeRoleMask64(const vector<LookEntry> &look, return true; } -// Sorting by the size of every bucket. -// Used in map<u32, vector<s8>, cmpNibble>. -struct cmpNibble { - bool operator()(const u32 data1, const u32 data2) const{ - u32 size1 = popcount32(data1 >> 16) * popcount32(data1 << 16); - u32 size2 = popcount32(data2 >> 16) * popcount32(data2 << 16); - return std::tie(size1, data1) < std::tie(size2, data2); - } -}; - -// Insert all pairs of bucket and offset into buckets. -static really_inline -void getAllBuckets(const vector<LookEntry> &look, - map<u32, vector<s8>, cmpNibble> &buckets, u64a &neg_mask) { - s32 base_offset = verify_s32(look.front().offset); - for (const auto &entry : look) { - CharReach cr = entry.reach; - // Flip heavy character classes to save buckets. - if (cr.count() > 128 ) { - cr.flip(); - } else { - neg_mask ^= 1ULL << (entry.offset - base_offset); - } - - map <u16, u16> lo2hi; - // We treat Ascii Table as a 16x16 grid. - // Push every row in cr into lo2hi and mark the row number. - for (size_t i = cr.find_first(); i != CharReach::npos;) { - u8 it_hi = i >> 4; - u16 low_encode = 0; - while (i != CharReach::npos && (i >> 4) == it_hi) { - low_encode |= 1 << (i & 0xf); - i = cr.find_next(i); - } - lo2hi[low_encode] |= 1 << it_hi; - } - for (const auto &it : lo2hi) { - u32 hi_lo = (it.second << 16) | it.first; - buckets[hi_lo].push_back(entry.offset); - } - } -} - -// Once we have a new bucket, we'll try to combine it with all old buckets. -static really_inline -void nibUpdate(map<u32, u16> &nib, u32 hi_lo) { - u16 hi = hi_lo >> 16; - u16 lo = hi_lo & 0xffff; - for (const auto pairs : nib) { - u32 old = pairs.first; - if ((old >> 16) == hi || (old & 0xffff) == lo) { - if (!nib[old | hi_lo]) { - nib[old | hi_lo] = nib[old] | nib[hi_lo]; - } - } - } -} - -static really_inline -void nibMaskUpdate(array<u8, 32> &mask, u32 data, u8 bit_index) { - for (u8 index = 0; data > 0; data >>= 1, index++) { - if (data & 1) { - // 0 ~ 7 bucket in first 16 bytes, - // 8 ~ 15 bucket in second 16 bytes. - if (bit_index >= 8) { - mask[index + 16] |= 1 << (bit_index - 8); - } else { - mask[index] |= 1 << bit_index; - } - } - } -} - -static -bool getShuftiMasks(const vector<LookEntry> &look, array<u8, 32> &hi_mask, - array<u8, 32> &lo_mask, u8 *bucket_select_hi, - u8 *bucket_select_lo, u64a &neg_mask, - u8 &bit_idx, size_t len) { - map<u32, u16> nib; // map every bucket to its bucket number. - map<u32, vector<s8>, cmpNibble> bucket2offsets; - s32 base_offset = look.front().offset; - - bit_idx = 0; - neg_mask = ~0ULL; - - getAllBuckets(look, bucket2offsets, neg_mask); - - for (const auto &it : bucket2offsets) { - u32 hi_lo = it.first; - // New bucket. - if (!nib[hi_lo]) { - if ((bit_idx >= 8 && len == 64) || bit_idx >= 16) { - return false; - } - nib[hi_lo] = 1 << bit_idx; - - nibUpdate(nib, hi_lo); - nibMaskUpdate(hi_mask, hi_lo >> 16, bit_idx); - nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_idx); - bit_idx++; - } - - DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]); - - // Update bucket_select_mask. - u8 nib_hi = nib[hi_lo] >> 8; - u8 nib_lo = nib[hi_lo] & 0xff; - for (const auto offset : it.second) { - bucket_select_hi[offset - base_offset] |= nib_hi; - bucket_select_lo[offset - base_offset] |= nib_lo; - } - } - return true; -} - -static -unique_ptr<RoseInstruction> -makeCheckShufti16x8(u32 offset_range, u8 bucket_idx, - const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, - const array<u8, 32> &bucket_select_mask, - u32 neg_mask, s32 base_offset, - const RoseInstruction *end_inst) { - if (offset_range > 16 || bucket_idx > 8) { - return nullptr; - } - array<u8, 32> nib_mask; - array<u8, 16> bucket_select_mask_16; - copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin()); - copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16); - copy(bucket_select_mask.begin(), bucket_select_mask.begin() + 16, - bucket_select_mask_16.begin()); - return std::make_unique<RoseInstrCheckShufti16x8> - (nib_mask, bucket_select_mask_16, - neg_mask & 0xffff, base_offset, end_inst); -} - -static -unique_ptr<RoseInstruction> -makeCheckShufti32x8(u32 offset_range, u8 bucket_idx, - const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, - const array<u8, 32> &bucket_select_mask, - u32 neg_mask, s32 base_offset, - const RoseInstruction *end_inst) { - if (offset_range > 32 || bucket_idx > 8) { - return nullptr; - } - - array<u8, 16> hi_mask_16; - array<u8, 16> lo_mask_16; - copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin()); - copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin()); - return std::make_unique<RoseInstrCheckShufti32x8> - (hi_mask_16, lo_mask_16, bucket_select_mask, - neg_mask, base_offset, end_inst); -} - -static -unique_ptr<RoseInstruction> -makeCheckShufti16x16(u32 offset_range, u8 bucket_idx, - const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, - const array<u8, 32> &bucket_select_mask_lo, - const array<u8, 32> &bucket_select_mask_hi, - u32 neg_mask, s32 base_offset, - const RoseInstruction *end_inst) { - if (offset_range > 16 || bucket_idx > 16) { - return nullptr; - } - - array<u8, 32> bucket_select_mask_32; - copy(bucket_select_mask_lo.begin(), bucket_select_mask_lo.begin() + 16, - bucket_select_mask_32.begin()); - copy(bucket_select_mask_hi.begin(), bucket_select_mask_hi.begin() + 16, - bucket_select_mask_32.begin() + 16); - return std::make_unique<RoseInstrCheckShufti16x16> - (hi_mask, lo_mask, bucket_select_mask_32, - neg_mask & 0xffff, base_offset, end_inst); -} - -static -unique_ptr<RoseInstruction> -makeCheckShufti32x16(u32 offset_range, u8 bucket_idx, - const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, - const array<u8, 32> &bucket_select_mask_lo, - const array<u8, 32> &bucket_select_mask_hi, - u32 neg_mask, s32 base_offset, - const RoseInstruction *end_inst) { - if (offset_range > 32 || bucket_idx > 16) { - return nullptr; - } - - return std::make_unique<RoseInstrCheckShufti32x16> - (hi_mask, lo_mask, bucket_select_mask_hi, - bucket_select_mask_lo, neg_mask, base_offset, end_inst); -} - -static +// Sorting by the size of every bucket. +// Used in map<u32, vector<s8>, cmpNibble>. +struct cmpNibble { + bool operator()(const u32 data1, const u32 data2) const{ + u32 size1 = popcount32(data1 >> 16) * popcount32(data1 << 16); + u32 size2 = popcount32(data2 >> 16) * popcount32(data2 << 16); + return std::tie(size1, data1) < std::tie(size2, data2); + } +}; + +// Insert all pairs of bucket and offset into buckets. +static really_inline +void getAllBuckets(const vector<LookEntry> &look, + map<u32, vector<s8>, cmpNibble> &buckets, u64a &neg_mask) { + s32 base_offset = verify_s32(look.front().offset); + for (const auto &entry : look) { + CharReach cr = entry.reach; + // Flip heavy character classes to save buckets. + if (cr.count() > 128 ) { + cr.flip(); + } else { + neg_mask ^= 1ULL << (entry.offset - base_offset); + } + + map <u16, u16> lo2hi; + // We treat Ascii Table as a 16x16 grid. + // Push every row in cr into lo2hi and mark the row number. + for (size_t i = cr.find_first(); i != CharReach::npos;) { + u8 it_hi = i >> 4; + u16 low_encode = 0; + while (i != CharReach::npos && (i >> 4) == it_hi) { + low_encode |= 1 << (i & 0xf); + i = cr.find_next(i); + } + lo2hi[low_encode] |= 1 << it_hi; + } + for (const auto &it : lo2hi) { + u32 hi_lo = (it.second << 16) | it.first; + buckets[hi_lo].push_back(entry.offset); + } + } +} + +// Once we have a new bucket, we'll try to combine it with all old buckets. +static really_inline +void nibUpdate(map<u32, u16> &nib, u32 hi_lo) { + u16 hi = hi_lo >> 16; + u16 lo = hi_lo & 0xffff; + for (const auto pairs : nib) { + u32 old = pairs.first; + if ((old >> 16) == hi || (old & 0xffff) == lo) { + if (!nib[old | hi_lo]) { + nib[old | hi_lo] = nib[old] | nib[hi_lo]; + } + } + } +} + +static really_inline +void nibMaskUpdate(array<u8, 32> &mask, u32 data, u8 bit_index) { + for (u8 index = 0; data > 0; data >>= 1, index++) { + if (data & 1) { + // 0 ~ 7 bucket in first 16 bytes, + // 8 ~ 15 bucket in second 16 bytes. + if (bit_index >= 8) { + mask[index + 16] |= 1 << (bit_index - 8); + } else { + mask[index] |= 1 << bit_index; + } + } + } +} + +static +bool getShuftiMasks(const vector<LookEntry> &look, array<u8, 32> &hi_mask, + array<u8, 32> &lo_mask, u8 *bucket_select_hi, + u8 *bucket_select_lo, u64a &neg_mask, + u8 &bit_idx, size_t len) { + map<u32, u16> nib; // map every bucket to its bucket number. + map<u32, vector<s8>, cmpNibble> bucket2offsets; + s32 base_offset = look.front().offset; + + bit_idx = 0; + neg_mask = ~0ULL; + + getAllBuckets(look, bucket2offsets, neg_mask); + + for (const auto &it : bucket2offsets) { + u32 hi_lo = it.first; + // New bucket. + if (!nib[hi_lo]) { + if ((bit_idx >= 8 && len == 64) || bit_idx >= 16) { + return false; + } + nib[hi_lo] = 1 << bit_idx; + + nibUpdate(nib, hi_lo); + nibMaskUpdate(hi_mask, hi_lo >> 16, bit_idx); + nibMaskUpdate(lo_mask, hi_lo & 0xffff, bit_idx); + bit_idx++; + } + + DEBUG_PRINTF("hi_lo %x bucket %x\n", hi_lo, nib[hi_lo]); + + // Update bucket_select_mask. + u8 nib_hi = nib[hi_lo] >> 8; + u8 nib_lo = nib[hi_lo] & 0xff; + for (const auto offset : it.second) { + bucket_select_hi[offset - base_offset] |= nib_hi; + bucket_select_lo[offset - base_offset] |= nib_lo; + } + } + return true; +} + +static +unique_ptr<RoseInstruction> +makeCheckShufti16x8(u32 offset_range, u8 bucket_idx, + const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, + const array<u8, 32> &bucket_select_mask, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 16 || bucket_idx > 8) { + return nullptr; + } + array<u8, 32> nib_mask; + array<u8, 16> bucket_select_mask_16; + copy(lo_mask.begin(), lo_mask.begin() + 16, nib_mask.begin()); + copy(hi_mask.begin(), hi_mask.begin() + 16, nib_mask.begin() + 16); + copy(bucket_select_mask.begin(), bucket_select_mask.begin() + 16, + bucket_select_mask_16.begin()); + return std::make_unique<RoseInstrCheckShufti16x8> + (nib_mask, bucket_select_mask_16, + neg_mask & 0xffff, base_offset, end_inst); +} + +static +unique_ptr<RoseInstruction> +makeCheckShufti32x8(u32 offset_range, u8 bucket_idx, + const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, + const array<u8, 32> &bucket_select_mask, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 32 || bucket_idx > 8) { + return nullptr; + } + + array<u8, 16> hi_mask_16; + array<u8, 16> lo_mask_16; + copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_16.begin()); + copy(lo_mask.begin(), lo_mask.begin() + 16, lo_mask_16.begin()); + return std::make_unique<RoseInstrCheckShufti32x8> + (hi_mask_16, lo_mask_16, bucket_select_mask, + neg_mask, base_offset, end_inst); +} + +static +unique_ptr<RoseInstruction> +makeCheckShufti16x16(u32 offset_range, u8 bucket_idx, + const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, + const array<u8, 32> &bucket_select_mask_lo, + const array<u8, 32> &bucket_select_mask_hi, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 16 || bucket_idx > 16) { + return nullptr; + } + + array<u8, 32> bucket_select_mask_32; + copy(bucket_select_mask_lo.begin(), bucket_select_mask_lo.begin() + 16, + bucket_select_mask_32.begin()); + copy(bucket_select_mask_hi.begin(), bucket_select_mask_hi.begin() + 16, + bucket_select_mask_32.begin() + 16); + return std::make_unique<RoseInstrCheckShufti16x16> + (hi_mask, lo_mask, bucket_select_mask_32, + neg_mask & 0xffff, base_offset, end_inst); +} + +static +unique_ptr<RoseInstruction> +makeCheckShufti32x16(u32 offset_range, u8 bucket_idx, + const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, + const array<u8, 32> &bucket_select_mask_lo, + const array<u8, 32> &bucket_select_mask_hi, + u32 neg_mask, s32 base_offset, + const RoseInstruction *end_inst) { + if (offset_range > 32 || bucket_idx > 16) { + return nullptr; + } + + return std::make_unique<RoseInstrCheckShufti32x16> + (hi_mask, lo_mask, bucket_select_mask_hi, + bucket_select_mask_lo, neg_mask, base_offset, end_inst); +} + +static unique_ptr<RoseInstruction> makeCheckShufti64x8(u32 offset_range, u8 bucket_idx, const array<u8, 32> &hi_mask, const array<u8, 32> &lo_mask, @@ -1309,7 +1309,7 @@ makeCheckShufti64x8(u32 offset_range, u8 bucket_idx, if (offset_range > 64 || bucket_idx > 8) { return nullptr; } - + array<u8, 64> hi_mask_64; array<u8, 64> lo_mask_64; copy(hi_mask.begin(), hi_mask.begin() + 16, hi_mask_64.begin()); @@ -1375,26 +1375,26 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program, } else { offset_limit = 32; } - s32 base_offset = verify_s32(look.front().offset); + s32 base_offset = verify_s32(look.front().offset); if (look.back().offset >= base_offset + offset_limit) { - return false; - } - - u8 bucket_idx = 0; // number of buckets - u64a neg_mask_64; - array<u8, 32> hi_mask; - array<u8, 32> lo_mask; + return false; + } + + u8 bucket_idx = 0; // number of buckets + u64a neg_mask_64; + array<u8, 32> hi_mask; + array<u8, 32> lo_mask; array<u8, 64> bucket_select_hi_64; // for AVX512 array<u8, 64> bucket_select_lo_64; // for AVX512 - array<u8, 32> bucket_select_hi; - array<u8, 32> bucket_select_lo; - hi_mask.fill(0); - lo_mask.fill(0); + array<u8, 32> bucket_select_hi; + array<u8, 32> bucket_select_lo; + hi_mask.fill(0); + lo_mask.fill(0); bucket_select_hi_64.fill(0); bucket_select_lo_64.fill(0); - bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. - bucket_select_lo.fill(0); - + bucket_select_hi.fill(0); // will not be used in 16x8 and 32x8. + bucket_select_lo.fill(0); + if (target.has_avx512()) { if (!getShuftiMasks(look, hi_mask, lo_mask, bucket_select_hi_64.data(), bucket_select_lo_64.data(), neg_mask_64, bucket_idx, @@ -1416,30 +1416,30 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program, 32)) { return false; } - } - - u32 neg_mask = (u32)neg_mask_64; - - DEBUG_PRINTF("hi_mask %s\n", - convertMaskstoString(hi_mask.data(), 32).c_str()); - DEBUG_PRINTF("lo_mask %s\n", - convertMaskstoString(lo_mask.data(), 32).c_str()); - DEBUG_PRINTF("bucket_select_hi %s\n", - convertMaskstoString(bucket_select_hi.data(), 32).c_str()); - DEBUG_PRINTF("bucket_select_lo %s\n", - convertMaskstoString(bucket_select_lo.data(), 32).c_str()); - - const auto *end_inst = program.end_instruction(); - s32 offset_range = look.back().offset - base_offset + 1; - - auto ri = makeCheckShufti16x8(offset_range, bucket_idx, hi_mask, lo_mask, - bucket_select_lo, neg_mask, base_offset, - end_inst); - if (!ri) { - ri = makeCheckShufti32x8(offset_range, bucket_idx, hi_mask, lo_mask, - bucket_select_lo, neg_mask, base_offset, - end_inst); - } + } + + u32 neg_mask = (u32)neg_mask_64; + + DEBUG_PRINTF("hi_mask %s\n", + convertMaskstoString(hi_mask.data(), 32).c_str()); + DEBUG_PRINTF("lo_mask %s\n", + convertMaskstoString(lo_mask.data(), 32).c_str()); + DEBUG_PRINTF("bucket_select_hi %s\n", + convertMaskstoString(bucket_select_hi.data(), 32).c_str()); + DEBUG_PRINTF("bucket_select_lo %s\n", + convertMaskstoString(bucket_select_lo.data(), 32).c_str()); + + const auto *end_inst = program.end_instruction(); + s32 offset_range = look.back().offset - base_offset + 1; + + auto ri = makeCheckShufti16x8(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, neg_mask, base_offset, + end_inst); + if (!ri) { + ri = makeCheckShufti32x8(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, neg_mask, base_offset, + end_inst); + } if (target.has_avx512()) { if (!ri) { ri = makeCheckShufti64x8(offset_range, bucket_idx, hi_mask, lo_mask, @@ -1447,16 +1447,16 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program, base_offset, end_inst); } } - if (!ri) { - ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask, - bucket_select_lo, bucket_select_hi, - neg_mask, base_offset, end_inst); - } - if (!ri) { - ri = makeCheckShufti32x16(offset_range, bucket_idx, hi_mask, lo_mask, - bucket_select_lo, bucket_select_hi, - neg_mask, base_offset, end_inst); - } + if (!ri) { + ri = makeCheckShufti16x16(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, bucket_select_hi, + neg_mask, base_offset, end_inst); + } + if (!ri) { + ri = makeCheckShufti32x16(offset_range, bucket_idx, hi_mask, lo_mask, + bucket_select_lo, bucket_select_hi, + neg_mask, base_offset, end_inst); + } if (target.has_avx512()) { if (!ri) { ri = makeCheckShufti64x16(offset_range, bucket_idx, hi_mask, lo_mask, @@ -1464,1137 +1464,1137 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program, neg_mask_64, base_offset, end_inst); } } - assert(ri); - program.add_before_end(move(ri)); - - return true; -} - -/** - * Builds a lookaround instruction, or an appropriate specialization if one is - * available. - */ -static -void makeLookaroundInstruction(const vector<LookEntry> &look, + assert(ri); + program.add_before_end(move(ri)); + + return true; +} + +/** + * Builds a lookaround instruction, or an appropriate specialization if one is + * available. + */ +static +void makeLookaroundInstruction(const vector<LookEntry> &look, RoseProgram &program, const target_t &target) { - assert(!look.empty()); - - if (makeRoleByte(look, program)) { - return; - } - - if (look.size() == 1) { - s8 offset = look.begin()->offset; - const CharReach &reach = look.begin()->reach; - auto ri = std::make_unique<RoseInstrCheckSingleLookaround>(offset, reach, - program.end_instruction()); - program.add_before_end(move(ri)); - return; - } - - if (makeRoleMask(look, program)) { - return; - } - - if (makeRoleMask32(look, program)) { - return; - } - + assert(!look.empty()); + + if (makeRoleByte(look, program)) { + return; + } + + if (look.size() == 1) { + s8 offset = look.begin()->offset; + const CharReach &reach = look.begin()->reach; + auto ri = std::make_unique<RoseInstrCheckSingleLookaround>(offset, reach, + program.end_instruction()); + program.add_before_end(move(ri)); + return; + } + + if (makeRoleMask(look, program)) { + return; + } + + if (makeRoleMask32(look, program)) { + return; + } + if (makeRoleMask64(look, program, target)) { - return; - } - + return; + } + if (makeRoleShufti(look, program, target)) { return; } - auto ri = std::make_unique<RoseInstrCheckLookaround>(look, - program.end_instruction()); - program.add_before_end(move(ri)); -} - -static -void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 lit_id, - RoseProgram &program) { - const auto &info = build.literal_info.at(lit_id); - if (!info.requires_benefits) { - return; - } - - vector<LookEntry> look; - - const auto &lit = build.literals.at(lit_id); - const ue2_literal &s = lit.s; - const auto &msk = lit.msk; - - DEBUG_PRINTF("building mask for lit %u: %s\n", lit_id, - dumpString(s).c_str()); - - assert(s.length() <= MAX_MASK2_WIDTH); - - // Note: the literal matcher will confirm the HWLM mask in lit.msk, so we - // do not include those entries in the lookaround. - auto it = s.begin(); - for (s32 i = 0 - s.length(), i_end = 0 - msk.size(); i < i_end; ++i, ++it) { - if (!it->nocase) { - look.emplace_back(verify_s8(i), *it); - } - } - - if (look.empty()) { - return; // all caseful chars handled by HWLM mask. - } - + auto ri = std::make_unique<RoseInstrCheckLookaround>(look, + program.end_instruction()); + program.add_before_end(move(ri)); +} + +static +void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 lit_id, + RoseProgram &program) { + const auto &info = build.literal_info.at(lit_id); + if (!info.requires_benefits) { + return; + } + + vector<LookEntry> look; + + const auto &lit = build.literals.at(lit_id); + const ue2_literal &s = lit.s; + const auto &msk = lit.msk; + + DEBUG_PRINTF("building mask for lit %u: %s\n", lit_id, + dumpString(s).c_str()); + + assert(s.length() <= MAX_MASK2_WIDTH); + + // Note: the literal matcher will confirm the HWLM mask in lit.msk, so we + // do not include those entries in the lookaround. + auto it = s.begin(); + for (s32 i = 0 - s.length(), i_end = 0 - msk.size(); i < i_end; ++i, ++it) { + if (!it->nocase) { + look.emplace_back(verify_s8(i), *it); + } + } + + if (look.empty()) { + return; // all caseful chars handled by HWLM mask. + } + makeLookaroundInstruction(look, program, build.cc.target_info); -} - -static -void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, u32 lit_id, - const vector<RoseEdge> &lit_edges, - u32 floatingMinLiteralMatchOffset, - RoseProgram &prog) { - if (lit_edges.empty()) { - return; - } - - if (floatingMinLiteralMatchOffset == 0) { - return; - } - - RoseVertex v = target(lit_edges.front(), build.g); - if (!build.isFloating(v)) { - return; - } - - const auto &lit = build.literals.at(lit_id); - size_t min_len = lit.elength(); - u32 min_offset = findMinOffset(build, lit_id); - DEBUG_PRINTF("has min_len=%zu, min_offset=%u, global min is %u\n", min_len, - min_offset, floatingMinLiteralMatchOffset); - - // If we can't match before the min offset, we don't need the check. - if (min_len >= floatingMinLiteralMatchOffset) { - DEBUG_PRINTF("no need for check, min is %u\n", - floatingMinLiteralMatchOffset); - return; - } - - assert(min_offset >= floatingMinLiteralMatchOffset); - assert(min_offset < UINT32_MAX); - - DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset); - const auto *end = prog.end_instruction(); +} + +static +void makeCheckLitEarlyInstruction(const RoseBuildImpl &build, u32 lit_id, + const vector<RoseEdge> &lit_edges, + u32 floatingMinLiteralMatchOffset, + RoseProgram &prog) { + if (lit_edges.empty()) { + return; + } + + if (floatingMinLiteralMatchOffset == 0) { + return; + } + + RoseVertex v = target(lit_edges.front(), build.g); + if (!build.isFloating(v)) { + return; + } + + const auto &lit = build.literals.at(lit_id); + size_t min_len = lit.elength(); + u32 min_offset = findMinOffset(build, lit_id); + DEBUG_PRINTF("has min_len=%zu, min_offset=%u, global min is %u\n", min_len, + min_offset, floatingMinLiteralMatchOffset); + + // If we can't match before the min offset, we don't need the check. + if (min_len >= floatingMinLiteralMatchOffset) { + DEBUG_PRINTF("no need for check, min is %u\n", + floatingMinLiteralMatchOffset); + return; + } + + assert(min_offset >= floatingMinLiteralMatchOffset); + assert(min_offset < UINT32_MAX); + + DEBUG_PRINTF("adding lit early check, min_offset=%u\n", min_offset); + const auto *end = prog.end_instruction(); prog.add_before_end(std::make_unique<RoseInstrCheckLitEarly>(min_offset, end)); -} - -static -void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 lit_id, - RoseProgram &prog) { - const auto &info = build.literal_info.at(lit_id); - - if (!info.group_mask) { - return; - } +} + +static +void makeGroupCheckInstruction(const RoseBuildImpl &build, u32 lit_id, + RoseProgram &prog) { + const auto &info = build.literal_info.at(lit_id); + + if (!info.group_mask) { + return; + } prog.add_before_end(std::make_unique<RoseInstrCheckGroups>(info.group_mask)); -} - -static -bool hasDelayedLiteral(const RoseBuildImpl &build, - const vector<RoseEdge> &lit_edges) { - auto is_delayed = [&build](u32 lit_id) { return build.isDelayed(lit_id); }; - for (const auto &e : lit_edges) { - auto v = target(e, build.g); - const auto &lits = build.g[v].literals; - if (any_of(begin(lits), end(lits), is_delayed)) { - return true; - } - } - return false; -} - -static -RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, - ProgramBuild &prog_build, u32 lit_id, - const vector<RoseEdge> &lit_edges, - bool is_anchored_replay_program) { - RoseProgram program; - - // Check long literal info. - if (!build.isDelayed(lit_id)) { - makeCheckLiteralInstruction(build.literals.at(lit_id), - prog_build.longLitLengthThreshold, - program, build.cc); - } - - // Check lit mask. - makeCheckLitMaskInstruction(build, lit_id, program); - - // Check literal groups. This is an optimisation that we only perform for - // delayed literals, as their groups may be switched off; ordinarily, we - // can trust the HWLM matcher. - if (hasDelayedLiteral(build, lit_edges)) { - makeGroupCheckInstruction(build, lit_id, program); - } - - // Add instructions for pushing delayed matches, if there are any. - makePushDelayedInstructions(build.literals, prog_build, - build.literal_info.at(lit_id).delayed_ids, - program); - - // Add pre-check for early literals in the floating table. - makeCheckLitEarlyInstruction(build, lit_id, lit_edges, - prog_build.floatingMinLiteralMatchOffset, - program); - - /* Check if we are able to deliever matches from the anchored table now */ - if (!is_anchored_replay_program) { - makeAnchoredLiteralDelay(build, prog_build, lit_id, program); - } - - return program; -} - -static -bool makeRoleMultipathShufti(const vector<vector<LookEntry>> &multi_look, - RoseProgram &program) { - if (multi_look.empty()) { - return false; - } - - // find the base offset - assert(!multi_look[0].empty()); - s32 base_offset = multi_look[0].front().offset; - s32 last_start = base_offset; - s32 end_offset = multi_look[0].back().offset; - size_t multi_len = 0; - - for (const auto &look : multi_look) { - assert(look.size() > 0); - multi_len += look.size(); - - LIMIT_TO_AT_MOST(&base_offset, look.front().offset); - ENSURE_AT_LEAST(&last_start, look.front().offset); - ENSURE_AT_LEAST(&end_offset, look.back().offset); - } - - assert(last_start < 0); - - if (end_offset - base_offset >= MULTIPATH_MAX_LEN) { - return false; - } - - if (multi_len <= 16) { - multi_len = 16; - } else if (multi_len <= 32) { - multi_len = 32; - } else if (multi_len <= 64) { - multi_len = 64; - } else { - DEBUG_PRINTF("too long for multi-path\n"); - return false; - } - - vector<LookEntry> linear_look; - array<u8, 64> data_select_mask; - data_select_mask.fill(0); - u64a hi_bits_mask = 0; - u64a lo_bits_mask = 0; - - for (const auto &look : multi_look) { - assert(linear_look.size() < 64); - lo_bits_mask |= 1LLU << linear_look.size(); - for (const auto &entry : look) { - assert(entry.offset - base_offset < MULTIPATH_MAX_LEN); - data_select_mask[linear_look.size()] = - verify_u8(entry.offset - base_offset); - linear_look.emplace_back(verify_s8(linear_look.size()), entry.reach); - } - hi_bits_mask |= 1LLU << (linear_look.size() - 1); - } - - u8 bit_index = 0; // number of buckets - u64a neg_mask; - array<u8, 32> hi_mask; - array<u8, 32> lo_mask; - array<u8, 64> bucket_select_hi; - array<u8, 64> bucket_select_lo; - hi_mask.fill(0); - lo_mask.fill(0); - bucket_select_hi.fill(0); - bucket_select_lo.fill(0); - - if (!getShuftiMasks(linear_look, hi_mask, lo_mask, bucket_select_hi.data(), - bucket_select_lo.data(), neg_mask, bit_index, - multi_len)) { - return false; - } - - DEBUG_PRINTF("hi_mask %s\n", - convertMaskstoString(hi_mask.data(), 16).c_str()); - DEBUG_PRINTF("lo_mask %s\n", - convertMaskstoString(lo_mask.data(), 16).c_str()); - DEBUG_PRINTF("bucket_select_hi %s\n", - convertMaskstoString(bucket_select_hi.data(), 64).c_str()); - DEBUG_PRINTF("bucket_select_lo %s\n", - convertMaskstoString(bucket_select_lo.data(), 64).c_str()); - DEBUG_PRINTF("data_select_mask %s\n", - convertMaskstoString(data_select_mask.data(), 64).c_str()); - DEBUG_PRINTF("hi_bits_mask %llx\n", hi_bits_mask); - DEBUG_PRINTF("lo_bits_mask %llx\n", lo_bits_mask); - DEBUG_PRINTF("neg_mask %llx\n", neg_mask); - DEBUG_PRINTF("base_offset %d\n", base_offset); - DEBUG_PRINTF("last_start %d\n", last_start); - - // Since we don't have 16x16 now, just call 32x16 instead. - if (bit_index > 8) { - assert(multi_len <= 32); - multi_len = 32; - } - - const auto *end_inst = program.end_instruction(); - assert(multi_len == 16 || multi_len == 32 || multi_len == 64); - if (multi_len == 16) { - neg_mask &= 0xffff; - assert(!(hi_bits_mask & ~0xffffULL)); - assert(!(lo_bits_mask & ~0xffffULL)); - assert(bit_index <=8); - array<u8, 32> nib_mask; - copy(begin(lo_mask), begin(lo_mask) + 16, nib_mask.begin()); - copy(begin(hi_mask), begin(hi_mask) + 16, nib_mask.begin() + 16); - - auto ri = std::make_unique<RoseInstrCheckMultipathShufti16x8> - (nib_mask, bucket_select_lo, data_select_mask, hi_bits_mask, - lo_bits_mask, neg_mask, base_offset, last_start, end_inst); - program.add_before_end(move(ri)); - } else if (multi_len == 32) { - neg_mask &= 0xffffffff; - assert(!(hi_bits_mask & ~0xffffffffULL)); - assert(!(lo_bits_mask & ~0xffffffffULL)); - if (bit_index <= 8) { - auto ri = std::make_unique<RoseInstrCheckMultipathShufti32x8> - (hi_mask, lo_mask, bucket_select_lo, data_select_mask, - hi_bits_mask, lo_bits_mask, neg_mask, base_offset, - last_start, end_inst); - program.add_before_end(move(ri)); - } else { - auto ri = std::make_unique<RoseInstrCheckMultipathShufti32x16> - (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, - data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, - base_offset, last_start, end_inst); - program.add_before_end(move(ri)); - } - } else { - auto ri = std::make_unique<RoseInstrCheckMultipathShufti64> - (hi_mask, lo_mask, bucket_select_lo, data_select_mask, - hi_bits_mask, lo_bits_mask, neg_mask, base_offset, - last_start, end_inst); - program.add_before_end(move(ri)); - } - return true; -} - -static -void makeRoleMultipathLookaround(const vector<vector<LookEntry>> &multi_look, - RoseProgram &program) { - assert(!multi_look.empty()); - assert(multi_look.size() <= MAX_LOOKAROUND_PATHS); - vector<vector<LookEntry>> ordered_look; - set<s32> look_offset; - - assert(!multi_look[0].empty()); - s32 last_start = multi_look[0][0].offset; - - // build offset table. - for (const auto &look : multi_look) { - assert(look.size() > 0); - last_start = max(last_start, (s32)look.begin()->offset); - - for (const auto &t : look) { - look_offset.insert(t.offset); - } - } - - array<u8, MULTIPATH_MAX_LEN> start_mask; - if (multi_look.size() < MAX_LOOKAROUND_PATHS) { - start_mask.fill((1 << multi_look.size()) - 1); - } else { - start_mask.fill(0xff); - } - - u32 path_idx = 0; - for (const auto &look : multi_look) { - for (const auto &t : look) { - assert(t.offset >= (int)*look_offset.begin()); - size_t update_offset = t.offset - *look_offset.begin() + 1; - if (update_offset < start_mask.size()) { - start_mask[update_offset] &= ~(1 << path_idx); - } - } - path_idx++; - } - - for (u32 i = 1; i < MULTIPATH_MAX_LEN; i++) { - start_mask[i] &= start_mask[i - 1]; - DEBUG_PRINTF("start_mask[%u] = %x\n", i, start_mask[i]); - } - - assert(look_offset.size() <= MULTIPATH_MAX_LEN); - - assert(last_start < 0); - - for (const auto &offset : look_offset) { - vector<LookEntry> multi_entry; - multi_entry.resize(MAX_LOOKAROUND_PATHS); - - for (size_t i = 0; i < multi_look.size(); i++) { - for (const auto &t : multi_look[i]) { - if (t.offset == offset) { - multi_entry[i] = t; - } - } - } - ordered_look.emplace_back(multi_entry); - } - - auto ri = std::make_unique<RoseInstrMultipathLookaround>(move(ordered_look), - last_start, start_mask, - program.end_instruction()); - program.add_before_end(move(ri)); -} - -static -void makeRoleLookaround(const RoseBuildImpl &build, - const map<RoseVertex, left_build_info> &leftfix_info, - RoseVertex v, RoseProgram &program) { - if (!build.cc.grey.roseLookaroundMasks) { - return; - } - - vector<vector<LookEntry>> looks; - - // Lookaround from leftfix (mandatory). - if (contains(leftfix_info, v) && leftfix_info.at(v).has_lookaround) { - DEBUG_PRINTF("using leftfix lookaround\n"); - looks = leftfix_info.at(v).lookaround; - } - - // We may be able to find more lookaround info (advisory) and merge it - // in. - if (looks.size() <= 1) { - vector<LookEntry> look; - vector<LookEntry> look_more; - if (!looks.empty()) { - look = move(looks.front()); - } - findLookaroundMasks(build, v, look_more); - mergeLookaround(look, look_more); - if (!look.empty()) { +} + +static +bool hasDelayedLiteral(const RoseBuildImpl &build, + const vector<RoseEdge> &lit_edges) { + auto is_delayed = [&build](u32 lit_id) { return build.isDelayed(lit_id); }; + for (const auto &e : lit_edges) { + auto v = target(e, build.g); + const auto &lits = build.g[v].literals; + if (any_of(begin(lits), end(lits), is_delayed)) { + return true; + } + } + return false; +} + +static +RoseProgram makeLitInitialProgram(const RoseBuildImpl &build, + ProgramBuild &prog_build, u32 lit_id, + const vector<RoseEdge> &lit_edges, + bool is_anchored_replay_program) { + RoseProgram program; + + // Check long literal info. + if (!build.isDelayed(lit_id)) { + makeCheckLiteralInstruction(build.literals.at(lit_id), + prog_build.longLitLengthThreshold, + program, build.cc); + } + + // Check lit mask. + makeCheckLitMaskInstruction(build, lit_id, program); + + // Check literal groups. This is an optimisation that we only perform for + // delayed literals, as their groups may be switched off; ordinarily, we + // can trust the HWLM matcher. + if (hasDelayedLiteral(build, lit_edges)) { + makeGroupCheckInstruction(build, lit_id, program); + } + + // Add instructions for pushing delayed matches, if there are any. + makePushDelayedInstructions(build.literals, prog_build, + build.literal_info.at(lit_id).delayed_ids, + program); + + // Add pre-check for early literals in the floating table. + makeCheckLitEarlyInstruction(build, lit_id, lit_edges, + prog_build.floatingMinLiteralMatchOffset, + program); + + /* Check if we are able to deliever matches from the anchored table now */ + if (!is_anchored_replay_program) { + makeAnchoredLiteralDelay(build, prog_build, lit_id, program); + } + + return program; +} + +static +bool makeRoleMultipathShufti(const vector<vector<LookEntry>> &multi_look, + RoseProgram &program) { + if (multi_look.empty()) { + return false; + } + + // find the base offset + assert(!multi_look[0].empty()); + s32 base_offset = multi_look[0].front().offset; + s32 last_start = base_offset; + s32 end_offset = multi_look[0].back().offset; + size_t multi_len = 0; + + for (const auto &look : multi_look) { + assert(look.size() > 0); + multi_len += look.size(); + + LIMIT_TO_AT_MOST(&base_offset, look.front().offset); + ENSURE_AT_LEAST(&last_start, look.front().offset); + ENSURE_AT_LEAST(&end_offset, look.back().offset); + } + + assert(last_start < 0); + + if (end_offset - base_offset >= MULTIPATH_MAX_LEN) { + return false; + } + + if (multi_len <= 16) { + multi_len = 16; + } else if (multi_len <= 32) { + multi_len = 32; + } else if (multi_len <= 64) { + multi_len = 64; + } else { + DEBUG_PRINTF("too long for multi-path\n"); + return false; + } + + vector<LookEntry> linear_look; + array<u8, 64> data_select_mask; + data_select_mask.fill(0); + u64a hi_bits_mask = 0; + u64a lo_bits_mask = 0; + + for (const auto &look : multi_look) { + assert(linear_look.size() < 64); + lo_bits_mask |= 1LLU << linear_look.size(); + for (const auto &entry : look) { + assert(entry.offset - base_offset < MULTIPATH_MAX_LEN); + data_select_mask[linear_look.size()] = + verify_u8(entry.offset - base_offset); + linear_look.emplace_back(verify_s8(linear_look.size()), entry.reach); + } + hi_bits_mask |= 1LLU << (linear_look.size() - 1); + } + + u8 bit_index = 0; // number of buckets + u64a neg_mask; + array<u8, 32> hi_mask; + array<u8, 32> lo_mask; + array<u8, 64> bucket_select_hi; + array<u8, 64> bucket_select_lo; + hi_mask.fill(0); + lo_mask.fill(0); + bucket_select_hi.fill(0); + bucket_select_lo.fill(0); + + if (!getShuftiMasks(linear_look, hi_mask, lo_mask, bucket_select_hi.data(), + bucket_select_lo.data(), neg_mask, bit_index, + multi_len)) { + return false; + } + + DEBUG_PRINTF("hi_mask %s\n", + convertMaskstoString(hi_mask.data(), 16).c_str()); + DEBUG_PRINTF("lo_mask %s\n", + convertMaskstoString(lo_mask.data(), 16).c_str()); + DEBUG_PRINTF("bucket_select_hi %s\n", + convertMaskstoString(bucket_select_hi.data(), 64).c_str()); + DEBUG_PRINTF("bucket_select_lo %s\n", + convertMaskstoString(bucket_select_lo.data(), 64).c_str()); + DEBUG_PRINTF("data_select_mask %s\n", + convertMaskstoString(data_select_mask.data(), 64).c_str()); + DEBUG_PRINTF("hi_bits_mask %llx\n", hi_bits_mask); + DEBUG_PRINTF("lo_bits_mask %llx\n", lo_bits_mask); + DEBUG_PRINTF("neg_mask %llx\n", neg_mask); + DEBUG_PRINTF("base_offset %d\n", base_offset); + DEBUG_PRINTF("last_start %d\n", last_start); + + // Since we don't have 16x16 now, just call 32x16 instead. + if (bit_index > 8) { + assert(multi_len <= 32); + multi_len = 32; + } + + const auto *end_inst = program.end_instruction(); + assert(multi_len == 16 || multi_len == 32 || multi_len == 64); + if (multi_len == 16) { + neg_mask &= 0xffff; + assert(!(hi_bits_mask & ~0xffffULL)); + assert(!(lo_bits_mask & ~0xffffULL)); + assert(bit_index <=8); + array<u8, 32> nib_mask; + copy(begin(lo_mask), begin(lo_mask) + 16, nib_mask.begin()); + copy(begin(hi_mask), begin(hi_mask) + 16, nib_mask.begin() + 16); + + auto ri = std::make_unique<RoseInstrCheckMultipathShufti16x8> + (nib_mask, bucket_select_lo, data_select_mask, hi_bits_mask, + lo_bits_mask, neg_mask, base_offset, last_start, end_inst); + program.add_before_end(move(ri)); + } else if (multi_len == 32) { + neg_mask &= 0xffffffff; + assert(!(hi_bits_mask & ~0xffffffffULL)); + assert(!(lo_bits_mask & ~0xffffffffULL)); + if (bit_index <= 8) { + auto ri = std::make_unique<RoseInstrCheckMultipathShufti32x8> + (hi_mask, lo_mask, bucket_select_lo, data_select_mask, + hi_bits_mask, lo_bits_mask, neg_mask, base_offset, + last_start, end_inst); + program.add_before_end(move(ri)); + } else { + auto ri = std::make_unique<RoseInstrCheckMultipathShufti32x16> + (hi_mask, lo_mask, bucket_select_hi, bucket_select_lo, + data_select_mask, hi_bits_mask, lo_bits_mask, neg_mask, + base_offset, last_start, end_inst); + program.add_before_end(move(ri)); + } + } else { + auto ri = std::make_unique<RoseInstrCheckMultipathShufti64> + (hi_mask, lo_mask, bucket_select_lo, data_select_mask, + hi_bits_mask, lo_bits_mask, neg_mask, base_offset, + last_start, end_inst); + program.add_before_end(move(ri)); + } + return true; +} + +static +void makeRoleMultipathLookaround(const vector<vector<LookEntry>> &multi_look, + RoseProgram &program) { + assert(!multi_look.empty()); + assert(multi_look.size() <= MAX_LOOKAROUND_PATHS); + vector<vector<LookEntry>> ordered_look; + set<s32> look_offset; + + assert(!multi_look[0].empty()); + s32 last_start = multi_look[0][0].offset; + + // build offset table. + for (const auto &look : multi_look) { + assert(look.size() > 0); + last_start = max(last_start, (s32)look.begin()->offset); + + for (const auto &t : look) { + look_offset.insert(t.offset); + } + } + + array<u8, MULTIPATH_MAX_LEN> start_mask; + if (multi_look.size() < MAX_LOOKAROUND_PATHS) { + start_mask.fill((1 << multi_look.size()) - 1); + } else { + start_mask.fill(0xff); + } + + u32 path_idx = 0; + for (const auto &look : multi_look) { + for (const auto &t : look) { + assert(t.offset >= (int)*look_offset.begin()); + size_t update_offset = t.offset - *look_offset.begin() + 1; + if (update_offset < start_mask.size()) { + start_mask[update_offset] &= ~(1 << path_idx); + } + } + path_idx++; + } + + for (u32 i = 1; i < MULTIPATH_MAX_LEN; i++) { + start_mask[i] &= start_mask[i - 1]; + DEBUG_PRINTF("start_mask[%u] = %x\n", i, start_mask[i]); + } + + assert(look_offset.size() <= MULTIPATH_MAX_LEN); + + assert(last_start < 0); + + for (const auto &offset : look_offset) { + vector<LookEntry> multi_entry; + multi_entry.resize(MAX_LOOKAROUND_PATHS); + + for (size_t i = 0; i < multi_look.size(); i++) { + for (const auto &t : multi_look[i]) { + if (t.offset == offset) { + multi_entry[i] = t; + } + } + } + ordered_look.emplace_back(multi_entry); + } + + auto ri = std::make_unique<RoseInstrMultipathLookaround>(move(ordered_look), + last_start, start_mask, + program.end_instruction()); + program.add_before_end(move(ri)); +} + +static +void makeRoleLookaround(const RoseBuildImpl &build, + const map<RoseVertex, left_build_info> &leftfix_info, + RoseVertex v, RoseProgram &program) { + if (!build.cc.grey.roseLookaroundMasks) { + return; + } + + vector<vector<LookEntry>> looks; + + // Lookaround from leftfix (mandatory). + if (contains(leftfix_info, v) && leftfix_info.at(v).has_lookaround) { + DEBUG_PRINTF("using leftfix lookaround\n"); + looks = leftfix_info.at(v).lookaround; + } + + // We may be able to find more lookaround info (advisory) and merge it + // in. + if (looks.size() <= 1) { + vector<LookEntry> look; + vector<LookEntry> look_more; + if (!looks.empty()) { + look = move(looks.front()); + } + findLookaroundMasks(build, v, look_more); + mergeLookaround(look, look_more); + if (!look.empty()) { makeLookaroundInstruction(look, program, build.cc.target_info); - } - return; - } - - if (!makeRoleMultipathShufti(looks, program)) { - assert(looks.size() <= 8); - makeRoleMultipathLookaround(looks, program); - } -} - -static -void makeRoleSuffix(const RoseBuildImpl &build, - const map<suffix_id, u32> &suffixes, - const map<u32, engine_info> &engine_info_by_queue, - RoseVertex v, RoseProgram &prog) { - const auto &g = build.g; - if (!g[v].suffix) { - return; - } - assert(contains(suffixes, g[v].suffix)); - u32 queue = suffixes.at(g[v].suffix); - u32 event; - assert(contains(engine_info_by_queue, queue)); - const auto eng_info = engine_info_by_queue.at(queue); - if (isContainerType(eng_info.type)) { - auto tamaProto = g[v].suffix.tamarama.get(); - assert(tamaProto); - event = (u32)MQE_TOP_FIRST + - tamaProto->top_remap.at(make_pair(g[v].index, - g[v].suffix.top)); - assert(event < MQE_INVALID); - } else if (isMultiTopType(eng_info.type)) { - assert(!g[v].suffix.haig); - event = (u32)MQE_TOP_FIRST + g[v].suffix.top; - assert(event < MQE_INVALID); - } else { - // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP - // event. - assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); - event = MQE_TOP; - } - + } + return; + } + + if (!makeRoleMultipathShufti(looks, program)) { + assert(looks.size() <= 8); + makeRoleMultipathLookaround(looks, program); + } +} + +static +void makeRoleSuffix(const RoseBuildImpl &build, + const map<suffix_id, u32> &suffixes, + const map<u32, engine_info> &engine_info_by_queue, + RoseVertex v, RoseProgram &prog) { + const auto &g = build.g; + if (!g[v].suffix) { + return; + } + assert(contains(suffixes, g[v].suffix)); + u32 queue = suffixes.at(g[v].suffix); + u32 event; + assert(contains(engine_info_by_queue, queue)); + const auto eng_info = engine_info_by_queue.at(queue); + if (isContainerType(eng_info.type)) { + auto tamaProto = g[v].suffix.tamarama.get(); + assert(tamaProto); + event = (u32)MQE_TOP_FIRST + + tamaProto->top_remap.at(make_pair(g[v].index, + g[v].suffix.top)); + assert(event < MQE_INVALID); + } else if (isMultiTopType(eng_info.type)) { + assert(!g[v].suffix.haig); + event = (u32)MQE_TOP_FIRST + g[v].suffix.top; + assert(event < MQE_INVALID); + } else { + // DFAs/Puffs have no MQE_TOP_N support, so they get a classic TOP + // event. + assert(!g[v].suffix.graph || onlyOneTop(*g[v].suffix.graph)); + event = MQE_TOP; + } + prog.add_before_end(std::make_unique<RoseInstrTriggerSuffix>(queue, event)); -} - -static -void addInfixTriggerInstructions(vector<TriggerInfo> triggers, - RoseProgram &prog) { - // Order, de-dupe and add instructions to the end of program. - sort_and_unique(triggers, [](const TriggerInfo &a, const TriggerInfo &b) { - return tie(a.cancel, a.queue, a.event) < - tie(b.cancel, b.queue, b.event); - }); - for (const auto &ti : triggers) { - prog.add_before_end( - std::make_unique<RoseInstrTriggerInfix>(ti.cancel, ti.queue, ti.event)); - } -} - -static -void makeRoleInfixTriggers(const RoseBuildImpl &build, - const map<RoseVertex, left_build_info> &leftfix_info, - const map<u32, engine_info> &engine_info_by_queue, - RoseVertex u, RoseProgram &program) { - const auto &g = build.g; - - vector<TriggerInfo> triggers; - - for (const auto &e : out_edges_range(u, g)) { - RoseVertex v = target(e, g); - if (!g[v].left) { - continue; - } - - assert(contains(leftfix_info, v)); - const left_build_info &lbi = leftfix_info.at(v); - if (lbi.has_lookaround) { - continue; - } - - assert(contains(engine_info_by_queue, lbi.queue)); - const auto &eng_info = engine_info_by_queue.at(lbi.queue); - - // DFAs have no TOP_N support, so they get a classic MQE_TOP event. - u32 top; - if (isContainerType(eng_info.type)) { - auto tamaProto = g[v].left.tamarama.get(); - assert(tamaProto); - top = MQE_TOP_FIRST + tamaProto->top_remap.at( - make_pair(g[v].index, g[e].rose_top)); - assert(top < MQE_INVALID); - } else if (!isMultiTopType(eng_info.type)) { - assert(num_tops(g[v].left) == 1); - top = MQE_TOP; - } else { - top = MQE_TOP_FIRST + g[e].rose_top; - assert(top < MQE_INVALID); - } - - triggers.emplace_back(g[e].rose_cancel_prev_top, lbi.queue, top); - } - - addInfixTriggerInstructions(move(triggers), program); -} - - -/** - * \brief True if the given vertex is a role that can only be switched on at - * EOD. - */ -static -bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { - const RoseGraph &g = tbi.g; - - // All such roles have only (0,0) edges to vertices with the eod_accept - // property, and no other effects (suffixes, ordinary reports, etc, etc). - - if (isLeafNode(v, g) || !g[v].reports.empty() || g[v].suffix) { - return false; - } - - for (const auto &e : out_edges_range(v, g)) { - RoseVertex w = target(e, g); - if (!g[w].eod_accept) { - return false; - } - assert(!g[w].reports.empty()); - assert(g[w].literals.empty()); - - if (g[e].minBound || g[e].maxBound) { - return false; - } - } - - /* There is no pointing enforcing this check at runtime if - * this role is only fired by the eod event literal */ - if (tbi.eod_event_literal_id != MO_INVALID_IDX && - g[v].literals.size() == 1 && - *g[v].literals.begin() == tbi.eod_event_literal_id) { - return false; - } - - return true; -} - -static -void addCheckOnlyEodInstruction(RoseProgram &prog) { - DEBUG_PRINTF("only at eod\n"); - const auto *end_inst = prog.end_instruction(); +} + +static +void addInfixTriggerInstructions(vector<TriggerInfo> triggers, + RoseProgram &prog) { + // Order, de-dupe and add instructions to the end of program. + sort_and_unique(triggers, [](const TriggerInfo &a, const TriggerInfo &b) { + return tie(a.cancel, a.queue, a.event) < + tie(b.cancel, b.queue, b.event); + }); + for (const auto &ti : triggers) { + prog.add_before_end( + std::make_unique<RoseInstrTriggerInfix>(ti.cancel, ti.queue, ti.event)); + } +} + +static +void makeRoleInfixTriggers(const RoseBuildImpl &build, + const map<RoseVertex, left_build_info> &leftfix_info, + const map<u32, engine_info> &engine_info_by_queue, + RoseVertex u, RoseProgram &program) { + const auto &g = build.g; + + vector<TriggerInfo> triggers; + + for (const auto &e : out_edges_range(u, g)) { + RoseVertex v = target(e, g); + if (!g[v].left) { + continue; + } + + assert(contains(leftfix_info, v)); + const left_build_info &lbi = leftfix_info.at(v); + if (lbi.has_lookaround) { + continue; + } + + assert(contains(engine_info_by_queue, lbi.queue)); + const auto &eng_info = engine_info_by_queue.at(lbi.queue); + + // DFAs have no TOP_N support, so they get a classic MQE_TOP event. + u32 top; + if (isContainerType(eng_info.type)) { + auto tamaProto = g[v].left.tamarama.get(); + assert(tamaProto); + top = MQE_TOP_FIRST + tamaProto->top_remap.at( + make_pair(g[v].index, g[e].rose_top)); + assert(top < MQE_INVALID); + } else if (!isMultiTopType(eng_info.type)) { + assert(num_tops(g[v].left) == 1); + top = MQE_TOP; + } else { + top = MQE_TOP_FIRST + g[e].rose_top; + assert(top < MQE_INVALID); + } + + triggers.emplace_back(g[e].rose_cancel_prev_top, lbi.queue, top); + } + + addInfixTriggerInstructions(move(triggers), program); +} + + +/** + * \brief True if the given vertex is a role that can only be switched on at + * EOD. + */ +static +bool onlyAtEod(const RoseBuildImpl &tbi, RoseVertex v) { + const RoseGraph &g = tbi.g; + + // All such roles have only (0,0) edges to vertices with the eod_accept + // property, and no other effects (suffixes, ordinary reports, etc, etc). + + if (isLeafNode(v, g) || !g[v].reports.empty() || g[v].suffix) { + return false; + } + + for (const auto &e : out_edges_range(v, g)) { + RoseVertex w = target(e, g); + if (!g[w].eod_accept) { + return false; + } + assert(!g[w].reports.empty()); + assert(g[w].literals.empty()); + + if (g[e].minBound || g[e].maxBound) { + return false; + } + } + + /* There is no pointing enforcing this check at runtime if + * this role is only fired by the eod event literal */ + if (tbi.eod_event_literal_id != MO_INVALID_IDX && + g[v].literals.size() == 1 && + *g[v].literals.begin() == tbi.eod_event_literal_id) { + return false; + } + + return true; +} + +static +void addCheckOnlyEodInstruction(RoseProgram &prog) { + DEBUG_PRINTF("only at eod\n"); + const auto *end_inst = prog.end_instruction(); prog.add_before_end(std::make_unique<RoseInstrCheckOnlyEod>(end_inst)); -} - -static -void makeRoleEagerEodReports(const RoseBuildImpl &build, - const map<RoseVertex, left_build_info> &leftfix_info, - bool needs_catchup, RoseVertex v, - RoseProgram &program) { - RoseProgram eod_program; - - for (const auto &e : out_edges_range(v, build.g)) { - if (canEagerlyReportAtEod(build, e)) { - RoseProgram block; - makeRoleReports(build, leftfix_info, needs_catchup, - target(e, build.g), block); - eod_program.add_block(move(block)); - } - } - - if (eod_program.empty()) { - return; - } - - if (!onlyAtEod(build, v)) { - // The rest of our program wasn't EOD anchored, so we need to guard - // these reports with a check. - addCheckOnlyEodInstruction(program); - } - - program.add_before_end(move(eod_program)); -} - +} + +static +void makeRoleEagerEodReports(const RoseBuildImpl &build, + const map<RoseVertex, left_build_info> &leftfix_info, + bool needs_catchup, RoseVertex v, + RoseProgram &program) { + RoseProgram eod_program; + + for (const auto &e : out_edges_range(v, build.g)) { + if (canEagerlyReportAtEod(build, e)) { + RoseProgram block; + makeRoleReports(build, leftfix_info, needs_catchup, + target(e, build.g), block); + eod_program.add_block(move(block)); + } + } + + if (eod_program.empty()) { + return; + } + + if (!onlyAtEod(build, v)) { + // The rest of our program wasn't EOD anchored, so we need to guard + // these reports with a check. + addCheckOnlyEodInstruction(program); + } + + program.add_before_end(move(eod_program)); +} + /** Makes a program for a role/vertex given a specific pred/in_edge. */ -static -RoseProgram makeRoleProgram(const RoseBuildImpl &build, - const map<RoseVertex, left_build_info> &leftfix_info, - const map<suffix_id, u32> &suffixes, - const map<u32, engine_info> &engine_info_by_queue, - const unordered_map<RoseVertex, u32> &roleStateIndices, - ProgramBuild &prog_build, const RoseEdge &e) { - const RoseGraph &g = build.g; - auto v = target(e, g); - - RoseProgram program; - - // First, add program instructions that enforce preconditions without - // effects. - - if (onlyAtEod(build, v)) { - addCheckOnlyEodInstruction(program); - } - - if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { - makeRoleCheckBounds(build, v, e, program); - } - - // This role program may be triggered by different predecessors, with - // different offset bounds. We must ensure we put this check/set operation - // after the bounds check to deal with this case. - if (in_degree(v, g) > 1) { - assert(!build.isRootSuccessor(v)); - makeRoleCheckNotHandled(prog_build, v, program); - } - - makeRoleLookaround(build, leftfix_info, v, program); - makeRoleCheckLeftfix(build, leftfix_info, v, program); - - // Next, we can add program instructions that have effects. This must be - // done as a series of blocks, as some of them (like reports) are - // escapable. - - RoseProgram effects_block; - - RoseProgram reports_block; - makeRoleReports(build, leftfix_info, prog_build.needs_catchup, v, - reports_block); - effects_block.add_block(move(reports_block)); - - RoseProgram infix_block; - makeRoleInfixTriggers(build, leftfix_info, engine_info_by_queue, v, - infix_block); - effects_block.add_block(move(infix_block)); - - // Note: SET_GROUPS instruction must be after infix triggers, as an infix - // going dead may switch off groups. - RoseProgram groups_block; - makeRoleGroups(build.g, prog_build, v, groups_block); - effects_block.add_block(move(groups_block)); - - RoseProgram suffix_block; - makeRoleSuffix(build, suffixes, engine_info_by_queue, v, suffix_block); - effects_block.add_block(move(suffix_block)); - - RoseProgram state_block; - makeRoleSetState(roleStateIndices, v, state_block); - effects_block.add_block(move(state_block)); - - // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if - // the program doesn't have one already). - RoseProgram eod_block; - makeRoleEagerEodReports(build, leftfix_info, prog_build.needs_catchup, v, - eod_block); - effects_block.add_block(move(eod_block)); - - /* a 'ghost role' may do nothing if we know that its groups are already set - * - in this case we can avoid producing a program at all. */ - if (effects_block.empty()) { - return {}; - } - - program.add_before_end(move(effects_block)); - return program; -} - -static -void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 lit_id, - RoseProgram &prog) { - const auto &info = build.literal_info.at(lit_id); - if (!info.squash_group) { - return; - } - - DEBUG_PRINTF("squashes 0x%llx\n", info.group_mask); - assert(info.group_mask); - /* Note: group_mask is negated. */ +static +RoseProgram makeRoleProgram(const RoseBuildImpl &build, + const map<RoseVertex, left_build_info> &leftfix_info, + const map<suffix_id, u32> &suffixes, + const map<u32, engine_info> &engine_info_by_queue, + const unordered_map<RoseVertex, u32> &roleStateIndices, + ProgramBuild &prog_build, const RoseEdge &e) { + const RoseGraph &g = build.g; + auto v = target(e, g); + + RoseProgram program; + + // First, add program instructions that enforce preconditions without + // effects. + + if (onlyAtEod(build, v)) { + addCheckOnlyEodInstruction(program); + } + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + makeRoleCheckBounds(build, v, e, program); + } + + // This role program may be triggered by different predecessors, with + // different offset bounds. We must ensure we put this check/set operation + // after the bounds check to deal with this case. + if (in_degree(v, g) > 1) { + assert(!build.isRootSuccessor(v)); + makeRoleCheckNotHandled(prog_build, v, program); + } + + makeRoleLookaround(build, leftfix_info, v, program); + makeRoleCheckLeftfix(build, leftfix_info, v, program); + + // Next, we can add program instructions that have effects. This must be + // done as a series of blocks, as some of them (like reports) are + // escapable. + + RoseProgram effects_block; + + RoseProgram reports_block; + makeRoleReports(build, leftfix_info, prog_build.needs_catchup, v, + reports_block); + effects_block.add_block(move(reports_block)); + + RoseProgram infix_block; + makeRoleInfixTriggers(build, leftfix_info, engine_info_by_queue, v, + infix_block); + effects_block.add_block(move(infix_block)); + + // Note: SET_GROUPS instruction must be after infix triggers, as an infix + // going dead may switch off groups. + RoseProgram groups_block; + makeRoleGroups(build.g, prog_build, v, groups_block); + effects_block.add_block(move(groups_block)); + + RoseProgram suffix_block; + makeRoleSuffix(build, suffixes, engine_info_by_queue, v, suffix_block); + effects_block.add_block(move(suffix_block)); + + RoseProgram state_block; + makeRoleSetState(roleStateIndices, v, state_block); + effects_block.add_block(move(state_block)); + + // Note: EOD eager reports may generate a CHECK_ONLY_EOD instruction (if + // the program doesn't have one already). + RoseProgram eod_block; + makeRoleEagerEodReports(build, leftfix_info, prog_build.needs_catchup, v, + eod_block); + effects_block.add_block(move(eod_block)); + + /* a 'ghost role' may do nothing if we know that its groups are already set + * - in this case we can avoid producing a program at all. */ + if (effects_block.empty()) { + return {}; + } + + program.add_before_end(move(effects_block)); + return program; +} + +static +void makeGroupSquashInstruction(const RoseBuildImpl &build, u32 lit_id, + RoseProgram &prog) { + const auto &info = build.literal_info.at(lit_id); + if (!info.squash_group) { + return; + } + + DEBUG_PRINTF("squashes 0x%llx\n", info.group_mask); + assert(info.group_mask); + /* Note: group_mask is negated. */ prog.add_before_end(std::make_unique<RoseInstrSquashGroups>(~info.group_mask)); -} - -namespace { -struct ProgKey { - ProgKey(const RoseProgram &p) : prog(&p) {} - - bool operator==(const ProgKey &b) const { - return RoseProgramEquivalence()(*prog, *b.prog); - } - - size_t hash() const { - return RoseProgramHash()(*prog); - } -private: - const RoseProgram *prog; -}; -} - -RoseProgram assembleProgramBlocks(vector<RoseProgram> &&blocks_in) { - DEBUG_PRINTF("%zu blocks before dedupe\n", blocks_in.size()); - - vector<RoseProgram> blocks; - blocks.reserve(blocks_in.size()); /* to ensure stable reference for seen */ - - ue2_unordered_set<ProgKey> seen; - for (auto &block : blocks_in) { - if (contains(seen, block)) { - continue; - } - - blocks.push_back(move(block)); - seen.emplace(blocks.back()); - } - - DEBUG_PRINTF("%zu blocks after dedupe\n", blocks.size()); - - RoseProgram prog; - for (auto &block : blocks) { - /* If we have multiple blocks from different literals and any of them - * squash groups, we will have to add a CLEAR_WORK_DONE instruction to - * each literal program block to clear the work_done flags so that it's - * only set if a state has been. */ - if (!prog.empty() && reads_work_done_flag(block)) { - RoseProgram clear_block; +} + +namespace { +struct ProgKey { + ProgKey(const RoseProgram &p) : prog(&p) {} + + bool operator==(const ProgKey &b) const { + return RoseProgramEquivalence()(*prog, *b.prog); + } + + size_t hash() const { + return RoseProgramHash()(*prog); + } +private: + const RoseProgram *prog; +}; +} + +RoseProgram assembleProgramBlocks(vector<RoseProgram> &&blocks_in) { + DEBUG_PRINTF("%zu blocks before dedupe\n", blocks_in.size()); + + vector<RoseProgram> blocks; + blocks.reserve(blocks_in.size()); /* to ensure stable reference for seen */ + + ue2_unordered_set<ProgKey> seen; + for (auto &block : blocks_in) { + if (contains(seen, block)) { + continue; + } + + blocks.push_back(move(block)); + seen.emplace(blocks.back()); + } + + DEBUG_PRINTF("%zu blocks after dedupe\n", blocks.size()); + + RoseProgram prog; + for (auto &block : blocks) { + /* If we have multiple blocks from different literals and any of them + * squash groups, we will have to add a CLEAR_WORK_DONE instruction to + * each literal program block to clear the work_done flags so that it's + * only set if a state has been. */ + if (!prog.empty() && reads_work_done_flag(block)) { + RoseProgram clear_block; clear_block.add_before_end(std::make_unique<RoseInstrClearWorkDone>()); - prog.add_block(move(clear_block)); - } - - prog.add_block(move(block)); - } - - return prog; -} - -RoseProgram makeLiteralProgram(const RoseBuildImpl &build, - const map<RoseVertex, left_build_info> &leftfix_info, - const map<suffix_id, u32> &suffixes, - const map<u32, engine_info> &engine_info_by_queue, - const unordered_map<RoseVertex, u32> &roleStateIndices, - ProgramBuild &prog_build, u32 lit_id, - const vector<RoseEdge> &lit_edges, - bool is_anchored_replay_program) { - const auto &g = build.g; - - DEBUG_PRINTF("lit id=%u, %zu lit edges\n", lit_id, lit_edges.size()); - - // Construct initial program up front, as its early checks must be able - // to jump to end and terminate processing for this literal. - auto lit_program = makeLitInitialProgram(build, prog_build, lit_id, - lit_edges, - is_anchored_replay_program); - - RoseProgram role_programs; - - // Predecessor state id -> program block. - map<u32, RoseProgram> pred_blocks; - - // Construct sparse iter sub-programs. - for (const auto &e : lit_edges) { - const auto &u = source(e, g); - if (build.isAnyStart(u)) { - continue; // Root roles are not handled with sparse iterator. - } - DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].index, - g[target(e, g)].index); - assert(contains(roleStateIndices, u)); - u32 pred_state = roleStateIndices.at(u); - auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, - engine_info_by_queue, roleStateIndices, - prog_build, e); - if (!role_prog.empty()) { - pred_blocks[pred_state].add_block(move(role_prog)); - } - } - - // Add blocks to deal with non-root edges (triggered by sparse iterator or - // mmbit_isset checks). - addPredBlocks(pred_blocks, roleStateIndices.size(), role_programs); - - // Add blocks to handle root roles. - for (const auto &e : lit_edges) { - const auto &u = source(e, g); - if (!build.isAnyStart(u)) { - continue; - } - DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index, - g[target(e, g)].index); - auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, - engine_info_by_queue, roleStateIndices, - prog_build, e); - role_programs.add_block(move(role_prog)); - } - - if (lit_id == build.eod_event_literal_id) { + prog.add_block(move(clear_block)); + } + + prog.add_block(move(block)); + } + + return prog; +} + +RoseProgram makeLiteralProgram(const RoseBuildImpl &build, + const map<RoseVertex, left_build_info> &leftfix_info, + const map<suffix_id, u32> &suffixes, + const map<u32, engine_info> &engine_info_by_queue, + const unordered_map<RoseVertex, u32> &roleStateIndices, + ProgramBuild &prog_build, u32 lit_id, + const vector<RoseEdge> &lit_edges, + bool is_anchored_replay_program) { + const auto &g = build.g; + + DEBUG_PRINTF("lit id=%u, %zu lit edges\n", lit_id, lit_edges.size()); + + // Construct initial program up front, as its early checks must be able + // to jump to end and terminate processing for this literal. + auto lit_program = makeLitInitialProgram(build, prog_build, lit_id, + lit_edges, + is_anchored_replay_program); + + RoseProgram role_programs; + + // Predecessor state id -> program block. + map<u32, RoseProgram> pred_blocks; + + // Construct sparse iter sub-programs. + for (const auto &e : lit_edges) { + const auto &u = source(e, g); + if (build.isAnyStart(u)) { + continue; // Root roles are not handled with sparse iterator. + } + DEBUG_PRINTF("sparse iter edge (%zu,%zu)\n", g[u].index, + g[target(e, g)].index); + assert(contains(roleStateIndices, u)); + u32 pred_state = roleStateIndices.at(u); + auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, + engine_info_by_queue, roleStateIndices, + prog_build, e); + if (!role_prog.empty()) { + pred_blocks[pred_state].add_block(move(role_prog)); + } + } + + // Add blocks to deal with non-root edges (triggered by sparse iterator or + // mmbit_isset checks). + addPredBlocks(pred_blocks, roleStateIndices.size(), role_programs); + + // Add blocks to handle root roles. + for (const auto &e : lit_edges) { + const auto &u = source(e, g); + if (!build.isAnyStart(u)) { + continue; + } + DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index, + g[target(e, g)].index); + auto role_prog = makeRoleProgram(build, leftfix_info, suffixes, + engine_info_by_queue, roleStateIndices, + prog_build, e); + role_programs.add_block(move(role_prog)); + } + + if (lit_id == build.eod_event_literal_id) { /* Note: does not require the lit initial program */ - assert(build.eod_event_literal_id != MO_INVALID_IDX); - return role_programs; - } - - /* Instructions to run even if a role program bails out */ - RoseProgram unconditional_block; - - // Literal may squash groups. - makeGroupSquashInstruction(build, lit_id, unconditional_block); - - role_programs.add_block(move(unconditional_block)); - lit_program.add_before_end(move(role_programs)); - - return lit_program; -} - -RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, - ProgramBuild &prog_build, - const vector<u32> &lit_ids) { - assert(!lit_ids.empty()); - assert(build.cc.streaming); - - vector<RoseProgram> blocks; - - for (const auto &lit_id : lit_ids) { - DEBUG_PRINTF("lit_id=%u\n", lit_id); - const auto &info = build.literal_info.at(lit_id); - if (info.delayed_ids.empty()) { - continue; // No delayed IDs, no work to do. - } - - RoseProgram prog; - if (!build.isDelayed(lit_id)) { - makeCheckLiteralInstruction(build.literals.at(lit_id), - prog_build.longLitLengthThreshold, prog, - build.cc); - } - - makeCheckLitMaskInstruction(build, lit_id, prog); - makePushDelayedInstructions(build.literals, prog_build, - build.literal_info.at(lit_id).delayed_ids, - prog); - blocks.push_back(move(prog)); - } - - return assembleProgramBlocks(move(blocks)); -} - -RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, - ProgramBuild &prog_build, const RoseEdge &e, - const bool multiple_preds) { - const RoseGraph &g = build.g; - const RoseVertex v = target(e, g); - - RoseProgram program; - - if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { - makeRoleCheckBounds(build, v, e, program); - } - - if (multiple_preds) { - // Only necessary when there is more than one pred. - makeRoleCheckNotHandled(prog_build, v, program); - } - - makeCatchup(build.rm, prog_build.needs_catchup, g[v].reports, program); - - const bool has_som = false; - RoseProgram report_block; - for (const auto &id : g[v].reports) { - makeReport(build, id, has_som, report_block); - } - program.add_before_end(move(report_block)); - - return program; -} - -static -void makeCatchupMpv(const ReportManager &rm, bool needs_mpv_catchup, - ReportID id, RoseProgram &program) { - if (!needs_mpv_catchup) { - return; - } - - const Report &report = rm.getReport(id); - if (report.type == INTERNAL_ROSE_CHAIN) { - return; - } - + assert(build.eod_event_literal_id != MO_INVALID_IDX); + return role_programs; + } + + /* Instructions to run even if a role program bails out */ + RoseProgram unconditional_block; + + // Literal may squash groups. + makeGroupSquashInstruction(build, lit_id, unconditional_block); + + role_programs.add_block(move(unconditional_block)); + lit_program.add_before_end(move(role_programs)); + + return lit_program; +} + +RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, + ProgramBuild &prog_build, + const vector<u32> &lit_ids) { + assert(!lit_ids.empty()); + assert(build.cc.streaming); + + vector<RoseProgram> blocks; + + for (const auto &lit_id : lit_ids) { + DEBUG_PRINTF("lit_id=%u\n", lit_id); + const auto &info = build.literal_info.at(lit_id); + if (info.delayed_ids.empty()) { + continue; // No delayed IDs, no work to do. + } + + RoseProgram prog; + if (!build.isDelayed(lit_id)) { + makeCheckLiteralInstruction(build.literals.at(lit_id), + prog_build.longLitLengthThreshold, prog, + build.cc); + } + + makeCheckLitMaskInstruction(build, lit_id, prog); + makePushDelayedInstructions(build.literals, prog_build, + build.literal_info.at(lit_id).delayed_ids, + prog); + blocks.push_back(move(prog)); + } + + return assembleProgramBlocks(move(blocks)); +} + +RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, + ProgramBuild &prog_build, const RoseEdge &e, + const bool multiple_preds) { + const RoseGraph &g = build.g; + const RoseVertex v = target(e, g); + + RoseProgram program; + + if (g[e].history == ROSE_ROLE_HISTORY_ANCH) { + makeRoleCheckBounds(build, v, e, program); + } + + if (multiple_preds) { + // Only necessary when there is more than one pred. + makeRoleCheckNotHandled(prog_build, v, program); + } + + makeCatchup(build.rm, prog_build.needs_catchup, g[v].reports, program); + + const bool has_som = false; + RoseProgram report_block; + for (const auto &id : g[v].reports) { + makeReport(build, id, has_som, report_block); + } + program.add_before_end(move(report_block)); + + return program; +} + +static +void makeCatchupMpv(const ReportManager &rm, bool needs_mpv_catchup, + ReportID id, RoseProgram &program) { + if (!needs_mpv_catchup) { + return; + } + + const Report &report = rm.getReport(id); + if (report.type == INTERNAL_ROSE_CHAIN) { + return; + } + program.add_before_end(std::make_unique<RoseInstrCatchUpMpv>()); -} - -RoseProgram makeReportProgram(const RoseBuildImpl &build, - bool needs_mpv_catchup, ReportID id) { - RoseProgram prog; - - makeCatchupMpv(build.rm, needs_mpv_catchup, id, prog); - - const bool has_som = false; - makeReport(build, id, has_som, prog); - - return prog; -} - -RoseProgram makeBoundaryProgram(const RoseBuildImpl &build, - const set<ReportID> &reports) { - // Note: no CATCHUP instruction is necessary in the boundary case, as we - // should always be caught up (and may not even have the resources in - // scratch to support it). - - const bool has_som = false; - RoseProgram prog; - for (const auto &id : reports) { - makeReport(build, id, has_som, prog); - } - - return prog; -} - -void addIncludedJumpProgram(RoseProgram &program, u32 child_offset, - u8 squash) { - RoseProgram block; +} + +RoseProgram makeReportProgram(const RoseBuildImpl &build, + bool needs_mpv_catchup, ReportID id) { + RoseProgram prog; + + makeCatchupMpv(build.rm, needs_mpv_catchup, id, prog); + + const bool has_som = false; + makeReport(build, id, has_som, prog); + + return prog; +} + +RoseProgram makeBoundaryProgram(const RoseBuildImpl &build, + const set<ReportID> &reports) { + // Note: no CATCHUP instruction is necessary in the boundary case, as we + // should always be caught up (and may not even have the resources in + // scratch to support it). + + const bool has_som = false; + RoseProgram prog; + for (const auto &id : reports) { + makeReport(build, id, has_som, prog); + } + + return prog; +} + +void addIncludedJumpProgram(RoseProgram &program, u32 child_offset, + u8 squash) { + RoseProgram block; block.add_before_end(std::make_unique<RoseInstrIncludedJump>(child_offset, - squash)); - program.add_block(move(block)); -} - -static -void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, - RoseProgram &program) { - // Prepend an instruction to check the pred state is on. - const auto *end_inst = pred_block.end_instruction(); - pred_block.insert(begin(pred_block), - std::make_unique<RoseInstrCheckState>(pred_state, end_inst)); - program.add_block(move(pred_block)); -} - -static -void addPredBlocksAny(map<u32, RoseProgram> &pred_blocks, u32 num_states, - RoseProgram &program) { - RoseProgram sparse_program; - - vector<u32> keys; - for (const u32 &key : pred_blocks | map_keys) { - keys.push_back(key); - } - - const RoseInstruction *end_inst = sparse_program.end_instruction(); - auto ri = std::make_unique<RoseInstrSparseIterAny>(num_states, keys, end_inst); - sparse_program.add_before_end(move(ri)); - - RoseProgram &block = pred_blocks.begin()->second; - - /* we no longer need the check handled instruction as all the pred-role - * blocks are being collapsed together */ - stripCheckHandledInstruction(block); - - sparse_program.add_before_end(move(block)); - program.add_block(move(sparse_program)); -} - -static -void addPredBlocksMulti(map<u32, RoseProgram> &pred_blocks, - u32 num_states, RoseProgram &program) { - assert(!pred_blocks.empty()); - - RoseProgram sparse_program; - const RoseInstruction *end_inst = sparse_program.end_instruction(); - vector<pair<u32, const RoseInstruction *>> jump_table; - - // BEGIN instruction. - auto ri_begin = std::make_unique<RoseInstrSparseIterBegin>(num_states, end_inst); - RoseInstrSparseIterBegin *begin_inst = ri_begin.get(); - sparse_program.add_before_end(move(ri_begin)); - - // NEXT instructions, one per pred program. - u32 prev_key = pred_blocks.begin()->first; - for (auto it = next(begin(pred_blocks)); it != end(pred_blocks); ++it) { - auto ri = std::make_unique<RoseInstrSparseIterNext>(prev_key, begin_inst, - end_inst); - sparse_program.add_before_end(move(ri)); - prev_key = it->first; - } - - // Splice in each pred program after its BEGIN/NEXT. - auto out_it = begin(sparse_program); - for (auto &m : pred_blocks) { - u32 key = m.first; - RoseProgram &flat_prog = m.second; - assert(!flat_prog.empty()); - const size_t block_len = flat_prog.size() - 1; // without INSTR_END. - - assert(dynamic_cast<const RoseInstrSparseIterBegin *>(out_it->get()) || - dynamic_cast<const RoseInstrSparseIterNext *>(out_it->get())); - out_it = sparse_program.insert(++out_it, move(flat_prog)); - - // Jump table target for this key is the beginning of the block we just - // spliced in. - jump_table.emplace_back(key, out_it->get()); - - assert(distance(begin(sparse_program), out_it) + block_len <= - sparse_program.size()); - advance(out_it, block_len); - } - - // Write the jump table back into the SPARSE_ITER_BEGIN instruction. - begin_inst->jump_table = move(jump_table); - - program.add_block(move(sparse_program)); -} - -void addPredBlocks(map<u32, RoseProgram> &pred_blocks, u32 num_states, - RoseProgram &program) { - // Trim empty blocks, if any exist. - for (auto it = pred_blocks.begin(); it != pred_blocks.end();) { - if (it->second.empty()) { - it = pred_blocks.erase(it); - } else { - ++it; - } - } - - const size_t num_preds = pred_blocks.size(); - if (num_preds == 0) { - return; - } - - if (num_preds == 1) { - const auto head = pred_blocks.begin(); - addPredBlockSingle(head->first, head->second, program); - return; - } - - // First, see if all our blocks are equivalent, in which case we can - // collapse them down into one. - const auto &blocks = pred_blocks | map_values; - if (all_of(begin(blocks), end(blocks), [&](const RoseProgram &block) { - return RoseProgramEquivalence()(*begin(blocks), block); - })) { - DEBUG_PRINTF("all blocks equiv\n"); - addPredBlocksAny(pred_blocks, num_states, program); - return; - } - - addPredBlocksMulti(pred_blocks, num_states, program); -} - -void applyFinalSpecialisation(RoseProgram &program) { - assert(!program.empty()); - assert(program.back().code() == ROSE_INSTR_END); - if (program.size() < 2) { - return; - } - - /* Replace the second-to-last instruction (before END) with a one-shot - * specialisation if available. */ - auto it = next(program.rbegin()); - if (auto *ri = dynamic_cast<const RoseInstrReport *>(it->get())) { - DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n"); - program.replace(it, std::make_unique<RoseInstrFinalReport>( - ri->onmatch, ri->offset_adjust)); - } -} - -void recordLongLiterals(vector<ue2_case_string> &longLiterals, - const RoseProgram &program) { - for (const auto &ri : program) { - if (const auto *ri_check = - dynamic_cast<const RoseInstrCheckLongLit *>(ri.get())) { - DEBUG_PRINTF("found CHECK_LONG_LIT for string '%s'\n", - escapeString(ri_check->literal).c_str()); - longLiterals.emplace_back(ri_check->literal, false); - continue; - } - if (const auto *ri_check = - dynamic_cast<const RoseInstrCheckLongLitNocase *>(ri.get())) { - DEBUG_PRINTF("found CHECK_LONG_LIT_NOCASE for string '%s'\n", - escapeString(ri_check->literal).c_str()); - longLiterals.emplace_back(ri_check->literal, true); - } - } -} - -void recordResources(RoseResources &resources, const RoseProgram &program) { - for (const auto &ri : program) { - switch (ri->code()) { - case ROSE_INSTR_TRIGGER_SUFFIX: - resources.has_suffixes = true; - break; - case ROSE_INSTR_TRIGGER_INFIX: - case ROSE_INSTR_CHECK_INFIX: - case ROSE_INSTR_CHECK_PREFIX: - case ROSE_INSTR_SOM_LEFTFIX: - resources.has_leftfixes = true; - break; - case ROSE_INSTR_SET_STATE: - case ROSE_INSTR_CHECK_STATE: - case ROSE_INSTR_SPARSE_ITER_BEGIN: - case ROSE_INSTR_SPARSE_ITER_NEXT: - resources.has_states = true; - break; - case ROSE_INSTR_CHECK_GROUPS: - resources.checks_groups = true; - break; - case ROSE_INSTR_PUSH_DELAYED: - resources.has_lit_delay = true; - break; - case ROSE_INSTR_CHECK_LONG_LIT: - case ROSE_INSTR_CHECK_LONG_LIT_NOCASE: - resources.has_lit_check = true; - break; - default: - break; - } - } -} - -} // namespace ue2 + squash)); + program.add_block(move(block)); +} + +static +void addPredBlockSingle(u32 pred_state, RoseProgram &pred_block, + RoseProgram &program) { + // Prepend an instruction to check the pred state is on. + const auto *end_inst = pred_block.end_instruction(); + pred_block.insert(begin(pred_block), + std::make_unique<RoseInstrCheckState>(pred_state, end_inst)); + program.add_block(move(pred_block)); +} + +static +void addPredBlocksAny(map<u32, RoseProgram> &pred_blocks, u32 num_states, + RoseProgram &program) { + RoseProgram sparse_program; + + vector<u32> keys; + for (const u32 &key : pred_blocks | map_keys) { + keys.push_back(key); + } + + const RoseInstruction *end_inst = sparse_program.end_instruction(); + auto ri = std::make_unique<RoseInstrSparseIterAny>(num_states, keys, end_inst); + sparse_program.add_before_end(move(ri)); + + RoseProgram &block = pred_blocks.begin()->second; + + /* we no longer need the check handled instruction as all the pred-role + * blocks are being collapsed together */ + stripCheckHandledInstruction(block); + + sparse_program.add_before_end(move(block)); + program.add_block(move(sparse_program)); +} + +static +void addPredBlocksMulti(map<u32, RoseProgram> &pred_blocks, + u32 num_states, RoseProgram &program) { + assert(!pred_blocks.empty()); + + RoseProgram sparse_program; + const RoseInstruction *end_inst = sparse_program.end_instruction(); + vector<pair<u32, const RoseInstruction *>> jump_table; + + // BEGIN instruction. + auto ri_begin = std::make_unique<RoseInstrSparseIterBegin>(num_states, end_inst); + RoseInstrSparseIterBegin *begin_inst = ri_begin.get(); + sparse_program.add_before_end(move(ri_begin)); + + // NEXT instructions, one per pred program. + u32 prev_key = pred_blocks.begin()->first; + for (auto it = next(begin(pred_blocks)); it != end(pred_blocks); ++it) { + auto ri = std::make_unique<RoseInstrSparseIterNext>(prev_key, begin_inst, + end_inst); + sparse_program.add_before_end(move(ri)); + prev_key = it->first; + } + + // Splice in each pred program after its BEGIN/NEXT. + auto out_it = begin(sparse_program); + for (auto &m : pred_blocks) { + u32 key = m.first; + RoseProgram &flat_prog = m.second; + assert(!flat_prog.empty()); + const size_t block_len = flat_prog.size() - 1; // without INSTR_END. + + assert(dynamic_cast<const RoseInstrSparseIterBegin *>(out_it->get()) || + dynamic_cast<const RoseInstrSparseIterNext *>(out_it->get())); + out_it = sparse_program.insert(++out_it, move(flat_prog)); + + // Jump table target for this key is the beginning of the block we just + // spliced in. + jump_table.emplace_back(key, out_it->get()); + + assert(distance(begin(sparse_program), out_it) + block_len <= + sparse_program.size()); + advance(out_it, block_len); + } + + // Write the jump table back into the SPARSE_ITER_BEGIN instruction. + begin_inst->jump_table = move(jump_table); + + program.add_block(move(sparse_program)); +} + +void addPredBlocks(map<u32, RoseProgram> &pred_blocks, u32 num_states, + RoseProgram &program) { + // Trim empty blocks, if any exist. + for (auto it = pred_blocks.begin(); it != pred_blocks.end();) { + if (it->second.empty()) { + it = pred_blocks.erase(it); + } else { + ++it; + } + } + + const size_t num_preds = pred_blocks.size(); + if (num_preds == 0) { + return; + } + + if (num_preds == 1) { + const auto head = pred_blocks.begin(); + addPredBlockSingle(head->first, head->second, program); + return; + } + + // First, see if all our blocks are equivalent, in which case we can + // collapse them down into one. + const auto &blocks = pred_blocks | map_values; + if (all_of(begin(blocks), end(blocks), [&](const RoseProgram &block) { + return RoseProgramEquivalence()(*begin(blocks), block); + })) { + DEBUG_PRINTF("all blocks equiv\n"); + addPredBlocksAny(pred_blocks, num_states, program); + return; + } + + addPredBlocksMulti(pred_blocks, num_states, program); +} + +void applyFinalSpecialisation(RoseProgram &program) { + assert(!program.empty()); + assert(program.back().code() == ROSE_INSTR_END); + if (program.size() < 2) { + return; + } + + /* Replace the second-to-last instruction (before END) with a one-shot + * specialisation if available. */ + auto it = next(program.rbegin()); + if (auto *ri = dynamic_cast<const RoseInstrReport *>(it->get())) { + DEBUG_PRINTF("replacing REPORT with FINAL_REPORT\n"); + program.replace(it, std::make_unique<RoseInstrFinalReport>( + ri->onmatch, ri->offset_adjust)); + } +} + +void recordLongLiterals(vector<ue2_case_string> &longLiterals, + const RoseProgram &program) { + for (const auto &ri : program) { + if (const auto *ri_check = + dynamic_cast<const RoseInstrCheckLongLit *>(ri.get())) { + DEBUG_PRINTF("found CHECK_LONG_LIT for string '%s'\n", + escapeString(ri_check->literal).c_str()); + longLiterals.emplace_back(ri_check->literal, false); + continue; + } + if (const auto *ri_check = + dynamic_cast<const RoseInstrCheckLongLitNocase *>(ri.get())) { + DEBUG_PRINTF("found CHECK_LONG_LIT_NOCASE for string '%s'\n", + escapeString(ri_check->literal).c_str()); + longLiterals.emplace_back(ri_check->literal, true); + } + } +} + +void recordResources(RoseResources &resources, const RoseProgram &program) { + for (const auto &ri : program) { + switch (ri->code()) { + case ROSE_INSTR_TRIGGER_SUFFIX: + resources.has_suffixes = true; + break; + case ROSE_INSTR_TRIGGER_INFIX: + case ROSE_INSTR_CHECK_INFIX: + case ROSE_INSTR_CHECK_PREFIX: + case ROSE_INSTR_SOM_LEFTFIX: + resources.has_leftfixes = true; + break; + case ROSE_INSTR_SET_STATE: + case ROSE_INSTR_CHECK_STATE: + case ROSE_INSTR_SPARSE_ITER_BEGIN: + case ROSE_INSTR_SPARSE_ITER_NEXT: + resources.has_states = true; + break; + case ROSE_INSTR_CHECK_GROUPS: + resources.checks_groups = true; + break; + case ROSE_INSTR_PUSH_DELAYED: + resources.has_lit_delay = true; + break; + case ROSE_INSTR_CHECK_LONG_LIT: + case ROSE_INSTR_CHECK_LONG_LIT_NOCASE: + resources.has_lit_check = true; + break; + default: + break; + } + } +} + +} // namespace ue2 diff --git a/contrib/libs/hyperscan/src/rose/rose_build_program.h b/contrib/libs/hyperscan/src/rose/rose_build_program.h index eb4adae29f..7d781f3191 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_program.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_program.h @@ -1,290 +1,290 @@ -/* +/* * Copyright (c) 2016-2019, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ROSE_BUILD_PROGRAM_H -#define ROSE_BUILD_PROGRAM_H - -#include "rose_build_impl.h" -#include "rose_program.h" -#include "util/bytecode_ptr.h" -#include "util/hash.h" -#include "util/make_unique.h" - -#include <unordered_map> -#include <vector> - -#include <boost/range/adaptor/map.hpp> - -namespace ue2 { - -struct LookEntry; -class RoseEngineBlob; -class RoseInstruction; -struct RoseResources; - -/** - * \brief Container for a list of program instructions. - */ -class RoseProgram { -private: - std::vector<std::unique_ptr<RoseInstruction>> prog; - -public: - RoseProgram(); - ~RoseProgram(); - RoseProgram(const RoseProgram &) = delete; - RoseProgram(RoseProgram &&); - RoseProgram &operator=(const RoseProgram &) = delete; - RoseProgram &operator=(RoseProgram &&); - - bool empty() const; - - size_t size() const { return prog.size(); } - - const RoseInstruction &back() const { return *prog.back(); } - const RoseInstruction &front() const { return *prog.front(); } - - using iterator = decltype(prog)::iterator; - iterator begin() { return prog.begin(); } - iterator end() { return prog.end(); } - - using const_iterator = decltype(prog)::const_iterator; - const_iterator begin() const { return prog.begin(); } - const_iterator end() const { return prog.end(); } - - using reverse_iterator = decltype(prog)::reverse_iterator; - reverse_iterator rbegin() { return prog.rbegin(); } - reverse_iterator rend() { return prog.rend(); } - - using const_reverse_iterator = decltype(prog)::const_reverse_iterator; - const_reverse_iterator rbegin() const { return prog.rbegin(); } - const_reverse_iterator rend() const { return prog.rend(); } - - /** \brief Retrieve a pointer to the terminating ROSE_INSTR_END. */ - const RoseInstruction *end_instruction() const; - - static void update_targets(iterator it, iterator it_end, - const RoseInstruction *old_target, - const RoseInstruction *new_target); - - iterator insert(iterator it, std::unique_ptr<RoseInstruction> ri); - - iterator insert(iterator it, RoseProgram &&block); - - /* Note: takes iterator rather than const_iterator to support toolchains - * with pre-C++11 standard libraries (i.e., gcc-4.8). */ - iterator erase(iterator first, iterator last); - - /** - * \brief Adds this instruction to the program just before the terminating - * ROSE_INSTR_END. - */ - void add_before_end(std::unique_ptr<RoseInstruction> ri); - - /** - * \brief Adds this block to the program just before the terminating - * ROSE_INSTR_END. - * - * Any existing instruction that was jumping to end continues to do so. - */ - void add_before_end(RoseProgram &&block); - /** - * \brief Append this program block, replacing our current ROSE_INSTR_END. - * - * Any existing instruction that was jumping to end, now leads to the newly - * added block. - */ - void add_block(RoseProgram &&block); - - /** - * \brief Replace the instruction pointed to by the given iterator. - */ - template<class Iter> - void replace(Iter it, std::unique_ptr<RoseInstruction> ri) { - assert(!prog.empty()); - - const RoseInstruction *old_ptr = it->get(); - *it = move(ri); - update_targets(prog.begin(), prog.end(), old_ptr, it->get()); - } -}; - -bytecode_ptr<char> writeProgram(RoseEngineBlob &blob, - const RoseProgram &program); - -class RoseProgramHash { -public: - size_t operator()(const RoseProgram &program) const; -}; - -class RoseProgramEquivalence { -public: - bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const; -}; - -/** \brief Data only used during construction of various programs (literal, - * anchored, delay, etc). */ -struct ProgramBuild : noncopyable { - explicit ProgramBuild(u32 fMinLitOffset, size_t longLitThresh, - bool catchup) - : floatingMinLiteralMatchOffset(fMinLitOffset), - longLitLengthThreshold(longLitThresh), needs_catchup(catchup) { - } - - /** \brief Minimum offset of a match from the floating table. */ - const u32 floatingMinLiteralMatchOffset; - - /** \brief Long literal length threshold, used in streaming mode. */ - const size_t longLitLengthThreshold; - - /** \brief True if reports need CATCH_UP instructions to catch up suffixes, - * outfixes etc. */ - const bool needs_catchup; - - /** \brief Mapping from vertex to key, for vertices with a - * CHECK_NOT_HANDLED instruction. */ - std::unordered_map<RoseVertex, u32> handledKeys; - - /** \brief Mapping from Rose literal ID to anchored program index. */ - std::map<u32, u32> anchored_programs; - - /** \brief Mapping from Rose literal ID to delayed program index. */ - std::map<u32, u32> delay_programs; - - /** \brief Mapping from every vertex to the groups that must be on for that - * vertex to be reached. */ - std::unordered_map<RoseVertex, rose_group> vertex_group_map; - - /** \brief Global bitmap of groups that can be squashed. */ - rose_group squashable_groups = 0; -}; - -void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program); -void addSuffixesEodProgram(RoseProgram &program); -void addMatcherEodProgram(RoseProgram &program); + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_PROGRAM_H +#define ROSE_BUILD_PROGRAM_H + +#include "rose_build_impl.h" +#include "rose_program.h" +#include "util/bytecode_ptr.h" +#include "util/hash.h" +#include "util/make_unique.h" + +#include <unordered_map> +#include <vector> + +#include <boost/range/adaptor/map.hpp> + +namespace ue2 { + +struct LookEntry; +class RoseEngineBlob; +class RoseInstruction; +struct RoseResources; + +/** + * \brief Container for a list of program instructions. + */ +class RoseProgram { +private: + std::vector<std::unique_ptr<RoseInstruction>> prog; + +public: + RoseProgram(); + ~RoseProgram(); + RoseProgram(const RoseProgram &) = delete; + RoseProgram(RoseProgram &&); + RoseProgram &operator=(const RoseProgram &) = delete; + RoseProgram &operator=(RoseProgram &&); + + bool empty() const; + + size_t size() const { return prog.size(); } + + const RoseInstruction &back() const { return *prog.back(); } + const RoseInstruction &front() const { return *prog.front(); } + + using iterator = decltype(prog)::iterator; + iterator begin() { return prog.begin(); } + iterator end() { return prog.end(); } + + using const_iterator = decltype(prog)::const_iterator; + const_iterator begin() const { return prog.begin(); } + const_iterator end() const { return prog.end(); } + + using reverse_iterator = decltype(prog)::reverse_iterator; + reverse_iterator rbegin() { return prog.rbegin(); } + reverse_iterator rend() { return prog.rend(); } + + using const_reverse_iterator = decltype(prog)::const_reverse_iterator; + const_reverse_iterator rbegin() const { return prog.rbegin(); } + const_reverse_iterator rend() const { return prog.rend(); } + + /** \brief Retrieve a pointer to the terminating ROSE_INSTR_END. */ + const RoseInstruction *end_instruction() const; + + static void update_targets(iterator it, iterator it_end, + const RoseInstruction *old_target, + const RoseInstruction *new_target); + + iterator insert(iterator it, std::unique_ptr<RoseInstruction> ri); + + iterator insert(iterator it, RoseProgram &&block); + + /* Note: takes iterator rather than const_iterator to support toolchains + * with pre-C++11 standard libraries (i.e., gcc-4.8). */ + iterator erase(iterator first, iterator last); + + /** + * \brief Adds this instruction to the program just before the terminating + * ROSE_INSTR_END. + */ + void add_before_end(std::unique_ptr<RoseInstruction> ri); + + /** + * \brief Adds this block to the program just before the terminating + * ROSE_INSTR_END. + * + * Any existing instruction that was jumping to end continues to do so. + */ + void add_before_end(RoseProgram &&block); + /** + * \brief Append this program block, replacing our current ROSE_INSTR_END. + * + * Any existing instruction that was jumping to end, now leads to the newly + * added block. + */ + void add_block(RoseProgram &&block); + + /** + * \brief Replace the instruction pointed to by the given iterator. + */ + template<class Iter> + void replace(Iter it, std::unique_ptr<RoseInstruction> ri) { + assert(!prog.empty()); + + const RoseInstruction *old_ptr = it->get(); + *it = move(ri); + update_targets(prog.begin(), prog.end(), old_ptr, it->get()); + } +}; + +bytecode_ptr<char> writeProgram(RoseEngineBlob &blob, + const RoseProgram &program); + +class RoseProgramHash { +public: + size_t operator()(const RoseProgram &program) const; +}; + +class RoseProgramEquivalence { +public: + bool operator()(const RoseProgram &prog1, const RoseProgram &prog2) const; +}; + +/** \brief Data only used during construction of various programs (literal, + * anchored, delay, etc). */ +struct ProgramBuild : noncopyable { + explicit ProgramBuild(u32 fMinLitOffset, size_t longLitThresh, + bool catchup) + : floatingMinLiteralMatchOffset(fMinLitOffset), + longLitLengthThreshold(longLitThresh), needs_catchup(catchup) { + } + + /** \brief Minimum offset of a match from the floating table. */ + const u32 floatingMinLiteralMatchOffset; + + /** \brief Long literal length threshold, used in streaming mode. */ + const size_t longLitLengthThreshold; + + /** \brief True if reports need CATCH_UP instructions to catch up suffixes, + * outfixes etc. */ + const bool needs_catchup; + + /** \brief Mapping from vertex to key, for vertices with a + * CHECK_NOT_HANDLED instruction. */ + std::unordered_map<RoseVertex, u32> handledKeys; + + /** \brief Mapping from Rose literal ID to anchored program index. */ + std::map<u32, u32> anchored_programs; + + /** \brief Mapping from Rose literal ID to delayed program index. */ + std::map<u32, u32> delay_programs; + + /** \brief Mapping from every vertex to the groups that must be on for that + * vertex to be reached. */ + std::unordered_map<RoseVertex, rose_group> vertex_group_map; + + /** \brief Global bitmap of groups that can be squashed. */ + rose_group squashable_groups = 0; +}; + +void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program); +void addSuffixesEodProgram(RoseProgram &program); +void addMatcherEodProgram(RoseProgram &program); void addFlushCombinationProgram(RoseProgram &program); void addLastFlushCombinationProgram(RoseProgram &program); - -static constexpr u32 INVALID_QUEUE = ~0U; - -struct left_build_info { - // Constructor for an engine implementation. - left_build_info(u32 q, u32 l, u32 t, rose_group sm, - const std::vector<u8> &stops, u32 max_ql, u8 cm_count, - const CharReach &cm_cr); - - // Constructor for a lookaround implementation. - explicit left_build_info(const std::vector<std::vector<LookEntry>> &looks); - - u32 queue = INVALID_QUEUE; /* uniquely idents the left_build_info */ - u32 lag = 0; - u32 transient = 0; - rose_group squash_mask = ~rose_group{0}; - std::vector<u8> stopAlphabet; - u32 max_queuelen = 0; - u8 countingMiracleCount = 0; - CharReach countingMiracleReach; - u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */ - bool has_lookaround = false; - - // alternative implementation to the NFA - std::vector<std::vector<LookEntry>> lookaround; -}; - -/** - * \brief Provides a brief summary of properties of an NFA that has already been - * finalised and stored in the blob. - */ -struct engine_info { - engine_info(const NFA *nfa, bool trans); - - enum NFAEngineType type; - bool accepts_eod; - u32 stream_size; - u32 scratch_size; - u32 scratch_align; - bool transient; -}; - -/** - * \brief Consumes list of program blocks corresponding to different literals, - * checks them for duplicates and then concatenates them into one program. - * - * Note: if a block will squash groups, a CLEAR_WORK_DONE instruction is - * inserted to prevent the work_done flag being contaminated by early blocks. - */ -RoseProgram assembleProgramBlocks(std::vector<RoseProgram> &&blocks); - -RoseProgram makeLiteralProgram(const RoseBuildImpl &build, - const std::map<RoseVertex, left_build_info> &leftfix_info, - const std::map<suffix_id, u32> &suffixes, - const std::map<u32, engine_info> &engine_info_by_queue, - const std::unordered_map<RoseVertex, u32> &roleStateIndices, - ProgramBuild &prog_build, u32 lit_id, - const std::vector<RoseEdge> &lit_edges, - bool is_anchored_replay_program); - -RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, - ProgramBuild &prog_build, - const std::vector<u32> &lit_ids); - -RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, - ProgramBuild &prog_build, const RoseEdge &e, - const bool multiple_preds); - -RoseProgram makeReportProgram(const RoseBuildImpl &build, - bool needs_mpv_catchup, ReportID id); - -RoseProgram makeBoundaryProgram(const RoseBuildImpl &build, - const std::set<ReportID> &reports); - -struct TriggerInfo { - TriggerInfo(bool c, u32 q, u32 e) : cancel(c), queue(q), event(e) {} - bool cancel; - u32 queue; - u32 event; - - bool operator==(const TriggerInfo &b) const { - return cancel == b.cancel && queue == b.queue && event == b.event; - } -}; - -void addPredBlocks(std::map<u32, RoseProgram> &pred_blocks, u32 num_states, - RoseProgram &program); - -void applyFinalSpecialisation(RoseProgram &program); - -void recordLongLiterals(std::vector<ue2_case_string> &longLiterals, - const RoseProgram &program); - -void recordResources(RoseResources &resources, const RoseProgram &program); - -void addIncludedJumpProgram(RoseProgram &program, u32 child_offset, u8 squash); -} // namespace ue2 - -#endif // ROSE_BUILD_PROGRAM_H + +static constexpr u32 INVALID_QUEUE = ~0U; + +struct left_build_info { + // Constructor for an engine implementation. + left_build_info(u32 q, u32 l, u32 t, rose_group sm, + const std::vector<u8> &stops, u32 max_ql, u8 cm_count, + const CharReach &cm_cr); + + // Constructor for a lookaround implementation. + explicit left_build_info(const std::vector<std::vector<LookEntry>> &looks); + + u32 queue = INVALID_QUEUE; /* uniquely idents the left_build_info */ + u32 lag = 0; + u32 transient = 0; + rose_group squash_mask = ~rose_group{0}; + std::vector<u8> stopAlphabet; + u32 max_queuelen = 0; + u8 countingMiracleCount = 0; + CharReach countingMiracleReach; + u32 countingMiracleOffset = 0; /* populated later when laying out bytecode */ + bool has_lookaround = false; + + // alternative implementation to the NFA + std::vector<std::vector<LookEntry>> lookaround; +}; + +/** + * \brief Provides a brief summary of properties of an NFA that has already been + * finalised and stored in the blob. + */ +struct engine_info { + engine_info(const NFA *nfa, bool trans); + + enum NFAEngineType type; + bool accepts_eod; + u32 stream_size; + u32 scratch_size; + u32 scratch_align; + bool transient; +}; + +/** + * \brief Consumes list of program blocks corresponding to different literals, + * checks them for duplicates and then concatenates them into one program. + * + * Note: if a block will squash groups, a CLEAR_WORK_DONE instruction is + * inserted to prevent the work_done flag being contaminated by early blocks. + */ +RoseProgram assembleProgramBlocks(std::vector<RoseProgram> &&blocks); + +RoseProgram makeLiteralProgram(const RoseBuildImpl &build, + const std::map<RoseVertex, left_build_info> &leftfix_info, + const std::map<suffix_id, u32> &suffixes, + const std::map<u32, engine_info> &engine_info_by_queue, + const std::unordered_map<RoseVertex, u32> &roleStateIndices, + ProgramBuild &prog_build, u32 lit_id, + const std::vector<RoseEdge> &lit_edges, + bool is_anchored_replay_program); + +RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build, + ProgramBuild &prog_build, + const std::vector<u32> &lit_ids); + +RoseProgram makeEodAnchorProgram(const RoseBuildImpl &build, + ProgramBuild &prog_build, const RoseEdge &e, + const bool multiple_preds); + +RoseProgram makeReportProgram(const RoseBuildImpl &build, + bool needs_mpv_catchup, ReportID id); + +RoseProgram makeBoundaryProgram(const RoseBuildImpl &build, + const std::set<ReportID> &reports); + +struct TriggerInfo { + TriggerInfo(bool c, u32 q, u32 e) : cancel(c), queue(q), event(e) {} + bool cancel; + u32 queue; + u32 event; + + bool operator==(const TriggerInfo &b) const { + return cancel == b.cancel && queue == b.queue && event == b.event; + } +}; + +void addPredBlocks(std::map<u32, RoseProgram> &pred_blocks, u32 num_states, + RoseProgram &program); + +void applyFinalSpecialisation(RoseProgram &program); + +void recordLongLiterals(std::vector<ue2_case_string> &longLiterals, + const RoseProgram &program); + +void recordResources(RoseResources &resources, const RoseProgram &program); + +void addIncludedJumpProgram(RoseProgram &program, u32 child_offset, u8 squash); +} // namespace ue2 + +#endif // ROSE_BUILD_PROGRAM_H diff --git a/contrib/libs/hyperscan/src/rose/rose_build_resources.h b/contrib/libs/hyperscan/src/rose/rose_build_resources.h index 0488a8b3ba..4fa102f3ee 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_resources.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_resources.h @@ -1,59 +1,59 @@ -/* - * Copyright (c) 2017, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef ROSE_BUILD_RESOURCES_H -#define ROSE_BUILD_RESOURCES_H - -namespace ue2 { - -/** - * \brief Structure tracking which resources are used by this Rose instance at - * runtime. - * - * We use this to control how much initialisation we need to do at the - * beginning of a stream/block at runtime. - */ -struct RoseResources { - bool has_outfixes = false; - bool has_suffixes = false; - bool has_leftfixes = false; - bool has_literals = false; - bool has_states = false; - bool checks_groups = false; - bool has_lit_delay = false; - bool has_lit_check = false; // long literal support - bool has_anchored = false; - bool has_anchored_multiple = false; /* multiple anchored dfas */ - bool has_anchored_large = false; /* mcclellan 16 anchored dfa */ - bool has_floating = false; - bool has_eod = false; -}; - -} - -#endif +/* + * Copyright (c) 2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef ROSE_BUILD_RESOURCES_H +#define ROSE_BUILD_RESOURCES_H + +namespace ue2 { + +/** + * \brief Structure tracking which resources are used by this Rose instance at + * runtime. + * + * We use this to control how much initialisation we need to do at the + * beginning of a stream/block at runtime. + */ +struct RoseResources { + bool has_outfixes = false; + bool has_suffixes = false; + bool has_leftfixes = false; + bool has_literals = false; + bool has_states = false; + bool checks_groups = false; + bool has_lit_delay = false; + bool has_lit_check = false; // long literal support + bool has_anchored = false; + bool has_anchored_multiple = false; /* multiple anchored dfas */ + bool has_anchored_large = false; /* mcclellan 16 anchored dfa */ + bool has_floating = false; + bool has_eod = false; +}; + +} + +#endif diff --git a/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.cpp b/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.cpp index 475f3f49c0..359550e118 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,10 +45,10 @@ #include "util/bitutils.h" #include "util/compile_context.h" #include "util/container.h" -#include "util/flat_containers.h" +#include "util/flat_containers.h" #include "util/graph.h" #include "util/graph_range.h" -#include "util/hash.h" +#include "util/hash.h" #include "util/order_check.h" #include <algorithm> @@ -62,8 +62,8 @@ using boost::adaptors::map_values; namespace ue2 { -static constexpr size_t MERGE_GROUP_SIZE_MAX = 200; - +static constexpr size_t MERGE_GROUP_SIZE_MAX = 200; + namespace { // Used for checking edge sets (both in- and out-) against each other. struct EdgeAndVertex { @@ -113,14 +113,14 @@ struct AliasInEdge : EdgeAndVertex { class CandidateSet { public: - using key_type = RoseVertex; - using iterator = set<RoseVertex>::iterator; - using const_iterator = set<RoseVertex>::const_iterator; + using key_type = RoseVertex; + using iterator = set<RoseVertex>::iterator; + using const_iterator = set<RoseVertex>::const_iterator; iterator begin() { return main_cont.begin(); } iterator end() { return main_cont.end(); } - const_iterator begin() const { return main_cont.begin(); } - const_iterator end() const { return main_cont.end(); } + const_iterator begin() const { return main_cont.begin(); } + const_iterator end() const { return main_cont.end(); } bool contains(RoseVertex a) const { return hash_cont.find(a) != hash_cont.end(); @@ -154,36 +154,36 @@ public: private: /* if a vertex is worth storing, it is worth storing twice */ - set<RoseVertex> main_cont; /* deterministic iterator */ - unordered_set<RoseVertex> hash_cont; /* member checks */ + set<RoseVertex> main_cont; /* deterministic iterator */ + unordered_set<RoseVertex> hash_cont; /* member checks */ }; -struct RoseAliasingInfo { - RoseAliasingInfo(const RoseBuildImpl &build) { - const auto &g = build.g; +struct RoseAliasingInfo { + RoseAliasingInfo(const RoseBuildImpl &build) { + const auto &g = build.g; - // Populate reverse leftfix map. - for (auto v : vertices_range(g)) { - if (g[v].left) { - rev_leftfix[g[v].left].insert(v); - } - } + // Populate reverse leftfix map. + for (auto v : vertices_range(g)) { + if (g[v].left) { + rev_leftfix[g[v].left].insert(v); + } + } - // Populate reverse ghost vertex map. - for (const auto &m : build.ghost) { - rev_ghost[m.second].insert(m.first); + // Populate reverse ghost vertex map. + for (const auto &m : build.ghost) { + rev_ghost[m.second].insert(m.first); } } - /** \brief Mapping from leftfix to vertices. */ - unordered_map<left_id, set<RoseVertex>> rev_leftfix; - - /** \brief Mapping from undelayed ghost to delayed vertices. */ - unordered_map<RoseVertex, set<RoseVertex>> rev_ghost; -}; - -} // namespace - + /** \brief Mapping from leftfix to vertices. */ + unordered_map<left_id, set<RoseVertex>> rev_leftfix; + + /** \brief Mapping from undelayed ghost to delayed vertices. */ + unordered_map<RoseVertex, set<RoseVertex>> rev_ghost; +}; + +} // namespace + // Check successor set: must lead to the same vertices via edges with the // same properties. static @@ -259,8 +259,8 @@ bool samePredecessors(RoseVertex a, RoseVertex b, const RoseGraph &g) { } for (const auto &e_a : in_edges_range(a, g)) { - RoseEdge e = edge(source(e_a, g), b, g); - if (!e || g[e].rose_top != g[e_a].rose_top) { + RoseEdge e = edge(source(e_a, g), b, g); + if (!e || g[e].rose_top != g[e_a].rose_top) { DEBUG_PRINTF("bad tops\n"); return false; } @@ -271,10 +271,10 @@ bool samePredecessors(RoseVertex a, RoseVertex b, const RoseGraph &g) { } static -bool hasCommonSuccWithBadBounds(RoseVertex a, RoseVertex b, - const RoseGraph &g) { +bool hasCommonSuccWithBadBounds(RoseVertex a, RoseVertex b, + const RoseGraph &g) { for (const auto &e_a : out_edges_range(a, g)) { - if (RoseEdge e = edge(b, target(e_a, g), g)) { + if (RoseEdge e = edge(b, target(e_a, g), g)) { if (g[e_a].maxBound < g[e].minBound || g[e].maxBound < g[e_a].minBound) { return true; @@ -290,10 +290,10 @@ bool hasCommonSuccWithBadBounds(RoseVertex a, RoseVertex b, } static -bool hasCommonPredWithBadBounds(RoseVertex a, RoseVertex b, - const RoseGraph &g) { +bool hasCommonPredWithBadBounds(RoseVertex a, RoseVertex b, + const RoseGraph &g) { for (const auto &e_a : in_edges_range(a, g)) { - if (RoseEdge e = edge(source(e_a, g), b, g)) { + if (RoseEdge e = edge(source(e_a, g), b, g)) { if (g[e_a].maxBound < g[e].minBound || g[e].maxBound < g[e_a].minBound) { return true; @@ -314,24 +314,24 @@ bool hasCommonPredWithBadBounds(RoseVertex a, RoseVertex b, } static -bool canMergeLiterals(RoseVertex a, RoseVertex b, const RoseBuildImpl &build) { - const auto &lits_a = build.g[a].literals; - const auto &lits_b = build.g[b].literals; +bool canMergeLiterals(RoseVertex a, RoseVertex b, const RoseBuildImpl &build) { + const auto &lits_a = build.g[a].literals; + const auto &lits_b = build.g[b].literals; assert(!lits_a.empty() && !lits_b.empty()); // If both vertices have only pseudo-dotstar in-edges, we can merge // literals of different lengths and can avoid the check below. - if (build.hasOnlyPseudoStarInEdges(a) && - build.hasOnlyPseudoStarInEdges(b)) { + if (build.hasOnlyPseudoStarInEdges(a) && + build.hasOnlyPseudoStarInEdges(b)) { DEBUG_PRINTF("both have pseudo-dotstar in-edges\n"); return true; } // Otherwise, all the literals involved must have the same length. for (u32 a_id : lits_a) { - const rose_literal_id &la = build.literals.at(a_id); + const rose_literal_id &la = build.literals.at(a_id); for (u32 b_id : lits_b) { - const rose_literal_id &lb = build.literals.at(b_id); + const rose_literal_id &lb = build.literals.at(b_id); if (la.elength() != lb.elength()) { DEBUG_PRINTF("bad merge %zu!=%zu '%s', '%s'\n", la.elength(), @@ -345,56 +345,56 @@ bool canMergeLiterals(RoseVertex a, RoseVertex b, const RoseBuildImpl &build) { } static -bool isAliasingCandidate(RoseVertex v, const RoseBuildImpl &build) { - const RoseVertexProps &props = build.g[v]; +bool isAliasingCandidate(RoseVertex v, const RoseBuildImpl &build) { + const RoseVertexProps &props = build.g[v]; // Must have literals. if (props.literals.empty()) { return false; } - assert(*props.literals.begin() != MO_INVALID_IDX); - return true; -} - -static -bool sameGhostProperties(const RoseBuildImpl &build, - const RoseAliasingInfo &rai, RoseVertex a, - RoseVertex b) { - // If these are ghost mapping keys, then they must map to the same vertex. - if (contains(build.ghost, a) || contains(build.ghost, b)) { - DEBUG_PRINTF("checking ghost key compat\n"); - if (!contains(build.ghost, a) || !contains(build.ghost, b)) { - DEBUG_PRINTF("missing ghost mapping\n"); - return false; - } - if (build.ghost.at(a) != build.ghost.at(b)) { - DEBUG_PRINTF("diff ghost mapping\n"); - return false; - } - DEBUG_PRINTF("ghost mappings ok\n"); - return true; - } - - // If they are ghost vertices, then they must have the same literals. - if (contains(rai.rev_ghost, a) || contains(rai.rev_ghost, b)) { - if (!contains(rai.rev_ghost, a) || !contains(rai.rev_ghost, b)) { - DEBUG_PRINTF("missing ghost reverse mapping\n"); - return false; - } - return build.g[a].literals == build.g[b].literals; - } + assert(*props.literals.begin() != MO_INVALID_IDX); + return true; +} + +static +bool sameGhostProperties(const RoseBuildImpl &build, + const RoseAliasingInfo &rai, RoseVertex a, + RoseVertex b) { + // If these are ghost mapping keys, then they must map to the same vertex. + if (contains(build.ghost, a) || contains(build.ghost, b)) { + DEBUG_PRINTF("checking ghost key compat\n"); + if (!contains(build.ghost, a) || !contains(build.ghost, b)) { + DEBUG_PRINTF("missing ghost mapping\n"); + return false; + } + if (build.ghost.at(a) != build.ghost.at(b)) { + DEBUG_PRINTF("diff ghost mapping\n"); + return false; + } + DEBUG_PRINTF("ghost mappings ok\n"); + return true; + } + + // If they are ghost vertices, then they must have the same literals. + if (contains(rai.rev_ghost, a) || contains(rai.rev_ghost, b)) { + if (!contains(rai.rev_ghost, a) || !contains(rai.rev_ghost, b)) { + DEBUG_PRINTF("missing ghost reverse mapping\n"); + return false; + } + return build.g[a].literals == build.g[b].literals; + } return true; } static -bool sameRoleProperties(const RoseBuildImpl &build, const RoseAliasingInfo &rai, - RoseVertex a, RoseVertex b) { +bool sameRoleProperties(const RoseBuildImpl &build, const RoseAliasingInfo &rai, + RoseVertex a, RoseVertex b) { const RoseGraph &g = build.g; const RoseVertexProps &aprops = g[a], &bprops = g[b]; - if (aprops.eod_accept != bprops.eod_accept) { + if (aprops.eod_accept != bprops.eod_accept) { return false; } @@ -415,17 +415,17 @@ bool sameRoleProperties(const RoseBuildImpl &build, const RoseAliasingInfo &rai, return false; } - if (!sameGhostProperties(build, rai, a, b)) { - return false; - } - + if (!sameGhostProperties(build, rai, a, b)) { + return false; + } + /* "roses are mergeable" check are handled elsewhere */ return true; } -/* Checks compatibility of role properties if we require that two roles are - * right equiv. */ +/* Checks compatibility of role properties if we require that two roles are + * right equiv. */ static bool sameRightRoleProperties(const RoseBuildImpl &build, RoseVertex a, RoseVertex b) { @@ -462,11 +462,11 @@ void mergeEdgeAdd(RoseVertex u, RoseVertex v, const RoseEdge &from_edge, const RoseEdgeProps &from_props = g[from_edge]; if (!to_edge) { - DEBUG_PRINTF("adding edge [%zu,%zu]\n", g[u].index, g[v].index); + DEBUG_PRINTF("adding edge [%zu,%zu]\n", g[u].index, g[v].index); add_edge(u, v, from_props, g); } else { // union of the two edges. - DEBUG_PRINTF("updating edge [%zu,%zu]\n", g[u].index, g[v].index); + DEBUG_PRINTF("updating edge [%zu,%zu]\n", g[u].index, g[v].index); RoseEdgeProps &to_props = g[*to_edge]; to_props.minBound = min(to_props.minBound, from_props.minBound); to_props.maxBound = max(to_props.maxBound, from_props.maxBound); @@ -484,7 +484,7 @@ void mergeEdges(RoseVertex a, RoseVertex b, RoseGraph &g) { // Cache b's in-edges so we can look them up by source quickly. for (const auto &e : in_edges_range(b, g)) { RoseVertex u = source(e, g); - b_edges.emplace(u, e); + b_edges.emplace(u, e); } // Add a's in-edges to b, merging them in where b already has the new edge. @@ -503,7 +503,7 @@ void mergeEdges(RoseVertex a, RoseVertex b, RoseGraph &g) { b_edges.clear(); for (const auto &e : out_edges_range(b, g)) { RoseVertex v = target(e, g); - b_edges.emplace(v, e); + b_edges.emplace(v, e); } // Add a's out-edges to b, merging them in where b already has the new edge. @@ -523,11 +523,11 @@ void mergeEdges(RoseVertex a, RoseVertex b, RoseGraph &g) { } static -void mergeLiteralSets(RoseVertex a, RoseVertex b, RoseBuildImpl &build) { - RoseGraph &g = build.g; +void mergeLiteralSets(RoseVertex a, RoseVertex b, RoseBuildImpl &build) { + RoseGraph &g = build.g; const auto &a_literals = g[a].literals; for (u32 lit_id : a_literals) { - auto &lit_vertices = build.literal_info[lit_id].vertices; + auto &lit_vertices = build.literal_info[lit_id].vertices; lit_vertices.erase(a); lit_vertices.insert(b); } @@ -536,131 +536,131 @@ void mergeLiteralSets(RoseVertex a, RoseVertex b, RoseBuildImpl &build) { } static -void updateAliasingInfo(RoseBuildImpl &build, RoseAliasingInfo &rai, - RoseVertex a, RoseVertex b) { - if (build.g[a].left) { - const left_id left(build.g[a].left); - assert(contains(rai.rev_leftfix[left], a)); - rai.rev_leftfix[left].erase(a); - } - if (contains(build.ghost, a)) { - auto ghost = build.ghost.at(a); - assert(contains(build.ghost, b) && ghost == build.ghost.at(b)); - build.ghost.erase(a); - rai.rev_ghost[ghost].erase(a); - } - - if (contains(rai.rev_ghost, a)) { - for (const auto &v : rai.rev_ghost[a]) { - build.ghost[v] = b; - rai.rev_ghost[b].insert(v); - } - rai.rev_ghost.erase(a); - } -} - -/** \brief Common role merge code used by variants below. */ -static -void mergeCommon(RoseBuildImpl &build, RoseAliasingInfo &rai, RoseVertex a, - RoseVertex b) { - RoseGraph &g = build.g; - +void updateAliasingInfo(RoseBuildImpl &build, RoseAliasingInfo &rai, + RoseVertex a, RoseVertex b) { + if (build.g[a].left) { + const left_id left(build.g[a].left); + assert(contains(rai.rev_leftfix[left], a)); + rai.rev_leftfix[left].erase(a); + } + if (contains(build.ghost, a)) { + auto ghost = build.ghost.at(a); + assert(contains(build.ghost, b) && ghost == build.ghost.at(b)); + build.ghost.erase(a); + rai.rev_ghost[ghost].erase(a); + } + + if (contains(rai.rev_ghost, a)) { + for (const auto &v : rai.rev_ghost[a]) { + build.ghost[v] = b; + rai.rev_ghost[b].insert(v); + } + rai.rev_ghost.erase(a); + } +} + +/** \brief Common role merge code used by variants below. */ +static +void mergeCommon(RoseBuildImpl &build, RoseAliasingInfo &rai, RoseVertex a, + RoseVertex b) { + RoseGraph &g = build.g; + assert(g[a].eod_accept == g[b].eod_accept); assert(g[a].left == g[b].left); - assert(!g[a].suffix || g[a].suffix == g[b].suffix); + assert(!g[a].suffix || g[a].suffix == g[b].suffix); // In some situations (ghost roles etc), we can have different groups. assert(!g[a].groups && !g[b].groups); /* current structure means groups * haven't been assigned yet */ g[b].groups |= g[a].groups; - mergeLiteralSets(a, b, build); - updateAliasingInfo(build, rai, a, b); - - // Our min and max_offsets should be sane. - assert(g[b].min_offset <= g[b].max_offset); - - // Safety check: we should not have created through a merge a vertex that - // has an out-edge with ANCH history but is not fixed-offset. - assert(!hasAnchHistorySucc(g, b) || g[b].fixedOffset()); -} - -/** \brief Merge role 'a' into 'b', left merge path. */ -static -void mergeVerticesLeft(RoseVertex a, RoseVertex b, RoseBuildImpl &build, - RoseAliasingInfo &rai) { - RoseGraph &g = build.g; - DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].index, g[b].index); - - insert(&g[b].reports, g[a].reports); - - // Since it is a left merge (identical LHS) we should pick the tighter - // bound. - g[b].min_offset = max(g[a].min_offset, g[b].min_offset); - g[b].max_offset = min(g[a].max_offset, g[b].max_offset); - + mergeLiteralSets(a, b, build); + updateAliasingInfo(build, rai, a, b); + + // Our min and max_offsets should be sane. + assert(g[b].min_offset <= g[b].max_offset); + + // Safety check: we should not have created through a merge a vertex that + // has an out-edge with ANCH history but is not fixed-offset. + assert(!hasAnchHistorySucc(g, b) || g[b].fixedOffset()); +} + +/** \brief Merge role 'a' into 'b', left merge path. */ +static +void mergeVerticesLeft(RoseVertex a, RoseVertex b, RoseBuildImpl &build, + RoseAliasingInfo &rai) { + RoseGraph &g = build.g; + DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].index, g[b].index); + + insert(&g[b].reports, g[a].reports); + + // Since it is a left merge (identical LHS) we should pick the tighter + // bound. + g[b].min_offset = max(g[a].min_offset, g[b].min_offset); + g[b].max_offset = min(g[a].max_offset, g[b].max_offset); + if (!g[b].suffix) { g[b].suffix = g[a].suffix; } mergeEdges(a, b, g); - mergeCommon(build, rai, a, b); + mergeCommon(build, rai, a, b); +} + +/** \brief Merge role 'a' into 'b', right merge path. */ +static +void mergeVerticesRight(RoseVertex a, RoseVertex b, RoseBuildImpl &build, + RoseAliasingInfo &rai) { + RoseGraph &g = build.g; + DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].index, g[b].index); + + insert(&g[b].reports, g[a].reports); + g[b].min_offset = min(g[a].min_offset, g[b].min_offset); + g[b].max_offset = max(g[a].max_offset, g[b].max_offset); + + mergeEdges(a, b, g); + mergeCommon(build, rai, a, b); } -/** \brief Merge role 'a' into 'b', right merge path. */ -static -void mergeVerticesRight(RoseVertex a, RoseVertex b, RoseBuildImpl &build, - RoseAliasingInfo &rai) { - RoseGraph &g = build.g; - DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].index, g[b].index); - - insert(&g[b].reports, g[a].reports); - g[b].min_offset = min(g[a].min_offset, g[b].min_offset); - g[b].max_offset = max(g[a].max_offset, g[b].max_offset); - - mergeEdges(a, b, g); - mergeCommon(build, rai, a, b); -} - /** * Faster version of \ref mergeVertices for diamond merges, for which we know * that the in- and out-edge sets, reports and suffixes are identical. */ static -void mergeVerticesDiamond(RoseVertex a, RoseVertex b, RoseBuildImpl &build, - RoseAliasingInfo &rai) { - RoseGraph &g = build.g; - DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].index, g[b].index); +void mergeVerticesDiamond(RoseVertex a, RoseVertex b, RoseBuildImpl &build, + RoseAliasingInfo &rai) { + RoseGraph &g = build.g; + DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].index, g[b].index); - // For a diamond merge, most properties are already the same (with the - // notable exception of the literal set). + // For a diamond merge, most properties are already the same (with the + // notable exception of the literal set). assert(g[a].reports == g[b].reports); assert(g[a].suffix == g[b].suffix); g[b].min_offset = min(g[a].min_offset, g[b].min_offset); g[b].max_offset = max(g[a].max_offset, g[b].max_offset); - mergeCommon(build, rai, a, b); + mergeCommon(build, rai, a, b); } static never_inline -void findCandidates(const RoseBuildImpl &build, CandidateSet *candidates) { - for (auto v : vertices_range(build.g)) { - if (isAliasingCandidate(v, build)) { - DEBUG_PRINTF("candidate %zu\n", build.g[v].index); - DEBUG_PRINTF("lits: %u\n", *build.g[v].literals.begin()); +void findCandidates(const RoseBuildImpl &build, CandidateSet *candidates) { + for (auto v : vertices_range(build.g)) { + if (isAliasingCandidate(v, build)) { + DEBUG_PRINTF("candidate %zu\n", build.g[v].index); + DEBUG_PRINTF("lits: %u\n", *build.g[v].literals.begin()); candidates->insert(v); } } - assert(candidates->size() <= num_vertices(build.g)); + assert(candidates->size() <= num_vertices(build.g)); DEBUG_PRINTF("found %zu/%zu candidates\n", candidates->size(), - num_vertices(build.g)); + num_vertices(build.g)); } static RoseVertex pickPred(const RoseVertex v, const RoseGraph &g, - const RoseBuildImpl &build) { + const RoseBuildImpl &build) { RoseGraph::in_edge_iterator ei, ee; tie(ei, ee) = in_edges(v, g); if (ei == ee) { @@ -671,7 +671,7 @@ RoseVertex pickPred(const RoseVertex v, const RoseGraph &g, // Avoid roots if we have other options, since it doesn't matter to the // merge pass which predecessor we pick. RoseVertex u = source(*ei, g); - while (build.isAnyStart(u) && ++ei != ee) { + while (build.isAnyStart(u) && ++ei != ee) { u = source(*ei, g); } return u; @@ -700,7 +700,7 @@ bool hasCommonPredWithDiffRoses(RoseVertex a, RoseVertex b, const bool equal_roses = hasEqualLeftfixes(a, b, g); for (const auto &e_a : in_edges_range(a, g)) { - if (RoseEdge e = edge(source(e_a, g), b, g)) { + if (RoseEdge e = edge(source(e_a, g), b, g)) { DEBUG_PRINTF("common pred, e_r=%d r_t %u,%u\n", (int)equal_roses, g[e].rose_top, g[e_a].rose_top); if (!equal_roses) { @@ -718,13 +718,13 @@ bool hasCommonPredWithDiffRoses(RoseVertex a, RoseVertex b, } static -void pruneReportIfUnused(const RoseBuildImpl &build, shared_ptr<NGHolder> h, +void pruneReportIfUnused(const RoseBuildImpl &build, shared_ptr<NGHolder> h, const set<RoseVertex> &verts, ReportID report) { DEBUG_PRINTF("trying to prune %u from %p (v %zu)\n", report, h.get(), verts.size()); for (RoseVertex v : verts) { - if (build.g[v].left.graph == h && - build.g[v].left.leftfix_report == report) { + if (build.g[v].left.graph == h && + build.g[v].left.leftfix_report == report) { DEBUG_PRINTF("report %u still in use\n", report); return; } @@ -736,12 +736,12 @@ void pruneReportIfUnused(const RoseBuildImpl &build, shared_ptr<NGHolder> h, // unimplementable. DEBUG_PRINTF("report %u has been merged away, pruning\n", report); - assert(h->kind == (build.isRootSuccessor(*verts.begin()) ? NFA_PREFIX - : NFA_INFIX)); + assert(h->kind == (build.isRootSuccessor(*verts.begin()) ? NFA_PREFIX + : NFA_INFIX)); unique_ptr<NGHolder> h_new = cloneHolder(*h); pruneReport(*h_new, report); - if (isImplementableNFA(*h_new, nullptr, build.cc)) { + if (isImplementableNFA(*h_new, nullptr, build.cc)) { clear_graph(*h); cloneHolder(*h, *h_new); } else { @@ -772,13 +772,13 @@ void pruneCastle(CastleProto &castle, ReportID report) { /** \brief Set all reports to the given one. */ static void setReports(CastleProto &castle, ReportID report) { - castle.report_map.clear(); - for (auto &e : castle.repeats) { - u32 top = e.first; - auto &repeat = e.second; + castle.report_map.clear(); + for (auto &e : castle.repeats) { + u32 top = e.first; + auto &repeat = e.second; repeat.reports.clear(); repeat.reports.insert(report); - castle.report_map[report].insert(top); + castle.report_map[report].insert(top); } } @@ -792,7 +792,7 @@ void updateEdgeTops(RoseGraph &g, RoseVertex v, const map<u32, u32> &top_map) { static void pruneUnusedTops(CastleProto &castle, const RoseGraph &g, const set<RoseVertex> &verts) { - unordered_set<u32> used_tops; + unordered_set<u32> used_tops; for (auto v : verts) { assert(g[v].left.castle.get() == &castle); @@ -817,13 +817,13 @@ void pruneUnusedTops(CastleProto &castle, const RoseGraph &g, static void pruneUnusedTops(NGHolder &h, const RoseGraph &g, const set<RoseVertex> &verts) { - if (!is_triggered(h)) { - DEBUG_PRINTF("not triggered, no tops\n"); - return; - } - assert(isCorrectlyTopped(h)); - DEBUG_PRINTF("pruning unused tops\n"); - flat_set<u32> used_tops; + if (!is_triggered(h)) { + DEBUG_PRINTF("not triggered, no tops\n"); + return; + } + assert(isCorrectlyTopped(h)); + DEBUG_PRINTF("pruning unused tops\n"); + flat_set<u32> used_tops; for (auto v : verts) { assert(g[v].left.graph.get() == &h); @@ -839,13 +839,13 @@ void pruneUnusedTops(NGHolder &h, const RoseGraph &g, if (v == h.startDs) { continue; // stylised edge, leave it alone. } - flat_set<u32> pruned_tops; - auto pt_inserter = inserter(pruned_tops, pruned_tops.end()); - set_intersection(h[e].tops.begin(), h[e].tops.end(), - used_tops.begin(), used_tops.end(), pt_inserter); - h[e].tops = std::move(pruned_tops); - if (h[e].tops.empty()) { - DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index); + flat_set<u32> pruned_tops; + auto pt_inserter = inserter(pruned_tops, pruned_tops.end()); + set_intersection(h[e].tops.begin(), h[e].tops.end(), + used_tops.begin(), used_tops.end(), pt_inserter); + h[e].tops = std::move(pruned_tops); + if (h[e].tops.empty()) { + DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index); dead.push_back(e); } } @@ -860,9 +860,9 @@ void pruneUnusedTops(NGHolder &h, const RoseGraph &g, } static -bool mergeSameCastle(RoseBuildImpl &build, RoseVertex a, RoseVertex b, - RoseAliasingInfo &rai) { - RoseGraph &g = build.g; +bool mergeSameCastle(RoseBuildImpl &build, RoseVertex a, RoseVertex b, + RoseAliasingInfo &rai) { + RoseGraph &g = build.g; LeftEngInfo &a_left = g[a].left; LeftEngInfo &b_left = g[b].left; CastleProto &castle = *a_left.castle; @@ -885,7 +885,7 @@ bool mergeSameCastle(RoseBuildImpl &build, RoseVertex a, RoseVertex b, return false; } - const ReportID new_report = build.getNewNfaReport(); + const ReportID new_report = build.getNewNfaReport(); map<u32, u32> a_top_map, b_top_map; for (const auto &c : castle.repeats) { @@ -907,9 +907,9 @@ bool mergeSameCastle(RoseBuildImpl &build, RoseVertex a, RoseVertex b, } } - assert(contains(rai.rev_leftfix[b_left], b)); - rai.rev_leftfix[b_left].erase(b); - rai.rev_leftfix[a_left].insert(b); + assert(contains(rai.rev_leftfix[b_left], b)); + rai.rev_leftfix[b_left].erase(b); + rai.rev_leftfix[a_left].insert(b); a_left.leftfix_report = new_report; b_left.leftfix_report = new_report; @@ -918,15 +918,15 @@ bool mergeSameCastle(RoseBuildImpl &build, RoseVertex a, RoseVertex b, updateEdgeTops(g, a, a_top_map); updateEdgeTops(g, b, b_top_map); - pruneUnusedTops(castle, g, rai.rev_leftfix[a_left]); + pruneUnusedTops(castle, g, rai.rev_leftfix[a_left]); return true; } static -bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, +bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, RoseVertex b, bool trivialCasesOnly, - RoseAliasingInfo &rai) { - RoseGraph &g = build.g; + RoseAliasingInfo &rai) { + RoseGraph &g = build.g; LeftEngInfo &a_left = g[a].left; LeftEngInfo &b_left = g[b].left; left_id a_left_id(a_left); @@ -944,28 +944,28 @@ bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, if (&a_castle == &b_castle) { DEBUG_PRINTF("castles are the same\n"); - return mergeSameCastle(build, a, b, rai); + return mergeSameCastle(build, a, b, rai); } if (is_equal(a_castle, a_left.leftfix_report, b_castle, b_left.leftfix_report)) { DEBUG_PRINTF("castles are equiv with respect to reports\n"); - if (rai.rev_leftfix[a_left_id].size() == 1) { + if (rai.rev_leftfix[a_left_id].size() == 1) { /* nobody else is using a_castle */ - rai.rev_leftfix[b_left_id].erase(b); - rai.rev_leftfix[a_left_id].insert(b); - pruneUnusedTops(b_castle, g, rai.rev_leftfix[b_left_id]); + rai.rev_leftfix[b_left_id].erase(b); + rai.rev_leftfix[a_left_id].insert(b); + pruneUnusedTops(b_castle, g, rai.rev_leftfix[b_left_id]); b_left.castle = a_left.castle; b_left.leftfix_report = a_left.leftfix_report; DEBUG_PRINTF("OK -> only user of a_castle\n"); return true; } - if (rai.rev_leftfix[b_left_id].size() == 1) { + if (rai.rev_leftfix[b_left_id].size() == 1) { /* nobody else is using b_castle */ - rai.rev_leftfix[a_left_id].erase(a); - rai.rev_leftfix[b_left_id].insert(a); - pruneUnusedTops(a_castle, g, rai.rev_leftfix[a_left_id]); + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].insert(a); + pruneUnusedTops(a_castle, g, rai.rev_leftfix[a_left_id]); a_left.castle = b_left.castle; a_left.leftfix_report = b_left.leftfix_report; DEBUG_PRINTF("OK -> only user of b_castle\n"); @@ -974,32 +974,32 @@ bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, if (preds_same) { /* preds are the same anyway in diamond/left merges just need to - * check that all the literals in rev_leftfix[b_h] can handle a_h */ - for (auto v : rai.rev_leftfix[b_left_id]) { - if (!mergeableRoseVertices(build, a, v)) { + * check that all the literals in rev_leftfix[b_h] can handle a_h */ + for (auto v : rai.rev_leftfix[b_left_id]) { + if (!mergeableRoseVertices(build, a, v)) { goto literal_mismatch_1; } } - rai.rev_leftfix[a_left_id].erase(a); - rai.rev_leftfix[b_left_id].insert(a); - pruneUnusedTops(a_castle, g, rai.rev_leftfix[a_left_id]); + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].insert(a); + pruneUnusedTops(a_castle, g, rai.rev_leftfix[a_left_id]); a_left.castle = b_left.castle; a_left.leftfix_report = b_left.leftfix_report; DEBUG_PRINTF("OK -> same preds ???\n"); return true; literal_mismatch_1: /* preds are the same anyway in diamond/left merges just need to - * check that all the literals in rev_leftfix[a_h] can handle b_h */ - for (auto v : rai.rev_leftfix[a_left_id]) { - if (!mergeableRoseVertices(build, v, b)) { + * check that all the literals in rev_leftfix[a_h] can handle b_h */ + for (auto v : rai.rev_leftfix[a_left_id]) { + if (!mergeableRoseVertices(build, v, b)) { goto literal_mismatch_2; } } - rai.rev_leftfix[b_left_id].erase(b); - rai.rev_leftfix[a_left_id].insert(b); - pruneUnusedTops(b_castle, g, rai.rev_leftfix[b_left_id]); + rai.rev_leftfix[b_left_id].erase(b); + rai.rev_leftfix[a_left_id].insert(b); + pruneUnusedTops(b_castle, g, rai.rev_leftfix[b_left_id]); b_left.castle = a_left.castle; b_left.leftfix_report = a_left.leftfix_report; DEBUG_PRINTF("OK -> same preds ???\n"); @@ -1010,15 +1010,15 @@ bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, /* we need to create a new graph as there may be other people * using b_left and it would be bad if a's preds started triggering it */ - ReportID new_report = build.getNewNfaReport(); + ReportID new_report = build.getNewNfaReport(); shared_ptr<CastleProto> new_castle = make_shared<CastleProto>(a_castle); pruneCastle(*new_castle, a_left.leftfix_report); setReports(*new_castle, new_report); - rai.rev_leftfix[a_left_id].erase(a); - rai.rev_leftfix[b_left_id].erase(b); - pruneUnusedTops(*a_left.castle, g, rai.rev_leftfix[a_left_id]); - pruneUnusedTops(*b_left.castle, g, rai.rev_leftfix[b_left_id]); + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].erase(b); + pruneUnusedTops(*a_left.castle, g, rai.rev_leftfix[a_left_id]); + pruneUnusedTops(*b_left.castle, g, rai.rev_leftfix[b_left_id]); a_left.leftfix_report = new_report; b_left.leftfix_report = new_report; @@ -1026,9 +1026,9 @@ bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, b_left.castle = new_castle; assert(a_left == b_left); - rai.rev_leftfix[a_left].insert(a); - rai.rev_leftfix[a_left].insert(b); - pruneUnusedTops(*new_castle, g, rai.rev_leftfix[a_left]); + rai.rev_leftfix[a_left].insert(a); + rai.rev_leftfix[a_left].insert(b); + pruneUnusedTops(*new_castle, g, rai.rev_leftfix[a_left]); return true; } @@ -1040,27 +1040,27 @@ bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, // Only infixes. Prefixes require special care when doing non-trivial // merges. - if (!build.isNonRootSuccessor(a) || !build.isNonRootSuccessor(b)) { + if (!build.isNonRootSuccessor(a) || !build.isNonRootSuccessor(b)) { return false; } - set<RoseVertex> &b_verts = rai.rev_leftfix[b_left_id]; + set<RoseVertex> &b_verts = rai.rev_leftfix[b_left_id]; set<RoseVertex> aa; aa.insert(a); - if (!mergeableRoseVertices(build, aa, b_verts)) { + if (!mergeableRoseVertices(build, aa, b_verts)) { DEBUG_PRINTF("vertices not mergeable\n"); return false; } - if (!build.cc.grey.roseMultiTopRoses || !build.cc.grey.allowCastle) { + if (!build.cc.grey.roseMultiTopRoses || !build.cc.grey.allowCastle) { return false; } DEBUG_PRINTF("merging into new castle\n"); // Clone new castle with a's repeats in it, set to a new report. - ReportID new_report = build.getNewNfaReport(); + ReportID new_report = build.getNewNfaReport(); shared_ptr<CastleProto> m_castle = make_shared<CastleProto>(a_castle); pruneCastle(*m_castle, a_left.leftfix_report); setReports(*m_castle, new_report); @@ -1079,7 +1079,7 @@ bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, // We should be protected from merging common preds with tops leading // to completely different repeats by earlier checks, but just in // case... - if (RoseEdge a_edge = edge(source(e, g), a, g)) { + if (RoseEdge a_edge = edge(source(e, g), a, g)) { u32 a_top = g[a_edge].rose_top; const PureRepeat &a_pr = m_castle->repeats[a_top]; // new report if (pr != a_pr) { @@ -1101,10 +1101,10 @@ bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, DEBUG_PRINTF("merged into castle containing %zu repeats\n", m_castle->repeats.size()); - rai.rev_leftfix[a_left_id].erase(a); - rai.rev_leftfix[b_left_id].erase(b); - pruneUnusedTops(*a_left.castle, g, rai.rev_leftfix[a_left_id]); - pruneUnusedTops(*b_left.castle, g, rai.rev_leftfix[b_left_id]); + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].erase(b); + pruneUnusedTops(*a_left.castle, g, rai.rev_leftfix[a_left_id]); + pruneUnusedTops(*b_left.castle, g, rai.rev_leftfix[b_left_id]); a_left.castle = m_castle; a_left.leftfix_report = new_report; @@ -1112,17 +1112,17 @@ bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, b_left.leftfix_report = new_report; assert(a_left == b_left); - rai.rev_leftfix[a_left].insert(a); - rai.rev_leftfix[a_left].insert(b); - pruneUnusedTops(*m_castle, g, rai.rev_leftfix[a_left]); + rai.rev_leftfix[a_left].insert(a); + rai.rev_leftfix[a_left].insert(b); + pruneUnusedTops(*m_castle, g, rai.rev_leftfix[a_left]); return true; } static -bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, +bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, RoseVertex b, bool trivialCasesOnly, - RoseAliasingInfo &rai) { - RoseGraph &g = build.g; + RoseAliasingInfo &rai) { + RoseGraph &g = build.g; LeftEngInfo &a_left = g[a].left; LeftEngInfo &b_left = g[b].left; left_id a_left_id(a_left); @@ -1130,8 +1130,8 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, shared_ptr<NGHolder> a_h = a_left.graph; shared_ptr<NGHolder> b_h = b_left.graph; assert(a_h && b_h); - assert(isImplementableNFA(*a_h, nullptr, build.cc)); - assert(isImplementableNFA(*b_h, nullptr, build.cc)); + assert(isImplementableNFA(*a_h, nullptr, build.cc)); + assert(isImplementableNFA(*b_h, nullptr, build.cc)); // If we only differ in reports, this is a very easy merge. Just use b's // report for both. @@ -1141,74 +1141,74 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, DEBUG_PRINTF("OK -> same actual holder\n"); ReportID a_oldreport = a_left.leftfix_report; ReportID b_oldreport = b_left.leftfix_report; - ReportID new_report = build.getNewNfaReport(); + ReportID new_report = build.getNewNfaReport(); duplicateReport(*a_h, a_left.leftfix_report, new_report); duplicateReport(*b_h, b_left.leftfix_report, new_report); a_left.leftfix_report = new_report; b_left.leftfix_report = new_report; - pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id], - a_oldreport); - pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id], - b_oldreport); - pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); + pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id], + a_oldreport); + pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id], + b_oldreport); + pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); assert(a_left == b_left); return true; } /* if it is the same graph, it is also fairly easy */ if (is_equal(*a_h, a_left.leftfix_report, *b_h, b_left.leftfix_report)) { - if (rai.rev_leftfix[a_left_id].size() == 1) { + if (rai.rev_leftfix[a_left_id].size() == 1) { /* nobody else is using a_h */ - rai.rev_leftfix[b_left_id].erase(b); - rai.rev_leftfix[a_left_id].insert(b); + rai.rev_leftfix[b_left_id].erase(b); + rai.rev_leftfix[a_left_id].insert(b); b_left.graph = a_h; b_left.leftfix_report = a_left.leftfix_report; - pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); + pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); DEBUG_PRINTF("OK -> only user of a_h\n"); return true; } - if (rai.rev_leftfix[b_left_id].size() == 1) { + if (rai.rev_leftfix[b_left_id].size() == 1) { /* nobody else is using b_h */ - rai.rev_leftfix[a_left_id].erase(a); - rai.rev_leftfix[b_left_id].insert(a); + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].insert(a); a_left.graph = b_h; a_left.leftfix_report = b_left.leftfix_report; - pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]); + pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]); DEBUG_PRINTF("OK -> only user of b_h\n"); return true; } if (preds_same) { /* preds are the same anyway in diamond/left merges just need to - * check that all the literals in rev_leftfix[b_h] can handle a_h */ - for (auto v : rai.rev_leftfix[b_left_id]) { - if (!mergeableRoseVertices(build, a, v)) { + * check that all the literals in rev_leftfix[b_h] can handle a_h */ + for (auto v : rai.rev_leftfix[b_left_id]) { + if (!mergeableRoseVertices(build, a, v)) { goto literal_mismatch_1; } } - rai.rev_leftfix[a_left_id].erase(a); - rai.rev_leftfix[b_left_id].insert(a); + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].insert(a); a_left.graph = b_h; a_left.leftfix_report = b_left.leftfix_report; - pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]); + pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]); DEBUG_PRINTF("OK -> same preds ???\n"); return true; literal_mismatch_1: /* preds are the same anyway in diamond/left merges just need to - * check that all the literals in rev_leftfix[a_h] can handle b_h */ - for (auto v : rai.rev_leftfix[a_left_id]) { - if (!mergeableRoseVertices(build, v, b)) { + * check that all the literals in rev_leftfix[a_h] can handle b_h */ + for (auto v : rai.rev_leftfix[a_left_id]) { + if (!mergeableRoseVertices(build, v, b)) { goto literal_mismatch_2; } } - rai.rev_leftfix[b_left_id].erase(b); - rai.rev_leftfix[a_left_id].insert(b); + rai.rev_leftfix[b_left_id].erase(b); + rai.rev_leftfix[a_left_id].insert(b); b_left.graph = a_h; b_left.leftfix_report = a_left.leftfix_report; - pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); + pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); DEBUG_PRINTF("OK -> same preds ???\n"); return true; literal_mismatch_2:; @@ -1217,29 +1217,29 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, /* we need to create a new graph as there may be other people * using b_left and it would be bad if a's preds started triggering it */ - ReportID new_report = build.getNewNfaReport(); + ReportID new_report = build.getNewNfaReport(); shared_ptr<NGHolder> new_graph = cloneHolder(*b_h); duplicateReport(*new_graph, b_left.leftfix_report, new_report); - pruneAllOtherReports(*new_graph, new_report); - - if (!isImplementableNFA(*new_graph, nullptr, build.cc)) { - DEBUG_PRINTF("new graph not implementable\n"); - return false; - } - - rai.rev_leftfix[a_left_id].erase(a); - rai.rev_leftfix[b_left_id].erase(b); - pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]); - pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); - + pruneAllOtherReports(*new_graph, new_report); + + if (!isImplementableNFA(*new_graph, nullptr, build.cc)) { + DEBUG_PRINTF("new graph not implementable\n"); + return false; + } + + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].erase(b); + pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]); + pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); + a_left.leftfix_report = new_report; b_left.leftfix_report = new_report; a_left.graph = new_graph; b_left.graph = new_graph; - rai.rev_leftfix[a_left].insert(a); - rai.rev_leftfix[a_left].insert(b); - pruneUnusedTops(*new_graph, g, rai.rev_leftfix[a_left]); + rai.rev_leftfix[a_left].insert(a); + rai.rev_leftfix[a_left].insert(b); + pruneUnusedTops(*new_graph, g, rai.rev_leftfix[a_left]); return true; } @@ -1251,23 +1251,23 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, // Only infixes. Prefixes require special care when doing non-trivial // merges. - if (!build.isNonRootSuccessor(a) || !build.isNonRootSuccessor(b)) { + if (!build.isNonRootSuccessor(a) || !build.isNonRootSuccessor(b)) { return false; } DEBUG_PRINTF("attempting merge of roses on vertices %zu and %zu\n", - g[a].index, g[b].index); + g[a].index, g[b].index); - set<RoseVertex> &b_verts = rai.rev_leftfix[b_left]; + set<RoseVertex> &b_verts = rai.rev_leftfix[b_left]; set<RoseVertex> aa; aa.insert(a); - if (!mergeableRoseVertices(build, aa, b_verts)) { + if (!mergeableRoseVertices(build, aa, b_verts)) { DEBUG_PRINTF("vertices not mergeable\n"); return false; } - if (!build.cc.grey.roseMultiTopRoses) { + if (!build.cc.grey.roseMultiTopRoses) { return false; } @@ -1277,10 +1277,10 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, /* We need to allocate a new report id because */ ReportID a_oldreport = a_left.leftfix_report; ReportID b_oldreport = b_left.leftfix_report; - ReportID new_report = build.getNewNfaReport(); + ReportID new_report = build.getNewNfaReport(); duplicateReport(*b_h, b_left.leftfix_report, new_report); b_left.leftfix_report = new_report; - pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id], b_oldreport); + pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id], b_oldreport); NGHolder victim; cloneHolder(victim, *a_h); @@ -1296,22 +1296,22 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, DEBUG_PRINTF("winner %zu states\n", num_vertices(*b_h)); if (!setDistinctRoseTops(g, victim, *b_h, deque<RoseVertex>(1, a))) { - assert(roseHasTops(build, a)); - assert(roseHasTops(build, b)); + assert(roseHasTops(build, a)); + assert(roseHasTops(build, b)); return false; } assert(victim.kind == b_h->kind); assert(!generates_callbacks(*b_h)); - if (!mergeNfaPair(victim, *b_h, nullptr, build.cc)) { + if (!mergeNfaPair(victim, *b_h, nullptr, build.cc)) { DEBUG_PRINTF("merge failed\n"); // Restore in-edge properties. for (const auto &e : in_edges_range(a, g)) { g[e] = a_props[source(e, g)]; } - assert(roseHasTops(build, a)); - assert(roseHasTops(build, b)); + assert(roseHasTops(build, a)); + assert(roseHasTops(build, b)); return false; } @@ -1321,22 +1321,22 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, a_left.graph = b_h; a_left.leftfix_report = new_report; - assert(contains(rai.rev_leftfix[a_left_id], a)); - assert(contains(rai.rev_leftfix[b_left_id], b)); - rai.rev_leftfix[a_left_id].erase(a); - rai.rev_leftfix[b_left_id].insert(a); + assert(contains(rai.rev_leftfix[a_left_id], a)); + assert(contains(rai.rev_leftfix[b_left_id], b)); + rai.rev_leftfix[a_left_id].erase(a); + rai.rev_leftfix[b_left_id].insert(a); - pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]); - pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); + pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]); + pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]); // Prune A's report from its old prefix if it was only used by A. - pruneReportIfUnused(build, a_h, rai.rev_leftfix[a_left_id], a_oldreport); + pruneReportIfUnused(build, a_h, rai.rev_leftfix[a_left_id], a_oldreport); - reduceImplementableGraph(*b_h, SOM_NONE, nullptr, build.cc); + reduceImplementableGraph(*b_h, SOM_NONE, nullptr, build.cc); - assert(roseHasTops(build, a)); - assert(roseHasTops(build, b)); - assert(isImplementableNFA(*b_h, nullptr, build.cc)); + assert(roseHasTops(build, a)); + assert(roseHasTops(build, b)); + assert(isImplementableNFA(*b_h, nullptr, build.cc)); return true; } @@ -1344,14 +1344,14 @@ bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, // the two LeftEngInfo structures to be the same. Returns false if the merge // is not possible. static -bool attemptRoseMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, - RoseVertex b, bool trivialCasesOnly, - RoseAliasingInfo &rai) { +bool attemptRoseMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, + RoseVertex b, bool trivialCasesOnly, + RoseAliasingInfo &rai) { DEBUG_PRINTF("attempting rose merge, vertices a=%zu, b=%zu\n", - build.g[a].index, build.g[b].index); + build.g[a].index, build.g[b].index); assert(a != b); - RoseGraph &g = build.g; + RoseGraph &g = build.g; LeftEngInfo &a_left = g[a].left; LeftEngInfo &b_left = g[b].left; @@ -1375,8 +1375,8 @@ bool attemptRoseMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, } // Only non-transients for the moment. - if (contains(build.transient, a_left_id) || - contains(build.transient, b_left_id)) { + if (contains(build.transient, a_left_id) || + contains(build.transient, b_left_id)) { return false; } @@ -1386,117 +1386,117 @@ bool attemptRoseMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a, return false; } - assert(roseHasTops(build, a)); - assert(roseHasTops(build, b)); + assert(roseHasTops(build, a)); + assert(roseHasTops(build, b)); if (a_left_id.graph() && b_left_id.graph()) { - return attemptRoseGraphMerge(build, preds_same, a, b, trivialCasesOnly, - rai); + return attemptRoseGraphMerge(build, preds_same, a, b, trivialCasesOnly, + rai); } if (a_left_id.castle() && b_left_id.castle()) { - return attemptRoseCastleMerge(build, preds_same, a, b, trivialCasesOnly, - rai); + return attemptRoseCastleMerge(build, preds_same, a, b, trivialCasesOnly, + rai); } return false; } -/** - * \brief Buckets that only contain one vertex are never going to lead to a - * merge. - */ +/** + * \brief Buckets that only contain one vertex are never going to lead to a + * merge. + */ static -void removeSingletonBuckets(vector<vector<RoseVertex>> &buckets) { - auto it = remove_if( - begin(buckets), end(buckets), - [](const vector<RoseVertex> &bucket) { return bucket.size() < 2; }); - if (it != end(buckets)) { - DEBUG_PRINTF("deleting %zu singleton buckets\n", - distance(it, end(buckets))); - buckets.erase(it, end(buckets)); +void removeSingletonBuckets(vector<vector<RoseVertex>> &buckets) { + auto it = remove_if( + begin(buckets), end(buckets), + [](const vector<RoseVertex> &bucket) { return bucket.size() < 2; }); + if (it != end(buckets)) { + DEBUG_PRINTF("deleting %zu singleton buckets\n", + distance(it, end(buckets))); + buckets.erase(it, end(buckets)); } } static -void buildInvBucketMap(const vector<vector<RoseVertex>> &buckets, - unordered_map<RoseVertex, size_t> &inv) { - inv.clear(); - for (size_t i = 0; i < buckets.size(); i++) { - for (auto v : buckets[i]) { - assert(!contains(inv, v)); - inv.emplace(v, i); - } - } -} - -/** - * \brief Generic splitter that will use the given split function to partition - * the vector of buckets, then remove buckets with <= 1 entry. - */ -template <class SplitFunction> -void splitAndFilterBuckets(vector<vector<RoseVertex>> &buckets, - const SplitFunction &make_split_key) { - if (buckets.empty()) { - return; - } - +void buildInvBucketMap(const vector<vector<RoseVertex>> &buckets, + unordered_map<RoseVertex, size_t> &inv) { + inv.clear(); + for (size_t i = 0; i < buckets.size(); i++) { + for (auto v : buckets[i]) { + assert(!contains(inv, v)); + inv.emplace(v, i); + } + } +} + +/** + * \brief Generic splitter that will use the given split function to partition + * the vector of buckets, then remove buckets with <= 1 entry. + */ +template <class SplitFunction> +void splitAndFilterBuckets(vector<vector<RoseVertex>> &buckets, + const SplitFunction &make_split_key) { + if (buckets.empty()) { + return; + } + vector<vector<RoseVertex>> out; - // Mapping from split key value to new bucket index. - using key_type = decltype(make_split_key(RoseGraph::null_vertex())); - unordered_map<key_type, size_t> dest_map; - dest_map.reserve(buckets.front().size()); - + // Mapping from split key value to new bucket index. + using key_type = decltype(make_split_key(RoseGraph::null_vertex())); + unordered_map<key_type, size_t> dest_map; + dest_map.reserve(buckets.front().size()); + for (const auto &bucket : buckets) { assert(!bucket.empty()); - dest_map.clear(); + dest_map.clear(); for (RoseVertex v : bucket) { - auto p = dest_map.emplace(make_split_key(v), out.size()); - if (p.second) { // New key, add a bucket. - out.emplace_back(); + auto p = dest_map.emplace(make_split_key(v), out.size()); + if (p.second) { // New key, add a bucket. + out.emplace_back(); } - auto out_bucket = p.first->second; + auto out_bucket = p.first->second; out[out_bucket].push_back(v); } } - if (out.size() == buckets.size()) { - return; // No new buckets created. - } - - buckets = std::move(out); - removeSingletonBuckets(buckets); + if (out.size() == buckets.size()) { + return; // No new buckets created. + } + + buckets = std::move(out); + removeSingletonBuckets(buckets); +} + +static +void splitByReportSuffixBehaviour(const RoseGraph &g, + vector<vector<RoseVertex>> &buckets) { + // Split by report set and suffix info. + auto make_split_key = [&g](RoseVertex v) { + return hash_all(g[v].reports, g[v].suffix); + }; + splitAndFilterBuckets(buckets, make_split_key); +} + +static +void splitByLiteralTable(const RoseBuildImpl &build, + vector<vector<RoseVertex>> &buckets) { + const RoseGraph &g = build.g; + + // Split by literal table. + auto make_split_key = [&](RoseVertex v) { + const auto &lits = g[v].literals; + assert(!lits.empty()); + auto table = build.literals.at(*lits.begin()).table; + return std::underlying_type<decltype(table)>::type(table); + }; + splitAndFilterBuckets(buckets, make_split_key); } static -void splitByReportSuffixBehaviour(const RoseGraph &g, - vector<vector<RoseVertex>> &buckets) { - // Split by report set and suffix info. - auto make_split_key = [&g](RoseVertex v) { - return hash_all(g[v].reports, g[v].suffix); - }; - splitAndFilterBuckets(buckets, make_split_key); -} - -static -void splitByLiteralTable(const RoseBuildImpl &build, - vector<vector<RoseVertex>> &buckets) { - const RoseGraph &g = build.g; - - // Split by literal table. - auto make_split_key = [&](RoseVertex v) { - const auto &lits = g[v].literals; - assert(!lits.empty()); - auto table = build.literals.at(*lits.begin()).table; - return std::underlying_type<decltype(table)>::type(table); - }; - splitAndFilterBuckets(buckets, make_split_key); -} - -static void splitByNeighbour(const RoseGraph &g, vector<vector<RoseVertex>> &buckets, - unordered_map<RoseVertex, size_t> &inv, bool succ) { + unordered_map<RoseVertex, size_t> &inv, bool succ) { vector<vector<RoseVertex>> extras; map<size_t, vector<RoseVertex>> neighbours_by_bucket; set<RoseVertex> picked; @@ -1552,63 +1552,63 @@ void splitByNeighbour(const RoseGraph &g, vector<vector<RoseVertex>> &buckets, } insert(&buckets, buckets.end(), extras); } - - removeSingletonBuckets(buckets); - buildInvBucketMap(buckets, inv); + + removeSingletonBuckets(buckets); + buildInvBucketMap(buckets, inv); } static -vector<vector<RoseVertex>> -splitDiamondMergeBuckets(CandidateSet &candidates, const RoseBuildImpl &build) { +vector<vector<RoseVertex>> +splitDiamondMergeBuckets(CandidateSet &candidates, const RoseBuildImpl &build) { const RoseGraph &g = build.g; vector<vector<RoseVertex>> buckets(1); - buckets[0].reserve(candidates.size()); - insert(&buckets[0], buckets[0].end(), candidates); - - DEBUG_PRINTF("at start, %zu candidates in 1 bucket\n", candidates.size()); - - splitByReportSuffixBehaviour(g, buckets); - DEBUG_PRINTF("split by report/suffix, %zu buckets\n", buckets.size()); - if (buckets.empty()) { - return buckets; - } - - splitByLiteralTable(build, buckets); - DEBUG_PRINTF("split by lit table, %zu buckets\n", buckets.size()); - if (buckets.empty()) { - return buckets; - } - - // Neighbour splits require inverse map. - unordered_map<RoseVertex, size_t> inv; - buildInvBucketMap(buckets, inv); - + buckets[0].reserve(candidates.size()); + insert(&buckets[0], buckets[0].end(), candidates); + + DEBUG_PRINTF("at start, %zu candidates in 1 bucket\n", candidates.size()); + + splitByReportSuffixBehaviour(g, buckets); + DEBUG_PRINTF("split by report/suffix, %zu buckets\n", buckets.size()); + if (buckets.empty()) { + return buckets; + } + + splitByLiteralTable(build, buckets); + DEBUG_PRINTF("split by lit table, %zu buckets\n", buckets.size()); + if (buckets.empty()) { + return buckets; + } + + // Neighbour splits require inverse map. + unordered_map<RoseVertex, size_t> inv; + buildInvBucketMap(buckets, inv); + splitByNeighbour(g, buckets, inv, true); - DEBUG_PRINTF("split by successor, %zu buckets\n", buckets.size()); - if (buckets.empty()) { - return buckets; - } - + DEBUG_PRINTF("split by successor, %zu buckets\n", buckets.size()); + if (buckets.empty()) { + return buckets; + } + splitByNeighbour(g, buckets, inv, false); - DEBUG_PRINTF("split by predecessor, %zu buckets\n", buckets.size()); + DEBUG_PRINTF("split by predecessor, %zu buckets\n", buckets.size()); return buckets; } - + static never_inline -void diamondMergePass(CandidateSet &candidates, RoseBuildImpl &build, +void diamondMergePass(CandidateSet &candidates, RoseBuildImpl &build, vector<RoseVertex> *dead, bool mergeRoses, - RoseAliasingInfo &rai) { + RoseAliasingInfo &rai) { DEBUG_PRINTF("begin\n"); - RoseGraph &g = build.g; + RoseGraph &g = build.g; if (candidates.empty()) { return; } /* Vertices may only be diamond merged with others in the same bucket */ - auto cand_buckets = splitDiamondMergeBuckets(candidates, build); + auto cand_buckets = splitDiamondMergeBuckets(candidates, build); for (const vector<RoseVertex> &siblings : cand_buckets) { for (auto it = siblings.begin(); it != siblings.end();) { @@ -1617,12 +1617,12 @@ void diamondMergePass(CandidateSet &candidates, RoseBuildImpl &build, assert(contains(candidates, a)); - DEBUG_PRINTF("trying to merge %zu into somebody\n", g[a].index); + DEBUG_PRINTF("trying to merge %zu into somebody\n", g[a].index); for (auto jt = it; jt != siblings.end(); ++jt) { RoseVertex b = *jt; assert(contains(candidates, b)); - if (!sameRoleProperties(build, rai, a, b)) { + if (!sameRoleProperties(build, rai, a, b)) { DEBUG_PRINTF("diff role prop\n"); continue; } @@ -1633,23 +1633,23 @@ void diamondMergePass(CandidateSet &candidates, RoseBuildImpl &build, * so we still have to checks successors and predecessors. */ if (!sameSuccessors(a, b, g) - || !sameRightRoleProperties(build, a, b) + || !sameRightRoleProperties(build, a, b) || !samePredecessors(a, b, g)) { DEBUG_PRINTF("not diamond\n"); continue; } - if (!canMergeLiterals(a, b, build)) { + if (!canMergeLiterals(a, b, build)) { DEBUG_PRINTF("incompatible lits\n"); continue; } - if (!attemptRoseMerge(build, true, a, b, !mergeRoses, rai)) { + if (!attemptRoseMerge(build, true, a, b, !mergeRoses, rai)) { DEBUG_PRINTF("rose fail\n"); continue; } - mergeVerticesDiamond(a, b, build, rai); + mergeVerticesDiamond(a, b, build, rai); dead->push_back(a); candidates.erase(a); break; // next a @@ -1665,7 +1665,7 @@ vector<RoseVertex>::iterator findLeftMergeSibling( vector<RoseVertex>::iterator it, const vector<RoseVertex>::iterator &end, const RoseVertex a, const RoseBuildImpl &build, - const RoseAliasingInfo &rai, + const RoseAliasingInfo &rai, const CandidateSet &candidates) { const RoseGraph &g = build.g; @@ -1679,7 +1679,7 @@ vector<RoseVertex>::iterator findLeftMergeSibling( continue; } - if (!sameRoleProperties(build, rai, a, b)) { + if (!sameRoleProperties(build, rai, a, b)) { continue; } @@ -1708,66 +1708,66 @@ vector<RoseVertex>::iterator findLeftMergeSibling( return end; } -static -void getLeftMergeSiblings(const RoseBuildImpl &build, RoseVertex a, - vector<RoseVertex> &siblings) { - // We have to find a sibling to merge `a' with, and we select between - // two approaches to minimize the number of vertices we have to - // examine; which we use depends on the shape of the graph. - - const RoseGraph &g = build.g; - assert(!g[a].literals.empty()); - u32 lit_id = *g[a].literals.begin(); - const auto &verts = build.literal_info.at(lit_id).vertices; - RoseVertex pred = pickPred(a, g, build); - - siblings.clear(); - - if (pred == RoseGraph::null_vertex() || build.isAnyStart(pred) || - out_degree(pred, g) > verts.size()) { - // Select sibling from amongst the vertices that share a literal. - insert(&siblings, siblings.end(), verts); - } else { - // Select sibling from amongst the vertices that share a - // predecessor. - insert(&siblings, siblings.end(), adjacent_vertices(pred, g)); - } -} - +static +void getLeftMergeSiblings(const RoseBuildImpl &build, RoseVertex a, + vector<RoseVertex> &siblings) { + // We have to find a sibling to merge `a' with, and we select between + // two approaches to minimize the number of vertices we have to + // examine; which we use depends on the shape of the graph. + + const RoseGraph &g = build.g; + assert(!g[a].literals.empty()); + u32 lit_id = *g[a].literals.begin(); + const auto &verts = build.literal_info.at(lit_id).vertices; + RoseVertex pred = pickPred(a, g, build); + + siblings.clear(); + + if (pred == RoseGraph::null_vertex() || build.isAnyStart(pred) || + out_degree(pred, g) > verts.size()) { + // Select sibling from amongst the vertices that share a literal. + insert(&siblings, siblings.end(), verts); + } else { + // Select sibling from amongst the vertices that share a + // predecessor. + insert(&siblings, siblings.end(), adjacent_vertices(pred, g)); + } +} + static never_inline -void leftMergePass(CandidateSet &candidates, RoseBuildImpl &build, - vector<RoseVertex> *dead, RoseAliasingInfo &rai) { +void leftMergePass(CandidateSet &candidates, RoseBuildImpl &build, + vector<RoseVertex> *dead, RoseAliasingInfo &rai) { DEBUG_PRINTF("begin (%zu)\n", candidates.size()); vector<RoseVertex> siblings; - auto it = candidates.begin(); + auto it = candidates.begin(); while (it != candidates.end()) { RoseVertex a = *it; CandidateSet::iterator ait = it; ++it; - getLeftMergeSiblings(build, a, siblings); + getLeftMergeSiblings(build, a, siblings); - auto jt = siblings.begin(); - while (jt != siblings.end()) { - jt = findLeftMergeSibling(jt, siblings.end(), a, build, rai, - candidates); - if (jt == siblings.end()) { - break; - } - RoseVertex b = *jt; - if (attemptRoseMerge(build, true, a, b, false, rai)) { - mergeVerticesLeft(a, b, build, rai); - dead->push_back(a); - candidates.erase(ait); - break; // consider next a - } - ++jt; + auto jt = siblings.begin(); + while (jt != siblings.end()) { + jt = findLeftMergeSibling(jt, siblings.end(), a, build, rai, + candidates); + if (jt == siblings.end()) { + break; + } + RoseVertex b = *jt; + if (attemptRoseMerge(build, true, a, b, false, rai)) { + mergeVerticesLeft(a, b, build, rai); + dead->push_back(a); + candidates.erase(ait); + break; // consider next a + } + ++jt; } } DEBUG_PRINTF("%zu candidates remaining\n", candidates.size()); - assert(!hasOrphanedTops(build)); + assert(!hasOrphanedTops(build)); } // Can't merge vertices with different root predecessors. @@ -1776,12 +1776,12 @@ bool safeRootPreds(RoseVertex a, RoseVertex b, const RoseGraph &g) { set<RoseVertex> a_roots, b_roots; for (auto u : inv_adjacent_vertices_range(a, g)) { - if (!in_degree(u, g)) { + if (!in_degree(u, g)) { a_roots.insert(u); } } for (auto u : inv_adjacent_vertices_range(b, g)) { - if (!in_degree(u, g)) { + if (!in_degree(u, g)) { b_roots.insert(u); } } @@ -1797,7 +1797,7 @@ vector<RoseVertex>::const_iterator findRightMergeSibling( vector<RoseVertex>::const_iterator it, const vector<RoseVertex>::const_iterator &end, const RoseVertex a, const RoseBuildImpl &build, - const RoseAliasingInfo &rai, + const RoseAliasingInfo &rai, const CandidateSet &candidates) { const RoseGraph &g = build.g; @@ -1811,7 +1811,7 @@ vector<RoseVertex>::const_iterator findRightMergeSibling( continue; } - if (!sameRoleProperties(build, rai, a, b)) { + if (!sameRoleProperties(build, rai, a, b)) { continue; } @@ -1849,85 +1849,85 @@ vector<RoseVertex>::const_iterator findRightMergeSibling( } static -void splitByRightProps(const RoseGraph &g, - vector<vector<RoseVertex>> &buckets) { - // Successor vector used in make_split_key. We declare it here so we can - // reuse storage. - vector<RoseVertex> succ; - - // Split by {successors, literals, reports}. - auto make_split_key = [&](RoseVertex v) { - succ.clear(); - insert(&succ, succ.end(), adjacent_vertices(v, g)); - sort(succ.begin(), succ.end()); - return hash_all(g[v].literals, g[v].reports, succ); - }; - splitAndFilterBuckets(buckets, make_split_key); +void splitByRightProps(const RoseGraph &g, + vector<vector<RoseVertex>> &buckets) { + // Successor vector used in make_split_key. We declare it here so we can + // reuse storage. + vector<RoseVertex> succ; + + // Split by {successors, literals, reports}. + auto make_split_key = [&](RoseVertex v) { + succ.clear(); + insert(&succ, succ.end(), adjacent_vertices(v, g)); + sort(succ.begin(), succ.end()); + return hash_all(g[v].literals, g[v].reports, succ); + }; + splitAndFilterBuckets(buckets, make_split_key); } static never_inline -vector<vector<RoseVertex>> -splitRightMergeBuckets(const CandidateSet &candidates, - const RoseBuildImpl &build) { - const RoseGraph &g = build.g; +vector<vector<RoseVertex>> +splitRightMergeBuckets(const CandidateSet &candidates, + const RoseBuildImpl &build) { + const RoseGraph &g = build.g; - vector<vector<RoseVertex>> buckets(1); - buckets[0].reserve(candidates.size()); - insert(&buckets[0], buckets[0].end(), candidates); + vector<vector<RoseVertex>> buckets(1); + buckets[0].reserve(candidates.size()); + insert(&buckets[0], buckets[0].end(), candidates); - DEBUG_PRINTF("at start, %zu candidates in 1 bucket\n", candidates.size()); + DEBUG_PRINTF("at start, %zu candidates in 1 bucket\n", candidates.size()); - splitByReportSuffixBehaviour(g, buckets); - DEBUG_PRINTF("split by report/suffix, %zu buckets\n", buckets.size()); - if (buckets.empty()) { - return buckets; + splitByReportSuffixBehaviour(g, buckets); + DEBUG_PRINTF("split by report/suffix, %zu buckets\n", buckets.size()); + if (buckets.empty()) { + return buckets; } - splitByRightProps(g, buckets); - DEBUG_PRINTF("split by right-merge properties, %zu buckets\n", - buckets.size()); - if (buckets.empty()) { - return buckets; + splitByRightProps(g, buckets); + DEBUG_PRINTF("split by right-merge properties, %zu buckets\n", + buckets.size()); + if (buckets.empty()) { + return buckets; } - return buckets; + return buckets; } static never_inline -void rightMergePass(CandidateSet &candidates, RoseBuildImpl &build, +void rightMergePass(CandidateSet &candidates, RoseBuildImpl &build, vector<RoseVertex> *dead, bool mergeRoses, - RoseAliasingInfo &rai) { + RoseAliasingInfo &rai) { DEBUG_PRINTF("begin\n"); - if (candidates.empty()) { - return; - } - - auto buckets = splitRightMergeBuckets(candidates, build); - - for (const auto &bucket : buckets) { - assert(!bucket.empty()); - for (auto it = bucket.begin(); it != bucket.end(); it++) { - RoseVertex a = *it; - for (auto jt = bucket.begin(); jt != bucket.end(); jt++) { - jt = findRightMergeSibling(jt, bucket.end(), a, build, rai, - candidates); - if (jt == bucket.end()) { - break; - } - RoseVertex b = *jt; - if (attemptRoseMerge(build, false, a, b, !mergeRoses, rai)) { - mergeVerticesRight(a, b, build, rai); - dead->push_back(a); - candidates.erase(a); - break; // consider next a - } + if (candidates.empty()) { + return; + } + + auto buckets = splitRightMergeBuckets(candidates, build); + + for (const auto &bucket : buckets) { + assert(!bucket.empty()); + for (auto it = bucket.begin(); it != bucket.end(); it++) { + RoseVertex a = *it; + for (auto jt = bucket.begin(); jt != bucket.end(); jt++) { + jt = findRightMergeSibling(jt, bucket.end(), a, build, rai, + candidates); + if (jt == bucket.end()) { + break; + } + RoseVertex b = *jt; + if (attemptRoseMerge(build, false, a, b, !mergeRoses, rai)) { + mergeVerticesRight(a, b, build, rai); + dead->push_back(a); + candidates.erase(a); + break; // consider next a + } } } } DEBUG_PRINTF("%zu candidates remaining\n", candidates.size()); - assert(!hasOrphanedTops(build)); + assert(!hasOrphanedTops(build)); } /** @@ -1942,7 +1942,7 @@ bool hasNoDiamondSiblings(const RoseGraph &g, RoseVertex v) { if (has_successor(v, g)) { bool only_succ = true; for (const auto &w : adjacent_vertices_range(v, g)) { - if (in_degree(w, g) > 1) { + if (in_degree(w, g) > 1) { only_succ = false; break; } @@ -1958,7 +1958,7 @@ bool hasNoDiamondSiblings(const RoseGraph &g, RoseVertex v) { bool only_pred = true; for (const auto &u : inv_adjacent_vertices_range(v, g)) { - if (out_degree(u, g) > 1) { + if (out_degree(u, g) > 1) { only_pred = false; break; } @@ -1993,8 +1993,8 @@ void filterDiamondCandidates(RoseGraph &g, CandidateSet &candidates) { void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { const CompileContext &cc = build.cc; RoseGraph &g = build.g; - assert(!hasOrphanedTops(build)); - assert(canImplementGraphs(build)); + assert(!hasOrphanedTops(build)); + assert(canImplementGraphs(build)); if (!cc.grey.roseRoleAliasing || !cc.grey.roseGraphReduction) { return; @@ -2002,11 +2002,11 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { DEBUG_PRINTF("doing role aliasing mr=%d\n", (int)mergeRoses); - RoseAliasingInfo rai(build); - + RoseAliasingInfo rai(build); + mergeRoses &= cc.grey.mergeRose & cc.grey.roseMergeRosesDuringAliasing; - CandidateSet candidates; + CandidateSet candidates; findCandidates(build, &candidates); DEBUG_PRINTF("candidates %zu\n", candidates.size()); @@ -2015,8 +2015,8 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { size_t old_dead_size = 0; do { old_dead_size = dead.size(); - leftMergePass(candidates, build, &dead, rai); - rightMergePass(candidates, build, &dead, mergeRoses, rai); + leftMergePass(candidates, build, &dead, rai); + rightMergePass(candidates, build, &dead, mergeRoses, rai); } while (old_dead_size != dead.size()); /* Diamond merge passes cannot create extra merges as they require the same @@ -2024,312 +2024,312 @@ void aliasRoles(RoseBuildImpl &build, bool mergeRoses) { * to a merge to different pred/succ before a diamond merge, it will still * be afterwards. */ filterDiamondCandidates(g, candidates); - diamondMergePass(candidates, build, &dead, mergeRoses, rai); + diamondMergePass(candidates, build, &dead, mergeRoses, rai); DEBUG_PRINTF("killed %zu vertices\n", dead.size()); build.removeVertices(dead); - assert(!hasOrphanedTops(build)); - assert(canImplementGraphs(build)); + assert(!hasOrphanedTops(build)); + assert(canImplementGraphs(build)); +} + +namespace { +struct DupeLeafKey { + explicit DupeLeafKey(const RoseVertexProps &litv) + : literals(litv.literals), reports(litv.reports), + eod_accept(litv.eod_accept), suffix(litv.suffix), left(litv.left), + som_adjust(litv.som_adjust) { + DEBUG_PRINTF("eod_accept %d\n", (int)eod_accept); + DEBUG_PRINTF("report %u\n", left.leftfix_report); + DEBUG_PRINTF("lag %u\n", left.lag); + } + + bool operator<(const DupeLeafKey &b) const { + const DupeLeafKey &a = *this; + ORDER_CHECK(literals); + ORDER_CHECK(eod_accept); + ORDER_CHECK(suffix); + ORDER_CHECK(reports); + ORDER_CHECK(som_adjust); + ORDER_CHECK(left.leftfix_report); + ORDER_CHECK(left.lag); + return false; + } + + flat_set<u32> literals; + flat_set<ReportID> reports; + bool eod_accept; + suffix_id suffix; + LeftEngInfo left; + u32 som_adjust; +}; + +struct UncalcLeafKey { + UncalcLeafKey(const RoseGraph &g, RoseVertex v) + : literals(g[v].literals), rose(g[v].left) { + for (const auto &e : in_edges_range(v, g)) { + RoseVertex u = source(e, g); + preds.insert(make_pair(u, g[e])); + } + } + + bool operator<(const UncalcLeafKey &b) const { + const UncalcLeafKey &a = *this; + ORDER_CHECK(literals); + ORDER_CHECK(preds); + ORDER_CHECK(rose); + return false; + } + + flat_set<u32> literals; + flat_set<pair<RoseVertex, RoseEdgeProps>> preds; + LeftEngInfo rose; +}; +} // namespace + +/** + * This function merges leaf vertices with the same literals and report + * id/suffix. The leaf vertices of the graph are inspected and a mapping of + * leaf vertex properties to vertices is built. If the same set of leaf + * properties has already been seen when we inspect a vertex, we attempt to + * merge the vertex in with the previously seen vertex. This process can fail + * if the vertices share a common predecessor vertex but have a differing, + * incompatible relationship (different bounds or infix) with the predecessor. + * + * This takes place after \ref dedupeSuffixes to increase effectiveness as the + * same suffix is required for a merge to occur. + * + * TODO: work if this is a subset of role aliasing (and if it can be eliminated) + * or clearly document cases that would not be covered by role aliasing. + */ +void mergeDupeLeaves(RoseBuildImpl &build) { + map<DupeLeafKey, RoseVertex> leaves; + vector<RoseVertex> changed; + + RoseGraph &g = build.g; + for (auto v : vertices_range(g)) { + if (in_degree(v, g) == 0) { + assert(build.isAnyStart(v)); + continue; + } + + DEBUG_PRINTF("inspecting vertex index=%zu in_degree %zu " + "out_degree %zu\n", g[v].index, in_degree(v, g), + out_degree(v, g)); + + // Vertex must be a reporting leaf node + if (g[v].reports.empty() || !isLeafNode(v, g)) { + continue; + } + + // At the moment, we ignore all successors of root or anchored_root, + // since many parts of our runtime assume that these have in-degree 1. + if (build.isRootSuccessor(v)) { + continue; + } + + DupeLeafKey dupe(g[v]); + if (leaves.find(dupe) == leaves.end()) { + leaves.insert(make_pair(dupe, v)); + continue; + } + + RoseVertex t = leaves.find(dupe)->second; + DEBUG_PRINTF("found two leaf dupe roles, index=%zu,%zu\n", g[v].index, + g[t].index); + + vector<RoseEdge> deadEdges; + for (const auto &e : in_edges_range(v, g)) { + RoseVertex u = source(e, g); + DEBUG_PRINTF("u index=%zu\n", g[u].index); + if (RoseEdge et = edge(u, t, g)) { + if (g[et].minBound <= g[e].minBound + && g[et].maxBound >= g[e].maxBound) { + DEBUG_PRINTF("remove more constrained edge\n"); + deadEdges.push_back(e); + } + } else { + DEBUG_PRINTF("rehome edge: add %zu->%zu\n", g[u].index, + g[t].index); + add_edge(u, t, g[e], g); + deadEdges.push_back(e); + } + } + + if (!deadEdges.empty()) { + for (auto &e : deadEdges) { + remove_edge(e, g); + } + changed.push_back(v); + g[t].min_offset = min(g[t].min_offset, g[v].min_offset); + g[t].max_offset = max(g[t].max_offset, g[v].max_offset); + } + } + DEBUG_PRINTF("find loop done\n"); + + // Remove any vertices that now have no in-edges. + size_t countRemovals = 0; + for (size_t i = 0; i < changed.size(); i++) { + RoseVertex v = changed[i]; + if (in_degree(v, g) == 0) { + DEBUG_PRINTF("remove vertex\n"); + if (!build.isVirtualVertex(v)) { + for (u32 lit_id : g[v].literals) { + build.literal_info[lit_id].vertices.erase(v); + } + } + remove_vertex(v, g); + countRemovals++; + } + } + + // if we've removed anything, we need to renumber vertices + if (countRemovals) { + renumber_vertices(g); + DEBUG_PRINTF("removed %zu vertices.\n", countRemovals); + } +} + +/** Merges the suffixes on the (identical) vertices in \a vcluster, used by + * \ref uncalcLeaves. */ +static +void mergeCluster(RoseGraph &g, const ReportManager &rm, + const vector<RoseVertex> &vcluster, + vector<RoseVertex> &dead, const CompileContext &cc) { + if (vcluster.size() <= 1) { + return; // No merge to perform. + } + + // Note that we batch merges up fairly crudely for performance reasons. + vector<RoseVertex>::const_iterator it = vcluster.begin(), it2; + while (it != vcluster.end()) { + vector<NGHolder *> cluster; + map<NGHolder *, RoseVertex> rev; + + for (it2 = it; + it2 != vcluster.end() && cluster.size() < MERGE_GROUP_SIZE_MAX; + ++it2) { + RoseVertex v = *it2; + NGHolder *h = g[v].suffix.graph.get(); + assert(!g[v].suffix.haig); /* should not be here if haig */ + rev[h] = v; + cluster.push_back(h); + } + it = it2; + + DEBUG_PRINTF("merging cluster %zu\n", cluster.size()); + auto merged = mergeNfaCluster(cluster, &rm, cc); + DEBUG_PRINTF("done\n"); + + for (const auto &m : merged) { + NGHolder *h_victim = m.first; // mergee + NGHolder *h_winner = m.second; + RoseVertex victim = rev[h_victim]; + RoseVertex winner = rev[h_winner]; + + LIMIT_TO_AT_MOST(&g[winner].min_offset, g[victim].min_offset); + ENSURE_AT_LEAST(&g[winner].max_offset, g[victim].max_offset); + insert(&g[winner].reports, g[victim].reports); + + dead.push_back(victim); + } + } +} + +static +void findUncalcLeavesCandidates(RoseBuildImpl &build, + map<UncalcLeafKey, vector<RoseVertex> > &clusters, + deque<UncalcLeafKey> &ordered) { + const RoseGraph &g = build.g; + + vector<RoseVertex> suffix_vertices; // vertices with suffix graphs + unordered_map<const NGHolder *, u32> fcount; // ref count per graph + + for (auto v : vertices_range(g)) { + if (g[v].suffix) { + if (!g[v].suffix.graph) { + continue; /* cannot uncalc (haig/mcclellan); TODO */ + } + + assert(g[v].suffix.graph->kind == NFA_SUFFIX); + + // Ref count all suffixes, as we don't want to merge a suffix + // that happens to be shared with a non-leaf vertex somewhere. + DEBUG_PRINTF("vertex %zu has suffix %p\n", g[v].index, + g[v].suffix.graph.get()); + fcount[g[v].suffix.graph.get()]++; + + // Vertex must be a reporting pseudo accept + if (!isLeafNode(v, g)) { + continue; + } + + suffix_vertices.push_back(v); + } + } + + for (auto v : suffix_vertices) { + if (in_degree(v, g) == 0) { + assert(build.isAnyStart(v)); + continue; + } + + const NGHolder *h = g[v].suffix.graph.get(); + assert(h); + DEBUG_PRINTF("suffix %p\n", h); + + // We can't easily merge suffixes shared with other vertices, and + // creating a unique copy to do so may just mean we end up tracking + // more NFAs. Better to leave shared suffixes alone. + if (fcount[h] != 1) { + DEBUG_PRINTF("skipping shared suffix\n"); + continue; + } + + UncalcLeafKey key(g, v); + vector<RoseVertex> &vec = clusters[key]; + if (vec.empty()) { + + ordered.push_back(key); + } + vec.push_back(v); + } + + DEBUG_PRINTF("find loop done\n"); +} + +/** + * This function attempts to combine identical roles (same literals, same + * predecessors, etc) with different suffixes into a single role which + * activates a larger suffix. The leaf vertices of the graph with a suffix are + * grouped into clusters which have members triggered by identical roles. The + * \ref mergeNfaCluster function (from ng_uncalc_components) is then utilised + * to build a set of larger (and still implementable) suffixes. The graph is + * then updated to point to the new suffixes and any unneeded roles are + * removed. + * + * Note: suffixes which are shared amongst multiple roles are not considered + * for this pass as the individual suffixes would have to continue to exist for + * the other roles to trigger resulting in the transformation not producing any + * savings. + * + * Note: as \ref mergeNfaCluster is slow when the cluster sizes are large, + * clusters of more than \ref MERGE_GROUP_SIZE_MAX roles are split into smaller + * chunks for processing. + */ +void uncalcLeaves(RoseBuildImpl &build) { + DEBUG_PRINTF("uncalcing\n"); + + map<UncalcLeafKey, vector<RoseVertex> > clusters; + deque<UncalcLeafKey> ordered; + findUncalcLeavesCandidates(build, clusters, ordered); + + vector<RoseVertex> dead; + + for (const auto &key : ordered) { + DEBUG_PRINTF("cluster of size %zu\n", clusters[key].size()); + mergeCluster(build.g, build.rm, clusters[key], dead, build.cc); + } + build.removeVertices(dead); } -namespace { -struct DupeLeafKey { - explicit DupeLeafKey(const RoseVertexProps &litv) - : literals(litv.literals), reports(litv.reports), - eod_accept(litv.eod_accept), suffix(litv.suffix), left(litv.left), - som_adjust(litv.som_adjust) { - DEBUG_PRINTF("eod_accept %d\n", (int)eod_accept); - DEBUG_PRINTF("report %u\n", left.leftfix_report); - DEBUG_PRINTF("lag %u\n", left.lag); - } - - bool operator<(const DupeLeafKey &b) const { - const DupeLeafKey &a = *this; - ORDER_CHECK(literals); - ORDER_CHECK(eod_accept); - ORDER_CHECK(suffix); - ORDER_CHECK(reports); - ORDER_CHECK(som_adjust); - ORDER_CHECK(left.leftfix_report); - ORDER_CHECK(left.lag); - return false; - } - - flat_set<u32> literals; - flat_set<ReportID> reports; - bool eod_accept; - suffix_id suffix; - LeftEngInfo left; - u32 som_adjust; -}; - -struct UncalcLeafKey { - UncalcLeafKey(const RoseGraph &g, RoseVertex v) - : literals(g[v].literals), rose(g[v].left) { - for (const auto &e : in_edges_range(v, g)) { - RoseVertex u = source(e, g); - preds.insert(make_pair(u, g[e])); - } - } - - bool operator<(const UncalcLeafKey &b) const { - const UncalcLeafKey &a = *this; - ORDER_CHECK(literals); - ORDER_CHECK(preds); - ORDER_CHECK(rose); - return false; - } - - flat_set<u32> literals; - flat_set<pair<RoseVertex, RoseEdgeProps>> preds; - LeftEngInfo rose; -}; -} // namespace - -/** - * This function merges leaf vertices with the same literals and report - * id/suffix. The leaf vertices of the graph are inspected and a mapping of - * leaf vertex properties to vertices is built. If the same set of leaf - * properties has already been seen when we inspect a vertex, we attempt to - * merge the vertex in with the previously seen vertex. This process can fail - * if the vertices share a common predecessor vertex but have a differing, - * incompatible relationship (different bounds or infix) with the predecessor. - * - * This takes place after \ref dedupeSuffixes to increase effectiveness as the - * same suffix is required for a merge to occur. - * - * TODO: work if this is a subset of role aliasing (and if it can be eliminated) - * or clearly document cases that would not be covered by role aliasing. - */ -void mergeDupeLeaves(RoseBuildImpl &build) { - map<DupeLeafKey, RoseVertex> leaves; - vector<RoseVertex> changed; - - RoseGraph &g = build.g; - for (auto v : vertices_range(g)) { - if (in_degree(v, g) == 0) { - assert(build.isAnyStart(v)); - continue; - } - - DEBUG_PRINTF("inspecting vertex index=%zu in_degree %zu " - "out_degree %zu\n", g[v].index, in_degree(v, g), - out_degree(v, g)); - - // Vertex must be a reporting leaf node - if (g[v].reports.empty() || !isLeafNode(v, g)) { - continue; - } - - // At the moment, we ignore all successors of root or anchored_root, - // since many parts of our runtime assume that these have in-degree 1. - if (build.isRootSuccessor(v)) { - continue; - } - - DupeLeafKey dupe(g[v]); - if (leaves.find(dupe) == leaves.end()) { - leaves.insert(make_pair(dupe, v)); - continue; - } - - RoseVertex t = leaves.find(dupe)->second; - DEBUG_PRINTF("found two leaf dupe roles, index=%zu,%zu\n", g[v].index, - g[t].index); - - vector<RoseEdge> deadEdges; - for (const auto &e : in_edges_range(v, g)) { - RoseVertex u = source(e, g); - DEBUG_PRINTF("u index=%zu\n", g[u].index); - if (RoseEdge et = edge(u, t, g)) { - if (g[et].minBound <= g[e].minBound - && g[et].maxBound >= g[e].maxBound) { - DEBUG_PRINTF("remove more constrained edge\n"); - deadEdges.push_back(e); - } - } else { - DEBUG_PRINTF("rehome edge: add %zu->%zu\n", g[u].index, - g[t].index); - add_edge(u, t, g[e], g); - deadEdges.push_back(e); - } - } - - if (!deadEdges.empty()) { - for (auto &e : deadEdges) { - remove_edge(e, g); - } - changed.push_back(v); - g[t].min_offset = min(g[t].min_offset, g[v].min_offset); - g[t].max_offset = max(g[t].max_offset, g[v].max_offset); - } - } - DEBUG_PRINTF("find loop done\n"); - - // Remove any vertices that now have no in-edges. - size_t countRemovals = 0; - for (size_t i = 0; i < changed.size(); i++) { - RoseVertex v = changed[i]; - if (in_degree(v, g) == 0) { - DEBUG_PRINTF("remove vertex\n"); - if (!build.isVirtualVertex(v)) { - for (u32 lit_id : g[v].literals) { - build.literal_info[lit_id].vertices.erase(v); - } - } - remove_vertex(v, g); - countRemovals++; - } - } - - // if we've removed anything, we need to renumber vertices - if (countRemovals) { - renumber_vertices(g); - DEBUG_PRINTF("removed %zu vertices.\n", countRemovals); - } -} - -/** Merges the suffixes on the (identical) vertices in \a vcluster, used by - * \ref uncalcLeaves. */ -static -void mergeCluster(RoseGraph &g, const ReportManager &rm, - const vector<RoseVertex> &vcluster, - vector<RoseVertex> &dead, const CompileContext &cc) { - if (vcluster.size() <= 1) { - return; // No merge to perform. - } - - // Note that we batch merges up fairly crudely for performance reasons. - vector<RoseVertex>::const_iterator it = vcluster.begin(), it2; - while (it != vcluster.end()) { - vector<NGHolder *> cluster; - map<NGHolder *, RoseVertex> rev; - - for (it2 = it; - it2 != vcluster.end() && cluster.size() < MERGE_GROUP_SIZE_MAX; - ++it2) { - RoseVertex v = *it2; - NGHolder *h = g[v].suffix.graph.get(); - assert(!g[v].suffix.haig); /* should not be here if haig */ - rev[h] = v; - cluster.push_back(h); - } - it = it2; - - DEBUG_PRINTF("merging cluster %zu\n", cluster.size()); - auto merged = mergeNfaCluster(cluster, &rm, cc); - DEBUG_PRINTF("done\n"); - - for (const auto &m : merged) { - NGHolder *h_victim = m.first; // mergee - NGHolder *h_winner = m.second; - RoseVertex victim = rev[h_victim]; - RoseVertex winner = rev[h_winner]; - - LIMIT_TO_AT_MOST(&g[winner].min_offset, g[victim].min_offset); - ENSURE_AT_LEAST(&g[winner].max_offset, g[victim].max_offset); - insert(&g[winner].reports, g[victim].reports); - - dead.push_back(victim); - } - } -} - -static -void findUncalcLeavesCandidates(RoseBuildImpl &build, - map<UncalcLeafKey, vector<RoseVertex> > &clusters, - deque<UncalcLeafKey> &ordered) { - const RoseGraph &g = build.g; - - vector<RoseVertex> suffix_vertices; // vertices with suffix graphs - unordered_map<const NGHolder *, u32> fcount; // ref count per graph - - for (auto v : vertices_range(g)) { - if (g[v].suffix) { - if (!g[v].suffix.graph) { - continue; /* cannot uncalc (haig/mcclellan); TODO */ - } - - assert(g[v].suffix.graph->kind == NFA_SUFFIX); - - // Ref count all suffixes, as we don't want to merge a suffix - // that happens to be shared with a non-leaf vertex somewhere. - DEBUG_PRINTF("vertex %zu has suffix %p\n", g[v].index, - g[v].suffix.graph.get()); - fcount[g[v].suffix.graph.get()]++; - - // Vertex must be a reporting pseudo accept - if (!isLeafNode(v, g)) { - continue; - } - - suffix_vertices.push_back(v); - } - } - - for (auto v : suffix_vertices) { - if (in_degree(v, g) == 0) { - assert(build.isAnyStart(v)); - continue; - } - - const NGHolder *h = g[v].suffix.graph.get(); - assert(h); - DEBUG_PRINTF("suffix %p\n", h); - - // We can't easily merge suffixes shared with other vertices, and - // creating a unique copy to do so may just mean we end up tracking - // more NFAs. Better to leave shared suffixes alone. - if (fcount[h] != 1) { - DEBUG_PRINTF("skipping shared suffix\n"); - continue; - } - - UncalcLeafKey key(g, v); - vector<RoseVertex> &vec = clusters[key]; - if (vec.empty()) { - - ordered.push_back(key); - } - vec.push_back(v); - } - - DEBUG_PRINTF("find loop done\n"); -} - -/** - * This function attempts to combine identical roles (same literals, same - * predecessors, etc) with different suffixes into a single role which - * activates a larger suffix. The leaf vertices of the graph with a suffix are - * grouped into clusters which have members triggered by identical roles. The - * \ref mergeNfaCluster function (from ng_uncalc_components) is then utilised - * to build a set of larger (and still implementable) suffixes. The graph is - * then updated to point to the new suffixes and any unneeded roles are - * removed. - * - * Note: suffixes which are shared amongst multiple roles are not considered - * for this pass as the individual suffixes would have to continue to exist for - * the other roles to trigger resulting in the transformation not producing any - * savings. - * - * Note: as \ref mergeNfaCluster is slow when the cluster sizes are large, - * clusters of more than \ref MERGE_GROUP_SIZE_MAX roles are split into smaller - * chunks for processing. - */ -void uncalcLeaves(RoseBuildImpl &build) { - DEBUG_PRINTF("uncalcing\n"); - - map<UncalcLeafKey, vector<RoseVertex> > clusters; - deque<UncalcLeafKey> ordered; - findUncalcLeavesCandidates(build, clusters, ordered); - - vector<RoseVertex> dead; - - for (const auto &key : ordered) { - DEBUG_PRINTF("cluster of size %zu\n", clusters[key].size()); - mergeCluster(build.g, build.rm, clusters[key], dead, build.cc); - } - build.removeVertices(dead); -} - } // namespace ue2 diff --git a/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.h b/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.h index c8e71f144c..4655f10d52 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,23 +26,23 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#ifndef ROSE_BUILD_ROLE_ALIASING_H -#define ROSE_BUILD_ROLE_ALIASING_H +#ifndef ROSE_BUILD_ROLE_ALIASING_H +#define ROSE_BUILD_ROLE_ALIASING_H + +/** \file + * \brief Rose Build: functions for reducing the size of the Rose graph + * through merging roles (RoseVertices) together. + */ -/** \file - * \brief Rose Build: functions for reducing the size of the Rose graph - * through merging roles (RoseVertices) together. - */ - namespace ue2 { class RoseBuildImpl; void aliasRoles(RoseBuildImpl &build, bool mergeRoses); -void mergeDupeLeaves(RoseBuildImpl &build); -void uncalcLeaves(RoseBuildImpl &build); - +void mergeDupeLeaves(RoseBuildImpl &build); +void uncalcLeaves(RoseBuildImpl &build); + } // namespace ue2 #endif diff --git a/contrib/libs/hyperscan/src/rose/rose_build_scatter.cpp b/contrib/libs/hyperscan/src/rose/rose_build_scatter.cpp index 2fb923def0..87085ae9a8 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_scatter.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_scatter.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -63,24 +63,24 @@ void merge_in(scatter_plan_raw *out, const scatter_plan_raw &in) { insert(&out->p_u8, out->p_u8.end(), in.p_u8); } -scatter_plan_raw buildStateScatterPlan(u32 role_state_offset, - u32 role_state_count, u32 left_array_count, u32 left_prefix_count, - const RoseStateOffsets &stateOffsets, bool streaming, - u32 leaf_array_count, u32 outfix_begin, u32 outfix_end) { - scatter_plan_raw out; - +scatter_plan_raw buildStateScatterPlan(u32 role_state_offset, + u32 role_state_count, u32 left_array_count, u32 left_prefix_count, + const RoseStateOffsets &stateOffsets, bool streaming, + u32 leaf_array_count, u32 outfix_begin, u32 outfix_end) { + scatter_plan_raw out; + /* init role array */ scatter_plan_raw spr_role; mmbBuildClearPlan(role_state_count, &spr_role); rebase(&spr_role, role_state_offset); - merge_in(&out, spr_role); + merge_in(&out, spr_role); /* init rose array: turn on prefixes */ u32 rose_array_offset = stateOffsets.activeLeftArray; scatter_plan_raw spr_rose; mmbBuildInitRangePlan(left_array_count, 0, left_prefix_count, &spr_rose); rebase(&spr_rose, rose_array_offset); - merge_in(&out, spr_rose); + merge_in(&out, spr_rose); /* suffix/outfix array */ scatter_plan_raw spr_leaf; @@ -91,9 +91,9 @@ scatter_plan_raw buildStateScatterPlan(u32 role_state_offset, mmbBuildClearPlan(leaf_array_count, &spr_leaf); } rebase(&spr_leaf, stateOffsets.activeLeafArray); - merge_in(&out, spr_leaf); - - return out; + merge_in(&out, spr_leaf); + + return out; } u32 aux_size(const scatter_plan_raw &raw) { diff --git a/contrib/libs/hyperscan/src/rose/rose_build_scatter.h b/contrib/libs/hyperscan/src/rose/rose_build_scatter.h index da44b2cf54..67a82b9937 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_scatter.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_scatter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -45,10 +45,10 @@ struct scatter_plan_raw { std::vector<scatter_unit_u8> p_u8; }; -scatter_plan_raw buildStateScatterPlan(u32 role_state_offset, - u32 role_state_count, u32 left_array_count, u32 left_prefix_count, - const RoseStateOffsets &stateOffsets, bool streaming, - u32 leaf_array_count, u32 outfix_begin, u32 outfix_end); +scatter_plan_raw buildStateScatterPlan(u32 role_state_offset, + u32 role_state_count, u32 left_array_count, u32 left_prefix_count, + const RoseStateOffsets &stateOffsets, bool streaming, + u32 leaf_array_count, u32 outfix_begin, u32 outfix_end); u32 aux_size(const scatter_plan_raw &raw); diff --git a/contrib/libs/hyperscan/src/rose/rose_build_util.h b/contrib/libs/hyperscan/src/rose/rose_build_util.h index c2c964e54a..81bb68459b 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_util.h +++ b/contrib/libs/hyperscan/src/rose/rose_build_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -36,8 +36,8 @@ namespace ue2 { -/** Max allowed width for transient graphs in block mode */ -#define ROSE_BLOCK_TRANSIENT_MAX_WIDTH 255U +/** Max allowed width for transient graphs in block mode */ +#define ROSE_BLOCK_TRANSIENT_MAX_WIDTH 255U /** * \brief Add two Rose depths together, coping correctly with infinity at diff --git a/contrib/libs/hyperscan/src/rose/rose_build_width.cpp b/contrib/libs/hyperscan/src/rose/rose_build_width.cpp index 4f11a2fcf8..182b62ee6f 100644 --- a/contrib/libs/hyperscan/src/rose/rose_build_width.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_build_width.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -77,20 +77,20 @@ u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) { u32 minWidth = ROSE_BOUND_INF; for (auto v : reachable) { if (g[v].eod_accept) { - DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].index); + DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].index); continue; } const u32 w = g[v].min_offset; if (!g[v].reports.empty()) { - DEBUG_PRINTF("%zu can fire report at offset %u\n", g[v].index, w); + DEBUG_PRINTF("%zu can fire report at offset %u\n", g[v].index, w); minWidth = min(minWidth, w); } if (is_end_anchored(g, v)) { - DEBUG_PRINTF("%zu can fire eod report at offset %u\n", g[v].index, - w); + DEBUG_PRINTF("%zu can fire eod report at offset %u\n", g[v].index, + w); minWidth = min(minWidth, w); } @@ -99,7 +99,7 @@ u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) { assert(suffix_width.is_reachable()); DEBUG_PRINTF("%zu has suffix with top %u (width %s), can fire " "report at %u\n", - g[v].index, g[v].suffix.top, suffix_width.str().c_str(), + g[v].index, g[v].suffix.top, suffix_width.str().c_str(), w + suffix_width); minWidth = min(minWidth, w + suffix_width); } @@ -204,10 +204,10 @@ u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) { // Everyone's anchored, so the max width can be taken from the max // max_offset on our vertices (so long as all accepts are ACCEPT_EOD). for (auto v : reachable) { - DEBUG_PRINTF("inspecting vert %zu\n", g[v].index); + DEBUG_PRINTF("inspecting vert %zu\n", g[v].index); if (g[v].eod_accept) { - DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].index); + DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].index); continue; } diff --git a/contrib/libs/hyperscan/src/rose/rose_common.h b/contrib/libs/hyperscan/src/rose/rose_common.h index ad18d5364a..34678b8fcc 100644 --- a/contrib/libs/hyperscan/src/rose/rose_common.h +++ b/contrib/libs/hyperscan/src/rose/rose_common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,16 +41,16 @@ /** \brief Length in bytes of a reach bitvector, used by the lookaround code. */ #define REACH_BITVECTOR_LEN 32 -/** \brief Length in bytes of a reach bitvector for multi-path lookaround. */ -#define MULTI_REACH_BITVECTOR_LEN 256 - -/** - * \brief The max offset from the leftmost byte to the rightmost byte in - * multi-path lookaround. - */ -#define MULTIPATH_MAX_LEN 16 - -/** \brief Value used to represent an invalid Rose program offset. */ -#define ROSE_INVALID_PROG_OFFSET 0 - +/** \brief Length in bytes of a reach bitvector for multi-path lookaround. */ +#define MULTI_REACH_BITVECTOR_LEN 256 + +/** + * \brief The max offset from the leftmost byte to the rightmost byte in + * multi-path lookaround. + */ +#define MULTIPATH_MAX_LEN 16 + +/** \brief Value used to represent an invalid Rose program offset. */ +#define ROSE_INVALID_PROG_OFFSET 0 + #endif // ROSE_COMMON_H diff --git a/contrib/libs/hyperscan/src/rose/rose_graph.h b/contrib/libs/hyperscan/src/rose/rose_graph.h index 1cf59ca0c9..b5bf1985d8 100644 --- a/contrib/libs/hyperscan/src/rose/rose_graph.h +++ b/contrib/libs/hyperscan/src/rose/rose_graph.h @@ -39,11 +39,11 @@ #include "ue2common.h" #include "rose_build.h" -#include "rose_internal.h" +#include "rose_internal.h" #include "nfa/nfa_internal.h" // for MO_INVALID_IDX #include "util/depth.h" -#include "util/flat_containers.h" -#include "util/ue2_graph.h" +#include "util/flat_containers.h" +#include "util/ue2_graph.h" #include <memory> #include <set> @@ -53,7 +53,7 @@ namespace ue2 { struct CastleProto; struct raw_dfa; struct raw_som_dfa; -struct TamaProto; +struct TamaProto; /** \brief Table type for a literal. */ enum rose_literal_table { @@ -64,14 +64,14 @@ enum rose_literal_table { ROSE_EVENT //!< "literal-like" events, such as EOD }; -/** \brief Edge history types. */ -enum RoseRoleHistory { - ROSE_ROLE_HISTORY_NONE, //!< no special history - ROSE_ROLE_HISTORY_ANCH, //!< previous role is at a fixed offset - ROSE_ROLE_HISTORY_LAST_BYTE, //!< previous role can only match at EOD - ROSE_ROLE_HISTORY_INVALID //!< history not yet assigned -}; - +/** \brief Edge history types. */ +enum RoseRoleHistory { + ROSE_ROLE_HISTORY_NONE, //!< no special history + ROSE_ROLE_HISTORY_ANCH, //!< previous role is at a fixed offset + ROSE_ROLE_HISTORY_LAST_BYTE, //!< previous role can only match at EOD + ROSE_ROLE_HISTORY_INVALID //!< history not yet assigned +}; + #include "util/order_check.h" /** \brief Provides information about the (pre|in)fix engine to the left of a @@ -81,10 +81,10 @@ struct LeftEngInfo { std::shared_ptr<CastleProto> castle; std::shared_ptr<raw_dfa> dfa; std::shared_ptr<raw_som_dfa> haig; - std::shared_ptr<TamaProto> tamarama; + std::shared_ptr<TamaProto> tamarama; u32 lag = 0U; ReportID leftfix_report = MO_INVALID_IDX; - depth dfa_min_width{0}; + depth dfa_min_width{0}; depth dfa_max_width = depth::infinity(); bool operator==(const LeftEngInfo &other) const { @@ -92,7 +92,7 @@ struct LeftEngInfo { && other.castle == castle && other.dfa == dfa && other.haig == haig - && other.tamarama == tamarama + && other.tamarama == tamarama && other.lag == lag && other.leftfix_report == leftfix_report; } @@ -105,12 +105,12 @@ struct LeftEngInfo { ORDER_CHECK(castle); ORDER_CHECK(dfa); ORDER_CHECK(haig); - ORDER_CHECK(tamarama); + ORDER_CHECK(tamarama); ORDER_CHECK(lag); ORDER_CHECK(leftfix_report); return false; } - size_t hash() const; + size_t hash() const; void reset(void); explicit operator bool() const; bool tracksSom() const { return !!haig; } @@ -124,14 +124,14 @@ struct RoseSuffixInfo { std::shared_ptr<CastleProto> castle; std::shared_ptr<raw_som_dfa> haig; std::shared_ptr<raw_dfa> rdfa; - std::shared_ptr<TamaProto> tamarama; - depth dfa_min_width{0}; + std::shared_ptr<TamaProto> tamarama; + depth dfa_min_width{0}; depth dfa_max_width = depth::infinity(); bool operator==(const RoseSuffixInfo &b) const; bool operator!=(const RoseSuffixInfo &b) const { return !(*this == b); } bool operator<(const RoseSuffixInfo &b) const; - size_t hash() const; + size_t hash() const; void reset(void); explicit operator bool() const { return graph || castle || haig || rdfa || tamarama; } }; @@ -139,7 +139,7 @@ struct RoseSuffixInfo { /** \brief Properties attached to each Rose graph vertex. */ struct RoseVertexProps { /** \brief Unique dense vertex index. Used for BGL algorithms. */ - size_t index = ~size_t{0}; + size_t index = ~size_t{0}; /** \brief IDs of literals in the Rose literal map. */ flat_set<u32> literals; @@ -183,9 +183,9 @@ struct RoseVertexProps { /** \brief Properties attached to each Rose graph edge. */ /* bounds are distance from end of prev to start of the next */ struct RoseEdgeProps { - /** \brief Unique dense vertex index. Used for BGL algorithms. */ - size_t index = ~size_t{0}; - + /** \brief Unique dense vertex index. Used for BGL algorithms. */ + size_t index = ~size_t{0}; + /** * \brief Minimum distance from the end of the source role's match to the * start of the target role's match. @@ -219,9 +219,9 @@ bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b); /** * \brief Core Rose graph structure. */ -struct RoseGraph : public ue2_graph<RoseGraph, RoseVertexProps, RoseEdgeProps> { - friend class RoseBuildImpl; /* to allow index renumbering */ -}; +struct RoseGraph : public ue2_graph<RoseGraph, RoseVertexProps, RoseEdgeProps> { + friend class RoseBuildImpl; /* to allow index renumbering */ +}; using RoseVertex = RoseGraph::vertex_descriptor; using RoseEdge = RoseGraph::edge_descriptor; diff --git a/contrib/libs/hyperscan/src/rose/rose_in_graph.h b/contrib/libs/hyperscan/src/rose/rose_in_graph.h index 1cf7b22045..da0ea08da1 100644 --- a/contrib/libs/hyperscan/src/rose/rose_in_graph.h +++ b/contrib/libs/hyperscan/src/rose/rose_in_graph.h @@ -45,8 +45,8 @@ #include "ue2common.h" #include "rose/rose_common.h" -#include "util/flat_containers.h" -#include "util/ue2_graph.h" +#include "util/flat_containers.h" +#include "util/ue2_graph.h" #include "util/ue2string.h" #include <memory> @@ -55,7 +55,7 @@ namespace ue2 { class NGHolder; struct raw_som_dfa; -struct raw_dfa; +struct raw_dfa; enum RoseInVertexType { RIV_LITERAL, @@ -105,12 +105,12 @@ public: ROSE_BOUND_INF); } - /* for when there is a suffix graph which handles the reports */ - static RoseInVertexProps makeAcceptEod() { - return RoseInVertexProps(RIV_ACCEPT_EOD, ue2_literal(), 0, - ROSE_BOUND_INF); - } - + /* for when there is a suffix graph which handles the reports */ + static RoseInVertexProps makeAcceptEod() { + return RoseInVertexProps(RIV_ACCEPT_EOD, ue2_literal(), 0, + ROSE_BOUND_INF); + } + static RoseInVertexProps makeStart(bool anchored) { DEBUG_PRINTF("making %s\n", anchored ? "anchored start" : "start"); if (anchored) { @@ -167,12 +167,12 @@ struct RoseInEdgeProps { /** \brief Maximum bound on 'dot' repeat between literals. */ u32 maxBound; - /** \brief Graph on edge. Graph is end to (end - lag). */ + /** \brief Graph on edge. Graph is end to (end - lag). */ std::shared_ptr<NGHolder> graph; - /** \brief DFA version of graph, if we have already determinised. */ - std::shared_ptr<raw_dfa> dfa; - + /** \brief DFA version of graph, if we have already determinised. */ + std::shared_ptr<raw_dfa> dfa; + /** \brief Haig version of graph, if required. */ std::shared_ptr<raw_som_dfa> haig; @@ -183,11 +183,11 @@ struct RoseInEdgeProps { u32 graph_lag; /** \brief Unique edge index. */ - size_t index = 0; -}; + size_t index = 0; +}; -struct RoseInGraph - : public ue2_graph<RoseInGraph, RoseInVertexProps, RoseInEdgeProps> { +struct RoseInGraph + : public ue2_graph<RoseInGraph, RoseInVertexProps, RoseInEdgeProps> { }; typedef RoseInGraph::vertex_descriptor RoseInVertex; typedef RoseInGraph::edge_descriptor RoseInEdge; diff --git a/contrib/libs/hyperscan/src/rose/rose_in_util.cpp b/contrib/libs/hyperscan/src/rose/rose_in_util.cpp index e24c9fa08c..cb531017e3 100644 --- a/contrib/libs/hyperscan/src/rose/rose_in_util.cpp +++ b/contrib/libs/hyperscan/src/rose/rose_in_util.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2017, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -51,11 +51,11 @@ namespace ue2 { * at the front and all the predecessors of a vertex occur earlier in the list * than the vertex. */ vector<RoseInVertex> topo_order(const RoseInGraph &g) { - assert(hasCorrectlyNumberedVertices(g)); + assert(hasCorrectlyNumberedVertices(g)); vector<RoseInVertex> v_order; - v_order.reserve(num_vertices(g)); + v_order.reserve(num_vertices(g)); - boost::topological_sort(g, back_inserter(v_order)); + boost::topological_sort(g, back_inserter(v_order)); reverse(v_order.begin(), v_order.end()); /* put starts at the front */ @@ -92,8 +92,8 @@ private: } unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig) { - assert(hasCorrectlyNumberedVertices(ig)); - unique_ptr<RoseInGraph> out = std::make_unique<RoseInGraph>(); + assert(hasCorrectlyNumberedVertices(ig)); + unique_ptr<RoseInGraph> out = std::make_unique<RoseInGraph>(); unordered_map<const NGHolder *, shared_ptr<NGHolder>> graph_map; unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> haig_map; @@ -109,7 +109,7 @@ unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig) { } copy_graph(ig, *out, - boost::edge_copy(RoseEdgeCopier(ig, *out, graph_map, haig_map))); + boost::edge_copy(RoseEdgeCopier(ig, *out, graph_map, haig_map))); return out; } diff --git a/contrib/libs/hyperscan/src/rose/rose_in_util.h b/contrib/libs/hyperscan/src/rose/rose_in_util.h index f3e248e724..1f3c4ef78a 100644 --- a/contrib/libs/hyperscan/src/rose/rose_in_util.h +++ b/contrib/libs/hyperscan/src/rose/rose_in_util.h @@ -46,11 +46,11 @@ void calcVertexOffsets(RoseInGraph &ig); enum nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e); void pruneUseless(RoseInGraph &g); -inline -bool is_any_accept(RoseInVertex v, const RoseInGraph &g) { - return g[v].type == RIV_ACCEPT || g[v].type == RIV_ACCEPT_EOD; +inline +bool is_any_accept(RoseInVertex v, const RoseInGraph &g) { + return g[v].type == RIV_ACCEPT || g[v].type == RIV_ACCEPT_EOD; +} + } -} - #endif diff --git a/contrib/libs/hyperscan/src/rose/rose_internal.h b/contrib/libs/hyperscan/src/rose/rose_internal.h index ac2d6e7288..7bd6779c3d 100644 --- a/contrib/libs/hyperscan/src/rose/rose_internal.h +++ b/contrib/libs/hyperscan/src/rose/rose_internal.h @@ -68,15 +68,15 @@ typedef u64a rose_group; /* Rose Literal Sources * - * Rose currently gets events (mainly roseProcessMatch calls) from a number of - * sources: + * Rose currently gets events (mainly roseProcessMatch calls) from a number of + * sources: * 1) The floating table * 2) The anchored table * 3) Delayed literals - * 4) Suffix NFAs - * 5) Literal masks - * 5) End anchored table - * 6) Prefix / Infix nfas + * 4) Suffix NFAs + * 5) Literal masks + * 5) End anchored table + * 6) Prefix / Infix nfas * * Care is required to ensure that events appear to come into Rose in order * (or sufficiently ordered for Rose to cope). Generally the progress of the @@ -99,7 +99,7 @@ typedef u64a rose_group; * NFA queues are run to the current point (floating or delayed literal) as * appropriate. * - * Literal Masks: + * Literal Masks: * These are triggered from either floating literals or delayed literals and * inspect the data behind them. Matches are raised at the same location as the * trigger literal so there are no ordering issues. Masks are always pure @@ -144,7 +144,7 @@ struct LeftNfaInfo { u32 stopTable; // stop table index, or ROSE_OFFSET_INVALID u8 transient; /**< 0 if not transient, else max width of transient prefix */ char infix; /* TODO: make flags */ - char eager; /**< nfa should be run eagerly to first match or death */ + char eager; /**< nfa should be run eagerly to first match or death */ char eod_check; /**< nfa is used by the event eod literal */ u32 countingMiracleOffset; /** if not 0, offset to RoseCountingMiracle. */ rose_group squash_mask; /* & mask applied when rose nfa dies */ @@ -170,11 +170,11 @@ struct NfaInfo { #define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose * prefix checks */ -/* offset of the status flags in the stream state. */ -#define ROSE_STATE_OFFSET_STATUS_FLAGS 0 +/* offset of the status flags in the stream state. */ +#define ROSE_STATE_OFFSET_STATUS_FLAGS 0 -/* offset of role mmbit in stream state (just after the status flag byte). */ -#define ROSE_STATE_OFFSET_ROLE_MMBIT sizeof(u8) +/* offset of role mmbit in stream state (just after the status flag byte). */ +#define ROSE_STATE_OFFSET_ROLE_MMBIT sizeof(u8) /** * \brief Rose state offsets. @@ -184,23 +184,23 @@ struct NfaInfo { * * State not covered by this structure includes: * - * -# the first byte, containing the status bitmask + * -# the first byte, containing the status bitmask * -# the role state multibit */ struct RoseStateOffsets { /** History buffer. * - * Max size of history is RoseEngine::historyRequired. */ + * Max size of history is RoseEngine::historyRequired. */ u32 history; - /** Exhausted multibit. + /** Exhausted multibit. * - * entry per exhaustible key (used by Highlander mode). If a bit is set, + * entry per exhaustible key (used by Highlander mode). If a bit is set, * reports with that ekey should not be delivered to the user. */ u32 exhausted; /** size in bytes of exhausted multibit */ - u32 exhausted_size; + u32 exhausted_size; /** Logical multibit. * @@ -221,13 +221,13 @@ struct RoseStateOffsets { /** Multibit for active suffix/outfix engines. */ u32 activeLeafArray; - /** Size of multibit for active suffix/outfix engines in bytes. */ - u32 activeLeafArray_size; - - /** Multibit for active leftfix (prefix/infix) engines. */ + /** Size of multibit for active suffix/outfix engines in bytes. */ + u32 activeLeafArray_size; + + /** Multibit for active leftfix (prefix/infix) engines. */ u32 activeLeftArray; - /** Size of multibit for active leftfix (prefix/infix) engines in bytes. */ + /** Size of multibit for active leftfix (prefix/infix) engines in bytes. */ u32 activeLeftArray_size; /** Table of lag information (stored as one byte per engine) for active @@ -243,12 +243,12 @@ struct RoseStateOffsets { /** Size of packed Rose groups value, in bytes. */ u32 groups_size; - /** State for long literal support. */ - u32 longLitState; + /** State for long literal support. */ + u32 longLitState; + + /** Size of the long literal state. */ + u32 longLitState_size; - /** Size of the long literal state. */ - u32 longLitState_size; - /** Packed SOM location slots. */ u32 somLocation; @@ -258,29 +258,29 @@ struct RoseStateOffsets { /** Multibit guarding SOM location slots. */ u32 somWritable; - /** Size of each of the somValid and somWritable multibits, in bytes. */ - u32 somMultibit_size; - - /** Begin of the region where NFA engine state is stored. - * The NFA state region extends to end. */ - u32 nfaStateBegin; - + /** Size of each of the somValid and somWritable multibits, in bytes. */ + u32 somMultibit_size; + + /** Begin of the region where NFA engine state is stored. + * The NFA state region extends to end. */ + u32 nfaStateBegin; + /** Total size of Rose state, in bytes. */ u32 end; }; struct RoseBoundaryReports { - /** \brief 0 if no reports list, otherwise offset of program to run to - * deliver reports at EOD. */ - u32 reportEodOffset; - - /** \brief 0 if no reports list, otherwise offset of program to run to - * deliver reports at offset 0. */ - u32 reportZeroOffset; - - /** \brief 0 if no reports list, otherwise offset of program to run to - * deliver reports if EOD is at offset 0. Superset of other programs. */ - u32 reportZeroEodOffset; + /** \brief 0 if no reports list, otherwise offset of program to run to + * deliver reports at EOD. */ + u32 reportEodOffset; + + /** \brief 0 if no reports list, otherwise offset of program to run to + * deliver reports at offset 0. */ + u32 reportZeroOffset; + + /** \brief 0 if no reports list, otherwise offset of program to run to + * deliver reports if EOD is at offset 0. Superset of other programs. */ + u32 reportZeroEodOffset; }; /* NFA Queue Assignment @@ -310,19 +310,19 @@ struct RoseBoundaryReports { #define ROSE_RUNTIME_PURE_LITERAL 1 #define ROSE_RUNTIME_SINGLE_OUTFIX 2 -/** - * \brief Runtime structure header for Rose. - * - * Runtime structure header for Rose. - * In memory, we follow this with: - * -# the "engine blob" - * -# anchored 'literal' matcher table - * -# floating literal matcher table - * -# eod-anchored literal matcher table - * -# small block table - * -# array of NFA offsets, one per queue - * -# array of state offsets, one per queue (+) - * +/** + * \brief Runtime structure header for Rose. + * + * Runtime structure header for Rose. + * In memory, we follow this with: + * -# the "engine blob" + * -# anchored 'literal' matcher table + * -# floating literal matcher table + * -# eod-anchored literal matcher table + * -# small block table + * -# array of NFA offsets, one per queue + * -# array of state offsets, one per queue (+) + * * (+) stateOffset array note: Offsets in the array are either into the stream * state (normal case) or into the tstate region of scratch (for transient rose * nfas). Rose nfa info table can distinguish the cases. @@ -350,11 +350,11 @@ struct RoseEngine { u32 logicalTreeOffset; /**< offset to mapping from lkey to LogicalOp */ u32 combInfoMapOffset; /**< offset to mapping from ckey to combInfo */ u32 dkeyCount; /**< number of dedupe keys */ - u32 dkeyLogSize; /**< size of fatbit for storing dkey log (bytes) */ + u32 dkeyLogSize; /**< size of fatbit for storing dkey log (bytes) */ u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external * report ids */ u32 somLocationCount; /**< number of som locations required */ - u32 somLocationFatbitSize; /**< size of SOM location fatbit (bytes) */ + u32 somLocationFatbitSize; /**< size of SOM location fatbit (bytes) */ u32 rolesWithStateCount; // number of roles with entries in state bitset u32 stateSize; /* size of the state bitset * WARNING: not the size of the rose state */ @@ -366,9 +366,9 @@ struct RoseEngine { u32 amatcherOffset; // offset of the anchored literal matcher (bytes) u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes) u32 fmatcherOffset; // offset of the floating literal matcher (bytes) - u32 drmatcherOffset; // offset of the delayed rebuild table (bytes) + u32 drmatcherOffset; // offset of the delayed rebuild table (bytes) u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes) - u32 longLitTableOffset; // offset of the long literal table + u32 longLitTableOffset; // offset of the long literal table u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern * involved with the anchored table to produce a full * match. */ @@ -384,48 +384,48 @@ struct RoseEngine { u32 fmatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still * produce a match for a pattern involved * with the anchored table. */ - - /** - * \brief Offset of u32 array of program offsets for reports used by - * output-exposed engines. - */ - u32 reportProgramOffset; - - /** - * \brief Number of programs for reports used by output-exposed engines. - */ - u32 reportProgramCount; - - /** - * \brief Offset of u32 array of program offsets for delayed replay of - * literals. - */ - u32 delayProgramOffset; - - /** - * \brief Offset of u32 array of program offsets for anchored literals. - */ - u32 anchoredProgramOffset; - + + /** + * \brief Offset of u32 array of program offsets for reports used by + * output-exposed engines. + */ + u32 reportProgramOffset; + + /** + * \brief Number of programs for reports used by output-exposed engines. + */ + u32 reportProgramCount; + + /** + * \brief Offset of u32 array of program offsets for delayed replay of + * literals. + */ + u32 delayProgramOffset; + + /** + * \brief Offset of u32 array of program offsets for anchored literals. + */ + u32 anchoredProgramOffset; + u32 activeArrayCount; //number of nfas tracked in the active array u32 activeLeftCount; //number of nfas tracked in the active rose array u32 queueCount; /**< number of nfa queues */ - u32 activeQueueArraySize; //!< size of fatbit for active queues (bytes) - - u32 eagerIterOffset; /**< offset to sparse iter for eager prefixes or 0 if - * none */ - - /** \brief Number of keys used by CHECK_SET_HANDLED instructions in role - * programs. */ - u32 handledKeyCount; - - /** \brief Size of the handled keys fatbit in scratch (bytes). */ - u32 handledKeyFatbitSize; - + u32 activeQueueArraySize; //!< size of fatbit for active queues (bytes) + + u32 eagerIterOffset; /**< offset to sparse iter for eager prefixes or 0 if + * none */ + + /** \brief Number of keys used by CHECK_SET_HANDLED instructions in role + * programs. */ + u32 handledKeyCount; + + /** \brief Size of the handled keys fatbit in scratch (bytes). */ + u32 handledKeyFatbitSize; + u32 leftOffset; u32 roseCount; - u32 eodProgramOffset; //!< EOD program, otherwise 0. + u32 eodProgramOffset; //!< EOD program, otherwise 0. u32 flushCombProgramOffset; /**< FlushCombination program, otherwise 0 */ u32 lastFlushCombProgramOffset; /**< LastFlushCombination program, * otherwise 0 */ @@ -453,12 +453,12 @@ struct RoseEngine { * table */ u32 nfaInfoOffset; /* offset to the nfa info offset array */ rose_group initialGroups; - rose_group floating_group_mask; /* groups that are used by the ftable */ + rose_group floating_group_mask; /* groups that are used by the ftable */ u32 size; // (bytes) u32 delay_count; /* number of delayed literal ids. */ - u32 delay_fatbit_size; //!< size of each delay fatbit in scratch (bytes) + u32 delay_fatbit_size; //!< size of each delay fatbit in scratch (bytes) u32 anchored_count; /* number of anchored literal ids */ - u32 anchored_fatbit_size; //!< size of each anch fatbit in scratch (bytes) + u32 anchored_fatbit_size; //!< size of each anch fatbit in scratch (bytes) u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can * usefully be reported */ u32 delayRebuildLength; /* length of the history region which needs to be @@ -477,7 +477,7 @@ struct RoseEngine { u32 ematcherRegionSize; /* max region size to pass to ematcher */ u32 somRevCount; /**< number of som reverse nfas */ u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */ - u32 longLitStreamState; // size in bytes + u32 longLitStreamState; // size in bytes struct scatter_full_plan state_init; }; @@ -488,72 +488,72 @@ struct ALIGN_CL_DIRECTIVE anchored_matcher_info { u32 anchoredMinDistance; /* start of region to run anchored table over */ }; -/** - * \brief Long literal subtable for a particular mode (caseful or nocase). - */ -struct RoseLongLitSubtable { - /** - * \brief Offset of the hash table (relative to RoseLongLitTable base). - * - * Offset is zero if no such table exists. - */ - u32 hashOffset; - - /** - * \brief Offset of the bloom filter (relative to RoseLongLitTable base). - * - * Offset is zero if no such table exists. - */ - u32 bloomOffset; - - /** \brief lg2 of the size of the hash table. */ - u8 hashBits; - - /** \brief Size of the bloom filter in bits. */ - u8 bloomBits; - - /** \brief Number of bits of packed stream state used. */ - u8 streamStateBits; -}; - -/** - * \brief Long literal table header. - */ -struct RoseLongLitTable { - /** - * \brief Total size of the whole table (including strings, bloom filters, - * hash tables). - */ - u32 size; - - /** \brief Caseful sub-table (hash table and bloom filter). */ - struct RoseLongLitSubtable caseful; - - /** \brief Caseless sub-table (hash table and bloom filter). */ - struct RoseLongLitSubtable nocase; - - /** \brief Total size of packed stream state in bytes. */ - u8 streamStateBytes; - - /** \brief Max length of literal prefixes. */ - u8 maxLen; -}; - -/** - * \brief One of these structures per hash table entry in our long literal - * table. - */ -struct RoseLongLitHashEntry { - /** - * \brief Offset of the literal string itself, relative to - * RoseLongLitTable base. Zero if this bucket is empty. - */ - u32 str_offset; - - /** \brief Length of the literal string. */ - u32 str_len; -}; - +/** + * \brief Long literal subtable for a particular mode (caseful or nocase). + */ +struct RoseLongLitSubtable { + /** + * \brief Offset of the hash table (relative to RoseLongLitTable base). + * + * Offset is zero if no such table exists. + */ + u32 hashOffset; + + /** + * \brief Offset of the bloom filter (relative to RoseLongLitTable base). + * + * Offset is zero if no such table exists. + */ + u32 bloomOffset; + + /** \brief lg2 of the size of the hash table. */ + u8 hashBits; + + /** \brief Size of the bloom filter in bits. */ + u8 bloomBits; + + /** \brief Number of bits of packed stream state used. */ + u8 streamStateBits; +}; + +/** + * \brief Long literal table header. + */ +struct RoseLongLitTable { + /** + * \brief Total size of the whole table (including strings, bloom filters, + * hash tables). + */ + u32 size; + + /** \brief Caseful sub-table (hash table and bloom filter). */ + struct RoseLongLitSubtable caseful; + + /** \brief Caseless sub-table (hash table and bloom filter). */ + struct RoseLongLitSubtable nocase; + + /** \brief Total size of packed stream state in bytes. */ + u8 streamStateBytes; + + /** \brief Max length of literal prefixes. */ + u8 maxLen; +}; + +/** + * \brief One of these structures per hash table entry in our long literal + * table. + */ +struct RoseLongLitHashEntry { + /** + * \brief Offset of the literal string itself, relative to + * RoseLongLitTable base. Zero if this bucket is empty. + */ + u32 str_offset; + + /** \brief Length of the literal string. */ + u32 str_len; +}; + static really_inline const struct anchored_matcher_info *getALiteralMatcher( const struct RoseEngine *t) { diff --git a/contrib/libs/hyperscan/src/rose/rose_program.h b/contrib/libs/hyperscan/src/rose/rose_program.h index 056b30828d..7e21303cb7 100644 --- a/contrib/libs/hyperscan/src/rose/rose_program.h +++ b/contrib/libs/hyperscan/src/rose/rose_program.h @@ -1,188 +1,188 @@ -/* +/* * Copyright (c) 2015-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Rose data structures to do with role programs. - */ - -#ifndef ROSE_ROSE_PROGRAM_H -#define ROSE_ROSE_PROGRAM_H - -#include "som/som_operation.h" -#include "rose_internal.h" -#include "ue2common.h" -#include "util/simd_types.h" - -/** \brief Minimum alignment for each instruction in memory. */ -#define ROSE_INSTR_MIN_ALIGN 8U - -/** \brief Role program instruction opcodes. */ -enum RoseInstructionCode { - ROSE_INSTR_END, //!< End of program. - ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. - ROSE_INSTR_CHECK_LIT_EARLY, //!< Skip matches before floating min offset. - ROSE_INSTR_CHECK_GROUPS, //!< Check that literal groups are on. - ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. - ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. - ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". - ROSE_INSTR_CHECK_SINGLE_LOOKAROUND, //!< Single lookaround check. - ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. - ROSE_INSTR_CHECK_MASK, //!< 8-bytes mask check. - ROSE_INSTR_CHECK_MASK_32, //!< 32-bytes and/cmp/neg mask check. - ROSE_INSTR_CHECK_BYTE, //!< Single Byte check. - ROSE_INSTR_CHECK_SHUFTI_16x8, //!< Check 16-byte data by 8-bucket shufti. - ROSE_INSTR_CHECK_SHUFTI_32x8, //!< Check 32-byte data by 8-bucket shufti. - ROSE_INSTR_CHECK_SHUFTI_16x16, //!< Check 16-byte data by 16-bucket shufti. - ROSE_INSTR_CHECK_SHUFTI_32x16, //!< Check 32-byte data by 16-bucket shufti. - ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state. - ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state. - ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. - ROSE_INSTR_DUMMY_NOP, //!< NOP. Should not exist in build programs. - ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches. - ROSE_INSTR_CATCH_UP_MPV, //!< Catch up the MPV. - ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. - ROSE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine. - ROSE_INSTR_SOM_FROM_REPORT, //!< Acquire SOM from a som_operation. - ROSE_INSTR_SOM_ZERO, //!< Set SOM to zero. - ROSE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine. - ROSE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine. - ROSE_INSTR_DEDUPE, //!< Run deduplication for report. - ROSE_INSTR_DEDUPE_SOM, //!< Run deduplication for SOM report. - ROSE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV). - ROSE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only. - ROSE_INSTR_REPORT_SOM_AWARE, //!< Manipulate SOM from SOM-aware source. - - /** \brief Fire a report. */ - ROSE_INSTR_REPORT, - - /** \brief Fire an exhaustible report. */ - ROSE_INSTR_REPORT_EXHAUST, - - /** \brief Fire a SOM report. */ - ROSE_INSTR_REPORT_SOM, - - /** \brief Fire an exhaustible SOM report. */ - ROSE_INSTR_REPORT_SOM_EXHAUST, - - /** \brief Super-instruction combining DEDUPE and REPORT. */ - ROSE_INSTR_DEDUPE_AND_REPORT, - - /** - * \brief Fire a report and stop program execution. This is a - * specialisation intended for short, frequently-executed programs. - */ - ROSE_INSTR_FINAL_REPORT, - - ROSE_INSTR_CHECK_EXHAUSTED, //!< Check if an ekey has already been set. - ROSE_INSTR_CHECK_MIN_LENGTH, //!< Check (EOM - SOM) against min length. - ROSE_INSTR_SET_STATE, //!< Switch a state index on. - ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits. - ROSE_INSTR_SQUASH_GROUPS, //!< Conditionally turn off some groups. - ROSE_INSTR_CHECK_STATE, //!< Test a single bit in the state multibit. - ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states. - ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states. - ROSE_INSTR_SPARSE_ITER_ANY, //!< Test for any bit in the sparse iterator. - - /** \brief Check outfixes and suffixes for EOD and fire reports if so. */ - ROSE_INSTR_ENGINES_EOD, - - /** \brief Catch up and check active suffixes for EOD and fire reports if - * so. */ - ROSE_INSTR_SUFFIXES_EOD, - - /** \brief Run the EOD-anchored HWLM literal matcher. */ - ROSE_INSTR_MATCHER_EOD, - - /** - * \brief Confirm a case-sensitive literal at the current offset. In - * streaming mode, this makes use of the long literal table. - */ - ROSE_INSTR_CHECK_LONG_LIT, - - /** - * \brief Confirm a case-insensitive literal at the current offset. In - * streaming mode, this makes use of the long literal table. - */ - ROSE_INSTR_CHECK_LONG_LIT_NOCASE, - - /** - * \brief Confirm a case-sensitive "medium length" literal at the current - * offset. In streaming mode, this will check history if needed. - */ - ROSE_INSTR_CHECK_MED_LIT, - - /** - * \brief Confirm a case-insensitive "medium length" literal at the current - * offset. In streaming mode, this will check history if needed. - */ - ROSE_INSTR_CHECK_MED_LIT_NOCASE, - - /** - * \brief Clear the "work done" flag used by the SQUASH_GROUPS instruction. - */ - ROSE_INSTR_CLEAR_WORK_DONE, - - /** \brief Check lookaround if it has multiple paths. */ - ROSE_INSTR_MULTIPATH_LOOKAROUND, - - /** - * \brief Use shufti to check lookaround with multiple paths. The total - * length of the paths is 16 bytes at most and shufti has 8 buckets. - * All paths can be at most 16 bytes long. - */ - ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_16x8, - - /** - * \brief Use shufti to check lookaround with multiple paths. The total - * length of the paths is 32 bytes at most and shufti has 8 buckets. - * All paths can be at most 16 bytes long. - */ - ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x8, - - /** - * \brief Use shufti to check lookaround with multiple paths. The total - * length of the paths is 32 bytes at most and shufti has 16 buckets. - * All paths can be at most 16 bytes long. - */ - ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x16, - - /** - * \brief Use shufti to check multiple paths lookaround. The total - * length of the paths is 64 bytes at most and shufti has 8 buckets. - * All paths can be at most 16 bytes long. - */ - ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64, - - /** - * \brief Jump to the program of included literal. - */ - ROSE_INSTR_INCLUDED_JUMP, - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Rose data structures to do with role programs. + */ + +#ifndef ROSE_ROSE_PROGRAM_H +#define ROSE_ROSE_PROGRAM_H + +#include "som/som_operation.h" +#include "rose_internal.h" +#include "ue2common.h" +#include "util/simd_types.h" + +/** \brief Minimum alignment for each instruction in memory. */ +#define ROSE_INSTR_MIN_ALIGN 8U + +/** \brief Role program instruction opcodes. */ +enum RoseInstructionCode { + ROSE_INSTR_END, //!< End of program. + ROSE_INSTR_ANCHORED_DELAY, //!< Delay until after anchored matcher. + ROSE_INSTR_CHECK_LIT_EARLY, //!< Skip matches before floating min offset. + ROSE_INSTR_CHECK_GROUPS, //!< Check that literal groups are on. + ROSE_INSTR_CHECK_ONLY_EOD, //!< Role matches only at EOD. + ROSE_INSTR_CHECK_BOUNDS, //!< Bounds on distance from offset 0. + ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled". + ROSE_INSTR_CHECK_SINGLE_LOOKAROUND, //!< Single lookaround check. + ROSE_INSTR_CHECK_LOOKAROUND, //!< Lookaround check. + ROSE_INSTR_CHECK_MASK, //!< 8-bytes mask check. + ROSE_INSTR_CHECK_MASK_32, //!< 32-bytes and/cmp/neg mask check. + ROSE_INSTR_CHECK_BYTE, //!< Single Byte check. + ROSE_INSTR_CHECK_SHUFTI_16x8, //!< Check 16-byte data by 8-bucket shufti. + ROSE_INSTR_CHECK_SHUFTI_32x8, //!< Check 32-byte data by 8-bucket shufti. + ROSE_INSTR_CHECK_SHUFTI_16x16, //!< Check 16-byte data by 16-bucket shufti. + ROSE_INSTR_CHECK_SHUFTI_32x16, //!< Check 32-byte data by 16-bucket shufti. + ROSE_INSTR_CHECK_INFIX, //!< Infix engine must be in accept state. + ROSE_INSTR_CHECK_PREFIX, //!< Prefix engine must be in accept state. + ROSE_INSTR_PUSH_DELAYED, //!< Push delayed literal matches. + ROSE_INSTR_DUMMY_NOP, //!< NOP. Should not exist in build programs. + ROSE_INSTR_CATCH_UP, //!< Catch up engines, anchored matches. + ROSE_INSTR_CATCH_UP_MPV, //!< Catch up the MPV. + ROSE_INSTR_SOM_ADJUST, //!< Set SOM from a distance to EOM. + ROSE_INSTR_SOM_LEFTFIX, //!< Acquire SOM from a leftfix engine. + ROSE_INSTR_SOM_FROM_REPORT, //!< Acquire SOM from a som_operation. + ROSE_INSTR_SOM_ZERO, //!< Set SOM to zero. + ROSE_INSTR_TRIGGER_INFIX, //!< Trigger an infix engine. + ROSE_INSTR_TRIGGER_SUFFIX, //!< Trigger a suffix engine. + ROSE_INSTR_DEDUPE, //!< Run deduplication for report. + ROSE_INSTR_DEDUPE_SOM, //!< Run deduplication for SOM report. + ROSE_INSTR_REPORT_CHAIN, //!< Fire a chained report (MPV). + ROSE_INSTR_REPORT_SOM_INT, //!< Manipulate SOM only. + ROSE_INSTR_REPORT_SOM_AWARE, //!< Manipulate SOM from SOM-aware source. + + /** \brief Fire a report. */ + ROSE_INSTR_REPORT, + + /** \brief Fire an exhaustible report. */ + ROSE_INSTR_REPORT_EXHAUST, + + /** \brief Fire a SOM report. */ + ROSE_INSTR_REPORT_SOM, + + /** \brief Fire an exhaustible SOM report. */ + ROSE_INSTR_REPORT_SOM_EXHAUST, + + /** \brief Super-instruction combining DEDUPE and REPORT. */ + ROSE_INSTR_DEDUPE_AND_REPORT, + + /** + * \brief Fire a report and stop program execution. This is a + * specialisation intended for short, frequently-executed programs. + */ + ROSE_INSTR_FINAL_REPORT, + + ROSE_INSTR_CHECK_EXHAUSTED, //!< Check if an ekey has already been set. + ROSE_INSTR_CHECK_MIN_LENGTH, //!< Check (EOM - SOM) against min length. + ROSE_INSTR_SET_STATE, //!< Switch a state index on. + ROSE_INSTR_SET_GROUPS, //!< Set some literal group bits. + ROSE_INSTR_SQUASH_GROUPS, //!< Conditionally turn off some groups. + ROSE_INSTR_CHECK_STATE, //!< Test a single bit in the state multibit. + ROSE_INSTR_SPARSE_ITER_BEGIN, //!< Begin running a sparse iter over states. + ROSE_INSTR_SPARSE_ITER_NEXT, //!< Continue running sparse iter over states. + ROSE_INSTR_SPARSE_ITER_ANY, //!< Test for any bit in the sparse iterator. + + /** \brief Check outfixes and suffixes for EOD and fire reports if so. */ + ROSE_INSTR_ENGINES_EOD, + + /** \brief Catch up and check active suffixes for EOD and fire reports if + * so. */ + ROSE_INSTR_SUFFIXES_EOD, + + /** \brief Run the EOD-anchored HWLM literal matcher. */ + ROSE_INSTR_MATCHER_EOD, + + /** + * \brief Confirm a case-sensitive literal at the current offset. In + * streaming mode, this makes use of the long literal table. + */ + ROSE_INSTR_CHECK_LONG_LIT, + + /** + * \brief Confirm a case-insensitive literal at the current offset. In + * streaming mode, this makes use of the long literal table. + */ + ROSE_INSTR_CHECK_LONG_LIT_NOCASE, + + /** + * \brief Confirm a case-sensitive "medium length" literal at the current + * offset. In streaming mode, this will check history if needed. + */ + ROSE_INSTR_CHECK_MED_LIT, + + /** + * \brief Confirm a case-insensitive "medium length" literal at the current + * offset. In streaming mode, this will check history if needed. + */ + ROSE_INSTR_CHECK_MED_LIT_NOCASE, + + /** + * \brief Clear the "work done" flag used by the SQUASH_GROUPS instruction. + */ + ROSE_INSTR_CLEAR_WORK_DONE, + + /** \brief Check lookaround if it has multiple paths. */ + ROSE_INSTR_MULTIPATH_LOOKAROUND, + + /** + * \brief Use shufti to check lookaround with multiple paths. The total + * length of the paths is 16 bytes at most and shufti has 8 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_16x8, + + /** + * \brief Use shufti to check lookaround with multiple paths. The total + * length of the paths is 32 bytes at most and shufti has 8 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x8, + + /** + * \brief Use shufti to check lookaround with multiple paths. The total + * length of the paths is 32 bytes at most and shufti has 16 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_32x16, + + /** + * \brief Use shufti to check multiple paths lookaround. The total + * length of the paths is 64 bytes at most and shufti has 8 buckets. + * All paths can be at most 16 bytes long. + */ + ROSE_INSTR_CHECK_MULTIPATH_SHUFTI_64, + + /** + * \brief Jump to the program of included literal. + */ + ROSE_INSTR_INCLUDED_JUMP, + /** * \brief Set matching status of a sub-expression. */ @@ -213,82 +213,82 @@ enum RoseInstructionCode { ROSE_INSTR_CHECK_MASK_64, //!< 64-bytes and/cmp/neg mask check. LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_MASK_64 //!< Sentinel. -}; - -struct ROSE_STRUCT_END { - u8 code; //!< From enum RoseInstructionCode. -}; - -struct ROSE_STRUCT_ANCHORED_DELAY { - u8 code; //!< From enum RoseInstructionCode. - rose_group groups; //!< Bitmask. - u32 anch_id; //!< Program to restart after the delay. - u32 done_jump; //!< Jump forward this many bytes if we have to delay. -}; - -struct ROSE_STRUCT_CHECK_LIT_EARLY { - u8 code; //!< From enum RoseInstructionCode. - u32 min_offset; //!< Minimum offset for this literal. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -/** Note: check failure will halt program. */ -struct ROSE_STRUCT_CHECK_GROUPS { - u8 code; //!< From enum RoseInstructionCode. - rose_group groups; //!< Bitmask. -}; - -struct ROSE_STRUCT_CHECK_ONLY_EOD { - u8 code; //!< From enum RoseInstructionCode. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_BOUNDS { - u8 code; //!< From enum RoseInstructionCode. - u64a min_bound; //!< Min distance from zero. - u64a max_bound; //!< Max distance from zero. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_NOT_HANDLED { - u8 code; //!< From enum RoseInstructionCode. - u32 key; //!< Key in the "handled_roles" fatbit in scratch. - u32 fail_jump; //!< Jump forward this many bytes if we have seen key before. -}; - -struct ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND { - u8 code; //!< From enum RoseInstructionCode. - s8 offset; //!< The offset of the byte to examine. - u32 reach_index; //!< Index for lookaround reach bitvectors. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_LOOKAROUND { - u8 code; //!< From enum RoseInstructionCode. - u32 look_index; //!< Offset in bytecode of lookaround offset list. - u32 reach_index; //!< Offset in bytecode of lookaround reach bitvectors. - u32 count; //!< The count of lookaround entries in one instruction. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_MASK { - u8 code; //!< From enum roseInstructionCode. - u64a and_mask; //!< 8-byte and mask. - u64a cmp_mask; //!< 8-byte cmp mask. - u64a neg_mask; //!< 8-byte negation mask. - s32 offset; //!< Relative offset of the first byte. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_MASK_32 { - u8 code; //!< From enum RoseInstructionCode. - u8 and_mask[32]; //!< 32-byte and mask. - u8 cmp_mask[32]; //!< 32-byte cmp mask. - u32 neg_mask; //!< negation mask with 32 bits. - s32 offset; //!< Relative offset of the first byte. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - +}; + +struct ROSE_STRUCT_END { + u8 code; //!< From enum RoseInstructionCode. +}; + +struct ROSE_STRUCT_ANCHORED_DELAY { + u8 code; //!< From enum RoseInstructionCode. + rose_group groups; //!< Bitmask. + u32 anch_id; //!< Program to restart after the delay. + u32 done_jump; //!< Jump forward this many bytes if we have to delay. +}; + +struct ROSE_STRUCT_CHECK_LIT_EARLY { + u8 code; //!< From enum RoseInstructionCode. + u32 min_offset; //!< Minimum offset for this literal. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +/** Note: check failure will halt program. */ +struct ROSE_STRUCT_CHECK_GROUPS { + u8 code; //!< From enum RoseInstructionCode. + rose_group groups; //!< Bitmask. +}; + +struct ROSE_STRUCT_CHECK_ONLY_EOD { + u8 code; //!< From enum RoseInstructionCode. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_BOUNDS { + u8 code; //!< From enum RoseInstructionCode. + u64a min_bound; //!< Min distance from zero. + u64a max_bound; //!< Max distance from zero. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_NOT_HANDLED { + u8 code; //!< From enum RoseInstructionCode. + u32 key; //!< Key in the "handled_roles" fatbit in scratch. + u32 fail_jump; //!< Jump forward this many bytes if we have seen key before. +}; + +struct ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND { + u8 code; //!< From enum RoseInstructionCode. + s8 offset; //!< The offset of the byte to examine. + u32 reach_index; //!< Index for lookaround reach bitvectors. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_LOOKAROUND { + u8 code; //!< From enum RoseInstructionCode. + u32 look_index; //!< Offset in bytecode of lookaround offset list. + u32 reach_index; //!< Offset in bytecode of lookaround reach bitvectors. + u32 count; //!< The count of lookaround entries in one instruction. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MASK { + u8 code; //!< From enum roseInstructionCode. + u64a and_mask; //!< 8-byte and mask. + u64a cmp_mask; //!< 8-byte cmp mask. + u64a neg_mask; //!< 8-byte negation mask. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MASK_32 { + u8 code; //!< From enum RoseInstructionCode. + u8 and_mask[32]; //!< 32-byte and mask. + u8 cmp_mask[32]; //!< 32-byte cmp mask. + u32 neg_mask; //!< negation mask with 32 bits. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_CHECK_MASK_64 { u8 code; //!< From enum RoseInstructionCode. u8 and_mask[64]; //!< 64-byte and mask. @@ -298,57 +298,57 @@ struct ROSE_STRUCT_CHECK_MASK_64 { u32 fail_jump; //!< Jump forward this many bytes on failure. }; -struct ROSE_STRUCT_CHECK_BYTE { - u8 code; //!< From enum RoseInstructionCode. - u8 and_mask; //!< 8-bits and mask. - u8 cmp_mask; //!< 8-bits cmp mask. - u8 negation; //!< Flag about negation. - s32 offset; //!< The relative offset. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -// Since m128 and m256 could be missaligned in the bytecode, -// we'll use u8[16] and u8[32] instead in all rose_check_shufti structures. -struct ROSE_STRUCT_CHECK_SHUFTI_16x8 { - u8 code; //!< From enum RoseInstructionCode. - u8 nib_mask[32]; //!< High 16 and low 16 bits nibble mask in shufti. - u8 bucket_select_mask[16]; //!< Mask for bucket assigning. - u32 neg_mask; //!< Negation mask in low 16 bits. - s32 offset; //!< Relative offset of the first byte. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_SHUFTI_32x8 { - u8 code; //!< From enum RoseInstructionCode. - u8 hi_mask[16]; //!< High nibble mask in shufti. - u8 lo_mask[16]; //!< Low nibble mask in shufti. - u8 bucket_select_mask[32]; //!< Mask for bucket assigning. - u32 neg_mask; //!< 32 bits negation mask. - s32 offset; //!< Relative offset of the first byte. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_SHUFTI_16x16 { - u8 code; //!< From enum RoseInstructionCode. - u8 hi_mask[32]; //!< High nibble mask in shufti. - u8 lo_mask[32]; //!< Low nibble mask in shufti. - u8 bucket_select_mask[32]; //!< Mask for bucket assigning. - u32 neg_mask; //!< Negation mask in low 16 bits. - s32 offset; //!< Relative offset of the first byte. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_SHUFTI_32x16 { - u8 code; //!< From enum RoseInstructionCode. - u8 hi_mask[32]; //!< High nibble mask in shufti. - u8 lo_mask[32]; //!< Low nibble mask in shufti. - u8 bucket_select_mask_hi[32]; //!< Bucket mask for high 8 buckets. - u8 bucket_select_mask_lo[32]; //!< Bucket mask for low 8 buckets. - u32 neg_mask; //!< 32 bits negation mask. - s32 offset; //!< Relative offset of the first byte. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - +struct ROSE_STRUCT_CHECK_BYTE { + u8 code; //!< From enum RoseInstructionCode. + u8 and_mask; //!< 8-bits and mask. + u8 cmp_mask; //!< 8-bits cmp mask. + u8 negation; //!< Flag about negation. + s32 offset; //!< The relative offset. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +// Since m128 and m256 could be missaligned in the bytecode, +// we'll use u8[16] and u8[32] instead in all rose_check_shufti structures. +struct ROSE_STRUCT_CHECK_SHUFTI_16x8 { + u8 code; //!< From enum RoseInstructionCode. + u8 nib_mask[32]; //!< High 16 and low 16 bits nibble mask in shufti. + u8 bucket_select_mask[16]; //!< Mask for bucket assigning. + u32 neg_mask; //!< Negation mask in low 16 bits. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_SHUFTI_32x8 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[16]; //!< High nibble mask in shufti. + u8 lo_mask[16]; //!< Low nibble mask in shufti. + u8 bucket_select_mask[32]; //!< Mask for bucket assigning. + u32 neg_mask; //!< 32 bits negation mask. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_SHUFTI_16x16 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[32]; //!< High nibble mask in shufti. + u8 lo_mask[32]; //!< Low nibble mask in shufti. + u8 bucket_select_mask[32]; //!< Mask for bucket assigning. + u32 neg_mask; //!< Negation mask in low 16 bits. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_SHUFTI_32x16 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[32]; //!< High nibble mask in shufti. + u8 lo_mask[32]; //!< Low nibble mask in shufti. + u8 bucket_select_mask_hi[32]; //!< Bucket mask for high 8 buckets. + u8 bucket_select_mask_lo[32]; //!< Bucket mask for low 8 buckets. + u32 neg_mask; //!< 32 bits negation mask. + s32 offset; //!< Relative offset of the first byte. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + struct ROSE_STRUCT_CHECK_SHUFTI_64x8 { u8 code; //!< From enum RoseInstructionCode. u8 hi_mask[64]; //!< High nibble mask in shufti. @@ -372,331 +372,331 @@ struct ROSE_STRUCT_CHECK_SHUFTI_64x16 { u32 fail_jump; //!< Jump forward this many bytes on failure. }; -struct ROSE_STRUCT_CHECK_INFIX { - u8 code; //!< From enum RoseInstructionCode. - u32 queue; //!< Queue of leftfix to check. - u32 lag; //!< Lag of leftfix for this case. - ReportID report; //!< ReportID of leftfix to check. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_PREFIX { - u8 code; //!< From enum RoseInstructionCode. - u32 queue; //!< Queue of leftfix to check. - u32 lag; //!< Lag of leftfix for this case. - ReportID report; //!< ReportID of leftfix to check. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_PUSH_DELAYED { - u8 code; //!< From enum RoseInstructionCode. - u8 delay; // Number of bytes to delay. - u32 index; // Delay literal index (relative to first delay lit). -}; - -struct ROSE_STRUCT_DUMMY_NOP { - u8 code; //!< From enum RoseInstructionCode. -}; - -struct ROSE_STRUCT_CATCH_UP { - u8 code; //!< From enum RoseInstructionCode. -}; - -struct ROSE_STRUCT_CATCH_UP_MPV { - u8 code; //!< From enum RoseInstructionCode. -}; - -struct ROSE_STRUCT_SOM_ADJUST { - u8 code; //!< From enum RoseInstructionCode. - u32 distance; //!< Distance to EOM. -}; - -struct ROSE_STRUCT_SOM_LEFTFIX { - u8 code; //!< From enum RoseInstructionCode. - u32 queue; //!< Queue index of leftfix providing SOM. - u32 lag; //!< Lag of leftfix for this case. -}; - -struct ROSE_STRUCT_SOM_FROM_REPORT { - u8 code; //!< From enum RoseInstructionCode. - struct som_operation som; -}; - -struct ROSE_STRUCT_SOM_ZERO { - u8 code; //!< From enum RoseInstructionCode. -}; - -struct ROSE_STRUCT_TRIGGER_INFIX { - u8 code; //!< From enum RoseInstructionCode. - u8 cancel; //!< Cancels previous top event. - u32 queue; //!< Queue index of infix. - u32 event; //!< Queue event, from MQE_*. -}; - -struct ROSE_STRUCT_TRIGGER_SUFFIX { - u8 code; //!< From enum RoseInstructionCode. - u32 queue; //!< Queue index of suffix. - u32 event; //!< Queue event, from MQE_*. -}; - -struct ROSE_STRUCT_DEDUPE { - u8 code; //!< From enum RoseInstructionCode. - u8 quash_som; //!< Force SOM to zero for this report. - u32 dkey; //!< Dedupe key. - s32 offset_adjust; //!< Offset adjustment to apply to end offset. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_DEDUPE_SOM { - u8 code; //!< From enum RoseInstructionCode. - u8 quash_som; //!< Force SOM to zero for this report. - u32 dkey; //!< Dedupe key. - s32 offset_adjust; //!< Offset adjustment to apply to end offset. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_REPORT_CHAIN { - u8 code; //!< From enum RoseInstructionCode. - u32 event; //!< Queue event, from MQE_*. Must be a top. - - /** - * \brief Number of bytes behind us that we are allowed to squash - * identical top events on the queue. - */ - u64a top_squash_distance; -}; - -struct ROSE_STRUCT_REPORT_SOM_INT { - u8 code; //!< From enum RoseInstructionCode. - struct som_operation som; -}; - -struct ROSE_STRUCT_REPORT_SOM_AWARE { - u8 code; //!< From enum RoseInstructionCode. - struct som_operation som; -}; - -struct ROSE_STRUCT_REPORT { - u8 code; //!< From enum RoseInstructionCode. - ReportID onmatch; //!< Report ID to deliver to user. - s32 offset_adjust; //!< Offset adjustment to apply to end offset. -}; - -struct ROSE_STRUCT_REPORT_EXHAUST { - u8 code; //!< From enum RoseInstructionCode. - ReportID onmatch; //!< Report ID to deliver to user. - s32 offset_adjust; //!< Offset adjustment to apply to end offset. - u32 ekey; //!< Exhaustion key. -}; - -struct ROSE_STRUCT_REPORT_SOM { - u8 code; //!< From enum RoseInstructionCode. - ReportID onmatch; //!< Report ID to deliver to user. - s32 offset_adjust; //!< Offset adjustment to apply to end offset. -}; - -struct ROSE_STRUCT_REPORT_SOM_EXHAUST { - u8 code; //!< From enum RoseInstructionCode. - ReportID onmatch; //!< Report ID to deliver to user. - s32 offset_adjust; //!< Offset adjustment to apply to end offset. - u32 ekey; //!< Exhaustion key. -}; - -struct ROSE_STRUCT_DEDUPE_AND_REPORT { - u8 code; //!< From enum RoseInstructionCode. - u8 quash_som; //!< Force SOM to zero for this report. - u32 dkey; //!< Dedupe key. - ReportID onmatch; //!< Report ID to deliver to user. - s32 offset_adjust; //!< Offset adjustment to apply to end offset. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_FINAL_REPORT { - u8 code; //!< From enum RoseInstructionCode. - ReportID onmatch; //!< Report ID to deliver to user. - s32 offset_adjust; //!< Offset adjustment to apply to end offset. -}; - -struct ROSE_STRUCT_CHECK_EXHAUSTED { - u8 code; //!< From enum RoseInstructionCode. - u32 ekey; //!< Exhaustion key to check. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_MIN_LENGTH { - u8 code; //!< From enum RoseInstructionCode. - s32 end_adj; //!< Offset adjustment to add to EOM first. - u64a min_length; //!< Minimum distance from SOM to EOM. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_SET_STATE { - u8 code; //!< From enum RoseInstructionCode. - u32 index; //!< State index in multibit. -}; - -struct ROSE_STRUCT_SET_GROUPS { - u8 code; //!< From enum RoseInstructionCode. - rose_group groups; //!< Bitmask to OR into groups. -}; - -struct ROSE_STRUCT_SQUASH_GROUPS { - u8 code; //!< From enum RoseInstructionCode. - rose_group groups; //!< Bitmask to AND into groups. -}; - -struct ROSE_STRUCT_CHECK_STATE { - u8 code; //!< From enum RoseInstructionCode. - u32 index; //!< State index in the role multibit. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -/** - * Note that the offsets in the jump table are always relative to the start of - * the program, not the current instruction. - */ -struct ROSE_STRUCT_SPARSE_ITER_BEGIN { - u8 code; //!< From enum RoseInstructionCode. - u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. - u32 jump_table; //!< Offset of jump table indexed by sparse iterator. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -/** - * Note that the offsets in the jump table are always relative to the start of - * the program, not the current instruction. - */ -struct ROSE_STRUCT_SPARSE_ITER_NEXT { - u8 code; //!< From enum RoseInstructionCode. - u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. - u32 jump_table; //!< Offset of jump table indexed by sparse iterator. - u32 state; // Current state index. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_SPARSE_ITER_ANY { - u8 code; //!< From enum RoseInstructionCode. - u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_ENGINES_EOD { - u8 code; //!< From enum RoseInstructionCode. - u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. -}; - -struct ROSE_STRUCT_SUFFIXES_EOD { - u8 code; //!< From enum RoseInstructionCode. -}; - -struct ROSE_STRUCT_MATCHER_EOD { - u8 code; //!< From enum RoseInstructionCode. -}; - -struct ROSE_STRUCT_CHECK_LONG_LIT { - u8 code; //!< From enum RoseInstructionCode. - u32 lit_offset; //!< Offset of literal string. - u32 lit_length; //!< Length of literal string. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_LONG_LIT_NOCASE { - u8 code; //!< From enum RoseInstructionCode. - u32 lit_offset; //!< Offset of literal string. - u32 lit_length; //!< Length of literal string. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_MED_LIT { - u8 code; //!< From enum RoseInstructionCode. - u32 lit_offset; //!< Offset of literal string. - u32 lit_length; //!< Length of literal string. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_MED_LIT_NOCASE { - u8 code; //!< From enum RoseInstructionCode. - u32 lit_offset; //!< Offset of literal string. - u32 lit_length; //!< Length of literal string. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CLEAR_WORK_DONE { - u8 code; //!< From enum RoseInstructionCode. -}; - -struct ROSE_STRUCT_MULTIPATH_LOOKAROUND { - u8 code; //!< From enum RoseInstructionCode. - u32 look_index; //!< Offset in bytecode of lookaround offset list. - u32 reach_index; //!< Offset in bytecode of lookaround reach bitvectors. - u32 count; //!< The lookaround byte numbers for each path. - s32 last_start; //!< The latest start offset among 8 paths. - u8 start_mask[MULTIPATH_MAX_LEN]; /*!< Used to initialize path if left-most - * data is missed. */ - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8 { - u8 code; //!< From enum RoseInstructionCode. - u8 nib_mask[2 * sizeof(m128)]; //!< High and low nibble mask in shufti. - u8 bucket_select_mask[sizeof(m128)]; //!< Mask for bucket assigning. - u8 data_select_mask[sizeof(m128)]; //!< Shuffle mask for data ordering. - u32 hi_bits_mask; //!< High-bits used in multi-path validation. - u32 lo_bits_mask; //!< Low-bits used in multi-path validation. - u32 neg_mask; //!< 64 bits negation mask. - s32 base_offset; //!< Relative offset of the first byte. - s32 last_start; //!< The latest start offset among 8 paths. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8 { - u8 code; //!< From enum RoseInstructionCode. - u8 hi_mask[sizeof(m128)]; //!< High nibble mask in shufti. - u8 lo_mask[sizeof(m128)]; //!< Low nibble mask in shufti. - u8 bucket_select_mask[sizeof(m256)]; //!< Mask for bucket assigning. - u8 data_select_mask[sizeof(m256)]; //!< Shuffle mask for data ordering. - u32 hi_bits_mask; //!< High-bits used in multi-path validation. - u32 lo_bits_mask; //!< Low-bits used in multi-path validation. - u32 neg_mask; //!< 64 bits negation mask. - s32 base_offset; //!< Relative offset of the first byte. - s32 last_start; //!< The latest start offset among 8 paths. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16 { - u8 code; //!< From enum RoseInstructionCode. - u8 hi_mask[sizeof(m256)]; //!< High nibble mask in shufti. - u8 lo_mask[sizeof(m256)]; //!< Low nibble mask in shufti. - u8 bucket_select_mask_hi[sizeof(m256)]; //!< Mask for bucket assigning. - u8 bucket_select_mask_lo[sizeof(m256)]; //!< Mask for bucket assigning. - u8 data_select_mask[sizeof(m256)]; //!< Shuffle mask for data ordering. - u32 hi_bits_mask; //!< High-bits used in multi-path validation. - u32 lo_bits_mask; //!< Low-bits used in multi-path validation. - u32 neg_mask; //!< 64 bits negation mask. - s32 base_offset; //!< Relative offset of the first byte. - s32 last_start; //!< The latest start offset among 8 paths. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 { - u8 code; //!< From enum RoseInstructionCode. - u8 hi_mask[sizeof(m128)]; //!< High nibble mask in shufti. - u8 lo_mask[sizeof(m128)]; //!< Low nibble mask in shufti. - u8 bucket_select_mask[2 * sizeof(m256)]; //!< Mask for bucket assigning. - u8 data_select_mask[2 * sizeof(m256)]; //!< Shuffle mask for data ordering. - u64a hi_bits_mask; //!< High-bits used in multi-path validation. - u64a lo_bits_mask; //!< Low-bits used in multi-path validation. - u64a neg_mask; //!< 64 bits negation mask. - s32 base_offset; //!< Relative offset of the first byte. - s32 last_start; //!< The latest start offset among 8 paths. - u32 fail_jump; //!< Jump forward this many bytes on failure. -}; - -struct ROSE_STRUCT_INCLUDED_JUMP { - u8 code; //!< From enum RoseInstructionCode. - u8 squash; //!< FDR confirm squash mask for included literal. - u32 child_offset; //!< Program offset of included literal. -}; +struct ROSE_STRUCT_CHECK_INFIX { + u8 code; //!< From enum RoseInstructionCode. + u32 queue; //!< Queue of leftfix to check. + u32 lag; //!< Lag of leftfix for this case. + ReportID report; //!< ReportID of leftfix to check. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_PREFIX { + u8 code; //!< From enum RoseInstructionCode. + u32 queue; //!< Queue of leftfix to check. + u32 lag; //!< Lag of leftfix for this case. + ReportID report; //!< ReportID of leftfix to check. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_PUSH_DELAYED { + u8 code; //!< From enum RoseInstructionCode. + u8 delay; // Number of bytes to delay. + u32 index; // Delay literal index (relative to first delay lit). +}; + +struct ROSE_STRUCT_DUMMY_NOP { + u8 code; //!< From enum RoseInstructionCode. +}; + +struct ROSE_STRUCT_CATCH_UP { + u8 code; //!< From enum RoseInstructionCode. +}; + +struct ROSE_STRUCT_CATCH_UP_MPV { + u8 code; //!< From enum RoseInstructionCode. +}; + +struct ROSE_STRUCT_SOM_ADJUST { + u8 code; //!< From enum RoseInstructionCode. + u32 distance; //!< Distance to EOM. +}; + +struct ROSE_STRUCT_SOM_LEFTFIX { + u8 code; //!< From enum RoseInstructionCode. + u32 queue; //!< Queue index of leftfix providing SOM. + u32 lag; //!< Lag of leftfix for this case. +}; + +struct ROSE_STRUCT_SOM_FROM_REPORT { + u8 code; //!< From enum RoseInstructionCode. + struct som_operation som; +}; + +struct ROSE_STRUCT_SOM_ZERO { + u8 code; //!< From enum RoseInstructionCode. +}; + +struct ROSE_STRUCT_TRIGGER_INFIX { + u8 code; //!< From enum RoseInstructionCode. + u8 cancel; //!< Cancels previous top event. + u32 queue; //!< Queue index of infix. + u32 event; //!< Queue event, from MQE_*. +}; + +struct ROSE_STRUCT_TRIGGER_SUFFIX { + u8 code; //!< From enum RoseInstructionCode. + u32 queue; //!< Queue index of suffix. + u32 event; //!< Queue event, from MQE_*. +}; + +struct ROSE_STRUCT_DEDUPE { + u8 code; //!< From enum RoseInstructionCode. + u8 quash_som; //!< Force SOM to zero for this report. + u32 dkey; //!< Dedupe key. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_DEDUPE_SOM { + u8 code; //!< From enum RoseInstructionCode. + u8 quash_som; //!< Force SOM to zero for this report. + u32 dkey; //!< Dedupe key. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_REPORT_CHAIN { + u8 code; //!< From enum RoseInstructionCode. + u32 event; //!< Queue event, from MQE_*. Must be a top. + + /** + * \brief Number of bytes behind us that we are allowed to squash + * identical top events on the queue. + */ + u64a top_squash_distance; +}; + +struct ROSE_STRUCT_REPORT_SOM_INT { + u8 code; //!< From enum RoseInstructionCode. + struct som_operation som; +}; + +struct ROSE_STRUCT_REPORT_SOM_AWARE { + u8 code; //!< From enum RoseInstructionCode. + struct som_operation som; +}; + +struct ROSE_STRUCT_REPORT { + u8 code; //!< From enum RoseInstructionCode. + ReportID onmatch; //!< Report ID to deliver to user. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. +}; + +struct ROSE_STRUCT_REPORT_EXHAUST { + u8 code; //!< From enum RoseInstructionCode. + ReportID onmatch; //!< Report ID to deliver to user. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. + u32 ekey; //!< Exhaustion key. +}; + +struct ROSE_STRUCT_REPORT_SOM { + u8 code; //!< From enum RoseInstructionCode. + ReportID onmatch; //!< Report ID to deliver to user. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. +}; + +struct ROSE_STRUCT_REPORT_SOM_EXHAUST { + u8 code; //!< From enum RoseInstructionCode. + ReportID onmatch; //!< Report ID to deliver to user. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. + u32 ekey; //!< Exhaustion key. +}; + +struct ROSE_STRUCT_DEDUPE_AND_REPORT { + u8 code; //!< From enum RoseInstructionCode. + u8 quash_som; //!< Force SOM to zero for this report. + u32 dkey; //!< Dedupe key. + ReportID onmatch; //!< Report ID to deliver to user. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_FINAL_REPORT { + u8 code; //!< From enum RoseInstructionCode. + ReportID onmatch; //!< Report ID to deliver to user. + s32 offset_adjust; //!< Offset adjustment to apply to end offset. +}; + +struct ROSE_STRUCT_CHECK_EXHAUSTED { + u8 code; //!< From enum RoseInstructionCode. + u32 ekey; //!< Exhaustion key to check. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MIN_LENGTH { + u8 code; //!< From enum RoseInstructionCode. + s32 end_adj; //!< Offset adjustment to add to EOM first. + u64a min_length; //!< Minimum distance from SOM to EOM. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_SET_STATE { + u8 code; //!< From enum RoseInstructionCode. + u32 index; //!< State index in multibit. +}; + +struct ROSE_STRUCT_SET_GROUPS { + u8 code; //!< From enum RoseInstructionCode. + rose_group groups; //!< Bitmask to OR into groups. +}; + +struct ROSE_STRUCT_SQUASH_GROUPS { + u8 code; //!< From enum RoseInstructionCode. + rose_group groups; //!< Bitmask to AND into groups. +}; + +struct ROSE_STRUCT_CHECK_STATE { + u8 code; //!< From enum RoseInstructionCode. + u32 index; //!< State index in the role multibit. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +/** + * Note that the offsets in the jump table are always relative to the start of + * the program, not the current instruction. + */ +struct ROSE_STRUCT_SPARSE_ITER_BEGIN { + u8 code; //!< From enum RoseInstructionCode. + u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. + u32 jump_table; //!< Offset of jump table indexed by sparse iterator. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +/** + * Note that the offsets in the jump table are always relative to the start of + * the program, not the current instruction. + */ +struct ROSE_STRUCT_SPARSE_ITER_NEXT { + u8 code; //!< From enum RoseInstructionCode. + u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. + u32 jump_table; //!< Offset of jump table indexed by sparse iterator. + u32 state; // Current state index. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_SPARSE_ITER_ANY { + u8 code; //!< From enum RoseInstructionCode. + u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_ENGINES_EOD { + u8 code; //!< From enum RoseInstructionCode. + u32 iter_offset; //!< Offset of mmbit_sparse_iter structure. +}; + +struct ROSE_STRUCT_SUFFIXES_EOD { + u8 code; //!< From enum RoseInstructionCode. +}; + +struct ROSE_STRUCT_MATCHER_EOD { + u8 code; //!< From enum RoseInstructionCode. +}; + +struct ROSE_STRUCT_CHECK_LONG_LIT { + u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_LONG_LIT_NOCASE { + u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MED_LIT { + u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MED_LIT_NOCASE { + u8 code; //!< From enum RoseInstructionCode. + u32 lit_offset; //!< Offset of literal string. + u32 lit_length; //!< Length of literal string. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CLEAR_WORK_DONE { + u8 code; //!< From enum RoseInstructionCode. +}; + +struct ROSE_STRUCT_MULTIPATH_LOOKAROUND { + u8 code; //!< From enum RoseInstructionCode. + u32 look_index; //!< Offset in bytecode of lookaround offset list. + u32 reach_index; //!< Offset in bytecode of lookaround reach bitvectors. + u32 count; //!< The lookaround byte numbers for each path. + s32 last_start; //!< The latest start offset among 8 paths. + u8 start_mask[MULTIPATH_MAX_LEN]; /*!< Used to initialize path if left-most + * data is missed. */ + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_16x8 { + u8 code; //!< From enum RoseInstructionCode. + u8 nib_mask[2 * sizeof(m128)]; //!< High and low nibble mask in shufti. + u8 bucket_select_mask[sizeof(m128)]; //!< Mask for bucket assigning. + u8 data_select_mask[sizeof(m128)]; //!< Shuffle mask for data ordering. + u32 hi_bits_mask; //!< High-bits used in multi-path validation. + u32 lo_bits_mask; //!< Low-bits used in multi-path validation. + u32 neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x8 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[sizeof(m128)]; //!< High nibble mask in shufti. + u8 lo_mask[sizeof(m128)]; //!< Low nibble mask in shufti. + u8 bucket_select_mask[sizeof(m256)]; //!< Mask for bucket assigning. + u8 data_select_mask[sizeof(m256)]; //!< Shuffle mask for data ordering. + u32 hi_bits_mask; //!< High-bits used in multi-path validation. + u32 lo_bits_mask; //!< Low-bits used in multi-path validation. + u32 neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_32x16 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[sizeof(m256)]; //!< High nibble mask in shufti. + u8 lo_mask[sizeof(m256)]; //!< Low nibble mask in shufti. + u8 bucket_select_mask_hi[sizeof(m256)]; //!< Mask for bucket assigning. + u8 bucket_select_mask_lo[sizeof(m256)]; //!< Mask for bucket assigning. + u8 data_select_mask[sizeof(m256)]; //!< Shuffle mask for data ordering. + u32 hi_bits_mask; //!< High-bits used in multi-path validation. + u32 lo_bits_mask; //!< Low-bits used in multi-path validation. + u32 neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_CHECK_MULTIPATH_SHUFTI_64 { + u8 code; //!< From enum RoseInstructionCode. + u8 hi_mask[sizeof(m128)]; //!< High nibble mask in shufti. + u8 lo_mask[sizeof(m128)]; //!< Low nibble mask in shufti. + u8 bucket_select_mask[2 * sizeof(m256)]; //!< Mask for bucket assigning. + u8 data_select_mask[2 * sizeof(m256)]; //!< Shuffle mask for data ordering. + u64a hi_bits_mask; //!< High-bits used in multi-path validation. + u64a lo_bits_mask; //!< Low-bits used in multi-path validation. + u64a neg_mask; //!< 64 bits negation mask. + s32 base_offset; //!< Relative offset of the first byte. + s32 last_start; //!< The latest start offset among 8 paths. + u32 fail_jump; //!< Jump forward this many bytes on failure. +}; + +struct ROSE_STRUCT_INCLUDED_JUMP { + u8 code; //!< From enum RoseInstructionCode. + u8 squash; //!< FDR confirm squash mask for included literal. + u32 child_offset; //!< Program offset of included literal. +}; struct ROSE_STRUCT_SET_LOGICAL { u8 code; //!< From enum RoseInstructionCode. @@ -721,4 +721,4 @@ struct ROSE_STRUCT_SET_EXHAUST { struct ROSE_STRUCT_LAST_FLUSH_COMBINATION { u8 code; //!< From enum RoseInstructionCode. }; -#endif // ROSE_ROSE_PROGRAM_H +#endif // ROSE_ROSE_PROGRAM_H diff --git a/contrib/libs/hyperscan/src/rose/rose_types.h b/contrib/libs/hyperscan/src/rose/rose_types.h index ee64cddd6b..9dcef1cef0 100644 --- a/contrib/libs/hyperscan/src/rose/rose_types.h +++ b/contrib/libs/hyperscan/src/rose/rose_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2016, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -26,46 +26,46 @@ * POSSIBILITY OF SUCH DAMAGE. */ -/** \file - * \brief Rose runtime types (callbacks, etc). - */ - +/** \file + * \brief Rose runtime types (callbacks, etc). + */ + #ifndef ROSE_TYPES_H #define ROSE_TYPES_H #include "ue2common.h" -struct hs_scratch; +struct hs_scratch; + +/** + * \brief Continue without checking for exhaustion. + * + * \ref RoseCallback return value indicating that execution should continue and + * that it is not necessary to check if all reports have been exhausted. + */ +#define ROSE_CONTINUE_MATCHING_NO_EXHAUST 2 -/** - * \brief Continue without checking for exhaustion. - * - * \ref RoseCallback return value indicating that execution should continue and - * that it is not necessary to check if all reports have been exhausted. - */ -#define ROSE_CONTINUE_MATCHING_NO_EXHAUST 2 - -/** - * \brief The type for a Rose callback. - * - * \return - * - \ref MO_HALT_MATCHING if matching should terminate; - * - \ref MO_CONTINUE_MATCHING if matching should continue; - * - \ref ROSE_CONTINUE_MATCHING_NO_EXHAUST if matching should continue and no - * exhaustion is possible. - */ -typedef int (*RoseCallback)(u64a offset, ReportID id, - struct hs_scratch *scratch); - -/** - * \brief The type for a Rose callback which also tracks start of match. - * - * Behaves just like \ref RoseCallback except that it is provided with both a - * start and an end offset. - * - * \see RoseCallback - */ +/** + * \brief The type for a Rose callback. + * + * \return + * - \ref MO_HALT_MATCHING if matching should terminate; + * - \ref MO_CONTINUE_MATCHING if matching should continue; + * - \ref ROSE_CONTINUE_MATCHING_NO_EXHAUST if matching should continue and no + * exhaustion is possible. + */ +typedef int (*RoseCallback)(u64a offset, ReportID id, + struct hs_scratch *scratch); + +/** + * \brief The type for a Rose callback which also tracks start of match. + * + * Behaves just like \ref RoseCallback except that it is provided with both a + * start and an end offset. + * + * \see RoseCallback + */ typedef int (*RoseCallbackSom)(u64a from_offset, u64a to_offset, ReportID id, - struct hs_scratch *scratch); + struct hs_scratch *scratch); #endif diff --git a/contrib/libs/hyperscan/src/rose/runtime.h b/contrib/libs/hyperscan/src/rose/runtime.h index 5a57222df2..5fbb2b7416 100644 --- a/contrib/libs/hyperscan/src/rose/runtime.h +++ b/contrib/libs/hyperscan/src/rose/runtime.h @@ -33,33 +33,33 @@ #ifndef ROSE_RUNTIME_H #define ROSE_RUNTIME_H -#include "rose_internal.h" +#include "rose_internal.h" #include "scratch.h" #include "util/partial_store.h" /* * ROSE STATE LAYOUT: - * - * - runtime status byte (halt status, delay rebuild dirty, etc) - * - rose state multibit - * - active leaf array (multibit) - * - active leftfix array (multibit) - * - leftfix lag table - * - anchored matcher state - * - literal groups - * - history buffer - * - exhausted bitvector - * - som slots, som multibit arrays - * - nfa stream state (for each nfa) + * + * - runtime status byte (halt status, delay rebuild dirty, etc) + * - rose state multibit + * - active leaf array (multibit) + * - active leftfix array (multibit) + * - leftfix lag table + * - anchored matcher state + * - literal groups + * - history buffer + * - exhausted bitvector + * - som slots, som multibit arrays + * - nfa stream state (for each nfa) */ #define rose_inline really_inline -/* Maximum offset that we will eagerly run prefixes to. Beyond this point, eager - * prefixes are always run in exactly the same way as normal prefixes. */ -#define EAGER_STOP_OFFSET 64 +/* Maximum offset that we will eagerly run prefixes to. Beyond this point, eager + * prefixes are always run in exactly the same way as normal prefixes. */ +#define EAGER_STOP_OFFSET 64 + - static really_inline const void *getByOffset(const struct RoseEngine *t, u32 offset) { assert(offset < t->size); @@ -67,49 +67,49 @@ const void *getByOffset(const struct RoseEngine *t, u32 offset) { } static really_inline -void *getRoleState(char *state) { - return state + ROSE_STATE_OFFSET_ROLE_MMBIT; +void *getRoleState(char *state) { + return state + ROSE_STATE_OFFSET_ROLE_MMBIT; } /** \brief Fetch the active array for suffix nfas. */ static really_inline -u8 *getActiveLeafArray(const struct RoseEngine *t, char *state) { - return (u8 *)(state + t->stateOffsets.activeLeafArray); +u8 *getActiveLeafArray(const struct RoseEngine *t, char *state) { + return (u8 *)(state + t->stateOffsets.activeLeafArray); } /** \brief Fetch the active array for rose nfas. */ static really_inline -u8 *getActiveLeftArray(const struct RoseEngine *t, char *state) { - return (u8 *)(state + t->stateOffsets.activeLeftArray); +u8 *getActiveLeftArray(const struct RoseEngine *t, char *state) { + return (u8 *)(state + t->stateOffsets.activeLeftArray); } static really_inline -rose_group loadGroups(const struct RoseEngine *t, const char *state) { +rose_group loadGroups(const struct RoseEngine *t, const char *state) { return partial_load_u64a(state + t->stateOffsets.groups, t->stateOffsets.groups_size); } static really_inline -void storeGroups(const struct RoseEngine *t, char *state, rose_group groups) { +void storeGroups(const struct RoseEngine *t, char *state, rose_group groups) { partial_store_u64a(state + t->stateOffsets.groups, groups, t->stateOffsets.groups_size); } static really_inline -u8 *getLongLitState(const struct RoseEngine *t, char *state) { - return (u8 *)(state + t->stateOffsets.longLitState); +u8 *getLongLitState(const struct RoseEngine *t, char *state) { + return (u8 *)(state + t->stateOffsets.longLitState); } static really_inline -u8 *getLeftfixLagTable(const struct RoseEngine *t, char *state) { - return (u8 *)(state + t->stateOffsets.leftfixLagTable); +u8 *getLeftfixLagTable(const struct RoseEngine *t, char *state) { + return (u8 *)(state + t->stateOffsets.leftfixLagTable); } static really_inline -const u8 *getLeftfixLagTableConst(const struct RoseEngine *t, - const char *state) { - return (const u8 *)(state + t->stateOffsets.leftfixLagTable); +const u8 *getLeftfixLagTableConst(const struct RoseEngine *t, + const char *state) { + return (const u8 *)(state + t->stateOffsets.leftfixLagTable); } static really_inline diff --git a/contrib/libs/hyperscan/src/rose/stream.c b/contrib/libs/hyperscan/src/rose/stream.c index b9c0c4b758..26268dd574 100644 --- a/contrib/libs/hyperscan/src/rose/stream.c +++ b/contrib/libs/hyperscan/src/rose/stream.c @@ -31,10 +31,10 @@ #include "infix.h" #include "match.h" #include "miracle.h" -#include "program_runtime.h" -#include "rose.h" -#include "rose_internal.h" -#include "stream_long_lit.h" +#include "program_runtime.h" +#include "rose.h" +#include "rose_internal.h" +#include "stream_long_lit.h" #include "hwlm/hwlm.h" #include "nfa/mcclellan.h" #include "nfa/nfa_api.h" @@ -46,7 +46,7 @@ static rose_inline void runAnchoredTableStream(const struct RoseEngine *t, const void *atable, size_t alen, u64a offset, struct hs_scratch *scratch) { - char *state_base = scratch->core_info.state + t->stateOffsets.anchorState; + char *state_base = scratch->core_info.state + t->stateOffsets.anchorState; const struct anchored_matcher_info *curr = atable; do { @@ -77,7 +77,7 @@ void runAnchoredTableStream(const struct RoseEngine *t, const void *atable, goto next_nfa; } } else { - if (!unaligned_load_u16(state)) { + if (!unaligned_load_u16(state)) { goto next_nfa; } } @@ -86,11 +86,11 @@ void runAnchoredTableStream(const struct RoseEngine *t, const void *atable, if (nfa->type == MCCLELLAN_NFA_8) { nfaExecMcClellan8_SimpStream(nfa, state, scratch->core_info.buf, start, adj, alen, roseAnchoredCallback, - scratch); + scratch); } else { nfaExecMcClellan16_SimpStream(nfa, state, scratch->core_info.buf, - start, adj, alen, - roseAnchoredCallback, scratch); + start, adj, alen, + roseAnchoredCallback, scratch); } next_nfa: @@ -129,7 +129,7 @@ enum MiracleAction { }; static really_inline -enum MiracleAction roseScanForMiracles(const struct RoseEngine *t, char *state, +enum MiracleAction roseScanForMiracles(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, u32 qi, const struct LeftNfaInfo *left, const struct NFA *nfa) { @@ -178,7 +178,7 @@ found_miracle: nfaQueueInitState(q->nfa, q); } else { if (miracle_loc > end_loc - t->historyRequired) { - char *streamState = state + getNfaInfoByQueue(t, qi)->stateOffset; + char *streamState = state + getNfaInfoByQueue(t, qi)->stateOffset; u64a offset = ci->buf_offset + miracle_loc; u8 key = offset ? getByteBefore(ci, miracle_loc) : 0; DEBUG_PRINTF("init state, key=0x%02x, offset=%llu\n", key, offset); @@ -193,7 +193,7 @@ found_miracle: miracle_loc); if (!q_active) { fatbit_set(scratch->aqa, qCount, qi); - initRoseQueue(t, qi, left, scratch); + initRoseQueue(t, qi, left, scratch); } q->cur = q->end = 0; pushQueueAt(q, 0, MQE_START, miracle_loc); @@ -206,7 +206,7 @@ found_miracle: static really_inline -char roseCatchUpLeftfix(const struct RoseEngine *t, char *state, +char roseCatchUpLeftfix(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, u32 qi, const struct LeftNfaInfo *left) { assert(!left->transient); // active roses only @@ -239,7 +239,7 @@ char roseCatchUpLeftfix(const struct RoseEngine *t, char *state, } if (!fatbit_set(scratch->aqa, qCount, qi)) { - initRoseQueue(t, qi, left, scratch); + initRoseQueue(t, qi, left, scratch); s32 sp; if (ci->buf_offset) { @@ -294,7 +294,7 @@ char roseCatchUpLeftfix(const struct RoseEngine *t, char *state, DEBUG_PRINTF("infix died of old age\n"); return 0; } - reduceInfixQueue(q, last_loc, left->maxQueueLen, q->nfa->maxWidth); + reduceInfixQueue(q, last_loc, left->maxQueueLen, q->nfa->maxWidth); } DEBUG_PRINTF("end scan at %lld\n", last_loc); @@ -324,7 +324,7 @@ char roseCatchUpLeftfix(const struct RoseEngine *t, char *state, } static rose_inline -void roseCatchUpLeftfixes(const struct RoseEngine *t, char *state, +void roseCatchUpLeftfixes(const struct RoseEngine *t, char *state, struct hs_scratch *scratch) { if (!t->activeLeftIterOffset) { // No sparse iter, no non-transient roses. @@ -344,12 +344,12 @@ void roseCatchUpLeftfixes(const struct RoseEngine *t, char *state, const struct LeftNfaInfo *left_table = getLeftTable(t); const struct mmbit_sparse_iter *it = getActiveLeftIter(t); - struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; - + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + u32 idx = 0; - u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state); + u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state); for (; ri != MMB_INVALID; - ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) { + ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) { const struct LeftNfaInfo *left = left_table + ri; u32 qi = ri + t->leftfixBeginQueue; DEBUG_PRINTF("leftfix %u of %u, maxLag=%u, infix=%d\n", ri, arCount, @@ -366,7 +366,7 @@ void roseCatchUpLeftfixes(const struct RoseEngine *t, char *state, // Saves out stream state for all our active suffix NFAs. static rose_inline -void roseSaveNfaStreamState(const struct RoseEngine *t, char *state, +void roseSaveNfaStreamState(const struct RoseEngine *t, char *state, struct hs_scratch *scratch) { struct mq *queues = scratch->queues; u8 *aa = getActiveLeafArray(t, state); @@ -394,165 +394,165 @@ void roseSaveNfaStreamState(const struct RoseEngine *t, char *state, } static rose_inline -void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state, +void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state, struct hs_scratch *scratch, size_t length, - u64a offset) { + u64a offset) { struct RoseContext *tctxt = &scratch->tctxt; - if (roseCatchUpTo(t, scratch, length + scratch->core_info.buf_offset) == - HWLM_TERMINATE_MATCHING) { + if (roseCatchUpTo(t, scratch, length + scratch->core_info.buf_offset) == + HWLM_TERMINATE_MATCHING) { return; /* dead; no need to clean up state. */ } roseSaveNfaStreamState(t, state, scratch); roseCatchUpLeftfixes(t, state, scratch); - roseFlushLastByteHistory(t, scratch, offset + length); + roseFlushLastByteHistory(t, scratch, offset + length); tctxt->lastEndOffset = offset + length; storeGroups(t, state, tctxt->groups); - storeLongLiteralState(t, state, scratch); + storeLongLiteralState(t, state, scratch); } static really_inline -void do_rebuild(const struct RoseEngine *t, struct hs_scratch *scratch) { - assert(t->drmatcherOffset); +void do_rebuild(const struct RoseEngine *t, struct hs_scratch *scratch) { + assert(t->drmatcherOffset); assert(!can_stop_matching(scratch)); - - const struct HWLM *hwlm = getByOffset(t, t->drmatcherOffset); + + const struct HWLM *hwlm = getByOffset(t, t->drmatcherOffset); size_t len = MIN(scratch->core_info.hlen, t->delayRebuildLength); const u8 *buf = scratch->core_info.hbuf + scratch->core_info.hlen - len; DEBUG_PRINTF("BEGIN FLOATING REBUILD over %zu bytes\n", len); - scratch->core_info.status &= ~STATUS_DELAY_DIRTY; - - hwlmExec(hwlm, buf, len, 0, roseDelayRebuildCallback, scratch, + scratch->core_info.status &= ~STATUS_DELAY_DIRTY; + + hwlmExec(hwlm, buf, len, 0, roseDelayRebuildCallback, scratch, scratch->tctxt.groups); assert(!can_stop_matching(scratch)); } -static rose_inline -void runEagerPrefixesStream(const struct RoseEngine *t, - struct hs_scratch *scratch) { - if (!t->eagerIterOffset - || scratch->core_info.buf_offset >= EAGER_STOP_OFFSET) { - return; - } - - char *state = scratch->core_info.state; - u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into - * left_table */ - const u32 arCount = t->activeLeftCount; - const u32 qCount = t->queueCount; - const struct LeftNfaInfo *left_table = getLeftTable(t); - const struct mmbit_sparse_iter *it = getByOffset(t, t->eagerIterOffset); - - struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; - - u32 idx = 0; - u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state); - for (; ri != MMB_INVALID; - ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) { - const struct LeftNfaInfo *left = left_table + ri; - u32 qi = ri + t->leftfixBeginQueue; - DEBUG_PRINTF("leftfix %u of %u, maxLag=%u\n", ri, arCount, left->maxLag); - - assert(!fatbit_isset(scratch->aqa, qCount, qi)); - assert(left->eager); - assert(!left->infix); - - struct mq *q = scratch->queues + qi; - const struct NFA *nfa = getNfaByQueue(t, qi); - s64a loc = MIN(scratch->core_info.len, - EAGER_STOP_OFFSET - scratch->core_info.buf_offset); - - fatbit_set(scratch->aqa, qCount, qi); - initRoseQueue(t, qi, left, scratch); - - if (scratch->core_info.buf_offset) { - s64a sp = left->transient ? -(s64a)scratch->core_info.hlen - : -(s64a)loadRoseDelay(t, state, left); - pushQueueAt(q, 0, MQE_START, sp); - if (scratch->core_info.buf_offset + sp > 0) { - loadStreamState(nfa, q, sp); - /* if the leftfix fix is currently in a match state, we cannot - * advance it. */ - if (nfaInAnyAcceptState(nfa, q)) { - continue; - } - pushQueueAt(q, 1, MQE_END, loc); - } else { - pushQueueAt(q, 1, MQE_TOP, sp); - pushQueueAt(q, 2, MQE_END, loc); - nfaQueueInitState(q->nfa, q); - } - } else { - pushQueueAt(q, 0, MQE_START, 0); - pushQueueAt(q, 1, MQE_TOP, 0); - pushQueueAt(q, 2, MQE_END, loc); - nfaQueueInitState(nfa, q); - } - - char alive = nfaQueueExecToMatch(q->nfa, q, loc); - - if (!alive) { - DEBUG_PRINTF("queue %u dead, squashing\n", qi); - mmbit_unset(ara, arCount, ri); - fatbit_unset(scratch->aqa, qCount, qi); - scratch->tctxt.groups &= left->squash_mask; - } else if (q->cur == q->end) { - assert(alive != MO_MATCHES_PENDING); - /* unlike in block mode we cannot squash groups if there is no match - * in this block as we need the groups on for later stream writes */ - /* TODO: investigate possibility of a method to suppress groups for - * a single stream block. */ - DEBUG_PRINTF("queue %u finished, nfa lives\n", qi); - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } else { - assert(alive == MO_MATCHES_PENDING); - DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi); - q->end--; /* remove end item */ - } - } -} - -static really_inline -int can_never_match(const struct RoseEngine *t, char *state, - struct hs_scratch *scratch, size_t length, u64a offset) { - struct RoseContext *tctxt = &scratch->tctxt; - - if (tctxt->groups) { - DEBUG_PRINTF("still has active groups\n"); - return 0; - } - - if (offset + length <= t->anchoredDistance) { /* not < as may have eod */ - DEBUG_PRINTF("still in anchored region\n"); - return 0; - } - - if (t->lastByteHistoryIterOffset) { /* last byte history is hard */ - DEBUG_PRINTF("last byte history\n"); - return 0; - } - - if (mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { - DEBUG_PRINTF("active leaf\n"); - return 0; - } - - return 1; -} - -void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { - DEBUG_PRINTF("OH HAI [%llu, %llu)\n", scratch->core_info.buf_offset, - scratch->core_info.buf_offset + (u64a)scratch->core_info.len); +static rose_inline +void runEagerPrefixesStream(const struct RoseEngine *t, + struct hs_scratch *scratch) { + if (!t->eagerIterOffset + || scratch->core_info.buf_offset >= EAGER_STOP_OFFSET) { + return; + } + + char *state = scratch->core_info.state; + u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into + * left_table */ + const u32 arCount = t->activeLeftCount; + const u32 qCount = t->queueCount; + const struct LeftNfaInfo *left_table = getLeftTable(t); + const struct mmbit_sparse_iter *it = getByOffset(t, t->eagerIterOffset); + + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + u32 idx = 0; + u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state); + for (; ri != MMB_INVALID; + ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) { + const struct LeftNfaInfo *left = left_table + ri; + u32 qi = ri + t->leftfixBeginQueue; + DEBUG_PRINTF("leftfix %u of %u, maxLag=%u\n", ri, arCount, left->maxLag); + + assert(!fatbit_isset(scratch->aqa, qCount, qi)); + assert(left->eager); + assert(!left->infix); + + struct mq *q = scratch->queues + qi; + const struct NFA *nfa = getNfaByQueue(t, qi); + s64a loc = MIN(scratch->core_info.len, + EAGER_STOP_OFFSET - scratch->core_info.buf_offset); + + fatbit_set(scratch->aqa, qCount, qi); + initRoseQueue(t, qi, left, scratch); + + if (scratch->core_info.buf_offset) { + s64a sp = left->transient ? -(s64a)scratch->core_info.hlen + : -(s64a)loadRoseDelay(t, state, left); + pushQueueAt(q, 0, MQE_START, sp); + if (scratch->core_info.buf_offset + sp > 0) { + loadStreamState(nfa, q, sp); + /* if the leftfix fix is currently in a match state, we cannot + * advance it. */ + if (nfaInAnyAcceptState(nfa, q)) { + continue; + } + pushQueueAt(q, 1, MQE_END, loc); + } else { + pushQueueAt(q, 1, MQE_TOP, sp); + pushQueueAt(q, 2, MQE_END, loc); + nfaQueueInitState(q->nfa, q); + } + } else { + pushQueueAt(q, 0, MQE_START, 0); + pushQueueAt(q, 1, MQE_TOP, 0); + pushQueueAt(q, 2, MQE_END, loc); + nfaQueueInitState(nfa, q); + } + + char alive = nfaQueueExecToMatch(q->nfa, q, loc); + + if (!alive) { + DEBUG_PRINTF("queue %u dead, squashing\n", qi); + mmbit_unset(ara, arCount, ri); + fatbit_unset(scratch->aqa, qCount, qi); + scratch->tctxt.groups &= left->squash_mask; + } else if (q->cur == q->end) { + assert(alive != MO_MATCHES_PENDING); + /* unlike in block mode we cannot squash groups if there is no match + * in this block as we need the groups on for later stream writes */ + /* TODO: investigate possibility of a method to suppress groups for + * a single stream block. */ + DEBUG_PRINTF("queue %u finished, nfa lives\n", qi); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } else { + assert(alive == MO_MATCHES_PENDING); + DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi); + q->end--; /* remove end item */ + } + } +} + +static really_inline +int can_never_match(const struct RoseEngine *t, char *state, + struct hs_scratch *scratch, size_t length, u64a offset) { + struct RoseContext *tctxt = &scratch->tctxt; + + if (tctxt->groups) { + DEBUG_PRINTF("still has active groups\n"); + return 0; + } + + if (offset + length <= t->anchoredDistance) { /* not < as may have eod */ + DEBUG_PRINTF("still in anchored region\n"); + return 0; + } + + if (t->lastByteHistoryIterOffset) { /* last byte history is hard */ + DEBUG_PRINTF("last byte history\n"); + return 0; + } + + if (mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) { + DEBUG_PRINTF("active leaf\n"); + return 0; + } + + return 1; +} + +void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { + DEBUG_PRINTF("OH HAI [%llu, %llu)\n", scratch->core_info.buf_offset, + scratch->core_info.buf_offset + (u64a)scratch->core_info.len); assert(t); assert(scratch->core_info.hbuf); assert(scratch->core_info.buf); - // We should not have been called if we've already been told to terminate - // matching. - assert(!told_to_stop_matching(scratch)); - + // We should not have been called if we've already been told to terminate + // matching. + assert(!told_to_stop_matching(scratch)); + assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount) < MAX_SPARSE_ITER_STATES); @@ -568,7 +568,7 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { return; } - char *state = scratch->core_info.state; + char *state = scratch->core_info.state; struct RoseContext *tctxt = &scratch->tctxt; tctxt->mpv_inactive = 0; @@ -583,8 +583,8 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { tctxt->minNonMpvMatchOffset = offset; tctxt->next_mpv_offset = 0; - DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu groups=%016llx\n", - scratch->core_info.hlen, scratch->core_info.len, tctxt->groups); + DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu groups=%016llx\n", + scratch->core_info.hlen, scratch->core_info.len, tctxt->groups); fatbit_clear(scratch->aqa); scratch->al_log_sum = 0; @@ -594,7 +594,7 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { streamInitSufPQ(t, state, scratch); } - runEagerPrefixesStream(t, scratch); + runEagerPrefixesStream(t, scratch); u32 alen = t->anchoredDistance > offset ? MIN(length + offset, t->anchoredDistance) - offset : 0; @@ -611,13 +611,13 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { const struct HWLM *ftable = getFLiteralMatcher(t); if (ftable) { - // Load in long literal table state and set up "fake history" buffers - // (ll_buf, etc, used by the CHECK_LONG_LIT instruction). Note that this - // must be done here in order to ensure that it happens before any path - // that leads to storeLongLiteralState(), which relies on these buffers. - loadLongLiteralState(t, state, scratch); - - if (t->noFloatingRoots && !roseHasInFlightMatches(t, state, scratch)) { + // Load in long literal table state and set up "fake history" buffers + // (ll_buf, etc, used by the CHECK_LONG_LIT instruction). Note that this + // must be done here in order to ensure that it happens before any path + // that leads to storeLongLiteralState(), which relies on these buffers. + loadLongLiteralState(t, state, scratch); + + if (t->noFloatingRoots && !roseHasInFlightMatches(t, state, scratch)) { DEBUG_PRINTF("skip FLOATING: no inflight matches\n"); goto flush_delay_and_exit; } @@ -630,18 +630,18 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { size_t hlength = scratch->core_info.hlen; - char rebuild = hlength && - (scratch->core_info.status & STATUS_DELAY_DIRTY) && - (t->maxFloatingDelayedMatch == ROSE_BOUND_INF || - offset < t->maxFloatingDelayedMatch); + char rebuild = hlength && + (scratch->core_info.status & STATUS_DELAY_DIRTY) && + (t->maxFloatingDelayedMatch == ROSE_BOUND_INF || + offset < t->maxFloatingDelayedMatch); DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n", - rebuild, scratch->core_info.status, - t->maxFloatingDelayedMatch, offset); + rebuild, scratch->core_info.status, + t->maxFloatingDelayedMatch, offset); + + if (rebuild) { /* rebuild floating delayed match stuff */ + do_rebuild(t, scratch); + } - if (rebuild) { /* rebuild floating delayed match stuff */ - do_rebuild(t, scratch); - } - if (!flen) { goto flush_delay_and_exit; } @@ -660,93 +660,93 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) { DEBUG_PRINTF("start=%zu\n", start); DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length); - hwlmExecStreaming(ftable, flen, start, roseFloatingCallback, scratch, - tctxt->groups & t->floating_group_mask); + hwlmExecStreaming(ftable, flen, start, roseFloatingCallback, scratch, + tctxt->groups & t->floating_group_mask); } flush_delay_and_exit: DEBUG_PRINTF("flushing floating\n"); - if (cleanUpDelayed(t, scratch, length, offset) == HWLM_TERMINATE_MATCHING) { + if (cleanUpDelayed(t, scratch, length, offset) == HWLM_TERMINATE_MATCHING) { return; } exit: DEBUG_PRINTF("CLEAN UP TIME\n"); if (!can_stop_matching(scratch)) { - ensureStreamNeatAndTidy(t, state, scratch, length, offset); + ensureStreamNeatAndTidy(t, state, scratch, length, offset); + } + + if (!told_to_stop_matching(scratch) + && can_never_match(t, state, scratch, length, offset)) { + DEBUG_PRINTF("PATTERN SET IS EXHAUSTED\n"); + scratch->core_info.status = STATUS_EXHAUSTED; + return; } - - if (!told_to_stop_matching(scratch) - && can_never_match(t, state, scratch, length, offset)) { - DEBUG_PRINTF("PATTERN SET IS EXHAUSTED\n"); - scratch->core_info.status = STATUS_EXHAUSTED; - return; - } - - DEBUG_PRINTF("DONE STREAMING SCAN, status = %u\n", - scratch->core_info.status); + + DEBUG_PRINTF("DONE STREAMING SCAN, status = %u\n", + scratch->core_info.status); return; } - -static rose_inline -void roseStreamInitEod(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch) { - struct RoseContext *tctxt = &scratch->tctxt; - /* TODO: diff groups for eod */ - tctxt->groups = loadGroups(t, scratch->core_info.state); - tctxt->lit_offset_adjust = scratch->core_info.buf_offset - - scratch->core_info.hlen - + 1; // index after last byte - tctxt->delayLastEndOffset = offset; - tctxt->lastEndOffset = offset; - tctxt->filledDelayedSlots = 0; - tctxt->lastMatchOffset = 0; + +static rose_inline +void roseStreamInitEod(const struct RoseEngine *t, u64a offset, + struct hs_scratch *scratch) { + struct RoseContext *tctxt = &scratch->tctxt; + /* TODO: diff groups for eod */ + tctxt->groups = loadGroups(t, scratch->core_info.state); + tctxt->lit_offset_adjust = scratch->core_info.buf_offset + - scratch->core_info.hlen + + 1; // index after last byte + tctxt->delayLastEndOffset = offset; + tctxt->lastEndOffset = offset; + tctxt->filledDelayedSlots = 0; + tctxt->lastMatchOffset = 0; tctxt->lastCombMatchOffset = offset; /* DO NOT set 0 here! */ - tctxt->minMatchOffset = offset; - tctxt->minNonMpvMatchOffset = offset; - tctxt->next_mpv_offset = offset; - - scratch->catchup_pq.qm_size = 0; - scratch->al_log_sum = 0; /* clear the anchored logs */ - - fatbit_clear(scratch->aqa); -} - -void roseStreamEodExec(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch) { - assert(scratch); - assert(t->requiresEodCheck); - DEBUG_PRINTF("ci buf %p/%zu his %p/%zu\n", scratch->core_info.buf, - scratch->core_info.len, scratch->core_info.hbuf, - scratch->core_info.hlen); - - // We should not have been called if we've already been told to terminate - // matching. - assert(!told_to_stop_matching(scratch)); - - if (t->maxBiAnchoredWidth != ROSE_BOUND_INF - && offset > t->maxBiAnchoredWidth) { - DEBUG_PRINTF("bailing, we are beyond max width\n"); - /* also some of the history/state may be stale */ - return; - } - - if (!t->eodProgramOffset) { - DEBUG_PRINTF("no eod program\n"); - return; - } - - roseStreamInitEod(t, offset, scratch); - - DEBUG_PRINTF("running eod program at %u\n", t->eodProgramOffset); - - // There should be no pending delayed literals. - assert(!scratch->tctxt.filledDelayedSlots); - - const u64a som = 0; - const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; - - // Note: we ignore the result, as this is the last thing to ever happen on - // a scan. - roseRunProgram(t, scratch, t->eodProgramOffset, som, offset, flags); -} + tctxt->minMatchOffset = offset; + tctxt->minNonMpvMatchOffset = offset; + tctxt->next_mpv_offset = offset; + + scratch->catchup_pq.qm_size = 0; + scratch->al_log_sum = 0; /* clear the anchored logs */ + + fatbit_clear(scratch->aqa); +} + +void roseStreamEodExec(const struct RoseEngine *t, u64a offset, + struct hs_scratch *scratch) { + assert(scratch); + assert(t->requiresEodCheck); + DEBUG_PRINTF("ci buf %p/%zu his %p/%zu\n", scratch->core_info.buf, + scratch->core_info.len, scratch->core_info.hbuf, + scratch->core_info.hlen); + + // We should not have been called if we've already been told to terminate + // matching. + assert(!told_to_stop_matching(scratch)); + + if (t->maxBiAnchoredWidth != ROSE_BOUND_INF + && offset > t->maxBiAnchoredWidth) { + DEBUG_PRINTF("bailing, we are beyond max width\n"); + /* also some of the history/state may be stale */ + return; + } + + if (!t->eodProgramOffset) { + DEBUG_PRINTF("no eod program\n"); + return; + } + + roseStreamInitEod(t, offset, scratch); + + DEBUG_PRINTF("running eod program at %u\n", t->eodProgramOffset); + + // There should be no pending delayed literals. + assert(!scratch->tctxt.filledDelayedSlots); + + const u64a som = 0; + const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; + + // Note: we ignore the result, as this is the last thing to ever happen on + // a scan. + roseRunProgram(t, scratch, t->eodProgramOffset, som, offset, flags); +} diff --git a/contrib/libs/hyperscan/src/rose/stream_long_lit.h b/contrib/libs/hyperscan/src/rose/stream_long_lit.h index 34bc00e286..df9b57f4e2 100644 --- a/contrib/libs/hyperscan/src/rose/stream_long_lit.h +++ b/contrib/libs/hyperscan/src/rose/stream_long_lit.h @@ -1,372 +1,372 @@ -/* - * Copyright (c) 2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef STREAM_LONG_LIT_H -#define STREAM_LONG_LIT_H - -#include "rose.h" -#include "rose_common.h" -#include "rose_internal.h" -#include "stream_long_lit_hash.h" +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef STREAM_LONG_LIT_H +#define STREAM_LONG_LIT_H + +#include "rose.h" +#include "rose_common.h" +#include "rose_internal.h" +#include "stream_long_lit_hash.h" #include "util/compare.h" -#include "util/copybytes.h" - -static really_inline -const struct RoseLongLitHashEntry * -getHashTableBase(const struct RoseLongLitTable *ll_table, - const struct RoseLongLitSubtable *ll_sub) { - assert(ll_sub->hashOffset); - return (const struct RoseLongLitHashEntry *)((const char *)ll_table + - ll_sub->hashOffset); -} - -// Reads from stream state and unpacks values into stream state table. -static really_inline -void loadLongLitStreamState(const struct RoseLongLitTable *ll_table, - const u8 *ll_state, u32 *state_case, - u32 *state_nocase) { - assert(ll_table); - assert(ll_state); - assert(state_case && state_nocase); - - u8 ss_bytes = ll_table->streamStateBytes; - u8 ssb = ll_table->caseful.streamStateBits; - UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits; - assert(ss_bytes == (ssb + ssb_nc + 7) / 8); - -#if defined(ARCH_32_BIT) - // On 32-bit hosts, we may be able to avoid having to do any u64a - // manipulation at all. - if (ss_bytes <= 4) { - u32 ssb_mask = (1U << ssb) - 1; - u32 streamVal = partial_load_u32(ll_state, ss_bytes); - *state_case = (u32)(streamVal & ssb_mask); - *state_nocase = (u32)(streamVal >> ssb); - return; - } -#endif - - u64a ssb_mask = (1ULL << ssb) - 1; - u64a streamVal = partial_load_u64a(ll_state, ss_bytes); - *state_case = (u32)(streamVal & ssb_mask); - *state_nocase = (u32)(streamVal >> ssb); -} - -static rose_inline -void loadLongLiteralStateMode(struct hs_scratch *scratch, - const struct RoseLongLitTable *ll_table, - const struct RoseLongLitSubtable *ll_sub, - const u32 state, const char nocase) { - if (!state) { - DEBUG_PRINTF("no state for %s\n", nocase ? "caseless" : "caseful"); - return; - } - - const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub); - const struct RoseLongLitHashEntry *ent = tab + state - 1; - - assert(ent->str_offset + ent->str_len <= ll_table->size); - const u8 *found_buf = (const u8 *)ll_table + ent->str_offset; - size_t found_sz = ent->str_len; - - struct RoseContext *tctxt = &scratch->tctxt; - if (nocase) { - tctxt->ll_buf_nocase = found_buf; - tctxt->ll_len_nocase = found_sz; - } else { - tctxt->ll_buf = found_buf; - tctxt->ll_len = found_sz; - } -} - -static rose_inline -void loadLongLiteralState(const struct RoseEngine *t, char *state, - struct hs_scratch *scratch) { - if (!t->longLitTableOffset) { - return; - } - - // If we don't have any long literals in play, these values must point to - // the real history buffer so that CHECK_LONG_LIT instructions examine the - // history buffer. - scratch->tctxt.ll_buf = scratch->core_info.hbuf; - scratch->tctxt.ll_len = scratch->core_info.hlen; - scratch->tctxt.ll_buf_nocase = scratch->core_info.hbuf; - scratch->tctxt.ll_len_nocase = scratch->core_info.hlen; - - if (!scratch->core_info.hlen) { - return; - } - - const struct RoseLongLitTable *ll_table = - getByOffset(t, t->longLitTableOffset); - const u8 *ll_state = getLongLitState(t, state); - - u32 state_case; - u32 state_nocase; - loadLongLitStreamState(ll_table, ll_state, &state_case, &state_nocase); - - DEBUG_PRINTF("loaded {%u, %u}\n", state_case, state_nocase); - - loadLongLiteralStateMode(scratch, ll_table, &ll_table->caseful, - state_case, 0); - loadLongLiteralStateMode(scratch, ll_table, &ll_table->nocase, - state_nocase, 1); -} - -static rose_inline -char confirmLongLiteral(const struct RoseLongLitTable *ll_table, - const struct hs_scratch *scratch, - const struct RoseLongLitHashEntry *ent, - const char nocase) { - assert(ent->str_offset + ent->str_len <= ll_table->size); - const u8 *s = (const u8 *)ll_table + ent->str_offset; - size_t len = ent->str_len; - const u8 *buf = scratch->core_info.buf; - const size_t buf_len = scratch->core_info.len; - - if (len > buf_len) { - const struct RoseContext *tctxt = &scratch->tctxt; - const u8 *hist = nocase ? tctxt->ll_buf_nocase : tctxt->ll_buf; - size_t hist_len = nocase ? tctxt->ll_len_nocase : tctxt->ll_len; - - if (len > buf_len + hist_len) { - return 0; // Break out - not enough total history - } - - size_t overhang = len - buf_len; - assert(overhang <= hist_len); - - if (cmpForward(hist + hist_len - overhang, s, overhang, nocase)) { - return 0; - } - s += overhang; - len -= overhang; - } - - // if we got here, we don't need history or we compared ok out of history - assert(len <= buf_len); - - if (cmpForward(buf + buf_len - len, s, len, nocase)) { - return 0; - } - - return 1; -} - -static rose_inline -const u8 *prepScanBuffer(const struct core_info *ci, - const struct RoseLongLitTable *ll_table, u8 *tempbuf) { - const u8 hash_len = ll_table->maxLen; - assert(hash_len >= LONG_LIT_HASH_LEN); - - // Our hash function operates over LONG_LIT_HASH_LEN bytes, starting from - // location (end of buffer - hash_len). If this block can be satisfied - // entirely from either the current buffer or the history buffer, we pass - // in the pointer directly; otherwise we must make a copy. - - const u8 *base; - - if (hash_len > ci->len) { - size_t overhang = hash_len - ci->len; - if (overhang >= LONG_LIT_HASH_LEN) { - // Can read enough to hash from inside the history buffer. - assert(overhang <= ci->hlen); - base = ci->hbuf + ci->hlen - overhang; - } else { - // Copy: first chunk from history buffer. - assert(overhang <= ci->hlen); +#include "util/copybytes.h" + +static really_inline +const struct RoseLongLitHashEntry * +getHashTableBase(const struct RoseLongLitTable *ll_table, + const struct RoseLongLitSubtable *ll_sub) { + assert(ll_sub->hashOffset); + return (const struct RoseLongLitHashEntry *)((const char *)ll_table + + ll_sub->hashOffset); +} + +// Reads from stream state and unpacks values into stream state table. +static really_inline +void loadLongLitStreamState(const struct RoseLongLitTable *ll_table, + const u8 *ll_state, u32 *state_case, + u32 *state_nocase) { + assert(ll_table); + assert(ll_state); + assert(state_case && state_nocase); + + u8 ss_bytes = ll_table->streamStateBytes; + u8 ssb = ll_table->caseful.streamStateBits; + UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits; + assert(ss_bytes == (ssb + ssb_nc + 7) / 8); + +#if defined(ARCH_32_BIT) + // On 32-bit hosts, we may be able to avoid having to do any u64a + // manipulation at all. + if (ss_bytes <= 4) { + u32 ssb_mask = (1U << ssb) - 1; + u32 streamVal = partial_load_u32(ll_state, ss_bytes); + *state_case = (u32)(streamVal & ssb_mask); + *state_nocase = (u32)(streamVal >> ssb); + return; + } +#endif + + u64a ssb_mask = (1ULL << ssb) - 1; + u64a streamVal = partial_load_u64a(ll_state, ss_bytes); + *state_case = (u32)(streamVal & ssb_mask); + *state_nocase = (u32)(streamVal >> ssb); +} + +static rose_inline +void loadLongLiteralStateMode(struct hs_scratch *scratch, + const struct RoseLongLitTable *ll_table, + const struct RoseLongLitSubtable *ll_sub, + const u32 state, const char nocase) { + if (!state) { + DEBUG_PRINTF("no state for %s\n", nocase ? "caseless" : "caseful"); + return; + } + + const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub); + const struct RoseLongLitHashEntry *ent = tab + state - 1; + + assert(ent->str_offset + ent->str_len <= ll_table->size); + const u8 *found_buf = (const u8 *)ll_table + ent->str_offset; + size_t found_sz = ent->str_len; + + struct RoseContext *tctxt = &scratch->tctxt; + if (nocase) { + tctxt->ll_buf_nocase = found_buf; + tctxt->ll_len_nocase = found_sz; + } else { + tctxt->ll_buf = found_buf; + tctxt->ll_len = found_sz; + } +} + +static rose_inline +void loadLongLiteralState(const struct RoseEngine *t, char *state, + struct hs_scratch *scratch) { + if (!t->longLitTableOffset) { + return; + } + + // If we don't have any long literals in play, these values must point to + // the real history buffer so that CHECK_LONG_LIT instructions examine the + // history buffer. + scratch->tctxt.ll_buf = scratch->core_info.hbuf; + scratch->tctxt.ll_len = scratch->core_info.hlen; + scratch->tctxt.ll_buf_nocase = scratch->core_info.hbuf; + scratch->tctxt.ll_len_nocase = scratch->core_info.hlen; + + if (!scratch->core_info.hlen) { + return; + } + + const struct RoseLongLitTable *ll_table = + getByOffset(t, t->longLitTableOffset); + const u8 *ll_state = getLongLitState(t, state); + + u32 state_case; + u32 state_nocase; + loadLongLitStreamState(ll_table, ll_state, &state_case, &state_nocase); + + DEBUG_PRINTF("loaded {%u, %u}\n", state_case, state_nocase); + + loadLongLiteralStateMode(scratch, ll_table, &ll_table->caseful, + state_case, 0); + loadLongLiteralStateMode(scratch, ll_table, &ll_table->nocase, + state_nocase, 1); +} + +static rose_inline +char confirmLongLiteral(const struct RoseLongLitTable *ll_table, + const struct hs_scratch *scratch, + const struct RoseLongLitHashEntry *ent, + const char nocase) { + assert(ent->str_offset + ent->str_len <= ll_table->size); + const u8 *s = (const u8 *)ll_table + ent->str_offset; + size_t len = ent->str_len; + const u8 *buf = scratch->core_info.buf; + const size_t buf_len = scratch->core_info.len; + + if (len > buf_len) { + const struct RoseContext *tctxt = &scratch->tctxt; + const u8 *hist = nocase ? tctxt->ll_buf_nocase : tctxt->ll_buf; + size_t hist_len = nocase ? tctxt->ll_len_nocase : tctxt->ll_len; + + if (len > buf_len + hist_len) { + return 0; // Break out - not enough total history + } + + size_t overhang = len - buf_len; + assert(overhang <= hist_len); + + if (cmpForward(hist + hist_len - overhang, s, overhang, nocase)) { + return 0; + } + s += overhang; + len -= overhang; + } + + // if we got here, we don't need history or we compared ok out of history + assert(len <= buf_len); + + if (cmpForward(buf + buf_len - len, s, len, nocase)) { + return 0; + } + + return 1; +} + +static rose_inline +const u8 *prepScanBuffer(const struct core_info *ci, + const struct RoseLongLitTable *ll_table, u8 *tempbuf) { + const u8 hash_len = ll_table->maxLen; + assert(hash_len >= LONG_LIT_HASH_LEN); + + // Our hash function operates over LONG_LIT_HASH_LEN bytes, starting from + // location (end of buffer - hash_len). If this block can be satisfied + // entirely from either the current buffer or the history buffer, we pass + // in the pointer directly; otherwise we must make a copy. + + const u8 *base; + + if (hash_len > ci->len) { + size_t overhang = hash_len - ci->len; + if (overhang >= LONG_LIT_HASH_LEN) { + // Can read enough to hash from inside the history buffer. + assert(overhang <= ci->hlen); + base = ci->hbuf + ci->hlen - overhang; + } else { + // Copy: first chunk from history buffer. + assert(overhang <= ci->hlen); copy_upto_64_bytes(tempbuf, ci->hbuf + ci->hlen - overhang, - overhang); - // Copy: second chunk from current buffer. - size_t copy_buf_len = LONG_LIT_HASH_LEN - overhang; - assert(copy_buf_len <= ci->len); + overhang); + // Copy: second chunk from current buffer. + size_t copy_buf_len = LONG_LIT_HASH_LEN - overhang; + assert(copy_buf_len <= ci->len); copy_upto_64_bytes(tempbuf + overhang, ci->buf, copy_buf_len); - // Read from our temporary buffer for the hash. - base = tempbuf; - } - } else { - // Can read enough to hash from inside the current buffer. - base = ci->buf + ci->len - hash_len; - } - - return base; -} - -#ifndef NDEBUG -// Defensive checking (used in assert) that these table values don't overflow -// the range available. -static really_inline -char streamingTableOverflow(u32 state_case, u32 state_nocase, u8 ssb, - u8 ssb_nc) { - u32 ssb_mask = (1ULL << (ssb)) - 1; - if (state_case & ~ssb_mask) { - return 1; - } - u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1; - if (state_nocase & ~ssb_nc_mask) { - return 1; - } - return 0; -} -#endif - -// Reads from stream state table and packs values into stream state. -static rose_inline -void storeLongLitStreamState(const struct RoseLongLitTable *ll_table, - u8 *ll_state, u32 state_case, u32 state_nocase) { - assert(ll_table); - assert(ll_state); - - u8 ss_bytes = ll_table->streamStateBytes; - u8 ssb = ll_table->caseful.streamStateBits; - UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits; - assert(ss_bytes == ROUNDUP_N(ssb + ssb_nc, 8) / 8); - assert(!streamingTableOverflow(state_case, state_nocase, ssb, ssb_nc)); - -#if defined(ARCH_32_BIT) - // On 32-bit hosts, we may be able to avoid having to do any u64a - // manipulation at all. - if (ss_bytes <= 4) { - u32 stagingStreamState = state_case; - stagingStreamState |= (state_nocase << ssb); - partial_store_u32(ll_state, stagingStreamState, ss_bytes); - return; - } -#endif - - u64a stagingStreamState = (u64a)state_case; - stagingStreamState |= (u64a)state_nocase << ssb; - partial_store_u64a(ll_state, stagingStreamState, ss_bytes); -} - -static really_inline -char has_bit(const u8 *data, u32 bit) { - return (data[bit / 8] >> (bit % 8)) & 1; -} - -static rose_inline -char bloomHasKey(const u8 *bloom, u32 bloom_mask, u32 hash) { - return has_bit(bloom, hash & bloom_mask); -} - -static rose_inline -char checkBloomFilter(const struct RoseLongLitTable *ll_table, - const struct RoseLongLitSubtable *ll_sub, - const u8 *scan_buf, char nocase) { - assert(ll_sub->bloomBits); - - const u8 *bloom = (const u8 *)ll_table + ll_sub->bloomOffset; - const u32 bloom_mask = (1U << ll_sub->bloomBits) - 1; - - char v = 1; - v &= bloomHasKey(bloom, bloom_mask, bloomHash_1(scan_buf, nocase)); - v &= bloomHasKey(bloom, bloom_mask, bloomHash_2(scan_buf, nocase)); - v &= bloomHasKey(bloom, bloom_mask, bloomHash_3(scan_buf, nocase)); - return v; -} - -/** - * \brief Look for a hit in the hash table. - * - * Returns zero if not found, otherwise returns (bucket + 1). - */ -static rose_inline -u32 checkHashTable(const struct RoseLongLitTable *ll_table, - const struct RoseLongLitSubtable *ll_sub, const u8 *scan_buf, - const struct hs_scratch *scratch, char nocase) { - const u32 nbits = ll_sub->hashBits; - assert(nbits && nbits < 32); - const u32 num_entries = 1U << nbits; - - const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub); - - u32 hash = hashLongLiteral(scan_buf, LONG_LIT_HASH_LEN, nocase); - u32 bucket = hash & ((1U << nbits) - 1); - - while (tab[bucket].str_offset != 0) { - DEBUG_PRINTF("checking bucket %u\n", bucket); - if (confirmLongLiteral(ll_table, scratch, &tab[bucket], nocase)) { - DEBUG_PRINTF("found hit for bucket %u\n", bucket); - return bucket + 1; - } - - if (++bucket == num_entries) { - bucket = 0; - } - } - - return 0; -} - -static rose_inline -void storeLongLiteralState(const struct RoseEngine *t, char *state, - struct hs_scratch *scratch) { - if (!t->longLitTableOffset) { - DEBUG_PRINTF("no table\n"); - return; - } - - struct core_info *ci = &scratch->core_info; - const struct RoseLongLitTable *ll_table = - getByOffset(t, t->longLitTableOffset); - assert(ll_table->maxLen); - - DEBUG_PRINTF("maxLen=%u, len=%zu, hlen=%zu\n", ll_table->maxLen, ci->len, - ci->hlen); - - u32 state_case = 0; - u32 state_nocase = 0; - - // If we don't have enough history, we don't need to do anything. - if (ll_table->maxLen <= ci->len + ci->hlen) { - u8 tempbuf[LONG_LIT_HASH_LEN]; - const u8 *scan_buf = prepScanBuffer(ci, ll_table, tempbuf); - - if (ll_table->caseful.hashBits && - checkBloomFilter(ll_table, &ll_table->caseful, scan_buf, 0)) { - state_case = checkHashTable(ll_table, &ll_table->caseful, scan_buf, - scratch, 0); - } - - if (ll_table->nocase.hashBits && - checkBloomFilter(ll_table, &ll_table->nocase, scan_buf, 1)) { - state_nocase = checkHashTable(ll_table, &ll_table->nocase, scan_buf, - scratch, 1); - } - } else { - DEBUG_PRINTF("not enough history (%zu bytes)\n", ci->len + ci->hlen); - } - - DEBUG_PRINTF("store {%u, %u}\n", state_case, state_nocase); - - u8 *ll_state = getLongLitState(t, state); - storeLongLitStreamState(ll_table, ll_state, state_case, state_nocase); -} - -#endif // STREAM_LONG_LIT_H + // Read from our temporary buffer for the hash. + base = tempbuf; + } + } else { + // Can read enough to hash from inside the current buffer. + base = ci->buf + ci->len - hash_len; + } + + return base; +} + +#ifndef NDEBUG +// Defensive checking (used in assert) that these table values don't overflow +// the range available. +static really_inline +char streamingTableOverflow(u32 state_case, u32 state_nocase, u8 ssb, + u8 ssb_nc) { + u32 ssb_mask = (1ULL << (ssb)) - 1; + if (state_case & ~ssb_mask) { + return 1; + } + u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1; + if (state_nocase & ~ssb_nc_mask) { + return 1; + } + return 0; +} +#endif + +// Reads from stream state table and packs values into stream state. +static rose_inline +void storeLongLitStreamState(const struct RoseLongLitTable *ll_table, + u8 *ll_state, u32 state_case, u32 state_nocase) { + assert(ll_table); + assert(ll_state); + + u8 ss_bytes = ll_table->streamStateBytes; + u8 ssb = ll_table->caseful.streamStateBits; + UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits; + assert(ss_bytes == ROUNDUP_N(ssb + ssb_nc, 8) / 8); + assert(!streamingTableOverflow(state_case, state_nocase, ssb, ssb_nc)); + +#if defined(ARCH_32_BIT) + // On 32-bit hosts, we may be able to avoid having to do any u64a + // manipulation at all. + if (ss_bytes <= 4) { + u32 stagingStreamState = state_case; + stagingStreamState |= (state_nocase << ssb); + partial_store_u32(ll_state, stagingStreamState, ss_bytes); + return; + } +#endif + + u64a stagingStreamState = (u64a)state_case; + stagingStreamState |= (u64a)state_nocase << ssb; + partial_store_u64a(ll_state, stagingStreamState, ss_bytes); +} + +static really_inline +char has_bit(const u8 *data, u32 bit) { + return (data[bit / 8] >> (bit % 8)) & 1; +} + +static rose_inline +char bloomHasKey(const u8 *bloom, u32 bloom_mask, u32 hash) { + return has_bit(bloom, hash & bloom_mask); +} + +static rose_inline +char checkBloomFilter(const struct RoseLongLitTable *ll_table, + const struct RoseLongLitSubtable *ll_sub, + const u8 *scan_buf, char nocase) { + assert(ll_sub->bloomBits); + + const u8 *bloom = (const u8 *)ll_table + ll_sub->bloomOffset; + const u32 bloom_mask = (1U << ll_sub->bloomBits) - 1; + + char v = 1; + v &= bloomHasKey(bloom, bloom_mask, bloomHash_1(scan_buf, nocase)); + v &= bloomHasKey(bloom, bloom_mask, bloomHash_2(scan_buf, nocase)); + v &= bloomHasKey(bloom, bloom_mask, bloomHash_3(scan_buf, nocase)); + return v; +} + +/** + * \brief Look for a hit in the hash table. + * + * Returns zero if not found, otherwise returns (bucket + 1). + */ +static rose_inline +u32 checkHashTable(const struct RoseLongLitTable *ll_table, + const struct RoseLongLitSubtable *ll_sub, const u8 *scan_buf, + const struct hs_scratch *scratch, char nocase) { + const u32 nbits = ll_sub->hashBits; + assert(nbits && nbits < 32); + const u32 num_entries = 1U << nbits; + + const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub); + + u32 hash = hashLongLiteral(scan_buf, LONG_LIT_HASH_LEN, nocase); + u32 bucket = hash & ((1U << nbits) - 1); + + while (tab[bucket].str_offset != 0) { + DEBUG_PRINTF("checking bucket %u\n", bucket); + if (confirmLongLiteral(ll_table, scratch, &tab[bucket], nocase)) { + DEBUG_PRINTF("found hit for bucket %u\n", bucket); + return bucket + 1; + } + + if (++bucket == num_entries) { + bucket = 0; + } + } + + return 0; +} + +static rose_inline +void storeLongLiteralState(const struct RoseEngine *t, char *state, + struct hs_scratch *scratch) { + if (!t->longLitTableOffset) { + DEBUG_PRINTF("no table\n"); + return; + } + + struct core_info *ci = &scratch->core_info; + const struct RoseLongLitTable *ll_table = + getByOffset(t, t->longLitTableOffset); + assert(ll_table->maxLen); + + DEBUG_PRINTF("maxLen=%u, len=%zu, hlen=%zu\n", ll_table->maxLen, ci->len, + ci->hlen); + + u32 state_case = 0; + u32 state_nocase = 0; + + // If we don't have enough history, we don't need to do anything. + if (ll_table->maxLen <= ci->len + ci->hlen) { + u8 tempbuf[LONG_LIT_HASH_LEN]; + const u8 *scan_buf = prepScanBuffer(ci, ll_table, tempbuf); + + if (ll_table->caseful.hashBits && + checkBloomFilter(ll_table, &ll_table->caseful, scan_buf, 0)) { + state_case = checkHashTable(ll_table, &ll_table->caseful, scan_buf, + scratch, 0); + } + + if (ll_table->nocase.hashBits && + checkBloomFilter(ll_table, &ll_table->nocase, scan_buf, 1)) { + state_nocase = checkHashTable(ll_table, &ll_table->nocase, scan_buf, + scratch, 1); + } + } else { + DEBUG_PRINTF("not enough history (%zu bytes)\n", ci->len + ci->hlen); + } + + DEBUG_PRINTF("store {%u, %u}\n", state_case, state_nocase); + + u8 *ll_state = getLongLitState(t, state); + storeLongLitStreamState(ll_table, ll_state, state_case, state_nocase); +} + +#endif // STREAM_LONG_LIT_H diff --git a/contrib/libs/hyperscan/src/rose/stream_long_lit_hash.h b/contrib/libs/hyperscan/src/rose/stream_long_lit_hash.h index ec476edfbe..041f05e609 100644 --- a/contrib/libs/hyperscan/src/rose/stream_long_lit_hash.h +++ b/contrib/libs/hyperscan/src/rose/stream_long_lit_hash.h @@ -1,105 +1,105 @@ -/* - * Copyright (c) 2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef STREAM_LONG_LIT_HASH_H -#define STREAM_LONG_LIT_HASH_H - -#include "ue2common.h" -#include "util/bitutils.h" -#include "util/unaligned.h" - -/** \brief Length of the buffer operated on by \ref hashLongLiteral(). */ -#define LONG_LIT_HASH_LEN 24 - -/** \brief Multiplier used by al the hash functions below. */ -#define HASH_MULTIPLIER 0x0b4e0ef37bc32127ULL - -/** \brief Hash function used for long literal table in streaming mode. */ -static really_inline -u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) { - // We unconditionally hash LONG_LIT_HASH_LEN bytes; all use cases of this - // hash are for strings longer than this. - assert(len >= 24); - - u64a v1 = unaligned_load_u64a(ptr); - u64a v2 = unaligned_load_u64a(ptr + 8); - u64a v3 = unaligned_load_u64a(ptr + 16); - if (nocase) { - v1 &= OCTO_CASE_CLEAR; - v2 &= OCTO_CASE_CLEAR; - v3 &= OCTO_CASE_CLEAR; - } - v1 *= HASH_MULTIPLIER; - v2 *= HASH_MULTIPLIER * HASH_MULTIPLIER; - v3 *= HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER; - v1 >>= 32; - v2 >>= 32; - v3 >>= 32; - return v1 ^ v2 ^ v3; -} - -/** - * \brief Internal, used by the bloom filter hash functions below. Hashes 16 - * bytes beginning at (ptr + offset). - */ -static really_inline -u32 bloomHash_i(const u8 *ptr, u32 offset, u64a multiplier, char nocase) { - assert(offset + 16 <= LONG_LIT_HASH_LEN); - - u64a v = unaligned_load_u64a(ptr + offset); - if (nocase) { - v &= OCTO_CASE_CLEAR; - } - v *= multiplier; - return v >> 32; -} - -/* - * We ensure that we see every byte the first LONG_LIT_HASH_LEN bytes of input - * data (using at least one of the following functions). - */ - -static really_inline -u32 bloomHash_1(const u8 *ptr, char nocase) { - const u64a multiplier = HASH_MULTIPLIER; - return bloomHash_i(ptr, 0, multiplier, nocase); -} - -static really_inline -u32 bloomHash_2(const u8 *ptr, char nocase) { - const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER; - return bloomHash_i(ptr, 4, multiplier, nocase); -} - -static really_inline -u32 bloomHash_3(const u8 *ptr, char nocase) { - const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER; - return bloomHash_i(ptr, 8, multiplier, nocase); -} - -#endif // STREAM_LONG_LIT_HASH_H +/* + * Copyright (c) 2016, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef STREAM_LONG_LIT_HASH_H +#define STREAM_LONG_LIT_HASH_H + +#include "ue2common.h" +#include "util/bitutils.h" +#include "util/unaligned.h" + +/** \brief Length of the buffer operated on by \ref hashLongLiteral(). */ +#define LONG_LIT_HASH_LEN 24 + +/** \brief Multiplier used by al the hash functions below. */ +#define HASH_MULTIPLIER 0x0b4e0ef37bc32127ULL + +/** \brief Hash function used for long literal table in streaming mode. */ +static really_inline +u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) { + // We unconditionally hash LONG_LIT_HASH_LEN bytes; all use cases of this + // hash are for strings longer than this. + assert(len >= 24); + + u64a v1 = unaligned_load_u64a(ptr); + u64a v2 = unaligned_load_u64a(ptr + 8); + u64a v3 = unaligned_load_u64a(ptr + 16); + if (nocase) { + v1 &= OCTO_CASE_CLEAR; + v2 &= OCTO_CASE_CLEAR; + v3 &= OCTO_CASE_CLEAR; + } + v1 *= HASH_MULTIPLIER; + v2 *= HASH_MULTIPLIER * HASH_MULTIPLIER; + v3 *= HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER; + v1 >>= 32; + v2 >>= 32; + v3 >>= 32; + return v1 ^ v2 ^ v3; +} + +/** + * \brief Internal, used by the bloom filter hash functions below. Hashes 16 + * bytes beginning at (ptr + offset). + */ +static really_inline +u32 bloomHash_i(const u8 *ptr, u32 offset, u64a multiplier, char nocase) { + assert(offset + 16 <= LONG_LIT_HASH_LEN); + + u64a v = unaligned_load_u64a(ptr + offset); + if (nocase) { + v &= OCTO_CASE_CLEAR; + } + v *= multiplier; + return v >> 32; +} + +/* + * We ensure that we see every byte the first LONG_LIT_HASH_LEN bytes of input + * data (using at least one of the following functions). + */ + +static really_inline +u32 bloomHash_1(const u8 *ptr, char nocase) { + const u64a multiplier = HASH_MULTIPLIER; + return bloomHash_i(ptr, 0, multiplier, nocase); +} + +static really_inline +u32 bloomHash_2(const u8 *ptr, char nocase) { + const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER; + return bloomHash_i(ptr, 4, multiplier, nocase); +} + +static really_inline +u32 bloomHash_3(const u8 *ptr, char nocase) { + const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER; + return bloomHash_i(ptr, 8, multiplier, nocase); +} + +#endif // STREAM_LONG_LIT_HASH_H diff --git a/contrib/libs/hyperscan/src/rose/validate_mask.h b/contrib/libs/hyperscan/src/rose/validate_mask.h index 5f0a273502..8191db52f8 100644 --- a/contrib/libs/hyperscan/src/rose/validate_mask.h +++ b/contrib/libs/hyperscan/src/rose/validate_mask.h @@ -1,46 +1,46 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef VALIDATE_MASK_H -#define VALIDATE_MASK_H - -#include "ue2common.h" -#include "util/simd_utils.h" - -#if defined(DEBUG) -static -void validateMask32Print(const u8 *mask) { - int i; - for (i = 0; i < 32; i++) { - printf("%02x", mask[i]); - } - printf("\n"); -} + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef VALIDATE_MASK_H +#define VALIDATE_MASK_H + +#include "ue2common.h" +#include "util/simd_utils.h" + +#if defined(DEBUG) +static +void validateMask32Print(const u8 *mask) { + int i; + for (i = 0; i < 32; i++) { + printf("%02x", mask[i]); + } + printf("\n"); +} #ifdef HAVE_AVX512 static @@ -51,81 +51,81 @@ void validateMask64Print(const u8 *mask) { } printf("\n"); } -#endif #endif - -// check positive bytes in cmp_result. -// return one if the check passed, zero otherwise. -static really_inline -int posValidateMask(const u64a cmp_result, const u64a pos_mask) { - return !(cmp_result & pos_mask); -} - -/* - * check negative bytes in cmp_result. - * return one if any byte in cmp_result is not 0, zero otherwise. - * check lowest 7 bits and highest bit of every byte respectively. - */ -static really_inline -int negValidateMask(const u64a cmp_result, const u64a neg_mask) { - const u64a count_mask = 0x7f7f7f7f7f7f7f7f; - // check lowest 7 bits of every byte. - // the highest bit should be 1 if check passed. - u64a check_low = (cmp_result & count_mask) + count_mask; - // check the highest bit of every byte. - // combine the highest bit and 0x7f to 0xff if check passes. - // flip all 0xff to 0x00 and 0x7f to 0x80. - u64a check_all = ~(check_low | cmp_result | count_mask); - return !(check_all & neg_mask); -} - -static really_inline -int validateMask(u64a data, u64a valid_data_mask, u64a and_mask, - u64a cmp_mask, u64a neg_mask) { - // skip some byte where valid_data_mask is 0x00 there. - and_mask &= valid_data_mask; - cmp_mask &= valid_data_mask; - neg_mask &= valid_data_mask; - u64a cmp_result = (data & and_mask) ^ cmp_mask; - /* do the positive check first since it's cheaper */ - if (posValidateMask(cmp_result, ~neg_mask) - && negValidateMask(cmp_result, neg_mask)) { - return 1; - } else { - DEBUG_PRINTF("data %llx valid_data_mask(vdm) %llx\n", - data, valid_data_mask); - DEBUG_PRINTF("and_mask & vdm %llx cmp_mask & vdm %llx\n", and_mask, - cmp_mask); - DEBUG_PRINTF("cmp_result %llx neg_mask & vdm %llx\n", - cmp_result, neg_mask); - return 0; - } -} - -static really_inline -int validateMask32(const m256 data, const u32 valid_data_mask, - const m256 and_mask, const m256 cmp_mask, - const u32 neg_mask) { - m256 cmp_result_256 = eq256(and256(data, and_mask), cmp_mask); - u32 cmp_result = ~movemask256(cmp_result_256); -#ifdef DEBUG - DEBUG_PRINTF("data\n"); - validateMask32Print((const u8 *)&data); - DEBUG_PRINTF("cmp_result\n"); - validateMask32Print((const u8 *)&cmp_result_256); -#endif - DEBUG_PRINTF("cmp_result %08x neg_mask %08x\n", cmp_result, neg_mask); - DEBUG_PRINTF("valid_data_mask %08x\n", valid_data_mask); - - if ((cmp_result & valid_data_mask) == (neg_mask & valid_data_mask)) { - DEBUG_PRINTF("checkCompareResult32 passed\n"); - return 1; - } else { - DEBUG_PRINTF("checkCompareResult32 failed\n"); - return 0; - } -} - +#endif + +// check positive bytes in cmp_result. +// return one if the check passed, zero otherwise. +static really_inline +int posValidateMask(const u64a cmp_result, const u64a pos_mask) { + return !(cmp_result & pos_mask); +} + +/* + * check negative bytes in cmp_result. + * return one if any byte in cmp_result is not 0, zero otherwise. + * check lowest 7 bits and highest bit of every byte respectively. + */ +static really_inline +int negValidateMask(const u64a cmp_result, const u64a neg_mask) { + const u64a count_mask = 0x7f7f7f7f7f7f7f7f; + // check lowest 7 bits of every byte. + // the highest bit should be 1 if check passed. + u64a check_low = (cmp_result & count_mask) + count_mask; + // check the highest bit of every byte. + // combine the highest bit and 0x7f to 0xff if check passes. + // flip all 0xff to 0x00 and 0x7f to 0x80. + u64a check_all = ~(check_low | cmp_result | count_mask); + return !(check_all & neg_mask); +} + +static really_inline +int validateMask(u64a data, u64a valid_data_mask, u64a and_mask, + u64a cmp_mask, u64a neg_mask) { + // skip some byte where valid_data_mask is 0x00 there. + and_mask &= valid_data_mask; + cmp_mask &= valid_data_mask; + neg_mask &= valid_data_mask; + u64a cmp_result = (data & and_mask) ^ cmp_mask; + /* do the positive check first since it's cheaper */ + if (posValidateMask(cmp_result, ~neg_mask) + && negValidateMask(cmp_result, neg_mask)) { + return 1; + } else { + DEBUG_PRINTF("data %llx valid_data_mask(vdm) %llx\n", + data, valid_data_mask); + DEBUG_PRINTF("and_mask & vdm %llx cmp_mask & vdm %llx\n", and_mask, + cmp_mask); + DEBUG_PRINTF("cmp_result %llx neg_mask & vdm %llx\n", + cmp_result, neg_mask); + return 0; + } +} + +static really_inline +int validateMask32(const m256 data, const u32 valid_data_mask, + const m256 and_mask, const m256 cmp_mask, + const u32 neg_mask) { + m256 cmp_result_256 = eq256(and256(data, and_mask), cmp_mask); + u32 cmp_result = ~movemask256(cmp_result_256); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + validateMask32Print((const u8 *)&data); + DEBUG_PRINTF("cmp_result\n"); + validateMask32Print((const u8 *)&cmp_result_256); +#endif + DEBUG_PRINTF("cmp_result %08x neg_mask %08x\n", cmp_result, neg_mask); + DEBUG_PRINTF("valid_data_mask %08x\n", valid_data_mask); + + if ((cmp_result & valid_data_mask) == (neg_mask & valid_data_mask)) { + DEBUG_PRINTF("checkCompareResult32 passed\n"); + return 1; + } else { + DEBUG_PRINTF("checkCompareResult32 failed\n"); + return 0; + } +} + #ifdef HAVE_AVX512 static really_inline int validateMask64(const m512 data, const u64a valid_data_mask, @@ -137,7 +137,7 @@ int validateMask64(const m512 data, const u64a valid_data_mask, validateMask64Print((const u8 *)&data); DEBUG_PRINTF("cmp_result\n"); validateMask64Print((const u8 *)&cmp_result); -#endif +#endif DEBUG_PRINTF("cmp_result %016llx neg_mask %016llx\n", cmp_result, neg_mask); DEBUG_PRINTF("valid_data_mask %016llx\n", valid_data_mask); diff --git a/contrib/libs/hyperscan/src/rose/validate_shufti.h b/contrib/libs/hyperscan/src/rose/validate_shufti.h index f936140b11..351df36a76 100644 --- a/contrib/libs/hyperscan/src/rose/validate_shufti.h +++ b/contrib/libs/hyperscan/src/rose/validate_shufti.h @@ -1,182 +1,182 @@ -/* +/* * Copyright (c) 2016-2020, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef VALIDATE_SHUFTI_H -#define VALIDATE_SHUFTI_H - -#include "ue2common.h" -#include "util/simd_utils.h" - -#if defined(DEBUG) -static -void dumpMask(const void *mask, int len) { - const u8 *c = (const u8 *)mask; - for (int i = 0; i < len; i++) { - printf("%02x", c[i]); - } - printf("\n"); -} -#endif - -static really_inline -int validateShuftiMask16x16(const m256 data, const m256 hi_mask, - const m256 lo_mask, const m256 and_mask, - const u32 neg_mask, const u32 valid_data_mask) { - m256 low4bits = set32x8(0xf); - m256 c_lo = pshufb_m256(lo_mask, and256(data, low4bits)); - m256 c_hi = pshufb_m256(hi_mask, - rshift64_m256(andnot256(low4bits, data), 4)); - m256 t = and256(c_lo, c_hi); - u32 nresult = movemask256(eq256(and256(t, and_mask), zeroes256())); -#ifdef DEBUG - DEBUG_PRINTF("data\n"); - dumpMask(&data, 32); - DEBUG_PRINTF("hi_mask\n"); - dumpMask(&hi_mask, 32); - DEBUG_PRINTF("lo_mask\n"); - dumpMask(&lo_mask, 32); - DEBUG_PRINTF("c_lo\n"); - dumpMask(&c_lo, 32); - DEBUG_PRINTF("c_hi\n"); - dumpMask(&c_hi, 32); - DEBUG_PRINTF("and_mask\n"); - dumpMask(&and_mask, 32); - DEBUG_PRINTF("nresult %x\n", nresult); - DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); -#endif - u32 cmp_result = (((nresult >> 16) & nresult) ^ neg_mask) & valid_data_mask; - return !cmp_result; -} - -static really_inline -int validateShuftiMask16x8(const m128 data, const m256 nib_mask, - const m128 and_mask, const u32 neg_mask, - const u32 valid_data_mask) { - m256 data_m256 = combine2x128(rshift64_m128(data, 4), data); - m256 low4bits = set32x8(0xf); - m256 c_nib = pshufb_m256(nib_mask, and256(data_m256, low4bits)); - m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib)); - m128 nresult = eq128(and128(t, and_mask), zeroes128()); -#ifdef DEBUG - DEBUG_PRINTF("data\n"); - dumpMask(&data_m256, 32); - DEBUG_PRINTF("nib_mask\n"); - dumpMask(&nib_mask, 32); - DEBUG_PRINTF("c_nib\n"); - dumpMask(&c_nib, 32); - DEBUG_PRINTF("nresult\n"); - dumpMask(&nresult, 16); - DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); -#endif - u32 cmp_result = (movemask128(nresult) ^ neg_mask) & valid_data_mask; - return !cmp_result; -} - -static really_inline -int validateShuftiMask32x8(const m256 data, const m256 hi_mask, - const m256 lo_mask, const m256 and_mask, - const u32 neg_mask, const u32 valid_data_mask) { - m256 low4bits = set32x8(0xf); - m256 c_lo = pshufb_m256(lo_mask, and256(data, low4bits)); - m256 c_hi = pshufb_m256(hi_mask, - rshift64_m256(andnot256(low4bits, data), 4)); - m256 t = and256(c_lo, c_hi); - m256 nresult = eq256(and256(t, and_mask), zeroes256()); -#ifdef DEBUG - DEBUG_PRINTF("data\n"); - dumpMask(&data, 32); - DEBUG_PRINTF("hi_mask\n"); - dumpMask(&hi_mask, 32); - DEBUG_PRINTF("lo_mask\n"); - dumpMask(&lo_mask, 32); - DEBUG_PRINTF("c_lo\n"); - dumpMask(&c_lo, 32); - DEBUG_PRINTF("c_hi\n"); - dumpMask(&c_hi, 32); - DEBUG_PRINTF("nresult\n"); - dumpMask(&nresult, 32); - DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); -#endif - u32 cmp_result = (movemask256(nresult) ^ neg_mask) & valid_data_mask; - return !cmp_result; -} - -static really_inline -int validateShuftiMask32x16(const m256 data, - const m256 hi_mask_1, const m256 hi_mask_2, - const m256 lo_mask_1, const m256 lo_mask_2, - const m256 bucket_mask_hi, - const m256 bucket_mask_lo, const u32 neg_mask, - const u32 valid_data_mask) { - m256 low4bits = set32x8(0xf); - m256 data_lo = and256(data, low4bits); - m256 data_hi = and256(rshift64_m256(data, 4), low4bits); - m256 c_lo_1 = pshufb_m256(lo_mask_1, data_lo); - m256 c_lo_2 = pshufb_m256(lo_mask_2, data_lo); - m256 c_hi_1 = pshufb_m256(hi_mask_1, data_hi); - m256 c_hi_2 = pshufb_m256(hi_mask_2, data_hi); - m256 t1 = and256(c_lo_1, c_hi_1); - m256 t2 = and256(c_lo_2, c_hi_2); - m256 result = or256(and256(t1, bucket_mask_lo), and256(t2, bucket_mask_hi)); - u32 nresult = movemask256(eq256(result, zeroes256())); -#ifdef DEBUG - DEBUG_PRINTF("data\n"); - dumpMask(&data, 32); - DEBUG_PRINTF("data_lo\n"); - dumpMask(&data_lo, 32); - DEBUG_PRINTF("data_hi\n"); - dumpMask(&data_hi, 32); - DEBUG_PRINTF("hi_mask_1\n"); - dumpMask(&hi_mask_1, 16); - DEBUG_PRINTF("hi_mask_2\n"); - dumpMask(&hi_mask_2, 16); - DEBUG_PRINTF("lo_mask_1\n"); - dumpMask(&lo_mask_1, 16); - DEBUG_PRINTF("lo_mask_2\n"); - dumpMask(&lo_mask_2, 16); - DEBUG_PRINTF("c_lo_1\n"); - dumpMask(&c_lo_1, 32); - DEBUG_PRINTF("c_lo_2\n"); - dumpMask(&c_lo_2, 32); - DEBUG_PRINTF("c_hi_1\n"); - dumpMask(&c_hi_1, 32); - DEBUG_PRINTF("c_hi_2\n"); - dumpMask(&c_hi_2, 32); - DEBUG_PRINTF("result\n"); - dumpMask(&result, 32); - DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); -#endif - u32 cmp_result = (nresult ^ neg_mask) & valid_data_mask; - return !cmp_result; -} - + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef VALIDATE_SHUFTI_H +#define VALIDATE_SHUFTI_H + +#include "ue2common.h" +#include "util/simd_utils.h" + +#if defined(DEBUG) +static +void dumpMask(const void *mask, int len) { + const u8 *c = (const u8 *)mask; + for (int i = 0; i < len; i++) { + printf("%02x", c[i]); + } + printf("\n"); +} +#endif + +static really_inline +int validateShuftiMask16x16(const m256 data, const m256 hi_mask, + const m256 lo_mask, const m256 and_mask, + const u32 neg_mask, const u32 valid_data_mask) { + m256 low4bits = set32x8(0xf); + m256 c_lo = pshufb_m256(lo_mask, and256(data, low4bits)); + m256 c_hi = pshufb_m256(hi_mask, + rshift64_m256(andnot256(low4bits, data), 4)); + m256 t = and256(c_lo, c_hi); + u32 nresult = movemask256(eq256(and256(t, and_mask), zeroes256())); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + dumpMask(&data, 32); + DEBUG_PRINTF("hi_mask\n"); + dumpMask(&hi_mask, 32); + DEBUG_PRINTF("lo_mask\n"); + dumpMask(&lo_mask, 32); + DEBUG_PRINTF("c_lo\n"); + dumpMask(&c_lo, 32); + DEBUG_PRINTF("c_hi\n"); + dumpMask(&c_hi, 32); + DEBUG_PRINTF("and_mask\n"); + dumpMask(&and_mask, 32); + DEBUG_PRINTF("nresult %x\n", nresult); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); +#endif + u32 cmp_result = (((nresult >> 16) & nresult) ^ neg_mask) & valid_data_mask; + return !cmp_result; +} + +static really_inline +int validateShuftiMask16x8(const m128 data, const m256 nib_mask, + const m128 and_mask, const u32 neg_mask, + const u32 valid_data_mask) { + m256 data_m256 = combine2x128(rshift64_m128(data, 4), data); + m256 low4bits = set32x8(0xf); + m256 c_nib = pshufb_m256(nib_mask, and256(data_m256, low4bits)); + m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib)); + m128 nresult = eq128(and128(t, and_mask), zeroes128()); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + dumpMask(&data_m256, 32); + DEBUG_PRINTF("nib_mask\n"); + dumpMask(&nib_mask, 32); + DEBUG_PRINTF("c_nib\n"); + dumpMask(&c_nib, 32); + DEBUG_PRINTF("nresult\n"); + dumpMask(&nresult, 16); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); +#endif + u32 cmp_result = (movemask128(nresult) ^ neg_mask) & valid_data_mask; + return !cmp_result; +} + +static really_inline +int validateShuftiMask32x8(const m256 data, const m256 hi_mask, + const m256 lo_mask, const m256 and_mask, + const u32 neg_mask, const u32 valid_data_mask) { + m256 low4bits = set32x8(0xf); + m256 c_lo = pshufb_m256(lo_mask, and256(data, low4bits)); + m256 c_hi = pshufb_m256(hi_mask, + rshift64_m256(andnot256(low4bits, data), 4)); + m256 t = and256(c_lo, c_hi); + m256 nresult = eq256(and256(t, and_mask), zeroes256()); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + dumpMask(&data, 32); + DEBUG_PRINTF("hi_mask\n"); + dumpMask(&hi_mask, 32); + DEBUG_PRINTF("lo_mask\n"); + dumpMask(&lo_mask, 32); + DEBUG_PRINTF("c_lo\n"); + dumpMask(&c_lo, 32); + DEBUG_PRINTF("c_hi\n"); + dumpMask(&c_hi, 32); + DEBUG_PRINTF("nresult\n"); + dumpMask(&nresult, 32); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); +#endif + u32 cmp_result = (movemask256(nresult) ^ neg_mask) & valid_data_mask; + return !cmp_result; +} + +static really_inline +int validateShuftiMask32x16(const m256 data, + const m256 hi_mask_1, const m256 hi_mask_2, + const m256 lo_mask_1, const m256 lo_mask_2, + const m256 bucket_mask_hi, + const m256 bucket_mask_lo, const u32 neg_mask, + const u32 valid_data_mask) { + m256 low4bits = set32x8(0xf); + m256 data_lo = and256(data, low4bits); + m256 data_hi = and256(rshift64_m256(data, 4), low4bits); + m256 c_lo_1 = pshufb_m256(lo_mask_1, data_lo); + m256 c_lo_2 = pshufb_m256(lo_mask_2, data_lo); + m256 c_hi_1 = pshufb_m256(hi_mask_1, data_hi); + m256 c_hi_2 = pshufb_m256(hi_mask_2, data_hi); + m256 t1 = and256(c_lo_1, c_hi_1); + m256 t2 = and256(c_lo_2, c_hi_2); + m256 result = or256(and256(t1, bucket_mask_lo), and256(t2, bucket_mask_hi)); + u32 nresult = movemask256(eq256(result, zeroes256())); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + dumpMask(&data, 32); + DEBUG_PRINTF("data_lo\n"); + dumpMask(&data_lo, 32); + DEBUG_PRINTF("data_hi\n"); + dumpMask(&data_hi, 32); + DEBUG_PRINTF("hi_mask_1\n"); + dumpMask(&hi_mask_1, 16); + DEBUG_PRINTF("hi_mask_2\n"); + dumpMask(&hi_mask_2, 16); + DEBUG_PRINTF("lo_mask_1\n"); + dumpMask(&lo_mask_1, 16); + DEBUG_PRINTF("lo_mask_2\n"); + dumpMask(&lo_mask_2, 16); + DEBUG_PRINTF("c_lo_1\n"); + dumpMask(&c_lo_1, 32); + DEBUG_PRINTF("c_lo_2\n"); + dumpMask(&c_lo_2, 32); + DEBUG_PRINTF("c_hi_1\n"); + dumpMask(&c_hi_1, 32); + DEBUG_PRINTF("c_hi_2\n"); + dumpMask(&c_hi_2, 32); + DEBUG_PRINTF("result\n"); + dumpMask(&result, 32); + DEBUG_PRINTF("valid_data_mask %x\n", valid_data_mask); +#endif + u32 cmp_result = (nresult ^ neg_mask) & valid_data_mask; + return !cmp_result; +} + #ifdef HAVE_AVX512 -static really_inline +static really_inline int validateShuftiMask64x8(const m512 data, const m512 hi_mask, const m512 lo_mask, const m512 and_mask, const u64a neg_mask, const u64a valid_data_mask) { @@ -254,119 +254,119 @@ int validateShuftiMask64x16(const m512 data, #endif static really_inline -int checkMultipath32(u32 data, u32 hi_bits, u32 lo_bits) { - u32 t = ~(data | hi_bits); - t += lo_bits; - t &= (~data) & hi_bits; - DEBUG_PRINTF("t %x\n", t); - return !!t; -} - -static really_inline -int checkMultipath64(u64a data, u64a hi_bits, u64a lo_bits) { - u64a t = ~(data | hi_bits); - t += lo_bits; - t &= (~data) & hi_bits; - DEBUG_PRINTF("t %llx\n", t); - return !!t; -} - -static really_inline -int validateMultipathShuftiMask16x8(const m128 data, - const m256 nib_mask, - const m128 bucket_select_mask, - const u32 hi_bits, const u32 lo_bits, - const u32 neg_mask, - const u32 valid_path_mask) { - m256 data_256 = combine2x128(rshift64_m128(data, 4), data); - m256 low4bits = set32x8(0xf); - m256 c_nib = pshufb_m256(nib_mask, and256(data_256, low4bits)); - m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib)); - m128 result = and128(t, bucket_select_mask); - u32 nresult = movemask128(eq128(result, zeroes128())); - u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask; - - DEBUG_PRINTF("cmp_result %x\n", cmp_result); - - return checkMultipath32(cmp_result, hi_bits, lo_bits); -} - -static really_inline -int validateMultipathShuftiMask32x8(const m256 data, - const m256 hi_mask, const m256 lo_mask, - const m256 bucket_select_mask, - const u32 hi_bits, const u32 lo_bits, - const u32 neg_mask, - const u32 valid_path_mask) { - m256 low4bits = set32x8(0xf); - m256 data_lo = and256(data, low4bits); - m256 data_hi = and256(rshift64_m256(data, 4), low4bits); - m256 c_lo = pshufb_m256(lo_mask, data_lo); - m256 c_hi = pshufb_m256(hi_mask, data_hi); - m256 c = and256(c_lo, c_hi); - m256 result = and256(c, bucket_select_mask); - u32 nresult = movemask256(eq256(result, zeroes256())); - u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask; - - DEBUG_PRINTF("cmp_result %x\n", cmp_result); - - return checkMultipath32(cmp_result, hi_bits, lo_bits); -} - -static really_inline -int validateMultipathShuftiMask32x16(const m256 data, - const m256 hi_mask_1, const m256 hi_mask_2, - const m256 lo_mask_1, const m256 lo_mask_2, - const m256 bucket_select_mask_hi, - const m256 bucket_select_mask_lo, - const u32 hi_bits, const u32 lo_bits, - const u32 neg_mask, - const u32 valid_path_mask) { - m256 low4bits = set32x8(0xf); - m256 data_lo = and256(data, low4bits); - m256 data_hi = and256(rshift64_m256(data, 4), low4bits); - m256 c_lo_1 = pshufb_m256(lo_mask_1, data_lo); - m256 c_lo_2 = pshufb_m256(lo_mask_2, data_lo); - m256 c_hi_1 = pshufb_m256(hi_mask_1, data_hi); - m256 c_hi_2 = pshufb_m256(hi_mask_2, data_hi); - m256 t1 = and256(c_lo_1, c_hi_1); - m256 t2 = and256(c_lo_2, c_hi_2); - m256 result = or256(and256(t1, bucket_select_mask_lo), - and256(t2, bucket_select_mask_hi)); - u32 nresult = movemask256(eq256(result, zeroes256())); - u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask; - - DEBUG_PRINTF("cmp_result %x\n", cmp_result); - - return checkMultipath32(cmp_result, hi_bits, lo_bits); -} - -static really_inline -int validateMultipathShuftiMask64(const m256 data_1, const m256 data_2, - const m256 hi_mask, const m256 lo_mask, - const m256 bucket_select_mask_1, - const m256 bucket_select_mask_2, - const u64a hi_bits, const u64a lo_bits, - const u64a neg_mask, - const u64a valid_path_mask) { - m256 low4bits = set32x8(0xf); - m256 c_lo_1 = pshufb_m256(lo_mask, and256(data_1, low4bits)); - m256 c_lo_2 = pshufb_m256(lo_mask, and256(data_2, low4bits)); - m256 c_hi_1 = pshufb_m256(hi_mask, - rshift64_m256(andnot256(low4bits, data_1), 4)); - m256 c_hi_2 = pshufb_m256(hi_mask, - rshift64_m256(andnot256(low4bits, data_2), 4)); - m256 t1 = and256(c_lo_1, c_hi_1); - m256 t2 = and256(c_lo_2, c_hi_2); - m256 nresult_1 = eq256(and256(t1, bucket_select_mask_1), zeroes256()); - m256 nresult_2 = eq256(and256(t2, bucket_select_mask_2), zeroes256()); - u64a nresult = (u64a)movemask256(nresult_1) | - (u64a)movemask256(nresult_2) << 32; - u64a cmp_result = (nresult ^ neg_mask) | valid_path_mask; - - DEBUG_PRINTF("cmp_result %llx\n", cmp_result); - - return checkMultipath64(cmp_result, hi_bits, lo_bits); -} - -#endif +int checkMultipath32(u32 data, u32 hi_bits, u32 lo_bits) { + u32 t = ~(data | hi_bits); + t += lo_bits; + t &= (~data) & hi_bits; + DEBUG_PRINTF("t %x\n", t); + return !!t; +} + +static really_inline +int checkMultipath64(u64a data, u64a hi_bits, u64a lo_bits) { + u64a t = ~(data | hi_bits); + t += lo_bits; + t &= (~data) & hi_bits; + DEBUG_PRINTF("t %llx\n", t); + return !!t; +} + +static really_inline +int validateMultipathShuftiMask16x8(const m128 data, + const m256 nib_mask, + const m128 bucket_select_mask, + const u32 hi_bits, const u32 lo_bits, + const u32 neg_mask, + const u32 valid_path_mask) { + m256 data_256 = combine2x128(rshift64_m128(data, 4), data); + m256 low4bits = set32x8(0xf); + m256 c_nib = pshufb_m256(nib_mask, and256(data_256, low4bits)); + m128 t = and128(movdq_hi(c_nib), movdq_lo(c_nib)); + m128 result = and128(t, bucket_select_mask); + u32 nresult = movemask128(eq128(result, zeroes128())); + u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %x\n", cmp_result); + + return checkMultipath32(cmp_result, hi_bits, lo_bits); +} + +static really_inline +int validateMultipathShuftiMask32x8(const m256 data, + const m256 hi_mask, const m256 lo_mask, + const m256 bucket_select_mask, + const u32 hi_bits, const u32 lo_bits, + const u32 neg_mask, + const u32 valid_path_mask) { + m256 low4bits = set32x8(0xf); + m256 data_lo = and256(data, low4bits); + m256 data_hi = and256(rshift64_m256(data, 4), low4bits); + m256 c_lo = pshufb_m256(lo_mask, data_lo); + m256 c_hi = pshufb_m256(hi_mask, data_hi); + m256 c = and256(c_lo, c_hi); + m256 result = and256(c, bucket_select_mask); + u32 nresult = movemask256(eq256(result, zeroes256())); + u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %x\n", cmp_result); + + return checkMultipath32(cmp_result, hi_bits, lo_bits); +} + +static really_inline +int validateMultipathShuftiMask32x16(const m256 data, + const m256 hi_mask_1, const m256 hi_mask_2, + const m256 lo_mask_1, const m256 lo_mask_2, + const m256 bucket_select_mask_hi, + const m256 bucket_select_mask_lo, + const u32 hi_bits, const u32 lo_bits, + const u32 neg_mask, + const u32 valid_path_mask) { + m256 low4bits = set32x8(0xf); + m256 data_lo = and256(data, low4bits); + m256 data_hi = and256(rshift64_m256(data, 4), low4bits); + m256 c_lo_1 = pshufb_m256(lo_mask_1, data_lo); + m256 c_lo_2 = pshufb_m256(lo_mask_2, data_lo); + m256 c_hi_1 = pshufb_m256(hi_mask_1, data_hi); + m256 c_hi_2 = pshufb_m256(hi_mask_2, data_hi); + m256 t1 = and256(c_lo_1, c_hi_1); + m256 t2 = and256(c_lo_2, c_hi_2); + m256 result = or256(and256(t1, bucket_select_mask_lo), + and256(t2, bucket_select_mask_hi)); + u32 nresult = movemask256(eq256(result, zeroes256())); + u32 cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %x\n", cmp_result); + + return checkMultipath32(cmp_result, hi_bits, lo_bits); +} + +static really_inline +int validateMultipathShuftiMask64(const m256 data_1, const m256 data_2, + const m256 hi_mask, const m256 lo_mask, + const m256 bucket_select_mask_1, + const m256 bucket_select_mask_2, + const u64a hi_bits, const u64a lo_bits, + const u64a neg_mask, + const u64a valid_path_mask) { + m256 low4bits = set32x8(0xf); + m256 c_lo_1 = pshufb_m256(lo_mask, and256(data_1, low4bits)); + m256 c_lo_2 = pshufb_m256(lo_mask, and256(data_2, low4bits)); + m256 c_hi_1 = pshufb_m256(hi_mask, + rshift64_m256(andnot256(low4bits, data_1), 4)); + m256 c_hi_2 = pshufb_m256(hi_mask, + rshift64_m256(andnot256(low4bits, data_2), 4)); + m256 t1 = and256(c_lo_1, c_hi_1); + m256 t2 = and256(c_lo_2, c_hi_2); + m256 nresult_1 = eq256(and256(t1, bucket_select_mask_1), zeroes256()); + m256 nresult_2 = eq256(and256(t2, bucket_select_mask_2), zeroes256()); + u64a nresult = (u64a)movemask256(nresult_1) | + (u64a)movemask256(nresult_2) << 32; + u64a cmp_result = (nresult ^ neg_mask) | valid_path_mask; + + DEBUG_PRINTF("cmp_result %llx\n", cmp_result); + + return checkMultipath64(cmp_result, hi_bits, lo_bits); +} + +#endif |