diff options
author | Ivan Blinkov <ivan@blinkov.ru> | 2022-02-10 16:47:11 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:11 +0300 |
commit | 5b283123c882433dafbaf6b338adeea16c1a0ea0 (patch) | |
tree | 339adc63bce23800021202ae4a8328a843dc447a /contrib/libs/hyperscan/src/rose/block.c | |
parent | 1aeb9a455974457866f78722ad98114bafc84e8a (diff) | |
download | ydb-5b283123c882433dafbaf6b338adeea16c1a0ea0.tar.gz |
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/rose/block.c')
-rw-r--r-- | contrib/libs/hyperscan/src/rose/block.c | 460 |
1 files changed, 230 insertions, 230 deletions
diff --git a/contrib/libs/hyperscan/src/rose/block.c b/contrib/libs/hyperscan/src/rose/block.c index 9148ecb8ff..b3f424cb73 100644 --- a/contrib/libs/hyperscan/src/rose/block.c +++ b/contrib/libs/hyperscan/src/rose/block.c @@ -29,9 +29,9 @@ #include "catchup.h" #include "init.h" #include "match.h" -#include "program_runtime.h" -#include "rose.h" -#include "rose_common.h" +#include "program_runtime.h" +#include "rose.h" +#include "rose_common.h" #include "nfa/nfa_api.h" #include "nfa/nfa_internal.h" #include "nfa/nfa_rev_api.h" @@ -62,11 +62,11 @@ void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable, if (nfa->type == MCCLELLAN_NFA_8) { nfaExecMcClellan8_B(nfa, curr->anchoredMinDistance, local_buffer, local_alen, - roseAnchoredCallback, scratch); + roseAnchoredCallback, scratch); } else { nfaExecMcClellan16_B(nfa, curr->anchoredMinDistance, local_buffer, local_alen, - roseAnchoredCallback, scratch); + roseAnchoredCallback, scratch); } } @@ -79,12 +79,12 @@ void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable, } static really_inline -void init_state_for_block(const struct RoseEngine *t, char *state) { +void init_state_for_block(const struct RoseEngine *t, char *state) { assert(t); assert(state); - DEBUG_PRINTF("init for Rose %p with %u state indices\n", t, - t->rolesWithStateCount); + DEBUG_PRINTF("init for Rose %p with %u state indices\n", t, + t->rolesWithStateCount); // Rose is guaranteed 8-aligned state assert(ISALIGNED_N(state, 8)); @@ -94,7 +94,7 @@ void init_state_for_block(const struct RoseEngine *t, char *state) { static really_inline void init_outfixes_for_block(const struct RoseEngine *t, - struct hs_scratch *scratch, char *state, + struct hs_scratch *scratch, char *state, char is_small_block) { /* active leaf array has been cleared by the init scatter */ @@ -114,7 +114,7 @@ void init_outfixes_for_block(const struct RoseEngine *t, fatbit_set(scratch->aqa, qCount, 0); struct mq *q = scratch->queues; - initQueue(q, 0, t, scratch); + initQueue(q, 0, t, scratch); q->length = len; /* adjust for rev_accel */ nfaQueueInitState(nfa, q); pushQueueAt(q, 0, MQE_START, 0); @@ -134,7 +134,7 @@ void init_outfixes_for_block(const struct RoseEngine *t, static really_inline void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, - char *state, char is_small_block) { + char *state, char is_small_block) { init_state_for_block(t, state); struct RoseContext *tctxt = &scratch->tctxt; @@ -159,211 +159,211 @@ void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch, init_outfixes_for_block(t, scratch, state, is_small_block); } -static rose_inline -void roseBlockEodExec(const struct RoseEngine *t, u64a offset, - struct hs_scratch *scratch) { - assert(t->requiresEodCheck); - assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF - || offset <= t->maxBiAnchoredWidth); - - assert(!can_stop_matching(scratch)); - assert(t->eodProgramOffset); - - // Ensure that history is correct before we look for EOD matches. - roseFlushLastByteHistory(t, scratch, offset); - scratch->tctxt.lastEndOffset = offset; - - DEBUG_PRINTF("running eod program at %u\n", t->eodProgramOffset); - - // There should be no pending delayed literals. - assert(!scratch->tctxt.filledDelayedSlots); - - const u64a som = 0; - const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; - - // Note: we ignore the result, as this is the last thing to ever happen on - // a scan. - roseRunProgram(t, scratch, t->eodProgramOffset, som, offset, flags); -} - -/** - * \brief Run the anchored matcher, if any. Returns non-zero if matching should - * halt. - */ -static rose_inline -int roseBlockAnchored(const struct RoseEngine *t, struct hs_scratch *scratch) { - const void *atable = getALiteralMatcher(t); - if (!atable) { - DEBUG_PRINTF("no anchored table\n"); - return 0; - } - - const size_t length = scratch->core_info.len; - - if (t->amatcherMaxBiAnchoredWidth != ROSE_BOUND_INF && - length > t->amatcherMaxBiAnchoredWidth) { - return 0; - } - - if (length < t->amatcherMinWidth) { - return 0; - } - - runAnchoredTableBlock(t, atable, scratch); - - return can_stop_matching(scratch); -} - -/** - * \brief Run the floating matcher, if any. Returns non-zero if matching should - * halt. - */ -static rose_inline -int roseBlockFloating(const struct RoseEngine *t, struct hs_scratch *scratch) { - const struct HWLM *ftable = getFLiteralMatcher(t); - if (!ftable) { - return 0; - } - - const size_t length = scratch->core_info.len; - char *state = scratch->core_info.state; - struct RoseContext *tctxt = &scratch->tctxt; - - DEBUG_PRINTF("ftable fd=%u fmd %u\n", t->floatingDistance, - t->floatingMinDistance); - if (t->noFloatingRoots && !roseHasInFlightMatches(t, state, scratch)) { - DEBUG_PRINTF("skip FLOATING: no inflight matches\n"); - return 0; - } - - if (t->fmatcherMaxBiAnchoredWidth != ROSE_BOUND_INF && - length > t->fmatcherMaxBiAnchoredWidth) { - return 0; - } - - if (length < t->fmatcherMinWidth) { - return 0; - } - - const u8 *buffer = scratch->core_info.buf; - size_t flen = length; - if (t->floatingDistance != ROSE_BOUND_INF) { - flen = MIN(t->floatingDistance, length); - } - if (flen <= t->floatingMinDistance) { - return 0; - } - - DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length); - DEBUG_PRINTF("-- %016llx\n", tctxt->groups); - hwlmExec(ftable, buffer, flen, t->floatingMinDistance, roseFloatingCallback, - scratch, tctxt->groups & t->floating_group_mask); - - return can_stop_matching(scratch); -} - -static rose_inline -void runEagerPrefixesBlock(const struct RoseEngine *t, - struct hs_scratch *scratch) { - if (!t->eagerIterOffset) { - return; - } - - char *state = scratch->core_info.state; - u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into - * left_table */ - const u32 arCount = t->activeLeftCount; - const u32 qCount = t->queueCount; - const struct LeftNfaInfo *left_table = getLeftTable(t); - const struct mmbit_sparse_iter *it = getByOffset(t, t->eagerIterOffset); - - struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; - - u32 idx = 0; - u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state); - for (; ri != MMB_INVALID; - ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) { - const struct LeftNfaInfo *left = left_table + ri; - u32 qi = ri + t->leftfixBeginQueue; - DEBUG_PRINTF("leftfix %u/%u, maxLag=%u\n", ri, arCount, left->maxLag); - - assert(!fatbit_isset(scratch->aqa, qCount, qi)); - assert(left->eager); - assert(!left->infix); - - struct mq *q = scratch->queues + qi; - const struct NFA *nfa = getNfaByQueue(t, qi); - - if (scratch->core_info.len < nfa->minWidth) { - /* we know that there is not enough data for this to ever match, so - * we can immediately squash/ */ - mmbit_unset(ara, arCount, ri); - scratch->tctxt.groups &= left->squash_mask; - } - - s64a loc = MIN(scratch->core_info.len, EAGER_STOP_OFFSET); - - fatbit_set(scratch->aqa, qCount, qi); - initRoseQueue(t, qi, left, scratch); - - pushQueueAt(q, 0, MQE_START, 0); - pushQueueAt(q, 1, MQE_TOP, 0); - pushQueueAt(q, 2, MQE_END, loc); - nfaQueueInitState(nfa, q); - - char alive = nfaQueueExecToMatch(q->nfa, q, loc); - - if (!alive) { - DEBUG_PRINTF("queue %u dead, squashing\n", qi); - mmbit_unset(ara, arCount, ri); - fatbit_unset(scratch->aqa, qCount, qi); - scratch->tctxt.groups &= left->squash_mask; - } else if (q->cur == q->end) { - assert(alive != MO_MATCHES_PENDING); - if (loc == (s64a)scratch->core_info.len) { - /* We know that the prefix does not match in the block so we - * can squash the groups anyway even though it did not die */ - /* TODO: if we knew the minimum lag the leftfix is checked at we - * could make this check tighter */ - DEBUG_PRINTF("queue %u has no match in block, squashing\n", qi); - mmbit_unset(ara, arCount, ri); - fatbit_unset(scratch->aqa, qCount, qi); - scratch->tctxt.groups &= left->squash_mask; - } else { - DEBUG_PRINTF("queue %u finished, nfa lives\n", qi); - q->cur = q->end = 0; - pushQueueAt(q, 0, MQE_START, loc); - } - } else { - assert(alive == MO_MATCHES_PENDING); - DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi); - q->end--; /* remove end item */ - } - } -} - -void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) { +static rose_inline +void roseBlockEodExec(const struct RoseEngine *t, u64a offset, + struct hs_scratch *scratch) { + assert(t->requiresEodCheck); + assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF + || offset <= t->maxBiAnchoredWidth); + + assert(!can_stop_matching(scratch)); + assert(t->eodProgramOffset); + + // Ensure that history is correct before we look for EOD matches. + roseFlushLastByteHistory(t, scratch, offset); + scratch->tctxt.lastEndOffset = offset; + + DEBUG_PRINTF("running eod program at %u\n", t->eodProgramOffset); + + // There should be no pending delayed literals. + assert(!scratch->tctxt.filledDelayedSlots); + + const u64a som = 0; + const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; + + // Note: we ignore the result, as this is the last thing to ever happen on + // a scan. + roseRunProgram(t, scratch, t->eodProgramOffset, som, offset, flags); +} + +/** + * \brief Run the anchored matcher, if any. Returns non-zero if matching should + * halt. + */ +static rose_inline +int roseBlockAnchored(const struct RoseEngine *t, struct hs_scratch *scratch) { + const void *atable = getALiteralMatcher(t); + if (!atable) { + DEBUG_PRINTF("no anchored table\n"); + return 0; + } + + const size_t length = scratch->core_info.len; + + if (t->amatcherMaxBiAnchoredWidth != ROSE_BOUND_INF && + length > t->amatcherMaxBiAnchoredWidth) { + return 0; + } + + if (length < t->amatcherMinWidth) { + return 0; + } + + runAnchoredTableBlock(t, atable, scratch); + + return can_stop_matching(scratch); +} + +/** + * \brief Run the floating matcher, if any. Returns non-zero if matching should + * halt. + */ +static rose_inline +int roseBlockFloating(const struct RoseEngine *t, struct hs_scratch *scratch) { + const struct HWLM *ftable = getFLiteralMatcher(t); + if (!ftable) { + return 0; + } + + const size_t length = scratch->core_info.len; + char *state = scratch->core_info.state; + struct RoseContext *tctxt = &scratch->tctxt; + + DEBUG_PRINTF("ftable fd=%u fmd %u\n", t->floatingDistance, + t->floatingMinDistance); + if (t->noFloatingRoots && !roseHasInFlightMatches(t, state, scratch)) { + DEBUG_PRINTF("skip FLOATING: no inflight matches\n"); + return 0; + } + + if (t->fmatcherMaxBiAnchoredWidth != ROSE_BOUND_INF && + length > t->fmatcherMaxBiAnchoredWidth) { + return 0; + } + + if (length < t->fmatcherMinWidth) { + return 0; + } + + const u8 *buffer = scratch->core_info.buf; + size_t flen = length; + if (t->floatingDistance != ROSE_BOUND_INF) { + flen = MIN(t->floatingDistance, length); + } + if (flen <= t->floatingMinDistance) { + return 0; + } + + DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length); + DEBUG_PRINTF("-- %016llx\n", tctxt->groups); + hwlmExec(ftable, buffer, flen, t->floatingMinDistance, roseFloatingCallback, + scratch, tctxt->groups & t->floating_group_mask); + + return can_stop_matching(scratch); +} + +static rose_inline +void runEagerPrefixesBlock(const struct RoseEngine *t, + struct hs_scratch *scratch) { + if (!t->eagerIterOffset) { + return; + } + + char *state = scratch->core_info.state; + u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into + * left_table */ + const u32 arCount = t->activeLeftCount; + const u32 qCount = t->queueCount; + const struct LeftNfaInfo *left_table = getLeftTable(t); + const struct mmbit_sparse_iter *it = getByOffset(t, t->eagerIterOffset); + + struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES]; + + u32 idx = 0; + u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state); + for (; ri != MMB_INVALID; + ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) { + const struct LeftNfaInfo *left = left_table + ri; + u32 qi = ri + t->leftfixBeginQueue; + DEBUG_PRINTF("leftfix %u/%u, maxLag=%u\n", ri, arCount, left->maxLag); + + assert(!fatbit_isset(scratch->aqa, qCount, qi)); + assert(left->eager); + assert(!left->infix); + + struct mq *q = scratch->queues + qi; + const struct NFA *nfa = getNfaByQueue(t, qi); + + if (scratch->core_info.len < nfa->minWidth) { + /* we know that there is not enough data for this to ever match, so + * we can immediately squash/ */ + mmbit_unset(ara, arCount, ri); + scratch->tctxt.groups &= left->squash_mask; + } + + s64a loc = MIN(scratch->core_info.len, EAGER_STOP_OFFSET); + + fatbit_set(scratch->aqa, qCount, qi); + initRoseQueue(t, qi, left, scratch); + + pushQueueAt(q, 0, MQE_START, 0); + pushQueueAt(q, 1, MQE_TOP, 0); + pushQueueAt(q, 2, MQE_END, loc); + nfaQueueInitState(nfa, q); + + char alive = nfaQueueExecToMatch(q->nfa, q, loc); + + if (!alive) { + DEBUG_PRINTF("queue %u dead, squashing\n", qi); + mmbit_unset(ara, arCount, ri); + fatbit_unset(scratch->aqa, qCount, qi); + scratch->tctxt.groups &= left->squash_mask; + } else if (q->cur == q->end) { + assert(alive != MO_MATCHES_PENDING); + if (loc == (s64a)scratch->core_info.len) { + /* We know that the prefix does not match in the block so we + * can squash the groups anyway even though it did not die */ + /* TODO: if we knew the minimum lag the leftfix is checked at we + * could make this check tighter */ + DEBUG_PRINTF("queue %u has no match in block, squashing\n", qi); + mmbit_unset(ara, arCount, ri); + fatbit_unset(scratch->aqa, qCount, qi); + scratch->tctxt.groups &= left->squash_mask; + } else { + DEBUG_PRINTF("queue %u finished, nfa lives\n", qi); + q->cur = q->end = 0; + pushQueueAt(q, 0, MQE_START, loc); + } + } else { + assert(alive == MO_MATCHES_PENDING); + DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi); + q->end--; /* remove end item */ + } + } +} + +void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) { assert(t); assert(scratch); assert(scratch->core_info.buf); assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount) < MAX_SPARSE_ITER_STATES); - // We should not have been called if we've already been told to terminate - // matching. - assert(!told_to_stop_matching(scratch)); - - // If this block is shorter than our minimum width, then no pattern in this - // RoseEngine could match. - /* minWidth checks should have already been performed by the caller */ - assert(scratch->core_info.len >= t->minWidth); - - // Similarly, we may have a maximum width (for engines constructed entirely - // of bi-anchored patterns). - /* This check is now handled by the interpreter */ - assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF - || scratch->core_info.len <= t->maxBiAnchoredWidth); - + // We should not have been called if we've already been told to terminate + // matching. + assert(!told_to_stop_matching(scratch)); + + // If this block is shorter than our minimum width, then no pattern in this + // RoseEngine could match. + /* minWidth checks should have already been performed by the caller */ + assert(scratch->core_info.len >= t->minWidth); + + // Similarly, we may have a maximum width (for engines constructed entirely + // of bi-anchored patterns). + /* This check is now handled by the interpreter */ + assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF + || scratch->core_info.len <= t->maxBiAnchoredWidth); + const size_t length = scratch->core_info.len; // We have optimizations for small block scans: we run a single coalesced @@ -373,9 +373,9 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) { const char is_small_block = (length < ROSE_SMALL_BLOCK_LEN && t->sbmatcherOffset); - char *state = scratch->core_info.state; + char *state = scratch->core_info.state; - init_for_block(t, scratch, state, is_small_block); + init_for_block(t, scratch, state, is_small_block); struct RoseContext *tctxt = &scratch->tctxt; @@ -388,35 +388,35 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) { DEBUG_PRINTF("BEGIN SMALL BLOCK (over %zu/%zu)\n", sblen, length); DEBUG_PRINTF("-- %016llx\n", tctxt->groups); hwlmExec(sbtable, scratch->core_info.buf, sblen, 0, roseCallback, - scratch, tctxt->groups); - } else { - runEagerPrefixesBlock(t, scratch); + scratch, tctxt->groups); + } else { + runEagerPrefixesBlock(t, scratch); - if (roseBlockAnchored(t, scratch)) { - return; + if (roseBlockAnchored(t, scratch)) { + return; } - if (roseBlockFloating(t, scratch)) { - return; + if (roseBlockFloating(t, scratch)) { + return; } - } + } - if (cleanUpDelayed(t, scratch, length, 0) == HWLM_TERMINATE_MATCHING) { - return; + if (cleanUpDelayed(t, scratch, length, 0) == HWLM_TERMINATE_MATCHING) { + return; } - assert(!can_stop_matching(scratch)); + assert(!can_stop_matching(scratch)); - roseCatchUpTo(t, scratch, length); + roseCatchUpTo(t, scratch, length); - if (!t->requiresEodCheck || !t->eodProgramOffset) { - DEBUG_PRINTF("no eod check required\n"); - return; + if (!t->requiresEodCheck || !t->eodProgramOffset) { + DEBUG_PRINTF("no eod check required\n"); + return; } - if (can_stop_matching(scratch)) { - DEBUG_PRINTF("bailing, already halted\n"); + if (can_stop_matching(scratch)) { + DEBUG_PRINTF("bailing, already halted\n"); return; } - roseBlockEodExec(t, length, scratch); + roseBlockEodExec(t, length, scratch); } |