aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/rose
diff options
context:
space:
mode:
authorbnagaev <bnagaev@yandex-team.ru>2022-02-10 16:47:04 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:04 +0300
commitc74559fb88da8adac0d9186cfa55a6b13c47695f (patch)
treeb83306b6e37edeea782e9eed673d89286c4fef35 /contrib/libs/hyperscan/src/rose
parentd6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (diff)
downloadydb-c74559fb88da8adac0d9186cfa55a6b13c47695f.tar.gz
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/rose')
-rw-r--r--contrib/libs/hyperscan/src/rose/block.c380
-rw-r--r--contrib/libs/hyperscan/src/rose/catchup.c1510
-rw-r--r--contrib/libs/hyperscan/src/rose/catchup.h234
-rw-r--r--contrib/libs/hyperscan/src/rose/counting_miracle.h514
-rw-r--r--contrib/libs/hyperscan/src/rose/infix.h274
-rw-r--r--contrib/libs/hyperscan/src/rose/init.c162
-rw-r--r--contrib/libs/hyperscan/src/rose/init.h88
-rw-r--r--contrib/libs/hyperscan/src/rose/match.c782
-rw-r--r--contrib/libs/hyperscan/src/rose/match.h450
-rw-r--r--contrib/libs/hyperscan/src/rose/miracle.h276
-rw-r--r--contrib/libs/hyperscan/src/rose/rose.h84
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build.h258
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_add.cpp3438
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_add_internal.h86
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_add_mask.cpp1462
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_anchored.cpp1510
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_anchored.h108
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_bytecode.cpp3010
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_compile.cpp2756
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_convert.cpp1508
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_convert.h82
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_dump.h104
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_impl.h948
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_infix.cpp612
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_infix.h104
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp1176
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_lookaround.h142
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_merge.cpp3616
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_merge.h134
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_misc.cpp1610
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.cpp2922
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.h74
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_scatter.cpp238
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_scatter.h110
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_util.h118
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_width.cpp494
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_build_width.h132
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_common.h86
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_graph.h400
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_in_dump.h98
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_in_graph.h342
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_in_util.cpp488
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_in_util.h102
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_internal.h930
-rw-r--r--contrib/libs/hyperscan/src/rose/rose_types.h72
-rw-r--r--contrib/libs/hyperscan/src/rose/runtime.h238
-rw-r--r--contrib/libs/hyperscan/src/rose/stream.c994
47 files changed, 17628 insertions, 17628 deletions
diff --git a/contrib/libs/hyperscan/src/rose/block.c b/contrib/libs/hyperscan/src/rose/block.c
index 7c8b43aed9..b3f424cb73 100644
--- a/contrib/libs/hyperscan/src/rose/block.c
+++ b/contrib/libs/hyperscan/src/rose/block.c
@@ -1,164 +1,164 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "catchup.h"
-#include "init.h"
-#include "match.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "catchup.h"
+#include "init.h"
+#include "match.h"
#include "program_runtime.h"
#include "rose.h"
#include "rose_common.h"
-#include "nfa/nfa_api.h"
-#include "nfa/nfa_internal.h"
-#include "nfa/nfa_rev_api.h"
-#include "nfa/mcclellan.h"
-#include "util/fatbit.h"
-
-static rose_inline
-void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable,
- struct hs_scratch *scratch) {
- const u8 *buffer = scratch->core_info.buf;
- size_t length = scratch->core_info.len;
- size_t alen = MIN(length, t->anchoredDistance);
- const struct anchored_matcher_info *curr = atable;
-
- DEBUG_PRINTF("BEGIN ANCHORED (over %zu/%zu)\n", alen, length);
-
- do {
- const struct NFA *nfa
- = (const struct NFA *)((const char *)curr + sizeof(*curr));
-
- assert(t->anchoredDistance > curr->anchoredMinDistance);
- if (length >= curr->anchoredMinDistance) {
- size_t local_alen = alen - curr->anchoredMinDistance;
- const u8 *local_buffer = buffer + curr->anchoredMinDistance;
-
- DEBUG_PRINTF("--anchored nfa (+%u)\n", curr->anchoredMinDistance);
- assert(isMcClellanType(nfa->type));
- if (nfa->type == MCCLELLAN_NFA_8) {
- nfaExecMcClellan8_B(nfa, curr->anchoredMinDistance,
- local_buffer, local_alen,
+#include "nfa/nfa_api.h"
+#include "nfa/nfa_internal.h"
+#include "nfa/nfa_rev_api.h"
+#include "nfa/mcclellan.h"
+#include "util/fatbit.h"
+
+static rose_inline
+void runAnchoredTableBlock(const struct RoseEngine *t, const void *atable,
+ struct hs_scratch *scratch) {
+ const u8 *buffer = scratch->core_info.buf;
+ size_t length = scratch->core_info.len;
+ size_t alen = MIN(length, t->anchoredDistance);
+ const struct anchored_matcher_info *curr = atable;
+
+ DEBUG_PRINTF("BEGIN ANCHORED (over %zu/%zu)\n", alen, length);
+
+ do {
+ const struct NFA *nfa
+ = (const struct NFA *)((const char *)curr + sizeof(*curr));
+
+ assert(t->anchoredDistance > curr->anchoredMinDistance);
+ if (length >= curr->anchoredMinDistance) {
+ size_t local_alen = alen - curr->anchoredMinDistance;
+ const u8 *local_buffer = buffer + curr->anchoredMinDistance;
+
+ DEBUG_PRINTF("--anchored nfa (+%u)\n", curr->anchoredMinDistance);
+ assert(isMcClellanType(nfa->type));
+ if (nfa->type == MCCLELLAN_NFA_8) {
+ nfaExecMcClellan8_B(nfa, curr->anchoredMinDistance,
+ local_buffer, local_alen,
roseAnchoredCallback, scratch);
- } else {
- nfaExecMcClellan16_B(nfa, curr->anchoredMinDistance,
- local_buffer, local_alen,
+ } else {
+ nfaExecMcClellan16_B(nfa, curr->anchoredMinDistance,
+ local_buffer, local_alen,
roseAnchoredCallback, scratch);
- }
- }
-
- if (!curr->next_offset) {
- break;
- }
-
- curr = (const void *)((const char *)curr + curr->next_offset);
- } while (1);
-}
-
-static really_inline
+ }
+ }
+
+ if (!curr->next_offset) {
+ break;
+ }
+
+ curr = (const void *)((const char *)curr + curr->next_offset);
+ } while (1);
+}
+
+static really_inline
void init_state_for_block(const struct RoseEngine *t, char *state) {
- assert(t);
- assert(state);
-
+ assert(t);
+ assert(state);
+
DEBUG_PRINTF("init for Rose %p with %u state indices\n", t,
t->rolesWithStateCount);
-
- // Rose is guaranteed 8-aligned state
- assert(ISALIGNED_N(state, 8));
-
- init_state(t, state);
-}
-
-static really_inline
-void init_outfixes_for_block(const struct RoseEngine *t,
+
+ // Rose is guaranteed 8-aligned state
+ assert(ISALIGNED_N(state, 8));
+
+ init_state(t, state);
+}
+
+static really_inline
+void init_outfixes_for_block(const struct RoseEngine *t,
struct hs_scratch *scratch, char *state,
- char is_small_block) {
- /* active leaf array has been cleared by the init scatter */
-
- if (t->initMpvNfa != MO_INVALID_IDX) {
- assert(t->initMpvNfa == 0);
- const struct NFA *nfa = getNfaByQueue(t, 0);
- DEBUG_PRINTF("testing minwidth %u > len %zu\n", nfa->minWidth,
- scratch->core_info.len);
- size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf,
- scratch->core_info.len);
- if (len) {
- u8 *activeArray = getActiveLeafArray(t, state);
- const u32 activeArraySize = t->activeArrayCount;
- const u32 qCount = t->queueCount;
-
- mmbit_set(activeArray, activeArraySize, 0);
- fatbit_set(scratch->aqa, qCount, 0);
-
- struct mq *q = scratch->queues;
+ char is_small_block) {
+ /* active leaf array has been cleared by the init scatter */
+
+ if (t->initMpvNfa != MO_INVALID_IDX) {
+ assert(t->initMpvNfa == 0);
+ const struct NFA *nfa = getNfaByQueue(t, 0);
+ DEBUG_PRINTF("testing minwidth %u > len %zu\n", nfa->minWidth,
+ scratch->core_info.len);
+ size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf,
+ scratch->core_info.len);
+ if (len) {
+ u8 *activeArray = getActiveLeafArray(t, state);
+ const u32 activeArraySize = t->activeArrayCount;
+ const u32 qCount = t->queueCount;
+
+ mmbit_set(activeArray, activeArraySize, 0);
+ fatbit_set(scratch->aqa, qCount, 0);
+
+ struct mq *q = scratch->queues;
initQueue(q, 0, t, scratch);
- q->length = len; /* adjust for rev_accel */
- nfaQueueInitState(nfa, q);
- pushQueueAt(q, 0, MQE_START, 0);
- pushQueueAt(q, 1, MQE_TOP, 0);
- }
- }
-
- if (is_small_block && !t->hasOutfixesInSmallBlock) {
- DEBUG_PRINTF("all outfixes in small block table\n");
- return;
- }
-
- if (t->outfixBeginQueue != t->outfixEndQueue) {
- blockInitSufPQ(t, state, scratch, is_small_block);
- }
-}
-
-static really_inline
-void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch,
+ q->length = len; /* adjust for rev_accel */
+ nfaQueueInitState(nfa, q);
+ pushQueueAt(q, 0, MQE_START, 0);
+ pushQueueAt(q, 1, MQE_TOP, 0);
+ }
+ }
+
+ if (is_small_block && !t->hasOutfixesInSmallBlock) {
+ DEBUG_PRINTF("all outfixes in small block table\n");
+ return;
+ }
+
+ if (t->outfixBeginQueue != t->outfixEndQueue) {
+ blockInitSufPQ(t, state, scratch, is_small_block);
+ }
+}
+
+static really_inline
+void init_for_block(const struct RoseEngine *t, struct hs_scratch *scratch,
char *state, char is_small_block) {
- init_state_for_block(t, state);
-
- struct RoseContext *tctxt = &scratch->tctxt;
-
- tctxt->groups = t->initialGroups;
- tctxt->lit_offset_adjust = 1; // index after last byte
- tctxt->delayLastEndOffset = 0;
- tctxt->lastEndOffset = 0;
- tctxt->filledDelayedSlots = 0;
- tctxt->lastMatchOffset = 0;
+ init_state_for_block(t, state);
+
+ struct RoseContext *tctxt = &scratch->tctxt;
+
+ tctxt->groups = t->initialGroups;
+ tctxt->lit_offset_adjust = 1; // index after last byte
+ tctxt->delayLastEndOffset = 0;
+ tctxt->lastEndOffset = 0;
+ tctxt->filledDelayedSlots = 0;
+ tctxt->lastMatchOffset = 0;
tctxt->lastCombMatchOffset = 0;
- tctxt->minMatchOffset = 0;
- tctxt->minNonMpvMatchOffset = 0;
- tctxt->next_mpv_offset = 0;
-
- scratch->al_log_sum = 0;
-
- fatbit_clear(scratch->aqa);
-
- scratch->catchup_pq.qm_size = 0;
-
- init_outfixes_for_block(t, scratch, state, is_small_block);
-}
-
+ tctxt->minMatchOffset = 0;
+ tctxt->minNonMpvMatchOffset = 0;
+ tctxt->next_mpv_offset = 0;
+
+ scratch->al_log_sum = 0;
+
+ fatbit_clear(scratch->aqa);
+
+ scratch->catchup_pq.qm_size = 0;
+
+ init_outfixes_for_block(t, scratch, state, is_small_block);
+}
+
static rose_inline
void roseBlockEodExec(const struct RoseEngine *t, u64a offset,
struct hs_scratch *scratch) {
@@ -343,12 +343,12 @@ void runEagerPrefixesBlock(const struct RoseEngine *t,
}
void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
- assert(t);
- assert(scratch);
- assert(scratch->core_info.buf);
- assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount)
- < MAX_SPARSE_ITER_STATES);
-
+ assert(t);
+ assert(scratch);
+ assert(scratch->core_info.buf);
+ assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount)
+ < MAX_SPARSE_ITER_STATES);
+
// We should not have been called if we've already been told to terminate
// matching.
assert(!told_to_stop_matching(scratch));
@@ -364,59 +364,59 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
assert(t->maxBiAnchoredWidth == ROSE_BOUND_INF
|| scratch->core_info.len <= t->maxBiAnchoredWidth);
- const size_t length = scratch->core_info.len;
-
- // We have optimizations for small block scans: we run a single coalesced
- // HWLM scan instead of running the anchored and floating matchers. Some
- // outfixes are disabled as well (for SEP scans of single-byte literals,
- // which are also run in the HWLM scan).
- const char is_small_block =
- (length < ROSE_SMALL_BLOCK_LEN && t->sbmatcherOffset);
-
+ const size_t length = scratch->core_info.len;
+
+ // We have optimizations for small block scans: we run a single coalesced
+ // HWLM scan instead of running the anchored and floating matchers. Some
+ // outfixes are disabled as well (for SEP scans of single-byte literals,
+ // which are also run in the HWLM scan).
+ const char is_small_block =
+ (length < ROSE_SMALL_BLOCK_LEN && t->sbmatcherOffset);
+
char *state = scratch->core_info.state;
-
+
init_for_block(t, scratch, state, is_small_block);
-
- struct RoseContext *tctxt = &scratch->tctxt;
-
- if (is_small_block) {
- const void *sbtable = getSBLiteralMatcher(t);
- assert(sbtable);
-
- size_t sblen = MIN(length, t->smallBlockDistance);
-
- DEBUG_PRINTF("BEGIN SMALL BLOCK (over %zu/%zu)\n", sblen, length);
- DEBUG_PRINTF("-- %016llx\n", tctxt->groups);
- hwlmExec(sbtable, scratch->core_info.buf, sblen, 0, roseCallback,
+
+ struct RoseContext *tctxt = &scratch->tctxt;
+
+ if (is_small_block) {
+ const void *sbtable = getSBLiteralMatcher(t);
+ assert(sbtable);
+
+ size_t sblen = MIN(length, t->smallBlockDistance);
+
+ DEBUG_PRINTF("BEGIN SMALL BLOCK (over %zu/%zu)\n", sblen, length);
+ DEBUG_PRINTF("-- %016llx\n", tctxt->groups);
+ hwlmExec(sbtable, scratch->core_info.buf, sblen, 0, roseCallback,
scratch, tctxt->groups);
} else {
runEagerPrefixesBlock(t, scratch);
-
+
if (roseBlockAnchored(t, scratch)) {
return;
- }
+ }
if (roseBlockFloating(t, scratch)) {
return;
- }
+ }
}
-
+
if (cleanUpDelayed(t, scratch, length, 0) == HWLM_TERMINATE_MATCHING) {
return;
- }
-
+ }
+
assert(!can_stop_matching(scratch));
-
+
roseCatchUpTo(t, scratch, length);
-
+
if (!t->requiresEodCheck || !t->eodProgramOffset) {
DEBUG_PRINTF("no eod check required\n");
return;
- }
-
+ }
+
if (can_stop_matching(scratch)) {
DEBUG_PRINTF("bailing, already halted\n");
- return;
- }
-
+ return;
+ }
+
roseBlockEodExec(t, length, scratch);
-}
+}
diff --git a/contrib/libs/hyperscan/src/rose/catchup.c b/contrib/libs/hyperscan/src/rose/catchup.c
index 14e0094dff..7a6648da98 100644
--- a/contrib/libs/hyperscan/src/rose/catchup.c
+++ b/contrib/libs/hyperscan/src/rose/catchup.c
@@ -1,76 +1,76 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
* \brief Rose runtime: code for catching up output-exposed engines.
*/
-#include "catchup.h"
-#include "match.h"
+#include "catchup.h"
+#include "match.h"
#include "program_runtime.h"
-#include "rose.h"
-#include "nfa/nfa_rev_api.h"
-#include "nfa/mpv.h"
-#include "som/som_runtime.h"
-#include "util/fatbit.h"
+#include "rose.h"
+#include "nfa/nfa_rev_api.h"
+#include "nfa/mpv.h"
+#include "som/som_runtime.h"
+#include "util/fatbit.h"
#include "report.h"
-
-typedef struct queue_match PQ_T;
-#define PQ_COMP(pqc_items, a, b) ((pqc_items)[a].loc < (pqc_items)[b].loc)
-#define PQ_COMP_B(pqc_items, a, b_fixed) ((pqc_items)[a].loc < (b_fixed).loc)
-
-#include "util/pqueue.h"
-
-static really_inline
+
+typedef struct queue_match PQ_T;
+#define PQ_COMP(pqc_items, a, b) ((pqc_items)[a].loc < (pqc_items)[b].loc)
+#define PQ_COMP_B(pqc_items, a, b_fixed) ((pqc_items)[a].loc < (b_fixed).loc)
+
+#include "util/pqueue.h"
+
+static really_inline
int roseNfaRunProgram(const struct RoseEngine *rose, struct hs_scratch *scratch,
u64a som, u64a offset, ReportID id, const char from_mpv) {
const u32 program = id;
u8 flags = ROSE_PROG_FLAG_IN_CATCHUP;
if (from_mpv) {
flags |= ROSE_PROG_FLAG_FROM_MPV;
- }
-
+ }
+
roseRunProgram(rose, scratch, program, som, offset, flags);
return can_stop_matching(scratch) ? MO_HALT_MATCHING : MO_CONTINUE_MATCHING;
-}
-
+}
+
static rose_inline
char roseSuffixInfoIsExhausted(const struct RoseEngine *rose,
const struct NfaInfo *info,
const char *exhausted) {
if (!info->ekeyListOffset) {
- return 0;
- }
-
+ return 0;
+ }
+
DEBUG_PRINTF("check exhaustion -> start at %u\n", info->ekeyListOffset);
-
+
/* INVALID_EKEY terminated list */
const u32 *ekeys = getByOffset(rose, info->ekeyListOffset);
while (*ekeys != INVALID_EKEY) {
@@ -80,367 +80,367 @@ char roseSuffixInfoIsExhausted(const struct RoseEngine *rose,
return 0;
}
++ekeys;
- }
-
+ }
+
DEBUG_PRINTF("all ekeys exhausted -> dead\n");
return 1;
-}
-
+}
+
static really_inline
char roseSuffixIsExhausted(const struct RoseEngine *rose, u32 qi,
const char *exhausted) {
DEBUG_PRINTF("check queue %u\n", qi);
const struct NfaInfo *info = getNfaInfoByQueue(rose, qi);
return roseSuffixInfoIsExhausted(rose, info, exhausted);
-}
-
-static really_inline
+}
+
+static really_inline
void deactivateQueue(const struct RoseEngine *t, u8 *aa, u32 qi,
struct hs_scratch *scratch) {
u32 aaCount = t->activeArrayCount;
u32 qCount = t->queueCount;
-
- /* this is sailing close to the wind with regards to invalidating an
- * iteration. We are saved by the fact that unsetting does not clear the
- * summary bits -> the block under the gun remains valid
- */
- DEBUG_PRINTF("killing off zombie queue %u\n", qi);
- mmbit_unset(aa, aaCount, qi);
- fatbit_unset(scratch->aqa, qCount, qi);
-}
-
-static really_inline
-void ensureQueueActive(const struct RoseEngine *t, u32 qi, u32 qCount,
- struct mq *q, struct hs_scratch *scratch) {
- if (!fatbit_set(scratch->aqa, qCount, qi)) {
- DEBUG_PRINTF("initing %u\n", qi);
+
+ /* this is sailing close to the wind with regards to invalidating an
+ * iteration. We are saved by the fact that unsetting does not clear the
+ * summary bits -> the block under the gun remains valid
+ */
+ DEBUG_PRINTF("killing off zombie queue %u\n", qi);
+ mmbit_unset(aa, aaCount, qi);
+ fatbit_unset(scratch->aqa, qCount, qi);
+}
+
+static really_inline
+void ensureQueueActive(const struct RoseEngine *t, u32 qi, u32 qCount,
+ struct mq *q, struct hs_scratch *scratch) {
+ if (!fatbit_set(scratch->aqa, qCount, qi)) {
+ DEBUG_PRINTF("initing %u\n", qi);
initQueue(q, qi, t, scratch);
- loadStreamState(q->nfa, q, 0);
- pushQueueAt(q, 0, MQE_START, 0);
- }
-}
-
-static really_inline
-void pq_replace_top_with(struct catchup_pq *pq,
- UNUSED struct hs_scratch *scratch, u32 queue,
- s64a loc) {
- DEBUG_PRINTF("inserting q%u in pq at %lld\n", queue, loc);
- struct queue_match temp = {
- .queue = queue,
- .loc = (size_t)loc
- };
-
- assert(loc > 0);
- assert(pq->qm_size);
- assert(loc <= (s64a)scratch->core_info.len);
- pq_replace_top(pq->qm, pq->qm_size, temp);
-}
-
-static really_inline
-void pq_insert_with(struct catchup_pq *pq,
- UNUSED struct hs_scratch *scratch, u32 queue, s64a loc) {
- DEBUG_PRINTF("inserting q%u in pq at %lld\n", queue, loc);
- struct queue_match temp = {
- .queue = queue,
- .loc = (size_t)loc
- };
-
- assert(loc > 0);
- assert(loc <= (s64a)scratch->core_info.len);
- pq_insert(pq->qm, pq->qm_size, temp);
- ++pq->qm_size;
-}
-
-static really_inline
-void pq_pop_nice(struct catchup_pq *pq) {
- pq_pop(pq->qm, pq->qm_size);
- pq->qm_size--;
-}
-
-static really_inline
-s64a pq_top_loc(struct catchup_pq *pq) {
- assert(pq->qm_size);
- return (s64a)pq_top(pq->qm)->loc;
-}
-
-/* requires that we are the top item on the pq */
-static really_inline
+ loadStreamState(q->nfa, q, 0);
+ pushQueueAt(q, 0, MQE_START, 0);
+ }
+}
+
+static really_inline
+void pq_replace_top_with(struct catchup_pq *pq,
+ UNUSED struct hs_scratch *scratch, u32 queue,
+ s64a loc) {
+ DEBUG_PRINTF("inserting q%u in pq at %lld\n", queue, loc);
+ struct queue_match temp = {
+ .queue = queue,
+ .loc = (size_t)loc
+ };
+
+ assert(loc > 0);
+ assert(pq->qm_size);
+ assert(loc <= (s64a)scratch->core_info.len);
+ pq_replace_top(pq->qm, pq->qm_size, temp);
+}
+
+static really_inline
+void pq_insert_with(struct catchup_pq *pq,
+ UNUSED struct hs_scratch *scratch, u32 queue, s64a loc) {
+ DEBUG_PRINTF("inserting q%u in pq at %lld\n", queue, loc);
+ struct queue_match temp = {
+ .queue = queue,
+ .loc = (size_t)loc
+ };
+
+ assert(loc > 0);
+ assert(loc <= (s64a)scratch->core_info.len);
+ pq_insert(pq->qm, pq->qm_size, temp);
+ ++pq->qm_size;
+}
+
+static really_inline
+void pq_pop_nice(struct catchup_pq *pq) {
+ pq_pop(pq->qm, pq->qm_size);
+ pq->qm_size--;
+}
+
+static really_inline
+s64a pq_top_loc(struct catchup_pq *pq) {
+ assert(pq->qm_size);
+ return (s64a)pq_top(pq->qm)->loc;
+}
+
+/* requires that we are the top item on the pq */
+static really_inline
hwlmcb_rv_t runExistingNfaToNextMatch(const struct RoseEngine *t, u32 qi,
struct mq *q, s64a loc,
- struct hs_scratch *scratch, u8 *aa,
- char report_curr) {
- assert(pq_top(scratch->catchup_pq.qm)->queue == qi);
- assert(scratch->catchup_pq.qm_size);
- assert(!q->report_current);
- if (report_curr) {
- DEBUG_PRINTF("need to report matches\n");
- q->report_current = 1;
- }
-
- DEBUG_PRINTF("running queue from %u:%lld to %lld\n", q->cur, q_cur_loc(q),
- loc);
-
- assert(q_cur_loc(q) <= loc);
-
- char alive = nfaQueueExecToMatch(q->nfa, q, loc);
-
- /* exit via gift shop */
- if (alive == MO_MATCHES_PENDING) {
- /* we have pending matches */
- assert(q_cur_loc(q) + scratch->core_info.buf_offset
- >= scratch->tctxt.minMatchOffset);
- pq_replace_top_with(&scratch->catchup_pq, scratch, qi, q_cur_loc(q));
- return HWLM_CONTINUE_MATCHING;
- } else if (!alive) {
- if (report_curr && can_stop_matching(scratch)) {
- DEBUG_PRINTF("bailing\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
+ struct hs_scratch *scratch, u8 *aa,
+ char report_curr) {
+ assert(pq_top(scratch->catchup_pq.qm)->queue == qi);
+ assert(scratch->catchup_pq.qm_size);
+ assert(!q->report_current);
+ if (report_curr) {
+ DEBUG_PRINTF("need to report matches\n");
+ q->report_current = 1;
+ }
+
+ DEBUG_PRINTF("running queue from %u:%lld to %lld\n", q->cur, q_cur_loc(q),
+ loc);
+
+ assert(q_cur_loc(q) <= loc);
+
+ char alive = nfaQueueExecToMatch(q->nfa, q, loc);
+
+ /* exit via gift shop */
+ if (alive == MO_MATCHES_PENDING) {
+ /* we have pending matches */
+ assert(q_cur_loc(q) + scratch->core_info.buf_offset
+ >= scratch->tctxt.minMatchOffset);
+ pq_replace_top_with(&scratch->catchup_pq, scratch, qi, q_cur_loc(q));
+ return HWLM_CONTINUE_MATCHING;
+ } else if (!alive) {
+ if (report_curr && can_stop_matching(scratch)) {
+ DEBUG_PRINTF("bailing\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
deactivateQueue(t, aa, qi, scratch);
- } else if (q->cur == q->end) {
- DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
- } else {
- DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi);
- u32 i = 0;
- while (q->cur < q->end) {
- q->items[i] = q->items[q->cur++];
- DEBUG_PRINTF("q[%u] = %u:%lld\n", i, q->items[i].type,
- q->items[i].location);
- assert(q->items[i].type != MQE_END);
- i++;
- }
- q->cur = 0;
- q->end = i;
- }
-
- pq_pop_nice(&scratch->catchup_pq);
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-static really_inline
+ } else if (q->cur == q->end) {
+ DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+ } else {
+ DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi);
+ u32 i = 0;
+ while (q->cur < q->end) {
+ q->items[i] = q->items[q->cur++];
+ DEBUG_PRINTF("q[%u] = %u:%lld\n", i, q->items[i].type,
+ q->items[i].location);
+ assert(q->items[i].type != MQE_END);
+ i++;
+ }
+ q->cur = 0;
+ q->end = i;
+ }
+
+ pq_pop_nice(&scratch->catchup_pq);
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static really_inline
hwlmcb_rv_t runNewNfaToNextMatch(const struct RoseEngine *t, u32 qi,
struct mq *q, s64a loc,
- struct hs_scratch *scratch, u8 *aa,
- s64a report_ok_loc) {
- assert(!q->report_current);
- DEBUG_PRINTF("running queue from %u:%lld to %lld\n", q->cur, q_cur_loc(q),
- loc);
- DEBUG_PRINTF("min match offset %llu\n", scratch->tctxt.minMatchOffset);
-
- char alive = 1;
-
-restart:
- alive = nfaQueueExecToMatch(q->nfa, q, loc);
-
- if (alive == MO_MATCHES_PENDING) {
- DEBUG_PRINTF("we have pending matches at %lld\n", q_cur_loc(q));
- s64a qcl = q_cur_loc(q);
-
- if (qcl == report_ok_loc) {
- assert(q->cur != q->end); /* the queue shouldn't be empty if there
- * are pending matches. */
- q->report_current = 1;
- DEBUG_PRINTF("restarting...\n");
- goto restart;
- }
- assert(qcl + scratch->core_info.buf_offset
- >= scratch->tctxt.minMatchOffset);
- pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl);
- } else if (!alive) {
- if (can_stop_matching(scratch)) {
- DEBUG_PRINTF("bailing\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
+ struct hs_scratch *scratch, u8 *aa,
+ s64a report_ok_loc) {
+ assert(!q->report_current);
+ DEBUG_PRINTF("running queue from %u:%lld to %lld\n", q->cur, q_cur_loc(q),
+ loc);
+ DEBUG_PRINTF("min match offset %llu\n", scratch->tctxt.minMatchOffset);
+
+ char alive = 1;
+
+restart:
+ alive = nfaQueueExecToMatch(q->nfa, q, loc);
+
+ if (alive == MO_MATCHES_PENDING) {
+ DEBUG_PRINTF("we have pending matches at %lld\n", q_cur_loc(q));
+ s64a qcl = q_cur_loc(q);
+
+ if (qcl == report_ok_loc) {
+ assert(q->cur != q->end); /* the queue shouldn't be empty if there
+ * are pending matches. */
+ q->report_current = 1;
+ DEBUG_PRINTF("restarting...\n");
+ goto restart;
+ }
+ assert(qcl + scratch->core_info.buf_offset
+ >= scratch->tctxt.minMatchOffset);
+ pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl);
+ } else if (!alive) {
+ if (can_stop_matching(scratch)) {
+ DEBUG_PRINTF("bailing\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
deactivateQueue(t, aa, qi, scratch);
- } else if (q->cur == q->end) {
- DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
- } else {
- DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi);
- u32 i = 0;
- while (q->cur < q->end) {
- q->items[i] = q->items[q->cur++];
- DEBUG_PRINTF("q[%u] = %u:%lld\n", i, q->items[i].type,
- q->items[i].location);
- assert(q->items[i].type != MQE_END);
- i++;
- }
- q->cur = 0;
- q->end = i;
- }
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-/* for use by mpv (chained) only */
+ } else if (q->cur == q->end) {
+ DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+ } else {
+ DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi);
+ u32 i = 0;
+ while (q->cur < q->end) {
+ q->items[i] = q->items[q->cur++];
+ DEBUG_PRINTF("q[%u] = %u:%lld\n", i, q->items[i].type,
+ q->items[i].location);
+ assert(q->items[i].type != MQE_END);
+ i++;
+ }
+ q->cur = 0;
+ q->end = i;
+ }
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+/* for use by mpv (chained) only */
static
int roseNfaFinalBlastAdaptor(u64a start, u64a end, ReportID id, void *context) {
struct hs_scratch *scratch = context;
assert(scratch && scratch->magic == SCRATCH_MAGIC);
const struct RoseEngine *t = scratch->core_info.rose;
-
+
DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end);
-
+
int cb_rv = roseNfaRunProgram(t, scratch, start, end, id, 1);
- if (cb_rv == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
- return MO_CONTINUE_MATCHING;
- } else {
- assert(cb_rv == MO_CONTINUE_MATCHING);
+ if (cb_rv == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
+ return MO_CONTINUE_MATCHING;
+ } else {
+ assert(cb_rv == MO_CONTINUE_MATCHING);
return !roseSuffixIsExhausted(t, 0,
scratch->core_info.exhaustionVector);
- }
-}
-
-static really_inline
-void ensureEnd(struct mq *q, UNUSED u32 qi, s64a final_loc) {
- DEBUG_PRINTF("ensure MQE_END %lld for queue %u\n", final_loc, qi);
- if (final_loc >= q_last_loc(q)) {
- /* TODO: ensure situation does not arise */
- assert(q_last_type(q) != MQE_END);
- pushQueueNoMerge(q, MQE_END, final_loc);
- }
-}
-
-static really_inline
-hwlmcb_rv_t add_to_queue(const struct RoseEngine *t, struct mq *queues,
- u32 qCount, u8 *aa, struct hs_scratch *scratch,
- s64a loc, u32 qi, s64a report_ok_loc) {
- struct mq *q = queues + qi;
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
-
- if (roseSuffixInfoIsExhausted(t, info,
- scratch->core_info.exhaustionVector)) {
+ }
+}
+
+static really_inline
+void ensureEnd(struct mq *q, UNUSED u32 qi, s64a final_loc) {
+ DEBUG_PRINTF("ensure MQE_END %lld for queue %u\n", final_loc, qi);
+ if (final_loc >= q_last_loc(q)) {
+ /* TODO: ensure situation does not arise */
+ assert(q_last_type(q) != MQE_END);
+ pushQueueNoMerge(q, MQE_END, final_loc);
+ }
+}
+
+static really_inline
+hwlmcb_rv_t add_to_queue(const struct RoseEngine *t, struct mq *queues,
+ u32 qCount, u8 *aa, struct hs_scratch *scratch,
+ s64a loc, u32 qi, s64a report_ok_loc) {
+ struct mq *q = queues + qi;
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+
+ if (roseSuffixInfoIsExhausted(t, info,
+ scratch->core_info.exhaustionVector)) {
deactivateQueue(t, aa, qi, scratch);
- return HWLM_CONTINUE_MATCHING;
- }
-
- ensureQueueActive(t, qi, qCount, q, scratch);
-
- if (unlikely(loc < q_cur_loc(q))) {
- DEBUG_PRINTF("err loc %lld < location %lld\n", loc, q_cur_loc(q));
- return HWLM_CONTINUE_MATCHING;
- }
-
- ensureEnd(q, qi, loc);
-
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ ensureQueueActive(t, qi, qCount, q, scratch);
+
+ if (unlikely(loc < q_cur_loc(q))) {
+ DEBUG_PRINTF("err loc %lld < location %lld\n", loc, q_cur_loc(q));
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ ensureEnd(q, qi, loc);
+
return runNewNfaToNextMatch(t, qi, q, loc, scratch, aa, report_ok_loc);
-}
-
-static really_inline
-s64a findSecondPlace(struct catchup_pq *pq, s64a loc_limit) {
- assert(pq->qm_size); /* we are still on the pq and we are first place */
-
- /* we know (*cough* encapsulation) that second place will either be in
- * pq->qm[1] or pq->qm[2] (we are pq->qm[0]) */
- switch (pq->qm_size) {
- case 0:
- case 1:
- return (s64a)loc_limit;
- case 2:
- return MIN((s64a)pq->qm[1].loc, loc_limit);
- default:;
- size_t best = MIN(pq->qm[1].loc, pq->qm[2].loc);
- return MIN((s64a)best, loc_limit);
- }
-}
-
+}
+
+static really_inline
+s64a findSecondPlace(struct catchup_pq *pq, s64a loc_limit) {
+ assert(pq->qm_size); /* we are still on the pq and we are first place */
+
+ /* we know (*cough* encapsulation) that second place will either be in
+ * pq->qm[1] or pq->qm[2] (we are pq->qm[0]) */
+ switch (pq->qm_size) {
+ case 0:
+ case 1:
+ return (s64a)loc_limit;
+ case 2:
+ return MIN((s64a)pq->qm[1].loc, loc_limit);
+ default:;
+ size_t best = MIN(pq->qm[1].loc, pq->qm[2].loc);
+ return MIN((s64a)best, loc_limit);
+ }
+}
+
hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc,
- struct hs_scratch *scratch) {
+ struct hs_scratch *scratch) {
char *state = scratch->core_info.state;
- struct mq *queues = scratch->queues;
- u8 *aa = getActiveLeafArray(t, state);
- UNUSED u32 aaCount = t->activeArrayCount;
- u32 qCount = t->queueCount;
-
- /* find first match of each pending nfa */
- DEBUG_PRINTF("aa=%p, aaCount=%u\n", aa, aaCount);
-
- assert(t->outfixBeginQueue == 1);
-
- u32 qi = 0;
- assert(mmbit_isset(aa, aaCount, 0)); /* caller should have already bailed */
-
- DEBUG_PRINTF("catching up qi=%u to loc %lld\n", qi, loc);
-
- struct mq *q = queues + qi;
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
- u64a mpv_exec_end = scratch->core_info.buf_offset + loc;
- u64a next_pos_match_loc = 0;
-
- if (roseSuffixInfoIsExhausted(t, info,
- scratch->core_info.exhaustionVector)) {
+ struct mq *queues = scratch->queues;
+ u8 *aa = getActiveLeafArray(t, state);
+ UNUSED u32 aaCount = t->activeArrayCount;
+ u32 qCount = t->queueCount;
+
+ /* find first match of each pending nfa */
+ DEBUG_PRINTF("aa=%p, aaCount=%u\n", aa, aaCount);
+
+ assert(t->outfixBeginQueue == 1);
+
+ u32 qi = 0;
+ assert(mmbit_isset(aa, aaCount, 0)); /* caller should have already bailed */
+
+ DEBUG_PRINTF("catching up qi=%u to loc %lld\n", qi, loc);
+
+ struct mq *q = queues + qi;
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+ u64a mpv_exec_end = scratch->core_info.buf_offset + loc;
+ u64a next_pos_match_loc = 0;
+
+ if (roseSuffixInfoIsExhausted(t, info,
+ scratch->core_info.exhaustionVector)) {
deactivateQueue(t, aa, qi, scratch);
- goto done;
- }
-
- ensureQueueActive(t, qi, qCount, q, scratch);
-
- if (unlikely(loc < q_cur_loc(q))) {
- DEBUG_PRINTF("err loc %lld < location %lld\n", loc, q_cur_loc(q));
- goto done;
- }
-
- ensureEnd(q, qi, loc);
-
- assert(!q->report_current);
-
+ goto done;
+ }
+
+ ensureQueueActive(t, qi, qCount, q, scratch);
+
+ if (unlikely(loc < q_cur_loc(q))) {
+ DEBUG_PRINTF("err loc %lld < location %lld\n", loc, q_cur_loc(q));
+ goto done;
+ }
+
+ ensureEnd(q, qi, loc);
+
+ assert(!q->report_current);
+
q->cb = roseNfaFinalBlastAdaptor;
-
- DEBUG_PRINTF("queue %u blasting, %u/%u [%lld/%lld]\n",
- qi, q->cur, q->end, q->items[q->cur].location, loc);
-
- scratch->tctxt.mpv_inactive = 0;
-
- /* we know it is going to be an mpv, skip the indirection */
+
+ DEBUG_PRINTF("queue %u blasting, %u/%u [%lld/%lld]\n",
+ qi, q->cur, q->end, q->items[q->cur].location, loc);
+
+ scratch->tctxt.mpv_inactive = 0;
+
+ /* we know it is going to be an mpv, skip the indirection */
next_pos_match_loc = nfaExecMpv_QueueExecRaw(q->nfa, q, loc);
- assert(!q->report_current);
-
- if (!next_pos_match_loc) { /* 0 means dead */
- DEBUG_PRINTF("mpv is pining for the fjords\n");
- if (can_stop_matching(scratch)) {
+ assert(!q->report_current);
+
+ if (!next_pos_match_loc) { /* 0 means dead */
+ DEBUG_PRINTF("mpv is pining for the fjords\n");
+ if (can_stop_matching(scratch)) {
deactivateQueue(t, aa, qi, scratch);
- return HWLM_TERMINATE_MATCHING;
- }
-
- next_pos_match_loc = scratch->core_info.len;
- scratch->tctxt.mpv_inactive = 1;
- }
-
- if (q->cur == q->end) {
- DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", qi, loc);
- q->cur = 0;
- q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
- } else {
- DEBUG_PRINTF("queue %u not finished, nfa lives [%lld]\n", qi, loc);
- }
-
-done:
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ next_pos_match_loc = scratch->core_info.len;
+ scratch->tctxt.mpv_inactive = 1;
+ }
+
+ if (q->cur == q->end) {
+ DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", qi, loc);
+ q->cur = 0;
+ q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+ } else {
+ DEBUG_PRINTF("queue %u not finished, nfa lives [%lld]\n", qi, loc);
+ }
+
+done:
if (t->flushCombProgramOffset) {
if (roseRunFlushCombProgram(t, scratch, mpv_exec_end)
== HWLM_TERMINATE_MATCHING) {
return HWLM_TERMINATE_MATCHING;
}
}
- updateMinMatchOffsetFromMpv(&scratch->tctxt, mpv_exec_end);
- scratch->tctxt.next_mpv_offset
- = MAX(next_pos_match_loc + scratch->core_info.buf_offset,
- mpv_exec_end + 1);
-
- DEBUG_PRINTF("next match loc %lld (off %llu)\n", next_pos_match_loc,
- scratch->tctxt.next_mpv_offset);
- return can_stop_matching(scratch) ? HWLM_TERMINATE_MATCHING
- : HWLM_CONTINUE_MATCHING;
-}
-
+ updateMinMatchOffsetFromMpv(&scratch->tctxt, mpv_exec_end);
+ scratch->tctxt.next_mpv_offset
+ = MAX(next_pos_match_loc + scratch->core_info.buf_offset,
+ mpv_exec_end + 1);
+
+ DEBUG_PRINTF("next match loc %lld (off %llu)\n", next_pos_match_loc,
+ scratch->tctxt.next_mpv_offset);
+ return can_stop_matching(scratch) ? HWLM_TERMINATE_MATCHING
+ : HWLM_CONTINUE_MATCHING;
+}
+
static really_inline
char in_mpv(const struct RoseEngine *rose, const struct hs_scratch *scratch) {
const struct RoseContext *tctxt = &scratch->tctxt;
@@ -448,453 +448,453 @@ char in_mpv(const struct RoseEngine *rose, const struct hs_scratch *scratch) {
if (tctxt->curr_qi < rose->outfixBeginQueue) {
assert(getNfaByQueue(rose, tctxt->curr_qi)->type == MPV_NFA);
return 1;
- }
+ }
return 0;
-}
-
+}
+
static
int roseNfaBlastAdaptor(u64a start, u64a end, ReportID id, void *context) {
struct hs_scratch *scratch = context;
assert(scratch && scratch->magic == SCRATCH_MAGIC);
const struct RoseEngine *t = scratch->core_info.rose;
-
+
DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end);
-
+
const char from_mpv = in_mpv(t, scratch);
int cb_rv = roseNfaRunProgram(t, scratch, start, end, id, from_mpv);
- if (cb_rv == MO_HALT_MATCHING) {
- return MO_HALT_MATCHING;
- } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
- return MO_CONTINUE_MATCHING;
- } else {
- assert(cb_rv == MO_CONTINUE_MATCHING);
+ if (cb_rv == MO_HALT_MATCHING) {
+ return MO_HALT_MATCHING;
+ } else if (cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) {
+ return MO_CONTINUE_MATCHING;
+ } else {
+ assert(cb_rv == MO_CONTINUE_MATCHING);
return !roseSuffixIsExhausted(t, scratch->tctxt.curr_qi,
scratch->core_info.exhaustionVector);
- }
-}
-
+ }
+}
+
int roseNfaAdaptor(u64a start, u64a end, ReportID id, void *context) {
struct hs_scratch *scratch = context;
assert(scratch && scratch->magic == SCRATCH_MAGIC);
-
+
DEBUG_PRINTF("id=%u matched at [%llu,%llu]\n", id, start, end);
-
- /* must be a external report as haig cannot directly participate in chain */
+
+ /* must be a external report as haig cannot directly participate in chain */
return roseNfaRunProgram(scratch->core_info.rose, scratch, start, end, id,
0);
-}
-
-static really_inline
+}
+
+static really_inline
char blast_queue(struct hs_scratch *scratch, struct mq *q, u32 qi, s64a to_loc,
char report_current) {
scratch->tctxt.curr_qi = qi;
q->cb = roseNfaBlastAdaptor;
- q->report_current = report_current;
- DEBUG_PRINTF("queue %u blasting, %u/%u [%lld/%lld]\n", qi, q->cur, q->end,
- q_cur_loc(q), to_loc);
- char alive = nfaQueueExec(q->nfa, q, to_loc);
+ q->report_current = report_current;
+ DEBUG_PRINTF("queue %u blasting, %u/%u [%lld/%lld]\n", qi, q->cur, q->end,
+ q_cur_loc(q), to_loc);
+ char alive = nfaQueueExec(q->nfa, q, to_loc);
q->cb = roseNfaAdaptor;
- assert(!q->report_current);
-
- return alive;
-}
-
-static really_inline
-hwlmcb_rv_t buildSufPQ_final(const struct RoseEngine *t, s64a report_ok_loc,
- s64a second_place_loc, s64a final_loc,
- struct hs_scratch *scratch, u8 *aa, u32 a_qi) {
- struct mq *q = scratch->queues + a_qi;
- const struct NfaInfo *info = getNfaInfoByQueue(t, a_qi);
- DEBUG_PRINTF("blasting qi=%u to %lld [final %lld]\n", a_qi, second_place_loc,
- final_loc);
-
- if (roseSuffixInfoIsExhausted(t, info,
- scratch->core_info.exhaustionVector)) {
+ assert(!q->report_current);
+
+ return alive;
+}
+
+static really_inline
+hwlmcb_rv_t buildSufPQ_final(const struct RoseEngine *t, s64a report_ok_loc,
+ s64a second_place_loc, s64a final_loc,
+ struct hs_scratch *scratch, u8 *aa, u32 a_qi) {
+ struct mq *q = scratch->queues + a_qi;
+ const struct NfaInfo *info = getNfaInfoByQueue(t, a_qi);
+ DEBUG_PRINTF("blasting qi=%u to %lld [final %lld]\n", a_qi, second_place_loc,
+ final_loc);
+
+ if (roseSuffixInfoIsExhausted(t, info,
+ scratch->core_info.exhaustionVector)) {
deactivateQueue(t, aa, a_qi, scratch);
- return HWLM_CONTINUE_MATCHING;
- }
-
- ensureQueueActive(t, a_qi, t->queueCount, q, scratch);
-
- if (unlikely(final_loc < q_cur_loc(q))) {
- DEBUG_PRINTF("err loc %lld < location %lld\n", final_loc, q_cur_loc(q));
- return HWLM_CONTINUE_MATCHING;
- }
-
- ensureEnd(q, a_qi, final_loc);
-
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ ensureQueueActive(t, a_qi, t->queueCount, q, scratch);
+
+ if (unlikely(final_loc < q_cur_loc(q))) {
+ DEBUG_PRINTF("err loc %lld < location %lld\n", final_loc, q_cur_loc(q));
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ ensureEnd(q, a_qi, final_loc);
+
char alive = blast_queue(scratch, q, a_qi, second_place_loc, 0);
-
+
/* We have three possible outcomes:
- * (1) the nfa died
- * (2) we completed the queue (implies that second_place_loc == final_loc)
- * (3) the queue ran to second_place_loc and stopped. In this case we need
- * to find the next match location.
- */
-
- if (!alive) {
- if (can_stop_matching(scratch)) {
- DEBUG_PRINTF("roseCatchUpNfas done as bailing\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
+ * (1) the nfa died
+ * (2) we completed the queue (implies that second_place_loc == final_loc)
+ * (3) the queue ran to second_place_loc and stopped. In this case we need
+ * to find the next match location.
+ */
+
+ if (!alive) {
+ if (can_stop_matching(scratch)) {
+ DEBUG_PRINTF("roseCatchUpNfas done as bailing\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
deactivateQueue(t, aa, a_qi, scratch);
- } else if (q->cur == q->end) {
- DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", a_qi, final_loc);
-
- assert(second_place_loc == final_loc);
-
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, final_loc);
- } else {
- DEBUG_PRINTF("queue %u not finished, %u/%u [%lld/%lld]\n", a_qi, q->cur,
- q->end, q_cur_loc(q), final_loc);
- DEBUG_PRINTF("finding next match location\n");
-
- assert(second_place_loc < final_loc);
- assert(q_cur_loc(q) >= second_place_loc);
-
+ } else if (q->cur == q->end) {
+ DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", a_qi, final_loc);
+
+ assert(second_place_loc == final_loc);
+
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, final_loc);
+ } else {
+ DEBUG_PRINTF("queue %u not finished, %u/%u [%lld/%lld]\n", a_qi, q->cur,
+ q->end, q_cur_loc(q), final_loc);
+ DEBUG_PRINTF("finding next match location\n");
+
+ assert(second_place_loc < final_loc);
+ assert(q_cur_loc(q) >= second_place_loc);
+
if (runNewNfaToNextMatch(t, a_qi, q, final_loc, scratch, aa,
report_ok_loc) == HWLM_TERMINATE_MATCHING) {
- DEBUG_PRINTF("roseCatchUpNfas done\n");
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
- return HWLM_CONTINUE_MATCHING;
-}
-
+ DEBUG_PRINTF("roseCatchUpNfas done\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
void streamInitSufPQ(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch) {
- assert(scratch->catchup_pq.qm_size == 0);
- assert(t->outfixBeginQueue != t->outfixEndQueue);
-
- DEBUG_PRINTF("initSufPQ: outfixes [%u,%u)\n", t->outfixBeginQueue,
- t->outfixEndQueue);
-
- u32 qCount = t->queueCount;
- u8 *aa = getActiveLeafArray(t, state);
- u32 aaCount = t->activeArrayCount;
- struct mq *queues = scratch->queues;
- size_t length = scratch->core_info.len;
-
- u32 qi = mmbit_iterate_bounded(aa, aaCount, t->outfixBeginQueue,
- t->outfixEndQueue);
- for (; qi < t->outfixEndQueue;) {
- DEBUG_PRINTF("adding qi=%u\n", qi);
- struct mq *q = queues + qi;
-
- ensureQueueActive(t, qi, qCount, q, scratch);
- ensureEnd(q, qi, length);
-
- char alive = nfaQueueExecToMatch(q->nfa, q, length);
-
- if (alive == MO_MATCHES_PENDING) {
- DEBUG_PRINTF("we have pending matches at %lld\n", q_cur_loc(q));
- s64a qcl = q_cur_loc(q);
-
- pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl);
- } else if (!alive) {
+ struct hs_scratch *scratch) {
+ assert(scratch->catchup_pq.qm_size == 0);
+ assert(t->outfixBeginQueue != t->outfixEndQueue);
+
+ DEBUG_PRINTF("initSufPQ: outfixes [%u,%u)\n", t->outfixBeginQueue,
+ t->outfixEndQueue);
+
+ u32 qCount = t->queueCount;
+ u8 *aa = getActiveLeafArray(t, state);
+ u32 aaCount = t->activeArrayCount;
+ struct mq *queues = scratch->queues;
+ size_t length = scratch->core_info.len;
+
+ u32 qi = mmbit_iterate_bounded(aa, aaCount, t->outfixBeginQueue,
+ t->outfixEndQueue);
+ for (; qi < t->outfixEndQueue;) {
+ DEBUG_PRINTF("adding qi=%u\n", qi);
+ struct mq *q = queues + qi;
+
+ ensureQueueActive(t, qi, qCount, q, scratch);
+ ensureEnd(q, qi, length);
+
+ char alive = nfaQueueExecToMatch(q->nfa, q, length);
+
+ if (alive == MO_MATCHES_PENDING) {
+ DEBUG_PRINTF("we have pending matches at %lld\n", q_cur_loc(q));
+ s64a qcl = q_cur_loc(q);
+
+ pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl);
+ } else if (!alive) {
deactivateQueue(t, aa, qi, scratch);
- } else {
- assert(q->cur == q->end);
- /* TODO: can this be simplified? the nfa will never produce any
- * matches for this block. */
- DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, length);
- }
-
- qi = mmbit_iterate_bounded(aa, aaCount, qi + 1, t->outfixEndQueue);
- }
-}
-
+ } else {
+ assert(q->cur == q->end);
+ /* TODO: can this be simplified? the nfa will never produce any
+ * matches for this block. */
+ DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, length);
+ }
+
+ qi = mmbit_iterate_bounded(aa, aaCount, qi + 1, t->outfixEndQueue);
+ }
+}
+
void blockInitSufPQ(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch, char is_small_block) {
- DEBUG_PRINTF("initSufPQ: outfixes [%u,%u)\n", t->outfixBeginQueue,
- t->outfixEndQueue);
-
- assert(scratch->catchup_pq.qm_size == 0);
- assert(t->outfixBeginQueue != t->outfixEndQueue);
-
- struct mq *queues = scratch->queues;
- u8 *aa = getActiveLeafArray(t, state);
- struct fatbit *aqa = scratch->aqa;
- u32 aaCount = t->activeArrayCount;
- u32 qCount = t->queueCount;
- size_t length = scratch->core_info.len;
-
- for (u32 qi = t->outfixBeginQueue; qi < t->outfixEndQueue; qi++) {
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
-
- if (is_small_block && info->in_sbmatcher) {
- DEBUG_PRINTF("skip outfix %u as it's in the SB matcher\n", qi);
- continue;
- }
-
- const struct NFA *nfa = getNfaByInfo(t, info);
- DEBUG_PRINTF("testing minwidth %u > len %zu\n", nfa->minWidth,
- length);
- size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf, length);
- if (!len) {
- continue;
- }
- mmbit_set(aa, aaCount, qi);
- fatbit_set(aqa, qCount, qi);
- struct mq *q = queues + qi;
+ struct hs_scratch *scratch, char is_small_block) {
+ DEBUG_PRINTF("initSufPQ: outfixes [%u,%u)\n", t->outfixBeginQueue,
+ t->outfixEndQueue);
+
+ assert(scratch->catchup_pq.qm_size == 0);
+ assert(t->outfixBeginQueue != t->outfixEndQueue);
+
+ struct mq *queues = scratch->queues;
+ u8 *aa = getActiveLeafArray(t, state);
+ struct fatbit *aqa = scratch->aqa;
+ u32 aaCount = t->activeArrayCount;
+ u32 qCount = t->queueCount;
+ size_t length = scratch->core_info.len;
+
+ for (u32 qi = t->outfixBeginQueue; qi < t->outfixEndQueue; qi++) {
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+
+ if (is_small_block && info->in_sbmatcher) {
+ DEBUG_PRINTF("skip outfix %u as it's in the SB matcher\n", qi);
+ continue;
+ }
+
+ const struct NFA *nfa = getNfaByInfo(t, info);
+ DEBUG_PRINTF("testing minwidth %u > len %zu\n", nfa->minWidth,
+ length);
+ size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf, length);
+ if (!len) {
+ continue;
+ }
+ mmbit_set(aa, aaCount, qi);
+ fatbit_set(aqa, qCount, qi);
+ struct mq *q = queues + qi;
initQueue(q, qi, t, scratch);
- q->length = len; /* adjust for rev_accel */
- nfaQueueInitState(nfa, q);
- pushQueueAt(q, 0, MQE_START, 0);
- pushQueueAt(q, 1, MQE_TOP, 0);
- pushQueueAt(q, 2, MQE_END, length);
-
- DEBUG_PRINTF("adding qi=%u to pq\n", qi);
-
- char alive = nfaQueueExecToMatch(q->nfa, q, length);
-
- if (alive == MO_MATCHES_PENDING) {
- DEBUG_PRINTF("we have pending matches at %lld\n", q_cur_loc(q));
- s64a qcl = q_cur_loc(q);
-
- pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl);
- } else if (!alive) {
+ q->length = len; /* adjust for rev_accel */
+ nfaQueueInitState(nfa, q);
+ pushQueueAt(q, 0, MQE_START, 0);
+ pushQueueAt(q, 1, MQE_TOP, 0);
+ pushQueueAt(q, 2, MQE_END, length);
+
+ DEBUG_PRINTF("adding qi=%u to pq\n", qi);
+
+ char alive = nfaQueueExecToMatch(q->nfa, q, length);
+
+ if (alive == MO_MATCHES_PENDING) {
+ DEBUG_PRINTF("we have pending matches at %lld\n", q_cur_loc(q));
+ s64a qcl = q_cur_loc(q);
+
+ pq_insert_with(&scratch->catchup_pq, scratch, qi, qcl);
+ } else if (!alive) {
deactivateQueue(t, aa, qi, scratch);
- } else {
- assert(q->cur == q->end);
- /* TODO: can this be simplified? the nfa will never produce any
- * matches for this block. */
- DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, length);
- }
- }
-}
-
-/**
- * safe_loc is ???
- */
-static rose_inline
+ } else {
+ assert(q->cur == q->end);
+ /* TODO: can this be simplified? the nfa will never produce any
+ * matches for this block. */
+ DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, length);
+ }
+ }
+}
+
+/**
+ * safe_loc is ???
+ */
+static rose_inline
hwlmcb_rv_t buildSufPQ(const struct RoseEngine *t, char *state, s64a safe_loc,
- s64a final_loc, struct hs_scratch *scratch) {
- assert(scratch->catchup_pq.qm_size <= t->outfixEndQueue);
-
- struct RoseContext *tctxt = &scratch->tctxt;
- assert(t->activeArrayCount);
-
- assert(scratch->core_info.buf_offset + final_loc
- > tctxt->minNonMpvMatchOffset);
- DEBUG_PRINTF("buildSufPQ final loc %lld (safe %lld)\n", final_loc,
- safe_loc);
- assert(safe_loc <= final_loc);
-
- u8 *aa = getActiveLeafArray(t, state);
- u32 aaCount = t->activeArrayCount;
-
- /* find first match of each pending nfa */
- DEBUG_PRINTF("aa=%p, aaCount=%u\n", aa, aaCount);
-
- /* Note: mpv MUST not participate in the main priority queue as
- * they may have events pushed on during this process which may be before
- * the catch up point. Outfixes are remain in the pq between catchup events
- * as they never have any incoming events to worry about.
- */
- if (aaCount == t->outfixEndQueue) {
- return HWLM_CONTINUE_MATCHING;
- }
-
- DEBUG_PRINTF("mib %u/%u\n", t->outfixBeginQueue, aaCount);
-
- u32 a_qi = mmbit_iterate_bounded(aa, aaCount, t->outfixEndQueue, aaCount);
-
- if (a_qi == MMB_INVALID) {
- return HWLM_CONTINUE_MATCHING;
- }
-
- s64a report_ok_loc = tctxt->minNonMpvMatchOffset + 1
- - scratch->core_info.buf_offset;
-
+ s64a final_loc, struct hs_scratch *scratch) {
+ assert(scratch->catchup_pq.qm_size <= t->outfixEndQueue);
+
+ struct RoseContext *tctxt = &scratch->tctxt;
+ assert(t->activeArrayCount);
+
+ assert(scratch->core_info.buf_offset + final_loc
+ > tctxt->minNonMpvMatchOffset);
+ DEBUG_PRINTF("buildSufPQ final loc %lld (safe %lld)\n", final_loc,
+ safe_loc);
+ assert(safe_loc <= final_loc);
+
+ u8 *aa = getActiveLeafArray(t, state);
+ u32 aaCount = t->activeArrayCount;
+
+ /* find first match of each pending nfa */
+ DEBUG_PRINTF("aa=%p, aaCount=%u\n", aa, aaCount);
+
+ /* Note: mpv MUST not participate in the main priority queue as
+ * they may have events pushed on during this process which may be before
+ * the catch up point. Outfixes are remain in the pq between catchup events
+ * as they never have any incoming events to worry about.
+ */
+ if (aaCount == t->outfixEndQueue) {
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ DEBUG_PRINTF("mib %u/%u\n", t->outfixBeginQueue, aaCount);
+
+ u32 a_qi = mmbit_iterate_bounded(aa, aaCount, t->outfixEndQueue, aaCount);
+
+ if (a_qi == MMB_INVALID) {
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ s64a report_ok_loc = tctxt->minNonMpvMatchOffset + 1
+ - scratch->core_info.buf_offset;
+
hwlmcb_rv_t rv = roseCatchUpMPV(t, report_ok_loc, scratch);
- if (rv != HWLM_CONTINUE_MATCHING) {
+ if (rv != HWLM_CONTINUE_MATCHING) {
DEBUG_PRINTF("terminating...\n");
- return rv;
- }
-
- while (a_qi != MMB_INVALID) {
- DEBUG_PRINTF("catching up qi=%u to %lld\n", a_qi, final_loc);
- u32 n_qi = mmbit_iterate(aa, aaCount, a_qi);
-
- s64a second_place_loc
- = scratch->catchup_pq.qm_size ? pq_top_loc(&scratch->catchup_pq)
- : safe_loc;
- second_place_loc = MIN(second_place_loc, safe_loc);
+ return rv;
+ }
+
+ while (a_qi != MMB_INVALID) {
+ DEBUG_PRINTF("catching up qi=%u to %lld\n", a_qi, final_loc);
+ u32 n_qi = mmbit_iterate(aa, aaCount, a_qi);
+
+ s64a second_place_loc
+ = scratch->catchup_pq.qm_size ? pq_top_loc(&scratch->catchup_pq)
+ : safe_loc;
+ second_place_loc = MIN(second_place_loc, safe_loc);
if (n_qi == MMB_INVALID && report_ok_loc <= second_place_loc) {
- if (buildSufPQ_final(t, report_ok_loc, second_place_loc, final_loc,
- scratch, aa, a_qi)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- break;
- }
-
- if (add_to_queue(t, scratch->queues, t->queueCount, aa, scratch,
- final_loc, a_qi, report_ok_loc)
- == HWLM_TERMINATE_MATCHING) {
- DEBUG_PRINTF("roseCatchUpNfas done\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
- a_qi = n_qi;
- }
-
- DEBUG_PRINTF("PQ BUILD %u items\n", scratch->catchup_pq.qm_size);
- return HWLM_CONTINUE_MATCHING;
-}
-
-static never_inline
+ if (buildSufPQ_final(t, report_ok_loc, second_place_loc, final_loc,
+ scratch, aa, a_qi)
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ break;
+ }
+
+ if (add_to_queue(t, scratch->queues, t->queueCount, aa, scratch,
+ final_loc, a_qi, report_ok_loc)
+ == HWLM_TERMINATE_MATCHING) {
+ DEBUG_PRINTF("roseCatchUpNfas done\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ a_qi = n_qi;
+ }
+
+ DEBUG_PRINTF("PQ BUILD %u items\n", scratch->catchup_pq.qm_size);
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static never_inline
hwlmcb_rv_t roseCatchUpNfas(const struct RoseEngine *t, s64a loc,
- s64a final_loc, struct hs_scratch *scratch) {
- assert(t->activeArrayCount);
-
+ s64a final_loc, struct hs_scratch *scratch) {
+ assert(t->activeArrayCount);
+
DEBUG_PRINTF("roseCatchUpNfas offset=%llu + %lld/%lld\n",
scratch->core_info.buf_offset, loc, final_loc);
- DEBUG_PRINTF("min non mpv match offset %llu\n",
- scratch->tctxt.minNonMpvMatchOffset);
-
+ DEBUG_PRINTF("min non mpv match offset %llu\n",
+ scratch->tctxt.minNonMpvMatchOffset);
+
struct RoseContext *tctxt = &scratch->tctxt;
assert(scratch->core_info.buf_offset + loc >= tctxt->minNonMpvMatchOffset);
char *state = scratch->core_info.state;
- struct mq *queues = scratch->queues;
- u8 *aa = getActiveLeafArray(t, state);
-
- /* fire off earliest nfa match and catchup anchored matches to that point */
- while (scratch->catchup_pq.qm_size) {
- s64a match_loc = pq_top_loc(&scratch->catchup_pq);
- u32 qi = pq_top(scratch->catchup_pq.qm)->queue;
-
- DEBUG_PRINTF("winrar q%u@%lld loc %lld\n", qi, match_loc, loc);
- assert(match_loc + scratch->core_info.buf_offset
- >= scratch->tctxt.minNonMpvMatchOffset);
-
- if (match_loc > loc) {
- /* we have processed all the matches at or before rose's current
- * location; only things remaining on the pq should be outfixes. */
- DEBUG_PRINTF("saving for later\n");
- goto exit;
- }
-
- /* catch up char matches to this point */
+ struct mq *queues = scratch->queues;
+ u8 *aa = getActiveLeafArray(t, state);
+
+ /* fire off earliest nfa match and catchup anchored matches to that point */
+ while (scratch->catchup_pq.qm_size) {
+ s64a match_loc = pq_top_loc(&scratch->catchup_pq);
+ u32 qi = pq_top(scratch->catchup_pq.qm)->queue;
+
+ DEBUG_PRINTF("winrar q%u@%lld loc %lld\n", qi, match_loc, loc);
+ assert(match_loc + scratch->core_info.buf_offset
+ >= scratch->tctxt.minNonMpvMatchOffset);
+
+ if (match_loc > loc) {
+ /* we have processed all the matches at or before rose's current
+ * location; only things remaining on the pq should be outfixes. */
+ DEBUG_PRINTF("saving for later\n");
+ goto exit;
+ }
+
+ /* catch up char matches to this point */
if (roseCatchUpMPV(t, match_loc, scratch)
- == HWLM_TERMINATE_MATCHING) {
- DEBUG_PRINTF("roseCatchUpNfas done\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
- assert(match_loc + scratch->core_info.buf_offset
- >= scratch->tctxt.minNonMpvMatchOffset);
-
- struct mq *q = queues + qi;
-
- /* outfixes must be advanced all the way as they persist in the pq
- * between catchup events */
- s64a q_final_loc = qi >= t->outfixEndQueue ? final_loc
- : (s64a)scratch->core_info.len;
-
- /* fire nfa matches, and find next place this nfa match */
- DEBUG_PRINTF("reporting matches %u@%llu [q->cur %u/%u]\n", qi,
- match_loc, q->cur, q->end);
-
- /* we then need to catch this nfa up to next earliest nfa match. These
- * matches can be fired directly from the callback. The callback needs
- * to ensure that the anchored matches remain in sync though */
- s64a second_place_loc = findSecondPlace(&scratch->catchup_pq, loc);
- DEBUG_PRINTF("second place %lld loc %lld\n", second_place_loc, loc);
-
- if (second_place_loc == q_cur_loc(q)) {
+ == HWLM_TERMINATE_MATCHING) {
+ DEBUG_PRINTF("roseCatchUpNfas done\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
+ assert(match_loc + scratch->core_info.buf_offset
+ >= scratch->tctxt.minNonMpvMatchOffset);
+
+ struct mq *q = queues + qi;
+
+ /* outfixes must be advanced all the way as they persist in the pq
+ * between catchup events */
+ s64a q_final_loc = qi >= t->outfixEndQueue ? final_loc
+ : (s64a)scratch->core_info.len;
+
+ /* fire nfa matches, and find next place this nfa match */
+ DEBUG_PRINTF("reporting matches %u@%llu [q->cur %u/%u]\n", qi,
+ match_loc, q->cur, q->end);
+
+ /* we then need to catch this nfa up to next earliest nfa match. These
+ * matches can be fired directly from the callback. The callback needs
+ * to ensure that the anchored matches remain in sync though */
+ s64a second_place_loc = findSecondPlace(&scratch->catchup_pq, loc);
+ DEBUG_PRINTF("second place %lld loc %lld\n", second_place_loc, loc);
+
+ if (second_place_loc == q_cur_loc(q)) {
if (runExistingNfaToNextMatch(t, qi, q, q_final_loc, scratch, aa, 1)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- continue;
- }
-
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ continue;
+ }
+
char alive = blast_queue(scratch, q, qi, second_place_loc, 1);
-
- if (!alive) {
- if (can_stop_matching(scratch)) {
- DEBUG_PRINTF("roseCatchUpNfas done as bailing\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
+
+ if (!alive) {
+ if (can_stop_matching(scratch)) {
+ DEBUG_PRINTF("roseCatchUpNfas done as bailing\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
deactivateQueue(t, aa, qi, scratch);
- pq_pop_nice(&scratch->catchup_pq);
- } else if (q->cur == q->end) {
- DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", qi, loc);
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
- pq_pop_nice(&scratch->catchup_pq);
- } else if (second_place_loc == q_final_loc) {
- DEBUG_PRINTF("queue %u on hold\n", qi);
- pq_pop_nice(&scratch->catchup_pq);
- break;
- } else {
- DEBUG_PRINTF("queue %u not finished, %u/%u [%lld/%lld]\n",
- qi, q->cur, q->end, q->items[q->cur].location, loc);
+ pq_pop_nice(&scratch->catchup_pq);
+ } else if (q->cur == q->end) {
+ DEBUG_PRINTF("queue %u finished, nfa lives [%lld]\n", qi, loc);
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+ pq_pop_nice(&scratch->catchup_pq);
+ } else if (second_place_loc == q_final_loc) {
+ DEBUG_PRINTF("queue %u on hold\n", qi);
+ pq_pop_nice(&scratch->catchup_pq);
+ break;
+ } else {
+ DEBUG_PRINTF("queue %u not finished, %u/%u [%lld/%lld]\n",
+ qi, q->cur, q->end, q->items[q->cur].location, loc);
runExistingNfaToNextMatch(t, qi, q, q_final_loc, scratch, aa, 0);
- }
- }
-exit:;
- tctxt->minNonMpvMatchOffset = scratch->core_info.buf_offset + loc;
- DEBUG_PRINTF("roseCatchUpNfas done\n");
- return HWLM_CONTINUE_MATCHING;
-}
-
-hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch) {
- /* just need suf/outfixes and mpv */
- DEBUG_PRINTF("loc %lld mnmmo %llu mmo %llu\n", loc,
- scratch->tctxt.minNonMpvMatchOffset,
- scratch->tctxt.minMatchOffset);
- assert(scratch->core_info.buf_offset + loc
- > scratch->tctxt.minNonMpvMatchOffset);
-
+ }
+ }
+exit:;
+ tctxt->minNonMpvMatchOffset = scratch->core_info.buf_offset + loc;
+ DEBUG_PRINTF("roseCatchUpNfas done\n");
+ return HWLM_CONTINUE_MATCHING;
+}
+
+hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch) {
+ /* just need suf/outfixes and mpv */
+ DEBUG_PRINTF("loc %lld mnmmo %llu mmo %llu\n", loc,
+ scratch->tctxt.minNonMpvMatchOffset,
+ scratch->tctxt.minMatchOffset);
+ assert(scratch->core_info.buf_offset + loc
+ > scratch->tctxt.minNonMpvMatchOffset);
+
const struct RoseEngine *t = scratch->core_info.rose;
char *state = scratch->core_info.state;
hwlmcb_rv_t rv = buildSufPQ(t, state, loc, loc, scratch);
- if (rv != HWLM_CONTINUE_MATCHING) {
- return rv;
- }
-
+ if (rv != HWLM_CONTINUE_MATCHING) {
+ return rv;
+ }
+
rv = roseCatchUpNfas(t, loc, loc, scratch);
- if (rv != HWLM_CONTINUE_MATCHING) {
- return rv;
- }
-
+ if (rv != HWLM_CONTINUE_MATCHING) {
+ return rv;
+ }
+
rv = roseCatchUpMPV(t, loc, scratch);
- assert(rv != HWLM_CONTINUE_MATCHING
+ assert(rv != HWLM_CONTINUE_MATCHING
|| scratch->catchup_pq.qm_size <= t->outfixEndQueue);
assert(!can_stop_matching(scratch) || rv == HWLM_TERMINATE_MATCHING);
- return rv;
-}
-
-hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch) {
- /* just need suf/outfixes. mpv will be caught up only to last reported
- * external match */
- assert(scratch->core_info.buf_offset + loc
- > scratch->tctxt.minNonMpvMatchOffset);
-
+ return rv;
+}
+
+hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch) {
+ /* just need suf/outfixes. mpv will be caught up only to last reported
+ * external match */
+ assert(scratch->core_info.buf_offset + loc
+ > scratch->tctxt.minNonMpvMatchOffset);
+
const struct RoseEngine *t = scratch->core_info.rose;
char *state = scratch->core_info.state;
hwlmcb_rv_t rv = buildSufPQ(t, state, loc, loc, scratch);
- if (rv != HWLM_CONTINUE_MATCHING) {
- return rv;
- }
-
+ if (rv != HWLM_CONTINUE_MATCHING) {
+ return rv;
+ }
+
rv = roseCatchUpNfas(t, loc, loc, scratch);
assert(rv != HWLM_CONTINUE_MATCHING ||
scratch->catchup_pq.qm_size <= t->outfixEndQueue);
-
- return rv;
-}
+
+ return rv;
+}
diff --git a/contrib/libs/hyperscan/src/rose/catchup.h b/contrib/libs/hyperscan/src/rose/catchup.h
index 8274bb839e..8188d5af01 100644
--- a/contrib/libs/hyperscan/src/rose/catchup.h
+++ b/contrib/libs/hyperscan/src/rose/catchup.h
@@ -1,31 +1,31 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/**
* \file
* \brief Rose runtime: code for catching up output-exposed engines.
@@ -45,58 +45,58 @@
* exists.
*/
-#ifndef ROSE_CATCHUP_H
-#define ROSE_CATCHUP_H
-
-#include "hwlm/hwlm.h"
-#include "runtime.h"
-#include "scratch.h"
+#ifndef ROSE_CATCHUP_H
+#define ROSE_CATCHUP_H
+
+#include "hwlm/hwlm.h"
+#include "runtime.h"
+#include "scratch.h"
#include "rose.h"
-#include "rose_common.h"
-#include "rose_internal.h"
-#include "ue2common.h"
-#include "util/multibit.h"
-
-hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch);
-
+#include "rose_common.h"
+#include "rose_internal.h"
+#include "ue2common.h"
+#include "util/multibit.h"
+
+hwlmcb_rv_t roseCatchUpAll(s64a loc, struct hs_scratch *scratch);
+
/* will only catch mpv up to last reported external match */
-hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch);
-
+hwlmcb_rv_t roseCatchUpSuf(s64a loc, struct hs_scratch *scratch);
+
hwlmcb_rv_t roseCatchUpMPV_i(const struct RoseEngine *t, s64a loc,
- struct hs_scratch *scratch);
-
+ struct hs_scratch *scratch);
+
void blockInitSufPQ(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch, char is_small_block);
+ struct hs_scratch *scratch, char is_small_block);
void streamInitSufPQ(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch);
-
-static really_inline
+ struct hs_scratch *scratch);
+
+static really_inline
int canSkipCatchUpMPV(const struct RoseEngine *t, struct hs_scratch *scratch,
u64a cur_offset) {
- if (!has_chained_nfas(t)) {
+ if (!has_chained_nfas(t)) {
return 1;
- }
-
- /* note: we may have to run at less than tctxt.minMatchOffset as we may
- * have a full queue of postponed events that we need to flush */
- if (cur_offset < scratch->tctxt.next_mpv_offset) {
+ }
+
+ /* note: we may have to run at less than tctxt.minMatchOffset as we may
+ * have a full queue of postponed events that we need to flush */
+ if (cur_offset < scratch->tctxt.next_mpv_offset) {
DEBUG_PRINTF("skipping cur_offset %llu min %llu, mpv %llu\n",
- cur_offset, scratch->tctxt.minMatchOffset,
- scratch->tctxt.next_mpv_offset);
+ cur_offset, scratch->tctxt.minMatchOffset,
+ scratch->tctxt.next_mpv_offset);
return 1;
- }
-
- assert(t->activeArrayCount);
-
+ }
+
+ assert(t->activeArrayCount);
+
DEBUG_PRINTF("cur offset offset: %llu\n", cur_offset);
- DEBUG_PRINTF("min match offset %llu\n", scratch->tctxt.minMatchOffset);
-
- assert(t->outfixBeginQueue == 1); /* if it exists mpv is queue 0 */
-
+ DEBUG_PRINTF("min match offset %llu\n", scratch->tctxt.minMatchOffset);
+
+ assert(t->outfixBeginQueue == 1); /* if it exists mpv is queue 0 */
+
const u8 *aa = getActiveLeafArray(t, scratch->core_info.state);
return !mmbit_isset(aa, t->activeArrayCount, 0);
}
-
+
/** \brief Catches up the MPV. */
static really_inline
hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc,
@@ -114,36 +114,36 @@ hwlmcb_rv_t roseCatchUpMPV(const struct RoseEngine *t, s64a loc,
}
updateMinMatchOffsetFromMpv(&scratch->tctxt, cur_offset);
return HWLM_CONTINUE_MATCHING;
- }
-
- /* Note: chained tails MUST not participate in the priority queue as
- * they may have events pushed on during this process which may be before
- * the catch up point */
-
+ }
+
+ /* Note: chained tails MUST not participate in the priority queue as
+ * they may have events pushed on during this process which may be before
+ * the catch up point */
+
return roseCatchUpMPV_i(t, loc, scratch);
-}
-
+}
+
/** \brief Catches up NFAs and the MPV. */
-static rose_inline
+static rose_inline
hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a end) {
- /* no need to catch up if we are at the same offset as last time */
- if (end <= scratch->tctxt.minMatchOffset) {
- /* we must already be up to date */
- DEBUG_PRINTF("skip\n");
- return HWLM_CONTINUE_MATCHING;
- }
-
+ /* no need to catch up if we are at the same offset as last time */
+ if (end <= scratch->tctxt.minMatchOffset) {
+ /* we must already be up to date */
+ DEBUG_PRINTF("skip\n");
+ return HWLM_CONTINUE_MATCHING;
+ }
+
char *state = scratch->core_info.state;
- s64a loc = end - scratch->core_info.buf_offset;
-
- if (end <= scratch->tctxt.minNonMpvMatchOffset) {
- /* only need to catch up the mpv */
+ s64a loc = end - scratch->core_info.buf_offset;
+
+ if (end <= scratch->tctxt.minNonMpvMatchOffset) {
+ /* only need to catch up the mpv */
return roseCatchUpMPV(t, loc, scratch);
- }
-
- assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset);
- hwlmcb_rv_t rv;
+ }
+
+ assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset);
+ hwlmcb_rv_t rv;
if (!t->activeArrayCount
|| !mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
if (t->flushCombProgramOffset) {
@@ -154,18 +154,18 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t,
}
updateMinMatchOffset(&scratch->tctxt, end);
rv = HWLM_CONTINUE_MATCHING;
- } else {
+ } else {
rv = roseCatchUpAll(loc, scratch);
- }
-
- assert(rv != HWLM_CONTINUE_MATCHING
- || scratch->tctxt.minMatchOffset == end);
- assert(rv != HWLM_CONTINUE_MATCHING
- || scratch->tctxt.minNonMpvMatchOffset == end);
+ }
+
+ assert(rv != HWLM_CONTINUE_MATCHING
+ || scratch->tctxt.minMatchOffset == end);
+ assert(rv != HWLM_CONTINUE_MATCHING
+ || scratch->tctxt.minNonMpvMatchOffset == end);
assert(!can_stop_matching(scratch) || rv == HWLM_TERMINATE_MATCHING);
- return rv;
-}
-
+ return rv;
+}
+
/**
* \brief Catches up anything which may add triggers on the MPV (suffixes and
* outfixes).
@@ -173,35 +173,35 @@ hwlmcb_rv_t roseCatchUpTo(const struct RoseEngine *t,
* The MPV will be run only to intersperse matches in the output match stream
* if external matches are raised.
*/
-static rose_inline
+static rose_inline
hwlmcb_rv_t roseCatchUpMpvFeeders(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a end) {
- /* no need to catch up if we are at the same offset as last time */
- if (end <= scratch->tctxt.minNonMpvMatchOffset) {
- /* we must already be up to date */
- DEBUG_PRINTF("skip\n");
- return HWLM_CONTINUE_MATCHING;
- }
-
- s64a loc = end - scratch->core_info.buf_offset;
-
- assert(t->activeArrayCount); /* mpv is in active array */
- assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset);
-
+ /* no need to catch up if we are at the same offset as last time */
+ if (end <= scratch->tctxt.minNonMpvMatchOffset) {
+ /* we must already be up to date */
+ DEBUG_PRINTF("skip\n");
+ return HWLM_CONTINUE_MATCHING;
+ }
+
+ s64a loc = end - scratch->core_info.buf_offset;
+
+ assert(t->activeArrayCount); /* mpv is in active array */
+ assert(scratch->tctxt.minMatchOffset >= scratch->core_info.buf_offset);
+
if (!t->mpvTriggeredByLeaf) {
/* no need to check as they never put triggers onto the mpv */
return HWLM_CONTINUE_MATCHING;
}
-
+
/* sadly, this branch rarely gets taken as the mpv itself is usually
* alive. */
char *state = scratch->core_info.state;
if (!mmbit_any(getActiveLeafArray(t, state), t->activeArrayCount)) {
scratch->tctxt.minNonMpvMatchOffset = end;
return HWLM_CONTINUE_MATCHING;
- }
+ }
return roseCatchUpSuf(loc, scratch);
-}
-
-#endif
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/counting_miracle.h b/contrib/libs/hyperscan/src/rose/counting_miracle.h
index 17ab965eae..976208b738 100644
--- a/contrib/libs/hyperscan/src/rose/counting_miracle.h
+++ b/contrib/libs/hyperscan/src/rose/counting_miracle.h
@@ -1,263 +1,263 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_COUNTING_MIRACLE_H
-#define ROSE_COUNTING_MIRACLE_H
-
-#include "ue2common.h"
-#include "runtime.h"
-#include "rose_internal.h"
-#include "nfa/nfa_api_queue.h"
-#include "util/simd_utils.h"
-
-/** \brief Maximum number of bytes to scan when looking for a "counting miracle"
- * stop character. */
-#define COUNTING_MIRACLE_LEN_MAX 256
-
-static really_inline
-char roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end,
- u32 target_count, u32 *count_inout,
- const u8 **d_out) {
- assert(d <= d_end);
-
- u32 count = *count_inout;
-
- m128 chars = set16x8(c);
-
- for (; d + 16 <= d_end; d_end -= 16) {
- m128 data = loadu128(d_end - 16);
- u32 z1 = movemask128(eq128(chars, data));
- count += popcount32(z1);
-
- if (count >= target_count) {
- *d_out = d_end - 16;
- *count_inout = count;
- return 1;
- }
- }
-
- if (d != d_end) {
- char temp[sizeof(m128)];
- assert(d + sizeof(temp) > d_end);
- memset(temp, c + 1, sizeof(temp));
- memcpy(temp, d, d_end - d);
- m128 data = loadu128(temp);
- u32 z1 = movemask128(eq128(chars, data));
- count += popcount32(z1);
-
- if (count >= target_count) {
- *d_out = d;
- *count_inout = count;
- return 1;
- }
- }
-
- *count_inout = count;
- return 0;
-}
-
-#define GET_LO_4(chars) and128(chars, low4bits)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_COUNTING_MIRACLE_H
+#define ROSE_COUNTING_MIRACLE_H
+
+#include "ue2common.h"
+#include "runtime.h"
+#include "rose_internal.h"
+#include "nfa/nfa_api_queue.h"
+#include "util/simd_utils.h"
+
+/** \brief Maximum number of bytes to scan when looking for a "counting miracle"
+ * stop character. */
+#define COUNTING_MIRACLE_LEN_MAX 256
+
+static really_inline
+char roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end,
+ u32 target_count, u32 *count_inout,
+ const u8 **d_out) {
+ assert(d <= d_end);
+
+ u32 count = *count_inout;
+
+ m128 chars = set16x8(c);
+
+ for (; d + 16 <= d_end; d_end -= 16) {
+ m128 data = loadu128(d_end - 16);
+ u32 z1 = movemask128(eq128(chars, data));
+ count += popcount32(z1);
+
+ if (count >= target_count) {
+ *d_out = d_end - 16;
+ *count_inout = count;
+ return 1;
+ }
+ }
+
+ if (d != d_end) {
+ char temp[sizeof(m128)];
+ assert(d + sizeof(temp) > d_end);
+ memset(temp, c + 1, sizeof(temp));
+ memcpy(temp, d, d_end - d);
+ m128 data = loadu128(temp);
+ u32 z1 = movemask128(eq128(chars, data));
+ count += popcount32(z1);
+
+ if (count >= target_count) {
+ *d_out = d;
+ *count_inout = count;
+ return 1;
+ }
+ }
+
+ *count_inout = count;
+ return 0;
+}
+
+#define GET_LO_4(chars) and128(chars, low4bits)
#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)
-
-static really_inline
-u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison,
- const u8 *d, const u8 *d_end,
- u32 target_count, u32 *count_inout,
- const u8 **d_out) {
- assert(d <= d_end);
-
- u32 count = *count_inout;
-
- const m128 zeroes = zeroes128();
- const m128 low4bits = _mm_set1_epi8(0xf);
-
- for (; d + 16 <= d_end; d_end -= 16) {
- m128 data = loadu128(d_end - 16);
+
+static really_inline
+u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison,
+ const u8 *d, const u8 *d_end,
+ u32 target_count, u32 *count_inout,
+ const u8 **d_out) {
+ assert(d <= d_end);
+
+ u32 count = *count_inout;
+
+ const m128 zeroes = zeroes128();
+ const m128 low4bits = _mm_set1_epi8(0xf);
+
+ for (; d + 16 <= d_end; d_end -= 16) {
+ m128 data = loadu128(d_end - 16);
m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data));
m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data));
- m128 t = and128(c_lo, c_hi);
- u32 z1 = movemask128(eq128(t, zeroes));
- count += popcount32(z1 ^ 0xffff);
-
- if (count >= target_count) {
- *d_out = d_end - 16;
- *count_inout = count;
- return 1;
- }
- }
-
- if (d != d_end) {
- char temp[sizeof(m128)];
- assert(d + sizeof(temp) > d_end);
- memset(temp, poison, sizeof(temp));
- memcpy(temp, d, d_end - d);
- m128 data = loadu128(temp);
+ m128 t = and128(c_lo, c_hi);
+ u32 z1 = movemask128(eq128(t, zeroes));
+ count += popcount32(z1 ^ 0xffff);
+
+ if (count >= target_count) {
+ *d_out = d_end - 16;
+ *count_inout = count;
+ return 1;
+ }
+ }
+
+ if (d != d_end) {
+ char temp[sizeof(m128)];
+ assert(d + sizeof(temp) > d_end);
+ memset(temp, poison, sizeof(temp));
+ memcpy(temp, d, d_end - d);
+ m128 data = loadu128(temp);
m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data));
m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data));
- m128 t = and128(c_lo, c_hi);
- u32 z1 = movemask128(eq128(t, zeroes));
- count += popcount32(z1 ^ 0xffff);
-
- if (count >= target_count) {
- *d_out = d;
- *count_inout = count;
- return 1;
- }
- }
-
- *count_inout = count;
- return 0;
-}
-
-/**
- * \brief "Counting Miracle" scan: If we see more than N instances of a
- * particular character class we know that the engine must be dead.
- *
- * Scans the buffer/history between relative locations \a begin_loc and \a
- * end_loc, and returns a miracle location (if any) that appears in the stream
- * after \a begin_loc.
- *
- * Returns 1 if some bytes can be skipped and sets \a miracle_loc
- * appropriately, 0 otherwise.
- */
-static never_inline
-int roseCountingMiracleOccurs(const struct RoseEngine *t,
- const struct LeftNfaInfo *left,
- const struct core_info *ci, s64a begin_loc,
- const s64a end_loc, s64a *miracle_loc) {
- if (!left->countingMiracleOffset) {
- return 0;
- }
-
- const struct RoseCountingMiracle *cm
- = (const void *)((const char *)t + left->countingMiracleOffset);
-
- assert(!left->transient);
- assert(cm->count > 1); /* should be a normal miracle then */
-
- DEBUG_PRINTF("looking for counting miracle over [%lld,%lld], maxLag=%u\n",
- begin_loc, end_loc, left->maxLag);
- DEBUG_PRINTF("ci->len=%zu, ci->hlen=%zu\n", ci->len, ci->hlen);
-
- assert(begin_loc <= end_loc);
- assert(begin_loc >= -(s64a)ci->hlen);
- assert(end_loc <= (s64a)ci->len);
-
- const s64a scan_end_loc = end_loc - left->maxLag;
- if (scan_end_loc <= begin_loc) {
- DEBUG_PRINTF("nothing to scan\n");
- return 0;
- }
-
- const s64a start = MAX(begin_loc, scan_end_loc - COUNTING_MIRACLE_LEN_MAX);
- DEBUG_PRINTF("scan [%lld..%lld]\n", start, scan_end_loc);
-
- u32 count = 0;
-
- s64a m_loc = start;
-
- if (!cm->shufti) {
- u8 c = cm->c;
-
- // Scan buffer.
- const s64a buf_scan_start = MAX(0, start);
- if (scan_end_loc > buf_scan_start) {
- const u8 *buf = ci->buf;
- const u8 *d = buf + scan_end_loc;
- const u8 *d_start = buf + buf_scan_start;
- const u8 *d_out;
- if (roseCountingMiracleScan(c, d_start, d, cm->count, &count,
- &d_out)) {
- assert(d_out >= d_start);
- m_loc = (d_out - d_start) + buf_scan_start;
- goto success;
- }
- }
-
- // Scan history.
- if (start < 0) {
- const u8 *hbuf_end = ci->hbuf + ci->hlen;
- const u8 *d = hbuf_end + MIN(0, scan_end_loc);
- const u8 *d_start = hbuf_end + start;
- const u8 *d_out;
- if (roseCountingMiracleScan(c, d_start, d, cm->count, &count,
- &d_out)) {
- assert(d_out >= d_start);
- m_loc = (d_out - d_start) + start;
- goto success;
- }
- }
- } else {
- m128 lo = cm->lo;
- m128 hi = cm->hi;
- u8 poison = cm->poison;
-
- // Scan buffer.
- const s64a buf_scan_start = MAX(0, start);
- if (scan_end_loc > buf_scan_start) {
- const u8 *buf = ci->buf;
- const u8 *d = buf + scan_end_loc;
- const u8 *d_start = buf + buf_scan_start;
- const u8 *d_out;
- if (roseCountingMiracleScanShufti(lo, hi, poison, d_start, d,
- cm->count, &count, &d_out)) {
- assert(d_out >= d_start);
- m_loc = (d_out - d_start) + buf_scan_start;
- goto success;
- }
- }
-
- // Scan history.
- if (start < 0) {
- const u8 *hbuf_end = ci->hbuf + ci->hlen;
- const u8 *d = hbuf_end + MIN(0, scan_end_loc);
- const u8 *d_start = hbuf_end + start;
- const u8 *d_out;
- if (roseCountingMiracleScanShufti(lo, hi, poison, d_start, d,
- cm->count, &count, &d_out)) {
- assert(d_out >= d_start);
- m_loc = (d_out - d_start) + start;
- goto success;
- }
- }
- }
-
- DEBUG_PRINTF("found %u/%u\n", count, cm->count);
- return 0;
-
-success:
- DEBUG_PRINTF("found %u/%u\n", count, cm->count);
- assert(count >= cm->count);
- assert(m_loc < scan_end_loc);
- assert(m_loc >= start);
-
- *miracle_loc = m_loc;
- return 1;
-}
-
-#endif
+ m128 t = and128(c_lo, c_hi);
+ u32 z1 = movemask128(eq128(t, zeroes));
+ count += popcount32(z1 ^ 0xffff);
+
+ if (count >= target_count) {
+ *d_out = d;
+ *count_inout = count;
+ return 1;
+ }
+ }
+
+ *count_inout = count;
+ return 0;
+}
+
+/**
+ * \brief "Counting Miracle" scan: If we see more than N instances of a
+ * particular character class we know that the engine must be dead.
+ *
+ * Scans the buffer/history between relative locations \a begin_loc and \a
+ * end_loc, and returns a miracle location (if any) that appears in the stream
+ * after \a begin_loc.
+ *
+ * Returns 1 if some bytes can be skipped and sets \a miracle_loc
+ * appropriately, 0 otherwise.
+ */
+static never_inline
+int roseCountingMiracleOccurs(const struct RoseEngine *t,
+ const struct LeftNfaInfo *left,
+ const struct core_info *ci, s64a begin_loc,
+ const s64a end_loc, s64a *miracle_loc) {
+ if (!left->countingMiracleOffset) {
+ return 0;
+ }
+
+ const struct RoseCountingMiracle *cm
+ = (const void *)((const char *)t + left->countingMiracleOffset);
+
+ assert(!left->transient);
+ assert(cm->count > 1); /* should be a normal miracle then */
+
+ DEBUG_PRINTF("looking for counting miracle over [%lld,%lld], maxLag=%u\n",
+ begin_loc, end_loc, left->maxLag);
+ DEBUG_PRINTF("ci->len=%zu, ci->hlen=%zu\n", ci->len, ci->hlen);
+
+ assert(begin_loc <= end_loc);
+ assert(begin_loc >= -(s64a)ci->hlen);
+ assert(end_loc <= (s64a)ci->len);
+
+ const s64a scan_end_loc = end_loc - left->maxLag;
+ if (scan_end_loc <= begin_loc) {
+ DEBUG_PRINTF("nothing to scan\n");
+ return 0;
+ }
+
+ const s64a start = MAX(begin_loc, scan_end_loc - COUNTING_MIRACLE_LEN_MAX);
+ DEBUG_PRINTF("scan [%lld..%lld]\n", start, scan_end_loc);
+
+ u32 count = 0;
+
+ s64a m_loc = start;
+
+ if (!cm->shufti) {
+ u8 c = cm->c;
+
+ // Scan buffer.
+ const s64a buf_scan_start = MAX(0, start);
+ if (scan_end_loc > buf_scan_start) {
+ const u8 *buf = ci->buf;
+ const u8 *d = buf + scan_end_loc;
+ const u8 *d_start = buf + buf_scan_start;
+ const u8 *d_out;
+ if (roseCountingMiracleScan(c, d_start, d, cm->count, &count,
+ &d_out)) {
+ assert(d_out >= d_start);
+ m_loc = (d_out - d_start) + buf_scan_start;
+ goto success;
+ }
+ }
+
+ // Scan history.
+ if (start < 0) {
+ const u8 *hbuf_end = ci->hbuf + ci->hlen;
+ const u8 *d = hbuf_end + MIN(0, scan_end_loc);
+ const u8 *d_start = hbuf_end + start;
+ const u8 *d_out;
+ if (roseCountingMiracleScan(c, d_start, d, cm->count, &count,
+ &d_out)) {
+ assert(d_out >= d_start);
+ m_loc = (d_out - d_start) + start;
+ goto success;
+ }
+ }
+ } else {
+ m128 lo = cm->lo;
+ m128 hi = cm->hi;
+ u8 poison = cm->poison;
+
+ // Scan buffer.
+ const s64a buf_scan_start = MAX(0, start);
+ if (scan_end_loc > buf_scan_start) {
+ const u8 *buf = ci->buf;
+ const u8 *d = buf + scan_end_loc;
+ const u8 *d_start = buf + buf_scan_start;
+ const u8 *d_out;
+ if (roseCountingMiracleScanShufti(lo, hi, poison, d_start, d,
+ cm->count, &count, &d_out)) {
+ assert(d_out >= d_start);
+ m_loc = (d_out - d_start) + buf_scan_start;
+ goto success;
+ }
+ }
+
+ // Scan history.
+ if (start < 0) {
+ const u8 *hbuf_end = ci->hbuf + ci->hlen;
+ const u8 *d = hbuf_end + MIN(0, scan_end_loc);
+ const u8 *d_start = hbuf_end + start;
+ const u8 *d_out;
+ if (roseCountingMiracleScanShufti(lo, hi, poison, d_start, d,
+ cm->count, &count, &d_out)) {
+ assert(d_out >= d_start);
+ m_loc = (d_out - d_start) + start;
+ goto success;
+ }
+ }
+ }
+
+ DEBUG_PRINTF("found %u/%u\n", count, cm->count);
+ return 0;
+
+success:
+ DEBUG_PRINTF("found %u/%u\n", count, cm->count);
+ assert(count >= cm->count);
+ assert(m_loc < scan_end_loc);
+ assert(m_loc >= start);
+
+ *miracle_loc = m_loc;
+ return 1;
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/infix.h b/contrib/libs/hyperscan/src/rose/infix.h
index 480aa3a52c..9cf9c0ad74 100644
--- a/contrib/libs/hyperscan/src/rose/infix.h
+++ b/contrib/libs/hyperscan/src/rose/infix.h
@@ -1,54 +1,54 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef INFIX_H
-#define INFIX_H
-
-#include "ue2common.h"
-#include "nfa/nfa_api.h"
-#include "nfa/nfa_api_queue.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef INFIX_H
+#define INFIX_H
+
+#include "ue2common.h"
+#include "nfa/nfa_api.h"
+#include "nfa/nfa_api_queue.h"
#include "nfa/nfa_internal.h"
-
-static really_inline
-int infixTooOld(struct mq *q, s64a curr_loc) {
- u32 maxAge = q->nfa->maxWidth;
-
- if (!maxAge) {
- return 0;
- }
-
- return q_last_loc(q) + maxAge < curr_loc;
-}
-
+
+static really_inline
+int infixTooOld(struct mq *q, s64a curr_loc) {
+ u32 maxAge = q->nfa->maxWidth;
+
+ if (!maxAge) {
+ return 0;
+ }
+
+ return q_last_loc(q) + maxAge < curr_loc;
+}
+
static really_inline
int canReduceQueue(const struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) {
u32 qlen = q->end - q->cur; /* includes MQE_START */
-
+
if (maxAge && q->items[q->cur].location + maxAge < curr_loc) {
return 1;
}
@@ -66,96 +66,96 @@ int canReduceQueue(const struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) {
return 0;
}
-/**
- * Removes tops which are known not to affect the final state from the queue.
- * May also reinitialise the engine state if it is unneeded.
- *
- * maxAge is the maximum width of the infix. Any tops/state before this can be
- * ignored. 0 is used to indicate that there is no upper bound on the width of
- * the pattern.
- *
- * maxTops is the maximum number of locations of tops that can affect the top.
- * It is only possible for the last maxTops tops to affect the final state -
- * earlier ones can be safely removed. Also, any state before the max tops may
- * be ignored.
- *
- * This code assumes/requires that there are not multiple tops at the same
- * location in the queue. This code also assumes that it is not a multitop
- * engine.
- */
-static really_inline
+/**
+ * Removes tops which are known not to affect the final state from the queue.
+ * May also reinitialise the engine state if it is unneeded.
+ *
+ * maxAge is the maximum width of the infix. Any tops/state before this can be
+ * ignored. 0 is used to indicate that there is no upper bound on the width of
+ * the pattern.
+ *
+ * maxTops is the maximum number of locations of tops that can affect the top.
+ * It is only possible for the last maxTops tops to affect the final state -
+ * earlier ones can be safely removed. Also, any state before the max tops may
+ * be ignored.
+ *
+ * This code assumes/requires that there are not multiple tops at the same
+ * location in the queue. This code also assumes that it is not a multitop
+ * engine.
+ */
+static really_inline
void reduceInfixQueue(struct mq *q, s64a curr_loc, u32 maxTops, u32 maxAge) {
- assert(q->end > q->cur);
- assert(maxTops);
- u32 qlen = q->end - q->cur; /* includes MQE_START */
- DEBUG_PRINTF("q=%p, len=%u, maxTops=%u maxAge=%u\n", q, qlen, maxTops,
- maxAge);
-
+ assert(q->end > q->cur);
+ assert(maxTops);
+ u32 qlen = q->end - q->cur; /* includes MQE_START */
+ DEBUG_PRINTF("q=%p, len=%u, maxTops=%u maxAge=%u\n", q, qlen, maxTops,
+ maxAge);
+
if (!canReduceQueue(q, curr_loc, maxTops, maxAge)) {
- DEBUG_PRINTF("nothing to do\n");
- return;
- }
-
-#ifdef DEBUG
- debugQueue(q);
-#endif
-
- char drop_state = qlen - 1 >= maxTops
- || (maxAge && q->items[q->cur].location + maxAge < curr_loc);
-
- LIMIT_TO_AT_MOST(&maxTops, qlen - 1);
-
- // We leave our START where it is, at the front of the queue.
- assert(q->items[q->cur].type == MQE_START);
-
- // We want to shuffle maxQueueLen items from the end of the queue to just
- // after the start, effectively dequeuing old items. We could use memmove
- // for this, but it's probably not a good idea to take the cost of the
- // function call.
- const struct mq_item *src = &q->items[q->cur + qlen - maxTops];
-
- q->items[0] = q->items[q->cur]; /* shift start event to 0 slot */
- q->cur = 0;
- q->end = 1;
- struct mq_item *dst = &q->items[1];
- u32 i = 0;
- if (maxAge) {
- /* any event which is older than maxAge can be dropped */
- for (; i < maxTops; i++, src++) {
- if (src->location >= curr_loc - maxAge) {
- break;
- }
- }
- }
-
- for (; i < maxTops; i++) {
- *dst = *src;
- src++;
- dst++;
- q->end++;
- }
-
- if (drop_state) {
- /* clear state and shift start up to first top */
- s64a new_loc;
- if (q->end > 1) {
- new_loc = q->items[1].location;
- } else {
- DEBUG_PRINTF("no tops\n");
- new_loc = curr_loc;
- }
-
- DEBUG_PRINTF("advancing start from %lld to %lld\n",
- q->items[0].location, new_loc);
- assert(new_loc > q->items[0].location);
- q->items[0].location = new_loc;
- nfaQueueInitState(q->nfa, q);
- }
-
- DEBUG_PRINTF("reduced queue to len=%u\n", q->end - q->cur);
-#ifdef DEBUG
- debugQueue(q);
-#endif
-}
-
-#endif
+ DEBUG_PRINTF("nothing to do\n");
+ return;
+ }
+
+#ifdef DEBUG
+ debugQueue(q);
+#endif
+
+ char drop_state = qlen - 1 >= maxTops
+ || (maxAge && q->items[q->cur].location + maxAge < curr_loc);
+
+ LIMIT_TO_AT_MOST(&maxTops, qlen - 1);
+
+ // We leave our START where it is, at the front of the queue.
+ assert(q->items[q->cur].type == MQE_START);
+
+ // We want to shuffle maxQueueLen items from the end of the queue to just
+ // after the start, effectively dequeuing old items. We could use memmove
+ // for this, but it's probably not a good idea to take the cost of the
+ // function call.
+ const struct mq_item *src = &q->items[q->cur + qlen - maxTops];
+
+ q->items[0] = q->items[q->cur]; /* shift start event to 0 slot */
+ q->cur = 0;
+ q->end = 1;
+ struct mq_item *dst = &q->items[1];
+ u32 i = 0;
+ if (maxAge) {
+ /* any event which is older than maxAge can be dropped */
+ for (; i < maxTops; i++, src++) {
+ if (src->location >= curr_loc - maxAge) {
+ break;
+ }
+ }
+ }
+
+ for (; i < maxTops; i++) {
+ *dst = *src;
+ src++;
+ dst++;
+ q->end++;
+ }
+
+ if (drop_state) {
+ /* clear state and shift start up to first top */
+ s64a new_loc;
+ if (q->end > 1) {
+ new_loc = q->items[1].location;
+ } else {
+ DEBUG_PRINTF("no tops\n");
+ new_loc = curr_loc;
+ }
+
+ DEBUG_PRINTF("advancing start from %lld to %lld\n",
+ q->items[0].location, new_loc);
+ assert(new_loc > q->items[0].location);
+ q->items[0].location = new_loc;
+ nfaQueueInitState(q->nfa, q);
+ }
+
+ DEBUG_PRINTF("reduced queue to len=%u\n", q->end - q->cur);
+#ifdef DEBUG
+ debugQueue(q);
+#endif
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/init.c b/contrib/libs/hyperscan/src/rose/init.c
index 33f8ff5e6c..025ecca0d6 100644
--- a/contrib/libs/hyperscan/src/rose/init.c
+++ b/contrib/libs/hyperscan/src/rose/init.c
@@ -1,88 +1,88 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "init.h"
-#include "match.h"
-#include "runtime.h"
-#include "scratch.h"
-#include "rose.h"
-#include "rose_common.h"
-#include "rose_internal.h"
-#include "ue2common.h"
-#include "nfa/mcclellan.h"
-#include "nfa/nfa_api_util.h"
-#include "nfa/nfa_internal.h"
-#include "util/multibit.h"
-
-#include <string.h>
-
-static really_inline
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "init.h"
+#include "match.h"
+#include "runtime.h"
+#include "scratch.h"
+#include "rose.h"
+#include "rose_common.h"
+#include "rose_internal.h"
+#include "ue2common.h"
+#include "nfa/mcclellan.h"
+#include "nfa/nfa_api_util.h"
+#include "nfa/nfa_internal.h"
+#include "util/multibit.h"
+
+#include <string.h>
+
+static really_inline
void init_rstate(const struct RoseEngine *t, char *state) {
// Set runtime state: we take our initial groups from the RoseEngine.
- DEBUG_PRINTF("setting initial groups to 0x%016llx\n", t->initialGroups);
- storeGroups(t, state, t->initialGroups);
-}
-
-static really_inline
+ DEBUG_PRINTF("setting initial groups to 0x%016llx\n", t->initialGroups);
+ storeGroups(t, state, t->initialGroups);
+}
+
+static really_inline
void init_outfixes(const struct RoseEngine *t, char *state) {
- /* The active leaf array has been init'ed by the scatter with outfix
- * bits set on */
-
- // Init the NFA state for each outfix.
- for (u32 qi = t->outfixBeginQueue; qi < t->outfixEndQueue; qi++) {
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
- const struct NFA *nfa = getNfaByInfo(t, info);
- nfaInitCompressedState(nfa, 0, state + info->stateOffset,
- 0 /* assume NUL at start */);
- }
-
- if (t->initMpvNfa != MO_INVALID_IDX) {
- const struct NfaInfo *info = getNfaInfoByQueue(t, t->initMpvNfa);
- const struct NFA *nfa = getNfaByInfo(t, info);
- nfaInitCompressedState(nfa, 0, state + info->stateOffset,
- 0 /* assume NUL at start */);
- mmbit_set(getActiveLeafArray(t, state), t->activeArrayCount,
- t->initMpvNfa);
- }
-}
-
+ /* The active leaf array has been init'ed by the scatter with outfix
+ * bits set on */
+
+ // Init the NFA state for each outfix.
+ for (u32 qi = t->outfixBeginQueue; qi < t->outfixEndQueue; qi++) {
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+ const struct NFA *nfa = getNfaByInfo(t, info);
+ nfaInitCompressedState(nfa, 0, state + info->stateOffset,
+ 0 /* assume NUL at start */);
+ }
+
+ if (t->initMpvNfa != MO_INVALID_IDX) {
+ const struct NfaInfo *info = getNfaInfoByQueue(t, t->initMpvNfa);
+ const struct NFA *nfa = getNfaByInfo(t, info);
+ nfaInitCompressedState(nfa, 0, state + info->stateOffset,
+ 0 /* assume NUL at start */);
+ mmbit_set(getActiveLeafArray(t, state), t->activeArrayCount,
+ t->initMpvNfa);
+ }
+}
+
void roseInitState(const struct RoseEngine *t, char *state) {
- assert(t);
- assert(state);
-
+ assert(t);
+ assert(state);
+
DEBUG_PRINTF("init for Rose %p with %u state indices)\n", t,
t->rolesWithStateCount);
-
- // Rose is guaranteed 8-aligned state
- assert(ISALIGNED_N(state, 8));
-
- init_rstate(t, state);
-
- init_state(t, state);
- init_outfixes(t, state);
-}
+
+ // Rose is guaranteed 8-aligned state
+ assert(ISALIGNED_N(state, 8));
+
+ init_rstate(t, state);
+
+ init_state(t, state);
+ init_outfixes(t, state);
+}
diff --git a/contrib/libs/hyperscan/src/rose/init.h b/contrib/libs/hyperscan/src/rose/init.h
index 9ae0db2a08..b37053b261 100644
--- a/contrib/libs/hyperscan/src/rose/init.h
+++ b/contrib/libs/hyperscan/src/rose/init.h
@@ -1,46 +1,46 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_INIT_H
-#define ROSE_INIT_H
-
-#include "rose_internal.h"
-#include "ue2common.h"
-#include "util/scatter_runtime.h"
-
-/*
- * Initialisation code common to streaming mode Rose (init.c) and block mode
- * Rose (block.c) code.
- */
-
-static really_inline
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_INIT_H
+#define ROSE_INIT_H
+
+#include "rose_internal.h"
+#include "ue2common.h"
+#include "util/scatter_runtime.h"
+
+/*
+ * Initialisation code common to streaming mode Rose (init.c) and block mode
+ * Rose (block.c) code.
+ */
+
+static really_inline
void init_state(const struct RoseEngine *t, char *state) {
- scatter(state, t, &t->state_init);
-}
-
-#endif // ROSE_INIT_H
+ scatter(state, t, &t->state_init);
+}
+
+#endif // ROSE_INIT_H
diff --git a/contrib/libs/hyperscan/src/rose/match.c b/contrib/libs/hyperscan/src/rose/match.c
index c7f8189cd2..84d3b1fdc2 100644
--- a/contrib/libs/hyperscan/src/rose/match.c
+++ b/contrib/libs/hyperscan/src/rose/match.c
@@ -1,240 +1,240 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "catchup.h"
-#include "match.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "catchup.h"
+#include "match.h"
#include "program_runtime.h"
-#include "rose.h"
-#include "util/bitutils.h"
-#include "util/fatbit.h"
-
-#if defined(DEBUG) || defined(DUMP_SUPPORT)
-#include "util/compare.h"
-/** A debugging crutch: print a hex-escaped version of the match for our
- * perusal. The start and end offsets are stream offsets. */
-static UNUSED
-void printMatch(const struct core_info *ci, u64a start, u64a end) {
- assert(start <= end);
- assert(end <= ci->buf_offset + ci->len);
-
- printf("'");
- u64a i = start;
- for (; i <= MIN(ci->buf_offset, end); i++) {
- u64a h_idx = ci->buf_offset - i;
- u8 c = h_idx >= ci->hlen ? '?' : ci->hbuf[ci->hlen - h_idx - 1];
- if (ourisprint(c) && c != '\'') {
- printf("%c", c);
- } else {
- printf("\\x%02x", c);
- }
- }
- for (; i <= end; i++) {
- u64a b_idx = i - ci->buf_offset - 1;
- u8 c = b_idx >= ci->len ? '?' : ci->buf[b_idx];
- if (ourisprint(c) && c != '\'') {
- printf("%c", c);
- } else {
- printf("\\x%02x", c);
- }
- }
- printf("'");
-}
-#endif
-
+#include "rose.h"
+#include "util/bitutils.h"
+#include "util/fatbit.h"
+
+#if defined(DEBUG) || defined(DUMP_SUPPORT)
+#include "util/compare.h"
+/** A debugging crutch: print a hex-escaped version of the match for our
+ * perusal. The start and end offsets are stream offsets. */
+static UNUSED
+void printMatch(const struct core_info *ci, u64a start, u64a end) {
+ assert(start <= end);
+ assert(end <= ci->buf_offset + ci->len);
+
+ printf("'");
+ u64a i = start;
+ for (; i <= MIN(ci->buf_offset, end); i++) {
+ u64a h_idx = ci->buf_offset - i;
+ u8 c = h_idx >= ci->hlen ? '?' : ci->hbuf[ci->hlen - h_idx - 1];
+ if (ourisprint(c) && c != '\'') {
+ printf("%c", c);
+ } else {
+ printf("\\x%02x", c);
+ }
+ }
+ for (; i <= end; i++) {
+ u64a b_idx = i - ci->buf_offset - 1;
+ u8 c = b_idx >= ci->len ? '?' : ci->buf[b_idx];
+ if (ourisprint(c) && c != '\'') {
+ printf("%c", c);
+ } else {
+ printf("\\x%02x", c);
+ }
+ }
+ printf("'");
+}
+#endif
+
hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id,
struct hs_scratch *scratch) {
- struct RoseContext *tctx = &scratch->tctxt;
- struct core_info *ci = &scratch->core_info;
+ struct RoseContext *tctx = &scratch->tctxt;
+ struct core_info *ci = &scratch->core_info;
const struct RoseEngine *t = ci->rose;
- size_t rb_len = MIN(ci->hlen, t->delayRebuildLength);
-
- u64a real_end = ci->buf_offset - rb_len + end + 1; // index after last byte
-
-#ifdef DEBUG
+ size_t rb_len = MIN(ci->hlen, t->delayRebuildLength);
+
+ u64a real_end = ci->buf_offset - rb_len + end + 1; // index after last byte
+
+#ifdef DEBUG
DEBUG_PRINTF("REBUILD MATCH id=%u end offset@%llu]: ", id, real_end);
u64a start = real_end < 8 ? 1 : real_end - 7;
printMatch(ci, start, real_end);
- printf("\n");
-#endif
-
+ printf("\n");
+#endif
+
DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups);
-
+
assert(id && id < t->size); // id is a program offset
const u64a som = 0;
const u8 flags = 0;
UNUSED hwlmcb_rv_t rv =
roseRunProgram(t, scratch, id, som, real_end, flags);
assert(rv != HWLM_TERMINATE_MATCHING);
-
+
/* we are just repopulating the delay queue, groups should be
- * already set from the original scan. */
-
- return tctx->groups;
-}
-
-static really_inline
-hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t,
- struct hs_scratch *scratch, u32 qi, s64a loc,
+ * already set from the original scan. */
+
+ return tctx->groups;
+}
+
+static really_inline
+hwlmcb_rv_t ensureMpvQueueFlushed(const struct RoseEngine *t,
+ struct hs_scratch *scratch, u32 qi, s64a loc,
char in_chained) {
return ensureQueueFlushed_i(t, scratch, qi, loc, 1, in_chained);
-}
-
+}
+
hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t,
struct hs_scratch *scratch, u32 event,
u64a top_squash_distance, u64a end,
char in_catchup) {
assert(event == MQE_TOP || event >= MQE_TOP_FIRST);
- struct core_info *ci = &scratch->core_info;
-
+ struct core_info *ci = &scratch->core_info;
+
u8 *aa = getActiveLeafArray(t, scratch->core_info.state);
- u32 aaCount = t->activeArrayCount;
- struct fatbit *activeQueues = scratch->aqa;
- u32 qCount = t->queueCount;
-
+ u32 aaCount = t->activeArrayCount;
+ struct fatbit *activeQueues = scratch->aqa;
+ u32 qCount = t->queueCount;
+
const u32 qi = 0; /* MPV is always queue 0 if it exists */
- struct mq *q = &scratch->queues[qi];
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
-
- s64a loc = (s64a)end - ci->buf_offset;
- assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen);
-
- if (!mmbit_set(aa, aaCount, qi)) {
+ struct mq *q = &scratch->queues[qi];
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+
+ s64a loc = (s64a)end - ci->buf_offset;
+ assert(loc <= (s64a)ci->len && loc >= -(s64a)ci->hlen);
+
+ if (!mmbit_set(aa, aaCount, qi)) {
initQueue(q, qi, t, scratch);
- nfaQueueInitState(q->nfa, q);
- pushQueueAt(q, 0, MQE_START, loc);
- fatbit_set(activeQueues, qCount, qi);
- } else if (info->no_retrigger) {
- DEBUG_PRINTF("yawn\n");
- /* nfa only needs one top; we can go home now */
- return HWLM_CONTINUE_MATCHING;
- } else if (!fatbit_set(activeQueues, qCount, qi)) {
+ nfaQueueInitState(q->nfa, q);
+ pushQueueAt(q, 0, MQE_START, loc);
+ fatbit_set(activeQueues, qCount, qi);
+ } else if (info->no_retrigger) {
+ DEBUG_PRINTF("yawn\n");
+ /* nfa only needs one top; we can go home now */
+ return HWLM_CONTINUE_MATCHING;
+ } else if (!fatbit_set(activeQueues, qCount, qi)) {
initQueue(q, qi, t, scratch);
- loadStreamState(q->nfa, q, 0);
- pushQueueAt(q, 0, MQE_START, 0);
- } else if (isQueueFull(q)) {
- DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi);
- /* we know it is a chained nfa and the suffixes/outfixes must already
- * be known to be consistent */
+ loadStreamState(q->nfa, q, 0);
+ pushQueueAt(q, 0, MQE_START, 0);
+ } else if (isQueueFull(q)) {
+ DEBUG_PRINTF("queue %u full -> catching up nfas\n", qi);
+ /* we know it is a chained nfa and the suffixes/outfixes must already
+ * be known to be consistent */
if (ensureMpvQueueFlushed(t, scratch, qi, loc, in_catchup)
- == HWLM_TERMINATE_MATCHING) {
+ == HWLM_TERMINATE_MATCHING) {
DEBUG_PRINTF("terminating...\n");
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+
if (top_squash_distance) {
assert(q->cur < q->end);
- struct mq_item *last = &q->items[q->end - 1];
- if (last->type == event
+ struct mq_item *last = &q->items[q->end - 1];
+ if (last->type == event
&& last->location >= loc - (s64a)top_squash_distance) {
- last->location = loc;
- goto event_enqueued;
- }
- }
-
- pushQueue(q, event, loc);
-
-event_enqueued:
- if (q_cur_loc(q) == (s64a)ci->len) {
- /* we may not run the nfa; need to ensure state is fine */
- DEBUG_PRINTF("empty run\n");
- pushQueueNoMerge(q, MQE_END, loc);
- char alive = nfaQueueExec(q->nfa, q, loc);
- if (alive) {
+ last->location = loc;
+ goto event_enqueued;
+ }
+ }
+
+ pushQueue(q, event, loc);
+
+event_enqueued:
+ if (q_cur_loc(q) == (s64a)ci->len) {
+ /* we may not run the nfa; need to ensure state is fine */
+ DEBUG_PRINTF("empty run\n");
+ pushQueueNoMerge(q, MQE_END, loc);
+ char alive = nfaQueueExec(q->nfa, q, loc);
+ if (alive) {
scratch->tctxt.mpv_inactive = 0;
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, loc);
- } else {
- mmbit_unset(aa, aaCount, qi);
- fatbit_unset(scratch->aqa, qCount, qi);
- }
- }
-
- DEBUG_PRINTF("added mpv event at %lld\n", loc);
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, loc);
+ } else {
+ mmbit_unset(aa, aaCount, qi);
+ fatbit_unset(scratch->aqa, qCount, qi);
+ }
+ }
+
+ DEBUG_PRINTF("added mpv event at %lld\n", loc);
scratch->tctxt.next_mpv_offset = 0; /* the top event may result in matches
* earlier than expected */
- return HWLM_CONTINUE_MATCHING;
-}
-
+ return HWLM_CONTINUE_MATCHING;
+}
+
int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx) {
struct hs_scratch *scratch = ctx;
assert(scratch && scratch->magic == SCRATCH_MAGIC);
struct RoseContext *tctxt = &scratch->tctxt;
- struct core_info *ci = &scratch->core_info;
+ struct core_info *ci = &scratch->core_info;
const struct RoseEngine *t = ci->rose;
-
- u64a real_end = ci->buf_offset + end; // index after last byte
-
- DEBUG_PRINTF("MATCH id=%u offsets=[???,%llu]\n", id, real_end);
+
+ u64a real_end = ci->buf_offset + end; // index after last byte
+
+ DEBUG_PRINTF("MATCH id=%u offsets=[???,%llu]\n", id, real_end);
DEBUG_PRINTF("STATE groups=0x%016llx\n", tctxt->groups);
-
+
if (can_stop_matching(scratch)) {
- DEBUG_PRINTF("received a match when we're already dead!\n");
- return MO_HALT_MATCHING;
- }
-
- /* delayed literals need to be delivered before real literals; however
- * delayed literals only come from the floating table so if we are going
- * to deliver a literal here it must be too early for a delayed literal */
-
- /* no history checks from anchored region and we are before the flush
- * boundary */
-
- if (real_end <= t->floatingMinLiteralMatchOffset) {
+ DEBUG_PRINTF("received a match when we're already dead!\n");
+ return MO_HALT_MATCHING;
+ }
+
+ /* delayed literals need to be delivered before real literals; however
+ * delayed literals only come from the floating table so if we are going
+ * to deliver a literal here it must be too early for a delayed literal */
+
+ /* no history checks from anchored region and we are before the flush
+ * boundary */
+
+ if (real_end <= t->floatingMinLiteralMatchOffset) {
roseFlushLastByteHistory(t, scratch, real_end);
- tctxt->lastEndOffset = real_end;
- }
-
+ tctxt->lastEndOffset = real_end;
+ }
+
// Note that the "id" we have been handed is the program offset.
const u8 flags = ROSE_PROG_FLAG_IN_ANCHORED;
if (roseRunProgram(t, scratch, id, start, real_end, flags)
== HWLM_TERMINATE_MATCHING) {
assert(can_stop_matching(scratch));
- DEBUG_PRINTF("caller requested termination\n");
- return MO_HALT_MATCHING;
- }
-
+ DEBUG_PRINTF("caller requested termination\n");
+ return MO_HALT_MATCHING;
+ }
+
DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups);
-
- return MO_CONTINUE_MATCHING;
-}
-
+
+ return MO_CONTINUE_MATCHING;
+}
+
/**
* \brief Run the program for the given literal ID, with the interpreter
* inlined into this call.
*
* Assumes not in_anchored.
*/
-static really_inline
+static really_inline
hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a end,
u32 id) {
- DEBUG_PRINTF("id=%u\n", id);
+ DEBUG_PRINTF("id=%u\n", id);
assert(id && id < t->size); // id is an offset into bytecode
const u64a som = 0;
const u8 flags = 0;
@@ -243,296 +243,296 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t,
} else {
return roseRunProgram(t, scratch, id, som, end, flags);
}
-}
-
-static rose_inline
+}
+
+static rose_inline
hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t,
struct hs_scratch *scratch,
struct fatbit **delaySlots, u32 vicIndex,
u64a offset) {
- /* assert(!tctxt->in_anchored); */
- assert(vicIndex < DELAY_SLOT_COUNT);
+ /* assert(!tctxt->in_anchored); */
+ assert(vicIndex < DELAY_SLOT_COUNT);
const struct fatbit *vicSlot = delaySlots[vicIndex];
u32 delay_count = t->delay_count;
-
+
if (offset < t->floatingMinLiteralMatchOffset) {
- DEBUG_PRINTF("too soon\n");
- return HWLM_CONTINUE_MATCHING;
- }
-
+ DEBUG_PRINTF("too soon\n");
+ return HWLM_CONTINUE_MATCHING;
+ }
+
struct RoseContext *tctxt = &scratch->tctxt;
roseFlushLastByteHistory(t, scratch, offset);
- tctxt->lastEndOffset = offset;
-
+ tctxt->lastEndOffset = offset;
+
const u32 *programs = getByOffset(t, t->delayProgramOffset);
-
+
for (u32 it = fatbit_iterate(vicSlot, delay_count, MMB_INVALID);
it != MMB_INVALID; it = fatbit_iterate(vicSlot, delay_count, it)) {
- UNUSED rose_group old_groups = tctxt->groups;
-
+ UNUSED rose_group old_groups = tctxt->groups;
+
DEBUG_PRINTF("DELAYED MATCH id=%u offset=%llu\n", it, offset);
const u64a som = 0;
const u8 flags = 0;
hwlmcb_rv_t rv = roseRunProgram(t, scratch, programs[it], som, offset,
flags);
DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups);
-
+
/* delayed literals can't safely set groups.
- * However we may be setting groups that successors already have
- * worked out that we don't need to match the group */
- DEBUG_PRINTF("groups in %016llx out %016llx\n", old_groups,
- tctxt->groups);
-
- if (rv == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-static really_inline
+ * However we may be setting groups that successors already have
+ * worked out that we don't need to match the group */
+ DEBUG_PRINTF("groups in %016llx out %016llx\n", old_groups,
+ tctxt->groups);
+
+ if (rv == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static really_inline
hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t,
struct hs_scratch *scratch,
u32 curr_loc) {
struct RoseContext *tctxt = &scratch->tctxt;
struct fatbit *curr_row = getAnchoredLiteralLog(scratch)[curr_loc - 1];
u32 region_width = t->anchored_count;
-
+
const u32 *programs = getByOffset(t, t->anchoredProgramOffset);
- DEBUG_PRINTF("report matches at curr loc\n");
+ DEBUG_PRINTF("report matches at curr loc\n");
for (u32 it = fatbit_iterate(curr_row, region_width, MMB_INVALID);
it != MMB_INVALID; it = fatbit_iterate(curr_row, region_width, it)) {
- DEBUG_PRINTF("it = %u/%u\n", it, region_width);
-
- rose_group old_groups = tctxt->groups;
+ DEBUG_PRINTF("it = %u/%u\n", it, region_width);
+
+ rose_group old_groups = tctxt->groups;
DEBUG_PRINTF("ANCH REPLAY MATCH id=%u offset=%u\n", it, curr_loc);
const u64a som = 0;
const u8 flags = 0;
hwlmcb_rv_t rv = roseRunProgram(t, scratch, programs[it], som, curr_loc,
flags);
DEBUG_PRINTF("DONE groups=0x%016llx\n", tctxt->groups);
-
+
/* anchored literals can't safely set groups.
* However we may be setting groups that successors already
- * have worked out that we don't need to match the group */
- DEBUG_PRINTF("groups in %016llx out %016llx\n", old_groups,
- tctxt->groups);
- tctxt->groups &= old_groups;
-
- if (rv == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
- /* clear row; does not invalidate iteration */
- bf64_unset(&scratch->al_log_sum, curr_loc - 1);
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-static really_inline
+ * have worked out that we don't need to match the group */
+ DEBUG_PRINTF("groups in %016llx out %016llx\n", old_groups,
+ tctxt->groups);
+ tctxt->groups &= old_groups;
+
+ if (rv == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+
+ /* clear row; does not invalidate iteration */
+ bf64_unset(&scratch->al_log_sum, curr_loc - 1);
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static really_inline
u32 anchored_it_begin(struct hs_scratch *scratch) {
struct RoseContext *tctxt = &scratch->tctxt;
- if (tctxt->lastEndOffset >= scratch->anchored_literal_region_len) {
- return MMB_INVALID;
- }
- u32 begin = tctxt->lastEndOffset;
- begin--;
-
+ if (tctxt->lastEndOffset >= scratch->anchored_literal_region_len) {
+ return MMB_INVALID;
+ }
+ u32 begin = tctxt->lastEndOffset;
+ begin--;
+
return bf64_iterate(scratch->al_log_sum, begin);
-}
-
-static really_inline
+}
+
+static really_inline
hwlmcb_rv_t flushAnchoredLiterals(const struct RoseEngine *t,
struct hs_scratch *scratch,
- u32 *anchored_it_param, u64a to_off) {
+ u32 *anchored_it_param, u64a to_off) {
struct RoseContext *tctxt = &scratch->tctxt;
- u32 anchored_it = *anchored_it_param;
- /* catch up any remaining anchored matches */
- for (; anchored_it != MMB_INVALID && anchored_it < to_off;
- anchored_it = bf64_iterate(scratch->al_log_sum, anchored_it)) {
- assert(anchored_it < scratch->anchored_literal_region_len);
- DEBUG_PRINTF("loc_it = %u\n", anchored_it);
- u32 curr_off = anchored_it + 1;
+ u32 anchored_it = *anchored_it_param;
+ /* catch up any remaining anchored matches */
+ for (; anchored_it != MMB_INVALID && anchored_it < to_off;
+ anchored_it = bf64_iterate(scratch->al_log_sum, anchored_it)) {
+ assert(anchored_it < scratch->anchored_literal_region_len);
+ DEBUG_PRINTF("loc_it = %u\n", anchored_it);
+ u32 curr_off = anchored_it + 1;
roseFlushLastByteHistory(t, scratch, curr_off);
- tctxt->lastEndOffset = curr_off;
-
+ tctxt->lastEndOffset = curr_off;
+
if (flushAnchoredLiteralAtLoc(t, scratch, curr_off)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
- *anchored_it_param = anchored_it;
- return HWLM_CONTINUE_MATCHING;
-}
-
-static really_inline
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+
+ *anchored_it_param = anchored_it;
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static really_inline
hwlmcb_rv_t playVictims(const struct RoseEngine *t, struct hs_scratch *scratch,
u32 *anchored_it, u64a lastEnd, u64a victimDelaySlots,
struct fatbit **delaySlots) {
- while (victimDelaySlots) {
- u32 vic = findAndClearLSB_64(&victimDelaySlots);
- DEBUG_PRINTF("vic = %u\n", vic);
- u64a vicOffset = vic + (lastEnd & ~(u64a)DELAY_MASK);
-
+ while (victimDelaySlots) {
+ u32 vic = findAndClearLSB_64(&victimDelaySlots);
+ DEBUG_PRINTF("vic = %u\n", vic);
+ u64a vicOffset = vic + (lastEnd & ~(u64a)DELAY_MASK);
+
if (flushAnchoredLiterals(t, scratch, anchored_it, vicOffset)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
-
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+
if (playDelaySlot(t, scratch, delaySlots, vic % DELAY_SLOT_COUNT,
vicOffset) == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-/* call flushQueuedLiterals instead */
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+/* call flushQueuedLiterals instead */
hwlmcb_rv_t flushQueuedLiterals_i(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a currEnd) {
struct RoseContext *tctxt = &scratch->tctxt;
- u64a lastEnd = tctxt->delayLastEndOffset;
- DEBUG_PRINTF("flushing backed up matches @%llu up from %llu\n", currEnd,
- lastEnd);
-
- assert(currEnd != lastEnd); /* checked in main entry point */
-
+ u64a lastEnd = tctxt->delayLastEndOffset;
+ DEBUG_PRINTF("flushing backed up matches @%llu up from %llu\n", currEnd,
+ lastEnd);
+
+ assert(currEnd != lastEnd); /* checked in main entry point */
+
u32 anchored_it = anchored_it_begin(scratch);
-
- if (!tctxt->filledDelayedSlots) {
- DEBUG_PRINTF("no delayed, no flush\n");
- goto anchored_leftovers;
- }
-
- {
+
+ if (!tctxt->filledDelayedSlots) {
+ DEBUG_PRINTF("no delayed, no flush\n");
+ goto anchored_leftovers;
+ }
+
+ {
struct fatbit **delaySlots = getDelaySlots(scratch);
-
- u32 lastIndex = lastEnd & DELAY_MASK;
- u32 currIndex = currEnd & DELAY_MASK;
-
- int wrapped = (lastEnd | DELAY_MASK) < currEnd;
-
- u64a victimDelaySlots; /* needs to be twice as wide as the number of
- * slots. */
-
- DEBUG_PRINTF("hello %08x\n", tctxt->filledDelayedSlots);
- if (!wrapped) {
- victimDelaySlots = tctxt->filledDelayedSlots;
-
- DEBUG_PRINTF("unwrapped %016llx %08x\n", victimDelaySlots,
- tctxt->filledDelayedSlots);
- /* index vars < 32 so 64bit shifts are safe */
-
- /* clear all slots at last index and below, */
- victimDelaySlots &= ~((1LLU << (lastIndex + 1)) - 1);
-
- /* clear all slots above curr index */
- victimDelaySlots &= (1LLU << (currIndex + 1)) - 1;
-
- tctxt->filledDelayedSlots &= ~victimDelaySlots;
-
- DEBUG_PRINTF("unwrapped %016llx %08x\n", victimDelaySlots,
- tctxt->filledDelayedSlots);
- } else {
- DEBUG_PRINTF("wrapped %08x\n", tctxt->filledDelayedSlots);
-
- /* 1st half: clear all slots at last index and below, */
- u64a first_half = tctxt->filledDelayedSlots;
- first_half &= ~((1ULL << (lastIndex + 1)) - 1);
- tctxt->filledDelayedSlots &= (1ULL << (lastIndex + 1)) - 1;
-
- u64a second_half = tctxt->filledDelayedSlots;
-
- if (currEnd > lastEnd + DELAY_SLOT_COUNT) {
- /* 2nd half: clear all slots above last index */
- second_half &= (1ULL << (lastIndex + 1)) - 1;
- } else {
- /* 2nd half: clear all slots above curr index */
- second_half &= (1ULL << (currIndex + 1)) - 1;
- }
- tctxt->filledDelayedSlots &= ~second_half;
-
- victimDelaySlots = first_half | (second_half << DELAY_SLOT_COUNT);
-
- DEBUG_PRINTF("-- %016llx %016llx = %016llx (li %u)\n", first_half,
- second_half, victimDelaySlots, lastIndex);
- }
-
+
+ u32 lastIndex = lastEnd & DELAY_MASK;
+ u32 currIndex = currEnd & DELAY_MASK;
+
+ int wrapped = (lastEnd | DELAY_MASK) < currEnd;
+
+ u64a victimDelaySlots; /* needs to be twice as wide as the number of
+ * slots. */
+
+ DEBUG_PRINTF("hello %08x\n", tctxt->filledDelayedSlots);
+ if (!wrapped) {
+ victimDelaySlots = tctxt->filledDelayedSlots;
+
+ DEBUG_PRINTF("unwrapped %016llx %08x\n", victimDelaySlots,
+ tctxt->filledDelayedSlots);
+ /* index vars < 32 so 64bit shifts are safe */
+
+ /* clear all slots at last index and below, */
+ victimDelaySlots &= ~((1LLU << (lastIndex + 1)) - 1);
+
+ /* clear all slots above curr index */
+ victimDelaySlots &= (1LLU << (currIndex + 1)) - 1;
+
+ tctxt->filledDelayedSlots &= ~victimDelaySlots;
+
+ DEBUG_PRINTF("unwrapped %016llx %08x\n", victimDelaySlots,
+ tctxt->filledDelayedSlots);
+ } else {
+ DEBUG_PRINTF("wrapped %08x\n", tctxt->filledDelayedSlots);
+
+ /* 1st half: clear all slots at last index and below, */
+ u64a first_half = tctxt->filledDelayedSlots;
+ first_half &= ~((1ULL << (lastIndex + 1)) - 1);
+ tctxt->filledDelayedSlots &= (1ULL << (lastIndex + 1)) - 1;
+
+ u64a second_half = tctxt->filledDelayedSlots;
+
+ if (currEnd > lastEnd + DELAY_SLOT_COUNT) {
+ /* 2nd half: clear all slots above last index */
+ second_half &= (1ULL << (lastIndex + 1)) - 1;
+ } else {
+ /* 2nd half: clear all slots above curr index */
+ second_half &= (1ULL << (currIndex + 1)) - 1;
+ }
+ tctxt->filledDelayedSlots &= ~second_half;
+
+ victimDelaySlots = first_half | (second_half << DELAY_SLOT_COUNT);
+
+ DEBUG_PRINTF("-- %016llx %016llx = %016llx (li %u)\n", first_half,
+ second_half, victimDelaySlots, lastIndex);
+ }
+
if (playVictims(t, scratch, &anchored_it, lastEnd, victimDelaySlots,
delaySlots) == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
- }
-
-anchored_leftovers:;
+ return HWLM_TERMINATE_MATCHING;
+ }
+ }
+
+anchored_leftovers:;
hwlmcb_rv_t rv = flushAnchoredLiterals(t, scratch, &anchored_it, currEnd);
- tctxt->delayLastEndOffset = currEnd;
- return rv;
-}
-
+ tctxt->delayLastEndOffset = currEnd;
+ return rv;
+}
+
static really_inline
hwlmcb_rv_t roseCallback_i(size_t end, u32 id, struct hs_scratch *scratch) {
struct RoseContext *tctx = &scratch->tctxt;
const struct RoseEngine *t = scratch->core_info.rose;
- u64a real_end = end + tctx->lit_offset_adjust;
-
-#if defined(DEBUG)
+ u64a real_end = end + tctx->lit_offset_adjust;
+
+#if defined(DEBUG)
DEBUG_PRINTF("MATCH id=%u end offset@%llu: ", id, real_end);
u64a start = real_end < 8 ? 1 : real_end - 7;
printMatch(&scratch->core_info, start, real_end);
- printf("\n");
-#endif
- DEBUG_PRINTF("last end %llu\n", tctx->lastEndOffset);
-
+ printf("\n");
+#endif
+ DEBUG_PRINTF("last end %llu\n", tctx->lastEndOffset);
+
DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups);
-
+
if (can_stop_matching(scratch)) {
- DEBUG_PRINTF("received a match when we're already dead!\n");
- return HWLM_TERMINATE_MATCHING;
- }
-
+ DEBUG_PRINTF("received a match when we're already dead!\n");
+ return HWLM_TERMINATE_MATCHING;
+ }
+
hwlmcb_rv_t rv = flushQueuedLiterals(t, scratch, real_end);
- /* flushDelayed may have advanced tctx->lastEndOffset */
-
+ /* flushDelayed may have advanced tctx->lastEndOffset */
+
if (real_end >= t->floatingMinLiteralMatchOffset) {
roseFlushLastByteHistory(t, scratch, real_end);
- tctx->lastEndOffset = real_end;
- }
-
- if (rv == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
-
+ tctx->lastEndOffset = real_end;
+ }
+
+ if (rv == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+
rv = roseProcessMatchInline(t, scratch, real_end, id);
-
+
DEBUG_PRINTF("DONE groups=0x%016llx\n", tctx->groups);
-
- if (rv != HWLM_TERMINATE_MATCHING) {
- return tctx->groups;
- }
-
+
+ if (rv != HWLM_TERMINATE_MATCHING) {
+ return tctx->groups;
+ }
+
assert(can_stop_matching(scratch));
- DEBUG_PRINTF("user requested halt\n");
- return HWLM_TERMINATE_MATCHING;
-}
-
+ DEBUG_PRINTF("user requested halt\n");
+ return HWLM_TERMINATE_MATCHING;
+}
+
hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch) {
return roseCallback_i(end, id, scratch);
}
-
+
hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id,
struct hs_scratch *scratch) {
const struct RoseEngine *t = scratch->core_info.rose;
-
+
return roseCallback_i(end, id, scratch) & t->floating_group_mask;
}
-
+
/**
* \brief Execute a boundary report program.
*
@@ -542,12 +542,12 @@ hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id,
int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
u64a stream_offset, struct hs_scratch *scratch) {
DEBUG_PRINTF("running boundary program at offset %u\n", program);
-
+
if (can_stop_matching(scratch)) {
DEBUG_PRINTF("can stop matching\n");
return MO_HALT_MATCHING;
}
-
+
if (rose->hasSom && scratch->deduper.current_report_offset == ~0ULL) {
/* we cannot delay the initialization of the som deduper logs any longer
* as we are reporting matches. This is done explicitly as we are
@@ -557,13 +557,13 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
fatbit_clear(scratch->deduper.som_log[1]);
scratch->deduper.som_log_dirty = 0;
}
-
+
// Keep assertions in program report path happy. At offset zero, there can
// have been no earlier reports. At EOD, all earlier reports should have
// been handled and we will have been caught up to the stream offset by the
// time we are running boundary report programs.
scratch->tctxt.minMatchOffset = stream_offset;
-
+
const u64a som = 0;
const u8 flags = 0;
hwlmcb_rv_t rv = roseRunProgram(rose, scratch, program, som, stream_offset,
@@ -573,7 +573,7 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
}
return MO_CONTINUE_MATCHING;
-}
+}
/**
* \brief Execute a flush combination program.
diff --git a/contrib/libs/hyperscan/src/rose/match.h b/contrib/libs/hyperscan/src/rose/match.h
index b323213cde..c03b1ebbae 100644
--- a/contrib/libs/hyperscan/src/rose/match.h
+++ b/contrib/libs/hyperscan/src/rose/match.h
@@ -1,283 +1,283 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_MATCH_H
-#define ROSE_MATCH_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_MATCH_H
+#define ROSE_MATCH_H
+
#include "catchup.h"
-#include "runtime.h"
-#include "scratch.h"
+#include "runtime.h"
+#include "scratch.h"
#include "report.h"
-#include "rose_common.h"
-#include "rose_internal.h"
-#include "ue2common.h"
+#include "rose_common.h"
+#include "rose_internal.h"
+#include "ue2common.h"
#include "hwlm/hwlm.h"
-#include "nfa/nfa_api.h"
-#include "nfa/nfa_api_queue.h"
-#include "nfa/nfa_api_util.h"
-#include "som/som_runtime.h"
-#include "util/bitutils.h"
+#include "nfa/nfa_api.h"
+#include "nfa/nfa_api_queue.h"
+#include "nfa/nfa_api_util.h"
+#include "som/som_runtime.h"
+#include "util/bitutils.h"
#include "util/exhaust.h"
#include "util/fatbit.h"
-#include "util/multibit.h"
-
-/* Callbacks, defined in catchup.c */
-
+#include "util/multibit.h"
+
+/* Callbacks, defined in catchup.c */
+
int roseNfaAdaptor(u64a start, u64a end, ReportID id, void *context);
-
-/* Callbacks, defined in match.c */
-
+
+/* Callbacks, defined in match.c */
+
hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch);
hwlmcb_rv_t roseFloatingCallback(size_t end, u32 id,
struct hs_scratch *scratch);
hwlmcb_rv_t roseDelayRebuildCallback(size_t end, u32 id,
struct hs_scratch *scratch);
int roseAnchoredCallback(u64a start, u64a end, u32 id, void *ctx);
-
-/* Common code, used all over Rose runtime */
-
+
+/* Common code, used all over Rose runtime */
+
hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t,
struct hs_scratch *scratch, u32 event,
u64a top_squash_distance, u64a end,
char in_catchup);
-
+
/** \brief Initialize the queue for a suffix/outfix engine. */
-static really_inline
-void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
+static really_inline
+void initQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
struct hs_scratch *scratch) {
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
- assert(scratch->fullState);
- q->nfa = getNfaByInfo(t, info);
- q->end = 0;
- q->cur = 0;
- q->state = scratch->fullState + info->fullStateOffset;
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+ assert(scratch->fullState);
+ q->nfa = getNfaByInfo(t, info);
+ q->end = 0;
+ q->cur = 0;
+ q->state = scratch->fullState + info->fullStateOffset;
q->streamState = scratch->core_info.state + info->stateOffset;
- q->offset = scratch->core_info.buf_offset;
- q->buffer = scratch->core_info.buf;
- q->length = scratch->core_info.len;
- q->history = scratch->core_info.hbuf;
- q->hlength = scratch->core_info.hlen;
+ q->offset = scratch->core_info.buf_offset;
+ q->buffer = scratch->core_info.buf;
+ q->length = scratch->core_info.len;
+ q->history = scratch->core_info.hbuf;
+ q->hlength = scratch->core_info.hlen;
q->cb = roseNfaAdaptor;
q->context = scratch;
- q->report_current = 0;
-
- DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
- "state=%u\n", qi, q->offset, info->fullStateOffset,
- info->stateOffset, *(u32 *)q->state);
-}
-
+ q->report_current = 0;
+
+ DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
+ "state=%u\n", qi, q->offset, info->fullStateOffset,
+ info->stateOffset, *(u32 *)q->state);
+}
+
/** \brief Initialize the queue for a leftfix (prefix/infix) engine. */
-static really_inline
-void initRoseQueue(const struct RoseEngine *t, u32 qi,
- const struct LeftNfaInfo *left,
+static really_inline
+void initRoseQueue(const struct RoseEngine *t, u32 qi,
+ const struct LeftNfaInfo *left,
struct hs_scratch *scratch) {
- struct mq *q = scratch->queues + qi;
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
- q->nfa = getNfaByInfo(t, info);
- q->end = 0;
- q->cur = 0;
- q->state = scratch->fullState + info->fullStateOffset;
-
- // Transient roses don't have stream state, we use tstate in scratch
- // instead. The only reason we need this at ALL is for LimEx extended
- // regions, which assume that they have access to q->streamState +
- // compressedStateSize.
- if (left->transient) {
- q->streamState = (char *)scratch->tstate + info->stateOffset;
- } else {
+ struct mq *q = scratch->queues + qi;
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+ q->nfa = getNfaByInfo(t, info);
+ q->end = 0;
+ q->cur = 0;
+ q->state = scratch->fullState + info->fullStateOffset;
+
+ // Transient roses don't have stream state, we use tstate in scratch
+ // instead. The only reason we need this at ALL is for LimEx extended
+ // regions, which assume that they have access to q->streamState +
+ // compressedStateSize.
+ if (left->transient) {
+ q->streamState = (char *)scratch->tstate + info->stateOffset;
+ } else {
q->streamState = scratch->core_info.state + info->stateOffset;
- }
-
- q->offset = scratch->core_info.buf_offset;
- q->buffer = scratch->core_info.buf;
- q->length = scratch->core_info.len;
- q->history = scratch->core_info.hbuf;
- q->hlength = scratch->core_info.hlen;
- q->cb = NULL;
- q->context = NULL;
- q->report_current = 0;
-
- DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
- "state=%u\n", qi, q->offset, info->fullStateOffset,
- info->stateOffset, *(u32 *)q->state);
-}
-
-/** returns 0 if space for two items (top and end) on the queue */
-static really_inline
-char isQueueFull(const struct mq *q) {
- return q->end + 2 > MAX_MQE_LEN;
-}
-
-static really_inline
-void loadStreamState(const struct NFA *nfa, struct mq *q, s64a loc) {
- DEBUG_PRINTF("offset=%llu, length=%zu, hlength=%zu, loc=%lld\n",
- q->offset, q->length, q->hlength, loc);
- nfaExpandState(nfa, q->state, q->streamState, q->offset + loc,
- queue_prev_byte(q, loc));
-}
-
-static really_inline
+ }
+
+ q->offset = scratch->core_info.buf_offset;
+ q->buffer = scratch->core_info.buf;
+ q->length = scratch->core_info.len;
+ q->history = scratch->core_info.hbuf;
+ q->hlength = scratch->core_info.hlen;
+ q->cb = NULL;
+ q->context = NULL;
+ q->report_current = 0;
+
+ DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
+ "state=%u\n", qi, q->offset, info->fullStateOffset,
+ info->stateOffset, *(u32 *)q->state);
+}
+
+/** returns 0 if space for two items (top and end) on the queue */
+static really_inline
+char isQueueFull(const struct mq *q) {
+ return q->end + 2 > MAX_MQE_LEN;
+}
+
+static really_inline
+void loadStreamState(const struct NFA *nfa, struct mq *q, s64a loc) {
+ DEBUG_PRINTF("offset=%llu, length=%zu, hlength=%zu, loc=%lld\n",
+ q->offset, q->length, q->hlength, loc);
+ nfaExpandState(nfa, q->state, q->streamState, q->offset + loc,
+ queue_prev_byte(q, loc));
+}
+
+static really_inline
void storeRoseDelay(const struct RoseEngine *t, char *state,
- const struct LeftNfaInfo *left, u32 loc) {
- u32 di = left->lagIndex;
- if (di == ROSE_OFFSET_INVALID) {
- return;
- }
-
- assert(loc < 256); // ONE WHOLE BYTE!
- DEBUG_PRINTF("storing rose delay %u in slot %u\n", loc, di);
- u8 *leftfixDelay = getLeftfixLagTable(t, state);
- assert(loc <= MAX_STORED_LEFTFIX_LAG);
- leftfixDelay[di] = loc;
-}
-
-static really_inline
+ const struct LeftNfaInfo *left, u32 loc) {
+ u32 di = left->lagIndex;
+ if (di == ROSE_OFFSET_INVALID) {
+ return;
+ }
+
+ assert(loc < 256); // ONE WHOLE BYTE!
+ DEBUG_PRINTF("storing rose delay %u in slot %u\n", loc, di);
+ u8 *leftfixDelay = getLeftfixLagTable(t, state);
+ assert(loc <= MAX_STORED_LEFTFIX_LAG);
+ leftfixDelay[di] = loc;
+}
+
+static really_inline
void setAsZombie(const struct RoseEngine *t, char *state,
- const struct LeftNfaInfo *left) {
- u32 di = left->lagIndex;
- assert(di != ROSE_OFFSET_INVALID);
- if (di == ROSE_OFFSET_INVALID) {
- return;
- }
-
- u8 *leftfixDelay = getLeftfixLagTable(t, state);
- leftfixDelay[di] = OWB_ZOMBIE_ALWAYS_YES;
-}
-
-/* loadRoseDelay MUST NOT be called on the first stream write as it is only
- * initialized for running nfas on stream boundaries */
-static really_inline
+ const struct LeftNfaInfo *left) {
+ u32 di = left->lagIndex;
+ assert(di != ROSE_OFFSET_INVALID);
+ if (di == ROSE_OFFSET_INVALID) {
+ return;
+ }
+
+ u8 *leftfixDelay = getLeftfixLagTable(t, state);
+ leftfixDelay[di] = OWB_ZOMBIE_ALWAYS_YES;
+}
+
+/* loadRoseDelay MUST NOT be called on the first stream write as it is only
+ * initialized for running nfas on stream boundaries */
+static really_inline
u32 loadRoseDelay(const struct RoseEngine *t, const char *state,
- const struct LeftNfaInfo *left) {
- u32 di = left->lagIndex;
- if (di == ROSE_OFFSET_INVALID) {
- return 0;
- }
-
- const u8 *leftfixDelay = getLeftfixLagTableConst(t, state);
- u32 loc = leftfixDelay[di];
- DEBUG_PRINTF("read rose delay %u from slot %u\n", loc, di);
- return loc;
-}
-
-static really_inline
+ const struct LeftNfaInfo *left) {
+ u32 di = left->lagIndex;
+ if (di == ROSE_OFFSET_INVALID) {
+ return 0;
+ }
+
+ const u8 *leftfixDelay = getLeftfixLagTableConst(t, state);
+ u32 loc = leftfixDelay[di];
+ DEBUG_PRINTF("read rose delay %u from slot %u\n", loc, di);
+ return loc;
+}
+
+static really_inline
char isZombie(const struct RoseEngine *t, const char *state,
- const struct LeftNfaInfo *left) {
- u32 di = left->lagIndex;
- assert(di != ROSE_OFFSET_INVALID);
- if (di == ROSE_OFFSET_INVALID) {
- return 0;
- }
-
- const u8 *leftfixDelay = getLeftfixLagTableConst(t, state);
- DEBUG_PRINTF("read owb %hhu from slot %u\n", leftfixDelay[di], di);
- return leftfixDelay[di] == OWB_ZOMBIE_ALWAYS_YES;
-}
-
+ const struct LeftNfaInfo *left) {
+ u32 di = left->lagIndex;
+ assert(di != ROSE_OFFSET_INVALID);
+ if (di == ROSE_OFFSET_INVALID) {
+ return 0;
+ }
+
+ const u8 *leftfixDelay = getLeftfixLagTableConst(t, state);
+ DEBUG_PRINTF("read owb %hhu from slot %u\n", leftfixDelay[di], di);
+ return leftfixDelay[di] == OWB_ZOMBIE_ALWAYS_YES;
+}
+
hwlmcb_rv_t flushQueuedLiterals_i(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a end);
-
-static really_inline
+
+static really_inline
hwlmcb_rv_t flushQueuedLiterals(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a end) {
struct RoseContext *tctxt = &scratch->tctxt;
- if (tctxt->delayLastEndOffset == end) {
- DEBUG_PRINTF("no progress, no flush\n");
- return HWLM_CONTINUE_MATCHING;
- }
-
+ if (tctxt->delayLastEndOffset == end) {
+ DEBUG_PRINTF("no progress, no flush\n");
+ return HWLM_CONTINUE_MATCHING;
+ }
+
if (!tctxt->filledDelayedSlots && !scratch->al_log_sum) {
- tctxt->delayLastEndOffset = end;
- return HWLM_CONTINUE_MATCHING;
- }
-
+ tctxt->delayLastEndOffset = end;
+ return HWLM_CONTINUE_MATCHING;
+ }
+
return flushQueuedLiterals_i(t, scratch, end);
-}
-
-static really_inline
+}
+
+static really_inline
hwlmcb_rv_t cleanUpDelayed(const struct RoseEngine *t,
struct hs_scratch *scratch, size_t length,
u64a offset) {
if (can_stop_matching(scratch)) {
- return HWLM_TERMINATE_MATCHING;
- }
-
+ return HWLM_TERMINATE_MATCHING;
+ }
+
if (flushQueuedLiterals(t, scratch, length + offset)
- == HWLM_TERMINATE_MATCHING) {
- return HWLM_TERMINATE_MATCHING;
- }
-
+ == HWLM_TERMINATE_MATCHING) {
+ return HWLM_TERMINATE_MATCHING;
+ }
+
struct RoseContext *tctxt = &scratch->tctxt;
- if (tctxt->filledDelayedSlots) {
- DEBUG_PRINTF("dirty\n");
+ if (tctxt->filledDelayedSlots) {
+ DEBUG_PRINTF("dirty\n");
scratch->core_info.status |= STATUS_DELAY_DIRTY;
- } else {
+ } else {
scratch->core_info.status &= ~STATUS_DELAY_DIRTY;
- }
-
- tctxt->filledDelayedSlots = 0;
- tctxt->delayLastEndOffset = offset;
-
- return HWLM_CONTINUE_MATCHING;
-}
-
-static rose_inline
+ }
+
+ tctxt->filledDelayedSlots = 0;
+ tctxt->delayLastEndOffset = offset;
+
+ return HWLM_CONTINUE_MATCHING;
+}
+
+static rose_inline
void roseFlushLastByteHistory(const struct RoseEngine *t,
struct hs_scratch *scratch, u64a currEnd) {
- if (!t->lastByteHistoryIterOffset) {
- return;
- }
-
+ if (!t->lastByteHistoryIterOffset) {
+ return;
+ }
+
struct RoseContext *tctxt = &scratch->tctxt;
- struct core_info *ci = &scratch->core_info;
-
- /* currEnd is last byte of string + 1 */
- if (tctxt->lastEndOffset == ci->buf_offset + ci->len
- || currEnd != ci->buf_offset + ci->len) {
- /* already flushed or it is not yet time to flush */
- return;
- }
-
- DEBUG_PRINTF("flushing\n");
-
+ struct core_info *ci = &scratch->core_info;
+
+ /* currEnd is last byte of string + 1 */
+ if (tctxt->lastEndOffset == ci->buf_offset + ci->len
+ || currEnd != ci->buf_offset + ci->len) {
+ /* already flushed or it is not yet time to flush */
+ return;
+ }
+
+ DEBUG_PRINTF("flushing\n");
+
const struct mmbit_sparse_iter *it =
getByOffset(t, t->lastByteHistoryIterOffset);
assert(ISALIGNED(it));
- const u32 numStates = t->rolesWithStateCount;
+ const u32 numStates = t->rolesWithStateCount;
void *role_state = getRoleState(scratch->core_info.state);
-
+
struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
mmbit_sparse_iter_unset(role_state, numStates, it, si_state);
-}
-
+}
+
static rose_inline
int roseHasInFlightMatches(const struct RoseEngine *t, char *state,
const struct hs_scratch *scratch) {
@@ -380,4 +380,4 @@ hwlmcb_rv_t ensureQueueFlushed(const struct RoseEngine *t,
return ensureQueueFlushed_i(t, scratch, qi, loc, 0, 0);
}
-#endif
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/miracle.h b/contrib/libs/hyperscan/src/rose/miracle.h
index 28f61a2992..604c50205c 100644
--- a/contrib/libs/hyperscan/src/rose/miracle.h
+++ b/contrib/libs/hyperscan/src/rose/miracle.h
@@ -1,138 +1,138 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_MIRACLE_H
-#define ROSE_MIRACLE_H
-
-#include "ue2common.h"
-#include "runtime.h"
-#include "rose_internal.h"
-
-/** \brief Maximum number of bytes to scan when looking for a "miracle" stop
- * character. */
-#define MIRACLE_LEN_MAX 32
-
-static really_inline
-u64a roseMiracleScan(const u8 *stop, const u8 *d, const u8 *d_start) {
- assert(d >= d_start);
-
- // Note: unrolling this loop manually does appear to reduce its
- // performance. I'm sick of tilting at this particular windmill.
-
- u32 mshift = 0;
- do {
- u64a s = (u64a)stop[*d];
- if (s) {
- s <<= mshift;
- return s;
- }
- mshift++;
- } while (--d >= d_start);
- return 0;
-}
-
-/**
- * \brief "Miracle" scan: uses stop table to check if we can skip forward to a
- * location where we know that the given rose engine will be in a known state.
- *
- * Scans the buffer/history between relative locations \a begin_loc and \a
- * end_loc, and returns a miracle location (if any) that appears in the stream
- * after \a begin_loc.
- *
- * Returns 1 if some bytes can be skipped and sets \a miracle_loc
- * appropriately, 0 otherwise.
- */
-static rose_inline
-char roseMiracleOccurs(const struct RoseEngine *t,
- const struct LeftNfaInfo *left,
- const struct core_info *ci, const s64a begin_loc,
- const s64a end_loc, s64a *miracle_loc) {
- assert(!left->transient);
- assert(left->stopTable);
-
- DEBUG_PRINTF("looking for miracle over [%lld,%lld], maxLag=%u\n",
- begin_loc, end_loc, left->maxLag);
- DEBUG_PRINTF("ci->len=%zu, ci->hlen=%zu\n", ci->len, ci->hlen);
-
- assert(begin_loc <= end_loc);
- assert(begin_loc >= -(s64a)ci->hlen);
- assert(end_loc <= (s64a)ci->len);
-
- const u8 *stop = getByOffset(t, left->stopTable);
-
- const s64a scan_end_loc = end_loc - left->maxLag;
- if (scan_end_loc <= begin_loc) {
- DEBUG_PRINTF("nothing to scan\n");
- return 0;
- }
-
- const s64a start = MAX(begin_loc, scan_end_loc - MIRACLE_LEN_MAX);
- DEBUG_PRINTF("scan [%lld..%lld]\n", start, scan_end_loc);
-
- u64a s = 0; // state, on bits are miracle locations
-
- // Scan buffer.
- const s64a buf_scan_start = MAX(0, start);
- if (scan_end_loc > buf_scan_start) {
- const u8 *buf = ci->buf;
- const u8 *d = buf + scan_end_loc - 1;
- const u8 *d_start = buf + buf_scan_start;
- s = roseMiracleScan(stop, d, d_start);
- if (s) {
- goto miracle_found;
- }
- }
-
- // Scan history.
- if (start < 0) {
- const u8 *hbuf_end = ci->hbuf + ci->hlen;
- const u8 *d = hbuf_end + MIN(0, scan_end_loc) - 1;
- const u8 *d_start = hbuf_end + start;
- s = roseMiracleScan(stop, d, d_start);
- if (scan_end_loc > 0) {
- // Shift s over to account for the buffer scan above.
- s <<= scan_end_loc;
- }
- }
-
- if (s) {
- miracle_found:
- DEBUG_PRINTF("s=0x%llx, ctz=%u\n", s, ctz64(s));
- s64a loc = end_loc - left->maxLag - ctz64(s) - 1;
- if (loc > begin_loc) {
- DEBUG_PRINTF("miracle at %lld\n", loc);
- *miracle_loc = loc;
- return 1;
- }
- }
-
- DEBUG_PRINTF("no viable miraculous stop characters found\n");
- return 0;
-}
-
-#endif // ROSE_MIRACLE_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_MIRACLE_H
+#define ROSE_MIRACLE_H
+
+#include "ue2common.h"
+#include "runtime.h"
+#include "rose_internal.h"
+
+/** \brief Maximum number of bytes to scan when looking for a "miracle" stop
+ * character. */
+#define MIRACLE_LEN_MAX 32
+
+static really_inline
+u64a roseMiracleScan(const u8 *stop, const u8 *d, const u8 *d_start) {
+ assert(d >= d_start);
+
+ // Note: unrolling this loop manually does appear to reduce its
+ // performance. I'm sick of tilting at this particular windmill.
+
+ u32 mshift = 0;
+ do {
+ u64a s = (u64a)stop[*d];
+ if (s) {
+ s <<= mshift;
+ return s;
+ }
+ mshift++;
+ } while (--d >= d_start);
+ return 0;
+}
+
+/**
+ * \brief "Miracle" scan: uses stop table to check if we can skip forward to a
+ * location where we know that the given rose engine will be in a known state.
+ *
+ * Scans the buffer/history between relative locations \a begin_loc and \a
+ * end_loc, and returns a miracle location (if any) that appears in the stream
+ * after \a begin_loc.
+ *
+ * Returns 1 if some bytes can be skipped and sets \a miracle_loc
+ * appropriately, 0 otherwise.
+ */
+static rose_inline
+char roseMiracleOccurs(const struct RoseEngine *t,
+ const struct LeftNfaInfo *left,
+ const struct core_info *ci, const s64a begin_loc,
+ const s64a end_loc, s64a *miracle_loc) {
+ assert(!left->transient);
+ assert(left->stopTable);
+
+ DEBUG_PRINTF("looking for miracle over [%lld,%lld], maxLag=%u\n",
+ begin_loc, end_loc, left->maxLag);
+ DEBUG_PRINTF("ci->len=%zu, ci->hlen=%zu\n", ci->len, ci->hlen);
+
+ assert(begin_loc <= end_loc);
+ assert(begin_loc >= -(s64a)ci->hlen);
+ assert(end_loc <= (s64a)ci->len);
+
+ const u8 *stop = getByOffset(t, left->stopTable);
+
+ const s64a scan_end_loc = end_loc - left->maxLag;
+ if (scan_end_loc <= begin_loc) {
+ DEBUG_PRINTF("nothing to scan\n");
+ return 0;
+ }
+
+ const s64a start = MAX(begin_loc, scan_end_loc - MIRACLE_LEN_MAX);
+ DEBUG_PRINTF("scan [%lld..%lld]\n", start, scan_end_loc);
+
+ u64a s = 0; // state, on bits are miracle locations
+
+ // Scan buffer.
+ const s64a buf_scan_start = MAX(0, start);
+ if (scan_end_loc > buf_scan_start) {
+ const u8 *buf = ci->buf;
+ const u8 *d = buf + scan_end_loc - 1;
+ const u8 *d_start = buf + buf_scan_start;
+ s = roseMiracleScan(stop, d, d_start);
+ if (s) {
+ goto miracle_found;
+ }
+ }
+
+ // Scan history.
+ if (start < 0) {
+ const u8 *hbuf_end = ci->hbuf + ci->hlen;
+ const u8 *d = hbuf_end + MIN(0, scan_end_loc) - 1;
+ const u8 *d_start = hbuf_end + start;
+ s = roseMiracleScan(stop, d, d_start);
+ if (scan_end_loc > 0) {
+ // Shift s over to account for the buffer scan above.
+ s <<= scan_end_loc;
+ }
+ }
+
+ if (s) {
+ miracle_found:
+ DEBUG_PRINTF("s=0x%llx, ctz=%u\n", s, ctz64(s));
+ s64a loc = end_loc - left->maxLag - ctz64(s) - 1;
+ if (loc > begin_loc) {
+ DEBUG_PRINTF("miracle at %lld\n", loc);
+ *miracle_loc = loc;
+ return 1;
+ }
+ }
+
+ DEBUG_PRINTF("no viable miraculous stop characters found\n");
+ return 0;
+}
+
+#endif // ROSE_MIRACLE_H
diff --git a/contrib/libs/hyperscan/src/rose/rose.h b/contrib/libs/hyperscan/src/rose/rose.h
index e227f04292..409b70028f 100644
--- a/contrib/libs/hyperscan/src/rose/rose.h
+++ b/contrib/libs/hyperscan/src/rose/rose.h
@@ -1,62 +1,62 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_H
-#define ROSE_H
-
-#include "ue2common.h"
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_H
+#define ROSE_H
+
+#include "ue2common.h"
+
struct RoseEngine;
struct hs_scratch;
-// Initialise state space for engine use.
+// Initialise state space for engine use.
void roseInitState(const struct RoseEngine *t, char *state);
-
+
/* assumes core_info in scratch has been init to point to data */
void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch);
-
-/* assumes core_info in scratch has been init to point to data */
+
+/* assumes core_info in scratch has been init to point to data */
void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch);
-
+
void roseStreamEodExec(const struct RoseEngine *t, u64a offset,
struct hs_scratch *scratch);
-
+
hwlmcb_rv_t roseCallback(size_t end, u32 id, struct hs_scratch *scratch);
-
+
int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context);
-
+
int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program,
u64a stream_offset, struct hs_scratch *scratch);
-
+
int roseRunFlushCombProgram(const struct RoseEngine *rose,
struct hs_scratch *scratch, u64a end);
int roseRunLastFlushCombProgram(const struct RoseEngine *rose,
struct hs_scratch *scratch, u64a end);
-#endif // ROSE_H
+#endif // ROSE_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_build.h b/contrib/libs/hyperscan/src/rose/rose_build.h
index 958eb0f7fa..ca3ba3696e 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build.h
@@ -1,143 +1,143 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose Build interface.
- *
- * Rose Build interface. Everything you ever needed to feed literals in and
- * get a RoseEngine out. This header should be everything needed by the rest
- * of UE2.
- */
-
-#ifndef ROSE_BUILD_H
-#define ROSE_BUILD_H
-
-#include "ue2common.h"
-#include "rose_common.h"
-#include "rose_in_graph.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose Build interface.
+ *
+ * Rose Build interface. Everything you ever needed to feed literals in and
+ * get a RoseEngine out. This header should be everything needed by the rest
+ * of UE2.
+ */
+
+#ifndef ROSE_BUILD_H
+#define ROSE_BUILD_H
+
+#include "ue2common.h"
+#include "rose_common.h"
+#include "rose_in_graph.h"
#include "util/bytecode_ptr.h"
-#include "util/charreach.h"
+#include "util/charreach.h"
#include "util/flat_containers.h"
#include "util/noncopyable.h"
-#include "util/ue2string.h"
-
-#include <memory>
-#include <set>
-#include <utility>
-#include <vector>
-
-struct NFA;
-struct SmallWriteEngine;
-struct RoseEngine;
-
-namespace ue2 {
-
-struct BoundaryReports;
-struct CompileContext;
-struct raw_puff;
-struct raw_som_dfa;
-class CharReach;
-class NGHolder;
-class ReportManager;
+#include "util/ue2string.h"
+
+#include <memory>
+#include <set>
+#include <utility>
+#include <vector>
+
+struct NFA;
+struct SmallWriteEngine;
+struct RoseEngine;
+
+namespace ue2 {
+
+struct BoundaryReports;
+struct CompileContext;
+struct raw_puff;
+struct raw_som_dfa;
+class CharReach;
+class NGHolder;
+class ReportManager;
class SmallWriteBuild;
-class SomSlotManager;
-
-class RoseDedupeAux {
-public:
- virtual ~RoseDedupeAux();
-
- /** \brief True if we can not establish that at most a single callback will
- * be generated at a given offset from this set of reports. */
+class SomSlotManager;
+
+class RoseDedupeAux {
+public:
+ virtual ~RoseDedupeAux();
+
+ /** \brief True if we can not establish that at most a single callback will
+ * be generated at a given offset from this set of reports. */
virtual bool requiresDedupeSupport(const flat_set<ReportID> &reports)
- const = 0;
-};
-
-/** \brief Abstract interface intended for callers from elsewhere in the tree,
- * real underlying implementation is RoseBuildImpl in rose_build_impl.h. */
+ const = 0;
+};
+
+/** \brief Abstract interface intended for callers from elsewhere in the tree,
+ * real underlying implementation is RoseBuildImpl in rose_build_impl.h. */
class RoseBuild : noncopyable {
-public:
- virtual ~RoseBuild();
-
- /** \brief Adds a single literal. */
- virtual void add(bool anchored, bool eod, const ue2_literal &lit,
+public:
+ virtual ~RoseBuild();
+
+ /** \brief Adds a single literal. */
+ virtual void add(bool anchored, bool eod, const ue2_literal &lit,
const flat_set<ReportID> &ids) = 0;
-
+
virtual bool addRose(const RoseInGraph &ig, bool prefilter) = 0;
- virtual bool addSombeRose(const RoseInGraph &ig) = 0;
-
- virtual bool addOutfix(const NGHolder &h) = 0;
- virtual bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) = 0;
- virtual bool addOutfix(const raw_puff &rp) = 0;
-
- virtual bool addChainTail(const raw_puff &rp, u32 *queue_out,
- u32 *event_out) = 0;
-
- /** \brief Returns true if we were able to add it as a mask. */
- virtual bool add(bool anchored, const std::vector<CharReach> &mask,
+ virtual bool addSombeRose(const RoseInGraph &ig) = 0;
+
+ virtual bool addOutfix(const NGHolder &h) = 0;
+ virtual bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) = 0;
+ virtual bool addOutfix(const raw_puff &rp) = 0;
+
+ virtual bool addChainTail(const raw_puff &rp, u32 *queue_out,
+ u32 *event_out) = 0;
+
+ /** \brief Returns true if we were able to add it as a mask. */
+ virtual bool add(bool anchored, const std::vector<CharReach> &mask,
const flat_set<ReportID> &reports) = 0;
-
- /** \brief Attempts to add the graph to the anchored acyclic table. Returns
- * true on success. */
- virtual bool addAnchoredAcyclic(const NGHolder &graph) = 0;
-
- virtual bool validateMask(const std::vector<CharReach> &mask,
+
+ /** \brief Attempts to add the graph to the anchored acyclic table. Returns
+ * true on success. */
+ virtual bool addAnchoredAcyclic(const NGHolder &graph) = 0;
+
+ virtual bool validateMask(const std::vector<CharReach> &mask,
const flat_set<ReportID> &reports,
- bool anchored, bool eod) const = 0;
- virtual void addMask(const std::vector<CharReach> &mask,
+ bool anchored, bool eod) const = 0;
+ virtual void addMask(const std::vector<CharReach> &mask,
const flat_set<ReportID> &reports, bool anchored,
- bool eod) = 0;
-
- /** \brief Construct a runtime implementation. */
+ bool eod) = 0;
+
+ /** \brief Construct a runtime implementation. */
virtual bytecode_ptr<RoseEngine> buildRose(u32 minWidth) = 0;
-
- virtual std::unique_ptr<RoseDedupeAux> generateDedupeAux() const = 0;
-
- /** Get a unique report identifier for a prefix|infix engine */
- virtual ReportID getNewNfaReport() = 0;
-
- /** Note that we have seen a SOM pattern. */
- virtual void setSom() = 0;
-};
-
-// Construct a usable Rose builder.
-std::unique_ptr<RoseBuild> makeRoseBuilder(ReportManager &rm,
- SomSlotManager &ssm,
+
+ virtual std::unique_ptr<RoseDedupeAux> generateDedupeAux() const = 0;
+
+ /** Get a unique report identifier for a prefix|infix engine */
+ virtual ReportID getNewNfaReport() = 0;
+
+ /** Note that we have seen a SOM pattern. */
+ virtual void setSom() = 0;
+};
+
+// Construct a usable Rose builder.
+std::unique_ptr<RoseBuild> makeRoseBuilder(ReportManager &rm,
+ SomSlotManager &ssm,
SmallWriteBuild &smwr,
- const CompileContext &cc,
- const BoundaryReports &boundary);
-
-bool roseCheckRose(const RoseInGraph &ig, bool prefilter,
- const ReportManager &rm, const CompileContext &cc);
-
-bool roseIsPureLiteral(const RoseEngine *t);
-
-size_t maxOverlap(const ue2_literal &a, const ue2_literal &b, u32 b_delay);
-
-} // namespace ue2
-
-#endif // ROSE_BUILD_H
+ const CompileContext &cc,
+ const BoundaryReports &boundary);
+
+bool roseCheckRose(const RoseInGraph &ig, bool prefilter,
+ const ReportManager &rm, const CompileContext &cc);
+
+bool roseIsPureLiteral(const RoseEngine *t);
+
+size_t maxOverlap(const ue2_literal &a, const ue2_literal &b, u32 b_delay);
+
+} // namespace ue2
+
+#endif // ROSE_BUILD_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_add.cpp b/contrib/libs/hyperscan/src/rose/rose_build_add.cpp
index 2dc136c0f1..4929c95fce 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_add.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_add.cpp
@@ -1,675 +1,675 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_add_internal.h"
-#include "rose_build_impl.h"
-
-#include "ue2common.h"
-#include "grey.h"
-#include "rose_build_anchored.h"
-#include "rose_in_util.h"
-#include "hwlm/hwlm_literal.h"
-#include "nfa/goughcompile.h"
-#include "nfa/nfa_api_queue.h"
-#include "nfagraph/ng_depth.h"
-#include "nfagraph/ng_dump.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_mcclellan.h"
-#include "nfagraph/ng_prefilter.h"
-#include "nfagraph/ng_prune.h"
-#include "nfagraph/ng_region.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_reports.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
-#include "util/charreach.h"
-#include "util/charreach_util.h"
-#include "util/compare.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_add_internal.h"
+#include "rose_build_impl.h"
+
+#include "ue2common.h"
+#include "grey.h"
+#include "rose_build_anchored.h"
+#include "rose_in_util.h"
+#include "hwlm/hwlm_literal.h"
+#include "nfa/goughcompile.h"
+#include "nfa/nfa_api_queue.h"
+#include "nfagraph/ng_depth.h"
+#include "nfagraph/ng_dump.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_mcclellan.h"
+#include "nfagraph/ng_prefilter.h"
+#include "nfagraph/ng_prune.h"
+#include "nfagraph/ng_region.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "util/charreach.h"
+#include "util/charreach_util.h"
+#include "util/compare.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
#include "util/insertion_ordered.h"
-#include "util/make_unique.h"
+#include "util/make_unique.h"
#include "util/noncopyable.h"
-#include "util/order_check.h"
-#include "util/report_manager.h"
-#include "util/ue2string.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-#include <utility>
-
-using namespace std;
-
-namespace ue2 {
-
-/**
- * \brief Data used by most of the construction code in this file.
- */
+#include "util/order_check.h"
+#include "util/report_manager.h"
+#include "util/ue2string.h"
+#include "util/verify_types.h"
+
+#include <algorithm>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+#include <utility>
+
+using namespace std;
+
+namespace ue2 {
+
+/**
+ * \brief Data used by most of the construction code in this file.
+ */
struct RoseBuildData : noncopyable {
- RoseBuildData(const RoseInGraph &ig_in, bool som_in)
- : ig(ig_in), som(som_in) {}
-
- /** Input rose graph. */
- const RoseInGraph &ig;
-
- /** Edges we've transformed (in \ref transformAnchoredLiteralOverlap) which
- * require ANCH history to prevent overlap. */
+ RoseBuildData(const RoseInGraph &ig_in, bool som_in)
+ : ig(ig_in), som(som_in) {}
+
+ /** Input rose graph. */
+ const RoseInGraph &ig;
+
+ /** Edges we've transformed (in \ref transformAnchoredLiteralOverlap) which
+ * require ANCH history to prevent overlap. */
unordered_set<RoseInEdge> anch_history_edges;
-
- /** True if we're tracking Start of Match. */
- bool som;
-};
-
-static
-ReportID findReportId(const NGHolder &g) {
- /* prefix/infix always have an edge to accept and only 1 reportid initially
- */
- assert(in_degree(g.accept, g));
- const auto &rep = g[*inv_adjacent_vertices(g.accept, g).first].reports;
- assert(!rep.empty());
- return *rep.begin();
-}
-
-static
-RoseVertex createVertex(RoseBuildImpl *build, u32 literalId, u32 min_offset,
- u32 max_offset) {
- RoseGraph &g = build->g;
- // add to tree
- RoseVertex v = add_vertex(g);
- g[v].min_offset = min_offset;
- g[v].max_offset = max_offset;
-
+
+ /** True if we're tracking Start of Match. */
+ bool som;
+};
+
+static
+ReportID findReportId(const NGHolder &g) {
+ /* prefix/infix always have an edge to accept and only 1 reportid initially
+ */
+ assert(in_degree(g.accept, g));
+ const auto &rep = g[*inv_adjacent_vertices(g.accept, g).first].reports;
+ assert(!rep.empty());
+ return *rep.begin();
+}
+
+static
+RoseVertex createVertex(RoseBuildImpl *build, u32 literalId, u32 min_offset,
+ u32 max_offset) {
+ RoseGraph &g = build->g;
+ // add to tree
+ RoseVertex v = add_vertex(g);
+ g[v].min_offset = min_offset;
+ g[v].max_offset = max_offset;
+
DEBUG_PRINTF("insert vertex %zu into literal %u's vertex set\n", g[v].index,
- literalId);
- g[v].literals.insert(literalId);
- build->literal_info[literalId].vertices.insert(v);
-
- return v;
-}
-
-RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent,
- u32 minBound, u32 maxBound, u32 literalId,
- size_t literalLength,
+ literalId);
+ g[v].literals.insert(literalId);
+ build->literal_info[literalId].vertices.insert(v);
+
+ return v;
+}
+
+RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent,
+ u32 minBound, u32 maxBound, u32 literalId,
+ size_t literalLength,
const flat_set<ReportID> &reports) {
- assert(parent != RoseGraph::null_vertex());
-
- RoseGraph &g = build->g;
- // add to tree (offsets set latter)
- RoseVertex v = createVertex(build, literalId, 0U, 0U);
-
- /* fill in report information */
- g[v].reports.insert(reports.begin(), reports.end());
-
+ assert(parent != RoseGraph::null_vertex());
+
+ RoseGraph &g = build->g;
+ // add to tree (offsets set latter)
+ RoseVertex v = createVertex(build, literalId, 0U, 0U);
+
+ /* fill in report information */
+ g[v].reports.insert(reports.begin(), reports.end());
+
RoseEdge e = add_edge(parent, v, g);
- DEBUG_PRINTF("adding edge (%u, %u) to parent\n", minBound, maxBound);
-
- g[e].minBound = minBound;
- g[e].maxBound = maxBound;
- g[e].rose_top = 0;
-
- u32 min_offset = add_rose_depth(g[parent].min_offset, minBound);
- u32 max_offset = add_rose_depth(g[parent].max_offset, maxBound);
-
- /* take literal length into account for offsets */
- const u32 lit_len = verify_u32(literalLength);
- min_offset = add_rose_depth(min_offset, lit_len);
- max_offset = add_rose_depth(max_offset, lit_len);
-
- g[v].min_offset = min_offset;
- g[v].max_offset = max_offset;
-
- return v;
-}
-
-static
-RoseVertex createAnchoredVertex(RoseBuildImpl *build, u32 literalId,
- u32 min_offset, u32 max_offset) {
- RoseGraph &g = build->g;
- RoseVertex v = createVertex(build, literalId, min_offset, max_offset);
-
+ DEBUG_PRINTF("adding edge (%u, %u) to parent\n", minBound, maxBound);
+
+ g[e].minBound = minBound;
+ g[e].maxBound = maxBound;
+ g[e].rose_top = 0;
+
+ u32 min_offset = add_rose_depth(g[parent].min_offset, minBound);
+ u32 max_offset = add_rose_depth(g[parent].max_offset, maxBound);
+
+ /* take literal length into account for offsets */
+ const u32 lit_len = verify_u32(literalLength);
+ min_offset = add_rose_depth(min_offset, lit_len);
+ max_offset = add_rose_depth(max_offset, lit_len);
+
+ g[v].min_offset = min_offset;
+ g[v].max_offset = max_offset;
+
+ return v;
+}
+
+static
+RoseVertex createAnchoredVertex(RoseBuildImpl *build, u32 literalId,
+ u32 min_offset, u32 max_offset) {
+ RoseGraph &g = build->g;
+ RoseVertex v = createVertex(build, literalId, min_offset, max_offset);
+
DEBUG_PRINTF("created anchored vertex %zu with lit id %u\n", g[v].index,
- literalId);
-
+ literalId);
+
RoseEdge e = add_edge(build->anchored_root, v, g);
- g[e].minBound = min_offset;
- g[e].maxBound = max_offset;
-
- return v;
-}
-
-static
-RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) {
- RoseGraph &g = build->g;
- RoseVertex w = add_vertex(g[v], g);
+ g[e].minBound = min_offset;
+ g[e].maxBound = max_offset;
+
+ return v;
+}
+
+static
+RoseVertex duplicate(RoseBuildImpl *build, RoseVertex v) {
+ RoseGraph &g = build->g;
+ RoseVertex w = add_vertex(g[v], g);
DEBUG_PRINTF("added vertex %zu\n", g[w].index);
-
- for (auto lit_id : g[w].literals) {
- build->literal_info[lit_id].vertices.insert(w);
- }
-
- for (const auto &e : in_edges_range(v, g)) {
- RoseVertex s = source(e, g);
- add_edge(s, w, g[e], g);
+
+ for (auto lit_id : g[w].literals) {
+ build->literal_info[lit_id].vertices.insert(w);
+ }
+
+ for (const auto &e : in_edges_range(v, g)) {
+ RoseVertex s = source(e, g);
+ add_edge(s, w, g[e], g);
DEBUG_PRINTF("added edge (%zu,%zu)\n", g[s].index, g[w].index);
- }
-
- return w;
-}
-
-namespace {
-struct created_key {
- explicit created_key(const RoseInEdgeProps &trep)
+ }
+
+ return w;
+}
+
+namespace {
+struct created_key {
+ explicit created_key(const RoseInEdgeProps &trep)
: prefix(trep.graph.get()), lag(trep.graph_lag) {
- }
- bool operator<(const created_key &b) const {
- const created_key &a = *this;
- ORDER_CHECK(prefix);
- ORDER_CHECK(lag);
- return false;
- }
- NGHolder *prefix;
- u32 lag;
-};
-}
-
-static
-bool isPureAnchored(const NGHolder &h) {
- return !proper_out_degree(h.startDs, h);
-}
-
-static
-RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd,
- const RoseInEdge &rose_edge, const RoseEdge &e) {
- const RoseGraph &g = tbi.g;
- const RoseVertex u = source(e, g), v = target(e, g);
- const bool fixed_offset_src = g[u].fixedOffset();
- const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF);
-
- DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n",
+ }
+ bool operator<(const created_key &b) const {
+ const created_key &a = *this;
+ ORDER_CHECK(prefix);
+ ORDER_CHECK(lag);
+ return false;
+ }
+ NGHolder *prefix;
+ u32 lag;
+};
+}
+
+static
+bool isPureAnchored(const NGHolder &h) {
+ return !proper_out_degree(h.startDs, h);
+}
+
+static
+RoseRoleHistory selectHistory(const RoseBuildImpl &tbi, const RoseBuildData &bd,
+ const RoseInEdge &rose_edge, const RoseEdge &e) {
+ const RoseGraph &g = tbi.g;
+ const RoseVertex u = source(e, g), v = target(e, g);
+ const bool fixed_offset_src = g[u].fixedOffset();
+ const bool has_bounds = g[e].minBound || (g[e].maxBound != ROSE_BOUND_INF);
+
+ DEBUG_PRINTF("edge %zu->%zu, bounds=[%u,%u], fixed_u=%d, prefix=%d\n",
g[u].index, g[v].index, g[e].minBound, g[e].maxBound,
- (int)g[u].fixedOffset(), (int)g[v].left);
-
- if (g[v].left) {
- // Roles with prefix engines have their history handled by that prefix.
- assert(!contains(bd.anch_history_edges, rose_edge));
- return ROSE_ROLE_HISTORY_NONE;
- }
-
- if (contains(bd.anch_history_edges, rose_edge)) {
- DEBUG_PRINTF("needs anch history\n");
- return ROSE_ROLE_HISTORY_ANCH;
- }
-
- if (fixed_offset_src && has_bounds) {
- DEBUG_PRINTF("needs anch history\n");
- return ROSE_ROLE_HISTORY_ANCH;
- }
-
- return ROSE_ROLE_HISTORY_NONE;
-}
-
-static
-bool hasSuccessorLiterals(RoseInVertex iv, const RoseInGraph &ig) {
- for (auto v : adjacent_vertices_range(iv, ig)) {
- if (ig[v].type != RIV_ACCEPT) {
- return true;
- }
- }
- return false;
-}
-
-static
-void createVertices(RoseBuildImpl *tbi,
- map<RoseInVertex, vector<RoseVertex> > &vertex_map,
- const vector<pair<RoseVertex, RoseInEdge> > &parents,
- RoseInVertex iv, u32 min_offset, u32 max_offset,
- u32 literalId, u32 delay, const RoseBuildData &bd) {
- RoseGraph &g = tbi->g;
-
- DEBUG_PRINTF("vertex has %zu parents\n", parents.size());
-
- map<created_key, RoseVertex> created;
-
- for (const auto &pv : parents) {
- RoseVertex w;
- const RoseInEdgeProps &edge_props = bd.ig[pv.second];
- shared_ptr<NGHolder> prefix_graph = edge_props.graph;
- u32 prefix_lag = edge_props.graph_lag;
-
- created_key key(edge_props);
-
- if (!contains(created, key)) {
- assert(prefix_graph || !edge_props.haig);
- w = createVertex(tbi, literalId, min_offset, max_offset);
- created[key] = w;
-
- if (prefix_graph) {
- g[w].left.graph = prefix_graph;
+ (int)g[u].fixedOffset(), (int)g[v].left);
+
+ if (g[v].left) {
+ // Roles with prefix engines have their history handled by that prefix.
+ assert(!contains(bd.anch_history_edges, rose_edge));
+ return ROSE_ROLE_HISTORY_NONE;
+ }
+
+ if (contains(bd.anch_history_edges, rose_edge)) {
+ DEBUG_PRINTF("needs anch history\n");
+ return ROSE_ROLE_HISTORY_ANCH;
+ }
+
+ if (fixed_offset_src && has_bounds) {
+ DEBUG_PRINTF("needs anch history\n");
+ return ROSE_ROLE_HISTORY_ANCH;
+ }
+
+ return ROSE_ROLE_HISTORY_NONE;
+}
+
+static
+bool hasSuccessorLiterals(RoseInVertex iv, const RoseInGraph &ig) {
+ for (auto v : adjacent_vertices_range(iv, ig)) {
+ if (ig[v].type != RIV_ACCEPT) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static
+void createVertices(RoseBuildImpl *tbi,
+ map<RoseInVertex, vector<RoseVertex> > &vertex_map,
+ const vector<pair<RoseVertex, RoseInEdge> > &parents,
+ RoseInVertex iv, u32 min_offset, u32 max_offset,
+ u32 literalId, u32 delay, const RoseBuildData &bd) {
+ RoseGraph &g = tbi->g;
+
+ DEBUG_PRINTF("vertex has %zu parents\n", parents.size());
+
+ map<created_key, RoseVertex> created;
+
+ for (const auto &pv : parents) {
+ RoseVertex w;
+ const RoseInEdgeProps &edge_props = bd.ig[pv.second];
+ shared_ptr<NGHolder> prefix_graph = edge_props.graph;
+ u32 prefix_lag = edge_props.graph_lag;
+
+ created_key key(edge_props);
+
+ if (!contains(created, key)) {
+ assert(prefix_graph || !edge_props.haig);
+ w = createVertex(tbi, literalId, min_offset, max_offset);
+ created[key] = w;
+
+ if (prefix_graph) {
+ g[w].left.graph = prefix_graph;
if (edge_props.dfa) {
g[w].left.dfa = edge_props.dfa;
- }
- g[w].left.haig = edge_props.haig;
- g[w].left.lag = prefix_lag;
-
- // The graph already has its report id allocated - find it.
- g[w].left.leftfix_report = findReportId(*prefix_graph);
-
- if (g[w].left.dfa || g[w].left.haig) {
- assert(prefix_graph);
- g[w].left.dfa_min_width = findMinWidth(*prefix_graph);
- g[w].left.dfa_max_width = findMaxWidth(*prefix_graph);
- }
- }
-
- if (bd.som && !g[w].left.haig) {
- /* no prefix - som based on literal start */
- assert(!prefix_graph);
+ }
+ g[w].left.haig = edge_props.haig;
+ g[w].left.lag = prefix_lag;
+
+ // The graph already has its report id allocated - find it.
+ g[w].left.leftfix_report = findReportId(*prefix_graph);
+
+ if (g[w].left.dfa || g[w].left.haig) {
+ assert(prefix_graph);
+ g[w].left.dfa_min_width = findMinWidth(*prefix_graph);
+ g[w].left.dfa_max_width = findMaxWidth(*prefix_graph);
+ }
+ }
+
+ if (bd.som && !g[w].left.haig) {
+ /* no prefix - som based on literal start */
+ assert(!prefix_graph);
g[w].som_adjust = tbi->literals.at(literalId).elength();
- DEBUG_PRINTF("set som_adjust to %u\n", g[w].som_adjust);
- }
-
+ DEBUG_PRINTF("set som_adjust to %u\n", g[w].som_adjust);
+ }
+
DEBUG_PRINTF(" adding new vertex index=%zu\n", tbi->g[w].index);
- vertex_map[iv].push_back(w);
- } else {
- w = created[key];
- }
-
+ vertex_map[iv].push_back(w);
+ } else {
+ w = created[key];
+ }
+
RoseVertex p = pv.first;
-
+
RoseEdge e = add_edge(p, w, g);
- DEBUG_PRINTF("adding edge (%u,%u) to parent\n", edge_props.minBound,
- edge_props.maxBound);
- g[e].minBound = edge_props.minBound;
- if (p != tbi->root && g[w].left.graph
- && (!tbi->isAnyStart(p) || isPureAnchored(*g[w].left.graph))) {
- depth mw = findMaxWidth(*g[w].left.graph);
- if (mw.is_infinite()) {
- g[e].maxBound = ROSE_BOUND_INF;
- } else {
- DEBUG_PRINTF("setting max to %s + %u\n", mw.str().c_str(),
- prefix_lag);
- g[e].maxBound = prefix_lag + mw;
- }
- } else {
- g[e].maxBound = edge_props.maxBound;
- }
- g[e].rose_top = 0;
- g[e].history = selectHistory(*tbi, bd, pv.second, e);
- }
-
- if (delay && hasSuccessorLiterals(iv, bd.ig)) {
- // Add an undelayed "ghost" vertex for this literal.
- u32 ghostId = tbi->literal_info[literalId].undelayed_id;
- DEBUG_PRINTF("creating delay ghost vertex, id=%u\n", ghostId);
- assert(ghostId != literalId);
+ DEBUG_PRINTF("adding edge (%u,%u) to parent\n", edge_props.minBound,
+ edge_props.maxBound);
+ g[e].minBound = edge_props.minBound;
+ if (p != tbi->root && g[w].left.graph
+ && (!tbi->isAnyStart(p) || isPureAnchored(*g[w].left.graph))) {
+ depth mw = findMaxWidth(*g[w].left.graph);
+ if (mw.is_infinite()) {
+ g[e].maxBound = ROSE_BOUND_INF;
+ } else {
+ DEBUG_PRINTF("setting max to %s + %u\n", mw.str().c_str(),
+ prefix_lag);
+ g[e].maxBound = prefix_lag + mw;
+ }
+ } else {
+ g[e].maxBound = edge_props.maxBound;
+ }
+ g[e].rose_top = 0;
+ g[e].history = selectHistory(*tbi, bd, pv.second, e);
+ }
+
+ if (delay && hasSuccessorLiterals(iv, bd.ig)) {
+ // Add an undelayed "ghost" vertex for this literal.
+ u32 ghostId = tbi->literal_info[literalId].undelayed_id;
+ DEBUG_PRINTF("creating delay ghost vertex, id=%u\n", ghostId);
+ assert(ghostId != literalId);
assert(tbi->literals.at(ghostId).delay == 0);
-
- // Adjust offsets, removing delay.
- u32 ghost_min = min_offset, ghost_max = max_offset;
- assert(ghost_min < ROSE_BOUND_INF && ghost_min >= delay);
- ghost_min -= delay;
- ghost_max -= ghost_max == ROSE_BOUND_INF ? 0 : delay;
-
- RoseVertex g_v = createVertex(tbi, ghostId, ghost_min, ghost_max);
-
- for (const auto &pv : parents) {
- const RoseInEdgeProps &edge_props = bd.ig[pv.second];
+
+ // Adjust offsets, removing delay.
+ u32 ghost_min = min_offset, ghost_max = max_offset;
+ assert(ghost_min < ROSE_BOUND_INF && ghost_min >= delay);
+ ghost_min -= delay;
+ ghost_max -= ghost_max == ROSE_BOUND_INF ? 0 : delay;
+
+ RoseVertex g_v = createVertex(tbi, ghostId, ghost_min, ghost_max);
+
+ for (const auto &pv : parents) {
+ const RoseInEdgeProps &edge_props = bd.ig[pv.second];
RoseEdge e = add_edge(pv.first, g_v, tbi->g);
- g[e].minBound = edge_props.minBound;
- g[e].maxBound = edge_props.maxBound;
- g[e].history = selectHistory(*tbi, bd, pv.second, e);
- DEBUG_PRINTF("parent edge has bounds [%u,%u]\n",
- edge_props.minBound, edge_props.maxBound);
- }
-
- for (auto &m : created) {
- tbi->ghost[m.second] = g_v;
- }
- }
-}
-
-/* ensure the holder does not accept any paths which do not end with lit */
-static
-void removeFalsePaths(NGHolder &g, const ue2_literal &lit) {
+ g[e].minBound = edge_props.minBound;
+ g[e].maxBound = edge_props.maxBound;
+ g[e].history = selectHistory(*tbi, bd, pv.second, e);
+ DEBUG_PRINTF("parent edge has bounds [%u,%u]\n",
+ edge_props.minBound, edge_props.maxBound);
+ }
+
+ for (auto &m : created) {
+ tbi->ghost[m.second] = g_v;
+ }
+ }
+}
+
+/* ensure the holder does not accept any paths which do not end with lit */
+static
+void removeFalsePaths(NGHolder &g, const ue2_literal &lit) {
DEBUG_PRINTF("strip '%s'\n", dumpString(lit).c_str());
- set<NFAVertex> curr, next;
- curr.insert(g.accept);
- curr.insert(g.acceptEod);
-
- for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
- next.clear();
- for (auto curr_v : curr) {
+ set<NFAVertex> curr, next;
+ curr.insert(g.accept);
+ curr.insert(g.acceptEod);
+
+ for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
+ next.clear();
+ for (auto curr_v : curr) {
DEBUG_PRINTF("handling %zu\n", g[curr_v].index);
- vector<NFAVertex> next_cand;
- insert(&next_cand, next_cand.end(),
- inv_adjacent_vertices(curr_v, g));
- clear_in_edges(curr_v, g);
- if (curr_v == g.acceptEod) {
- add_edge(g.accept, g.acceptEod, g);
- }
-
- for (auto v : next_cand) {
- assert(v != g.startDs);
- if (v == g.start || v == g.startDs || v == g.accept) {
- continue;
- }
-
- const CharReach &cr = g[v].char_reach;
-
- if (!overlaps(*it, cr)) {
+ vector<NFAVertex> next_cand;
+ insert(&next_cand, next_cand.end(),
+ inv_adjacent_vertices(curr_v, g));
+ clear_in_edges(curr_v, g);
+ if (curr_v == g.acceptEod) {
+ add_edge(g.accept, g.acceptEod, g);
+ }
+
+ for (auto v : next_cand) {
+ assert(v != g.startDs);
+ if (v == g.start || v == g.startDs || v == g.accept) {
+ continue;
+ }
+
+ const CharReach &cr = g[v].char_reach;
+
+ if (!overlaps(*it, cr)) {
DEBUG_PRINTF("false edge %zu\n", g[v].index);
- continue;
- }
-
- NFAVertex v2 = clone_vertex(g, v);
- clone_in_edges(g, v, v2);
- add_edge(v2, curr_v, g);
- g[v2].char_reach &= *it;
+ continue;
+ }
+
+ NFAVertex v2 = clone_vertex(g, v);
+ clone_in_edges(g, v, v2);
+ add_edge(v2, curr_v, g);
+ g[v2].char_reach &= *it;
DEBUG_PRINTF("next <- %zu\n", g[v2].index);
- next.insert(v2);
- }
- }
-
- curr.swap(next);
- }
-
- pruneUseless(g);
+ next.insert(v2);
+ }
+ }
+
+ curr.swap(next);
+ }
+
+ pruneUseless(g);
clearReports(g);
- assert(in_degree(g.accept, g) || in_degree(g.acceptEod, g) > 1);
- assert(allMatchStatesHaveReports(g));
-
- DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g));
-}
-
-static
-RoseVertex tryForAnchoredVertex(RoseBuildImpl *tbi,
- const RoseInVertexProps &iv_info,
- const RoseInEdgeProps &ep) {
- if (ep.graph_lag && ep.graph_lag != iv_info.s.length()) {
- DEBUG_PRINTF("bad lag %u != %zu\n", ep.graph_lag, iv_info.s.length());
- return RoseGraph::null_vertex(); /* TODO: better */
- }
-
- const depth anchored_max_depth(tbi->cc.grey.maxAnchoredRegion);
- depth min_width(0), max_width(0);
-
- if (ep.graph.get()) {
- const depth graph_lag(ep.graph_lag);
- max_width = findMaxWidth(*ep.graph) + graph_lag;
- min_width = findMinWidth(*ep.graph) + graph_lag;
- if (proper_out_degree(ep.graph->startDs, *ep.graph)) {
- max_width = depth::infinity();
- }
- }
-
- DEBUG_PRINTF("mw = %s; lag = %u\n", max_width.str().c_str(), ep.graph_lag);
-
- NGHolder h;
-
- if (ep.graph.get() && max_width <= anchored_max_depth) {
- cloneHolder(h, *ep.graph);
-
- /* add literal/dots */
- if (ep.graph_lag) {
- assert(ep.graph_lag == iv_info.s.length());
- appendLiteral(h, iv_info.s);
- } else {
- removeFalsePaths(h, iv_info.s);
- }
- } else if (!ep.graph.get() && ep.maxBound < ROSE_BOUND_INF
- && iv_info.s.length() + ep.maxBound
- <= tbi->cc.grey.maxAnchoredRegion) {
- if (ep.maxBound || ep.minBound) {
- /* TODO: handle, however these cases are not generated currently by
+ assert(in_degree(g.accept, g) || in_degree(g.acceptEod, g) > 1);
+ assert(allMatchStatesHaveReports(g));
+
+ DEBUG_PRINTF("graph has %zu vertices left\n", num_vertices(g));
+}
+
+static
+RoseVertex tryForAnchoredVertex(RoseBuildImpl *tbi,
+ const RoseInVertexProps &iv_info,
+ const RoseInEdgeProps &ep) {
+ if (ep.graph_lag && ep.graph_lag != iv_info.s.length()) {
+ DEBUG_PRINTF("bad lag %u != %zu\n", ep.graph_lag, iv_info.s.length());
+ return RoseGraph::null_vertex(); /* TODO: better */
+ }
+
+ const depth anchored_max_depth(tbi->cc.grey.maxAnchoredRegion);
+ depth min_width(0), max_width(0);
+
+ if (ep.graph.get()) {
+ const depth graph_lag(ep.graph_lag);
+ max_width = findMaxWidth(*ep.graph) + graph_lag;
+ min_width = findMinWidth(*ep.graph) + graph_lag;
+ if (proper_out_degree(ep.graph->startDs, *ep.graph)) {
+ max_width = depth::infinity();
+ }
+ }
+
+ DEBUG_PRINTF("mw = %s; lag = %u\n", max_width.str().c_str(), ep.graph_lag);
+
+ NGHolder h;
+
+ if (ep.graph.get() && max_width <= anchored_max_depth) {
+ cloneHolder(h, *ep.graph);
+
+ /* add literal/dots */
+ if (ep.graph_lag) {
+ assert(ep.graph_lag == iv_info.s.length());
+ appendLiteral(h, iv_info.s);
+ } else {
+ removeFalsePaths(h, iv_info.s);
+ }
+ } else if (!ep.graph.get() && ep.maxBound < ROSE_BOUND_INF
+ && iv_info.s.length() + ep.maxBound
+ <= tbi->cc.grey.maxAnchoredRegion) {
+ if (ep.maxBound || ep.minBound) {
+ /* TODO: handle, however these cases are not generated currently by
ng_violet */
- return RoseGraph::null_vertex();
- }
- max_width = depth(ep.maxBound + iv_info.s.length());
- min_width = depth(ep.minBound + iv_info.s.length());
- add_edge(h.start, h.accept, h);
- appendLiteral(h, iv_info.s);
- } else {
- return RoseGraph::null_vertex();
- }
-
- u32 anchored_exit_id = tbi->getNewLiteralId();
- u32 remap_id = 0;
- DEBUG_PRINTF(" trying to add dfa stuff\n");
- int rv = addToAnchoredMatcher(*tbi, h, anchored_exit_id, &remap_id);
-
- if (rv == ANCHORED_FAIL) {
- return RoseGraph::null_vertex();
- } else if (rv == ANCHORED_REMAP) {
- anchored_exit_id = remap_id;
- } else {
- assert(rv == ANCHORED_SUCCESS);
- }
-
- // Store the literal itself in a side structure so that we can use it for
- // overlap calculations later. This may be obsolete when the old Rose
- // construction path (and its history selection code) goes away.
- rose_literal_id lit(iv_info.s, ROSE_ANCHORED, 0);
- tbi->anchoredLitSuffix.insert(make_pair(anchored_exit_id, lit));
-
- assert(min_width <= anchored_max_depth);
- assert(max_width <= anchored_max_depth);
- assert(min_width <= max_width);
-
- /* Note: bounds are end-to-end as anchored lits are considered
- * to have 0 length. */
- RoseVertex v = createAnchoredVertex(tbi, anchored_exit_id, min_width,
- max_width);
- return v;
-}
-
-static
-u32 findRoseAnchorFloatingOverlap(const RoseInEdgeProps &ep,
- const RoseInVertexProps &succ_vp) {
- /* we need to ensure there is enough history to find the successor literal
- * when we enable its group.
- */
-
- if (!ep.graph.get()) {
- return 0; /* non overlapping */
- }
- depth graph_min_width = findMinWidth(*ep.graph);
- u32 min_width = ep.graph_lag + graph_min_width;
- u32 s_len = succ_vp.s.length();
-
- if (s_len <= min_width) {
- return 0; /* no overlap */
- }
-
- u32 overlap = s_len - min_width;
- DEBUG_PRINTF("found overlap of %u\n", overlap);
- return overlap;
-}
-
-static
-void findRoseLiteralMask(const NGHolder &h, const u32 lag, vector<u8> &msk,
- vector<u8> &cmp) {
- if (lag >= HWLM_MASKLEN) {
- msk.clear(); cmp.clear();
- return;
- }
-
- assert(in_degree(h.acceptEod, h) == 1); // no eod reports
-
- // Start with the set of reporter vertices for this rose.
- set<NFAVertex> curr, next;
- insert(&curr, inv_adjacent_vertices(h.accept, h));
- assert(!curr.empty());
-
- msk.assign(HWLM_MASKLEN, 0);
- cmp.assign(HWLM_MASKLEN, 0);
- size_t i = HWLM_MASKLEN - lag - 1;
- do {
- if (curr.empty() || contains(curr, h.start) ||
- contains(curr, h.startDs)) {
- DEBUG_PRINTF("end of the road\n");
- break;
- }
-
- next.clear();
- CharReach cr;
- for (auto v : curr) {
+ return RoseGraph::null_vertex();
+ }
+ max_width = depth(ep.maxBound + iv_info.s.length());
+ min_width = depth(ep.minBound + iv_info.s.length());
+ add_edge(h.start, h.accept, h);
+ appendLiteral(h, iv_info.s);
+ } else {
+ return RoseGraph::null_vertex();
+ }
+
+ u32 anchored_exit_id = tbi->getNewLiteralId();
+ u32 remap_id = 0;
+ DEBUG_PRINTF(" trying to add dfa stuff\n");
+ int rv = addToAnchoredMatcher(*tbi, h, anchored_exit_id, &remap_id);
+
+ if (rv == ANCHORED_FAIL) {
+ return RoseGraph::null_vertex();
+ } else if (rv == ANCHORED_REMAP) {
+ anchored_exit_id = remap_id;
+ } else {
+ assert(rv == ANCHORED_SUCCESS);
+ }
+
+ // Store the literal itself in a side structure so that we can use it for
+ // overlap calculations later. This may be obsolete when the old Rose
+ // construction path (and its history selection code) goes away.
+ rose_literal_id lit(iv_info.s, ROSE_ANCHORED, 0);
+ tbi->anchoredLitSuffix.insert(make_pair(anchored_exit_id, lit));
+
+ assert(min_width <= anchored_max_depth);
+ assert(max_width <= anchored_max_depth);
+ assert(min_width <= max_width);
+
+ /* Note: bounds are end-to-end as anchored lits are considered
+ * to have 0 length. */
+ RoseVertex v = createAnchoredVertex(tbi, anchored_exit_id, min_width,
+ max_width);
+ return v;
+}
+
+static
+u32 findRoseAnchorFloatingOverlap(const RoseInEdgeProps &ep,
+ const RoseInVertexProps &succ_vp) {
+ /* we need to ensure there is enough history to find the successor literal
+ * when we enable its group.
+ */
+
+ if (!ep.graph.get()) {
+ return 0; /* non overlapping */
+ }
+ depth graph_min_width = findMinWidth(*ep.graph);
+ u32 min_width = ep.graph_lag + graph_min_width;
+ u32 s_len = succ_vp.s.length();
+
+ if (s_len <= min_width) {
+ return 0; /* no overlap */
+ }
+
+ u32 overlap = s_len - min_width;
+ DEBUG_PRINTF("found overlap of %u\n", overlap);
+ return overlap;
+}
+
+static
+void findRoseLiteralMask(const NGHolder &h, const u32 lag, vector<u8> &msk,
+ vector<u8> &cmp) {
+ if (lag >= HWLM_MASKLEN) {
+ msk.clear(); cmp.clear();
+ return;
+ }
+
+ assert(in_degree(h.acceptEod, h) == 1); // no eod reports
+
+ // Start with the set of reporter vertices for this rose.
+ set<NFAVertex> curr, next;
+ insert(&curr, inv_adjacent_vertices(h.accept, h));
+ assert(!curr.empty());
+
+ msk.assign(HWLM_MASKLEN, 0);
+ cmp.assign(HWLM_MASKLEN, 0);
+ size_t i = HWLM_MASKLEN - lag - 1;
+ do {
+ if (curr.empty() || contains(curr, h.start) ||
+ contains(curr, h.startDs)) {
+ DEBUG_PRINTF("end of the road\n");
+ break;
+ }
+
+ next.clear();
+ CharReach cr;
+ for (auto v : curr) {
DEBUG_PRINTF("vertex %zu, reach %s\n", h[v].index,
- describeClass(h[v].char_reach).c_str());
- cr |= h[v].char_reach;
- insert(&next, inv_adjacent_vertices(v, h));
- }
- make_and_cmp_mask(cr, &msk[i], &cmp[i]);
- DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i,
- describeClass(cr).c_str(), msk.at(i), cmp.at(i));
- curr.swap(next);
- } while (i-- > 0);
-}
-
-static
-void doRoseLiteralVertex(RoseBuildImpl *tbi, bool use_eod_table,
- map<RoseInVertex, vector<RoseVertex> > &vertex_map,
- const vector<pair<RoseVertex, RoseInEdge> > &parents,
- RoseInVertex iv, const RoseBuildData &bd) {
- const RoseInGraph &ig = bd.ig;
- const RoseInVertexProps &iv_info = ig[iv];
- assert(iv_info.type == RIV_LITERAL);
- assert(!parents.empty()); /* start vertices should not be here */
-
+ describeClass(h[v].char_reach).c_str());
+ cr |= h[v].char_reach;
+ insert(&next, inv_adjacent_vertices(v, h));
+ }
+ make_and_cmp_mask(cr, &msk[i], &cmp[i]);
+ DEBUG_PRINTF("%zu: reach=%s, msk=%u, cmp=%u\n", i,
+ describeClass(cr).c_str(), msk.at(i), cmp.at(i));
+ curr.swap(next);
+ } while (i-- > 0);
+}
+
+static
+void doRoseLiteralVertex(RoseBuildImpl *tbi, bool use_eod_table,
+ map<RoseInVertex, vector<RoseVertex> > &vertex_map,
+ const vector<pair<RoseVertex, RoseInEdge> > &parents,
+ RoseInVertex iv, const RoseBuildData &bd) {
+ const RoseInGraph &ig = bd.ig;
+ const RoseInVertexProps &iv_info = ig[iv];
+ assert(iv_info.type == RIV_LITERAL);
+ assert(!parents.empty()); /* start vertices should not be here */
+
// ng_violet should have ensured that mixed-sensitivity literals are no
- // longer than the benefits max width.
- assert(iv_info.s.length() <= MAX_MASK2_WIDTH ||
- !mixed_sensitivity(iv_info.s));
-
- // Rose graph construction process should have given us a min_offset.
- assert(iv_info.min_offset > 0);
-
- if (use_eod_table) {
- goto floating;
- }
-
- DEBUG_PRINTF("rose find vertex\n");
- if (parents.size() == 1) {
- const RoseVertex u = parents.front().first;
- const RoseInEdgeProps &ep = ig[parents.front().second];
-
- if (!tbi->isAnyStart(u)) {
- goto floating;
- }
-
- if (!ep.graph && ep.maxBound == ROSE_BOUND_INF) {
- goto floating;
- }
- if (ep.graph && !isAnchored(*ep.graph)) {
- goto floating;
- }
-
- DEBUG_PRINTF("cand for anchored maxBound %u, %p (%d)\n", ep.maxBound,
- ep.graph.get(), ep.graph ? (int)isAnchored(*ep.graph) : 3);
-
- /* need to check if putting iv into the anchored table would create
- * any bad_overlap relationships with its successor literals */
- for (const auto &e : out_edges_range(iv, ig)) {
- RoseInVertex t = target(e, ig);
- u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[t]);
- DEBUG_PRINTF("found overlap of %u\n", overlap);
- if (overlap > tbi->cc.grey.maxHistoryAvailable + 1) {
- goto floating;
- }
- }
-
- RoseVertex v = tryForAnchoredVertex(tbi, iv_info, ep);
- if (v != RoseGraph::null_vertex()) {
- DEBUG_PRINTF("add anchored literal vertex\n");
- vertex_map[iv].push_back(v);
- return;
- }
- }
-
-floating:
- vector<u8> msk, cmp;
- if (tbi->cc.grey.roseHamsterMasks && in_degree(iv, ig) == 1) {
- RoseInEdge e = *in_edges(iv, ig).first;
- if (ig[e].graph) {
- findRoseLiteralMask(*ig[e].graph, ig[e].graph_lag, msk, cmp);
- }
- }
-
- u32 delay = iv_info.delay;
- rose_literal_table table = use_eod_table ? ROSE_EOD_ANCHORED : ROSE_FLOATING;
-
- u32 literalId = tbi->getLiteralId(iv_info.s, msk, cmp, delay, table);
-
- DEBUG_PRINTF("literal=%u (len=%zu, delay=%u, offsets=[%u,%u] '%s')\n",
- literalId, iv_info.s.length(), delay, iv_info.min_offset,
- iv_info.max_offset, dumpString(iv_info.s).c_str());
-
- createVertices(tbi, vertex_map, parents, iv, iv_info.min_offset,
- iv_info.max_offset, literalId, delay, bd);
-}
-
-static
+ // longer than the benefits max width.
+ assert(iv_info.s.length() <= MAX_MASK2_WIDTH ||
+ !mixed_sensitivity(iv_info.s));
+
+ // Rose graph construction process should have given us a min_offset.
+ assert(iv_info.min_offset > 0);
+
+ if (use_eod_table) {
+ goto floating;
+ }
+
+ DEBUG_PRINTF("rose find vertex\n");
+ if (parents.size() == 1) {
+ const RoseVertex u = parents.front().first;
+ const RoseInEdgeProps &ep = ig[parents.front().second];
+
+ if (!tbi->isAnyStart(u)) {
+ goto floating;
+ }
+
+ if (!ep.graph && ep.maxBound == ROSE_BOUND_INF) {
+ goto floating;
+ }
+ if (ep.graph && !isAnchored(*ep.graph)) {
+ goto floating;
+ }
+
+ DEBUG_PRINTF("cand for anchored maxBound %u, %p (%d)\n", ep.maxBound,
+ ep.graph.get(), ep.graph ? (int)isAnchored(*ep.graph) : 3);
+
+ /* need to check if putting iv into the anchored table would create
+ * any bad_overlap relationships with its successor literals */
+ for (const auto &e : out_edges_range(iv, ig)) {
+ RoseInVertex t = target(e, ig);
+ u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[t]);
+ DEBUG_PRINTF("found overlap of %u\n", overlap);
+ if (overlap > tbi->cc.grey.maxHistoryAvailable + 1) {
+ goto floating;
+ }
+ }
+
+ RoseVertex v = tryForAnchoredVertex(tbi, iv_info, ep);
+ if (v != RoseGraph::null_vertex()) {
+ DEBUG_PRINTF("add anchored literal vertex\n");
+ vertex_map[iv].push_back(v);
+ return;
+ }
+ }
+
+floating:
+ vector<u8> msk, cmp;
+ if (tbi->cc.grey.roseHamsterMasks && in_degree(iv, ig) == 1) {
+ RoseInEdge e = *in_edges(iv, ig).first;
+ if (ig[e].graph) {
+ findRoseLiteralMask(*ig[e].graph, ig[e].graph_lag, msk, cmp);
+ }
+ }
+
+ u32 delay = iv_info.delay;
+ rose_literal_table table = use_eod_table ? ROSE_EOD_ANCHORED : ROSE_FLOATING;
+
+ u32 literalId = tbi->getLiteralId(iv_info.s, msk, cmp, delay, table);
+
+ DEBUG_PRINTF("literal=%u (len=%zu, delay=%u, offsets=[%u,%u] '%s')\n",
+ literalId, iv_info.s.length(), delay, iv_info.min_offset,
+ iv_info.max_offset, dumpString(iv_info.s).c_str());
+
+ createVertices(tbi, vertex_map, parents, iv, iv_info.min_offset,
+ iv_info.max_offset, literalId, delay, bd);
+}
+
+static
unique_ptr<NGHolder> makeRoseEodPrefix(const NGHolder &h, RoseBuildImpl &build,
map<flat_set<ReportID>, ReportID> &remap) {
- assert(generates_callbacks(h));
+ assert(generates_callbacks(h));
assert(!in_degree(h.accept, h));
auto gg = cloneHolder(h);
NGHolder &g = *gg;
g.kind = is_triggered(h) ? NFA_INFIX : NFA_PREFIX;
-
- // Move acceptEod edges over to accept.
- vector<NFAEdge> dead;
+
+ // Move acceptEod edges over to accept.
+ vector<NFAEdge> dead;
for (const auto &e : in_edges_range(g.acceptEod, g)) {
NFAVertex u = source(e, g);
if (u == g.accept) {
- continue;
- }
+ continue;
+ }
add_edge_if_not_present(u, g.accept, g);
- dead.push_back(e);
+ dead.push_back(e);
if (!contains(remap, g[u].reports)) {
remap[g[u].reports] = build.getNewNfaReport();
}
g[u].reports = { remap[g[u].reports] };
- }
-
+ }
+
remove_edges(dead, g);
return gg;
-}
-
-static
+}
+
+static
u32 getEodEventID(RoseBuildImpl &build) {
// Allocate the EOD event if it hasn't been already.
if (build.eod_event_literal_id == MO_INVALID_IDX) {
@@ -730,18 +730,18 @@ void makeEodEventLeftfix(RoseBuildImpl &build, RoseVertex u,
}
static
-void doRoseAcceptVertex(RoseBuildImpl *tbi,
- const vector<pair<RoseVertex, RoseInEdge> > &parents,
- RoseInVertex iv, const RoseBuildData &bd) {
- const RoseInGraph &ig = bd.ig;
- assert(ig[iv].type == RIV_ACCEPT || ig[iv].type == RIV_ACCEPT_EOD);
-
- RoseGraph &g = tbi->g;
-
- for (const auto &pv : parents) {
- RoseVertex u = pv.first;
- const RoseInEdgeProps &edge_props = bd.ig[pv.second];
-
+void doRoseAcceptVertex(RoseBuildImpl *tbi,
+ const vector<pair<RoseVertex, RoseInEdge> > &parents,
+ RoseInVertex iv, const RoseBuildData &bd) {
+ const RoseInGraph &ig = bd.ig;
+ assert(ig[iv].type == RIV_ACCEPT || ig[iv].type == RIV_ACCEPT_EOD);
+
+ RoseGraph &g = tbi->g;
+
+ for (const auto &pv : parents) {
+ RoseVertex u = pv.first;
+ const RoseInEdgeProps &edge_props = bd.ig[pv.second];
+
/* We need to duplicate the parent vertices if:
*
* 1) It already has a suffix, etc as we are going to add the specified
@@ -753,42 +753,42 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi,
* incompatible with normal successors. As accepts are processed last we
* do not need to worry about other normal successors being added later.
*/
- if (g[u].suffix || !g[u].reports.empty()
+ if (g[u].suffix || !g[u].reports.empty()
|| (ig[iv].type == RIV_ACCEPT_EOD && out_degree(u, g)
&& !edge_props.graph)
- || (!isLeafNode(u, g) && !tbi->isAnyStart(u))) {
+ || (!isLeafNode(u, g) && !tbi->isAnyStart(u))) {
DEBUG_PRINTF("duplicating for parent %zu\n", g[u].index);
- assert(!tbi->isAnyStart(u));
- u = duplicate(tbi, u);
- g[u].suffix.reset();
- g[u].eod_accept = false;
- }
-
- assert(!g[u].suffix);
- if (ig[iv].type == RIV_ACCEPT) {
- assert(!tbi->isAnyStart(u));
+ assert(!tbi->isAnyStart(u));
+ u = duplicate(tbi, u);
+ g[u].suffix.reset();
+ g[u].eod_accept = false;
+ }
+
+ assert(!g[u].suffix);
+ if (ig[iv].type == RIV_ACCEPT) {
+ assert(!tbi->isAnyStart(u));
if (edge_props.dfa) {
DEBUG_PRINTF("adding early dfa suffix to i%zu\n", g[u].index);
g[u].suffix.rdfa = edge_props.dfa;
- g[u].suffix.dfa_min_width = findMinWidth(*edge_props.graph);
- g[u].suffix.dfa_max_width = findMaxWidth(*edge_props.graph);
- } else if (edge_props.graph) {
+ g[u].suffix.dfa_min_width = findMinWidth(*edge_props.graph);
+ g[u].suffix.dfa_max_width = findMaxWidth(*edge_props.graph);
+ } else if (edge_props.graph) {
DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index);
- g[u].suffix.graph = edge_props.graph;
- assert(g[u].suffix.graph->kind == NFA_SUFFIX);
- /* TODO: set dfa_(min|max)_width */
- } else if (edge_props.haig) {
+ g[u].suffix.graph = edge_props.graph;
+ assert(g[u].suffix.graph->kind == NFA_SUFFIX);
+ /* TODO: set dfa_(min|max)_width */
+ } else if (edge_props.haig) {
DEBUG_PRINTF("adding suffaig to i%zu\n", g[u].index);
- g[u].suffix.haig = edge_props.haig;
- } else {
+ g[u].suffix.haig = edge_props.haig;
+ } else {
DEBUG_PRINTF("adding boring accept to i%zu\n", g[u].index);
- assert(!g[u].eod_accept);
- g[u].reports = ig[iv].reports;
- }
- } else {
- assert(ig[iv].type == RIV_ACCEPT_EOD);
+ assert(!g[u].eod_accept);
+ g[u].reports = ig[iv].reports;
+ }
+ } else {
+ assert(ig[iv].type == RIV_ACCEPT_EOD);
assert(!edge_props.haig);
-
+
if (!edge_props.graph) {
RoseVertex w = add_vertex(g);
g[w].eod_accept = true;
@@ -809,129 +809,129 @@ void doRoseAcceptVertex(RoseBuildImpl *tbi,
if (tbi->isInETable(u)) {
assert(h.kind == NFA_SUFFIX);
- assert(!tbi->isAnyStart(u));
- /* etable can't/shouldn't use eod event */
+ assert(!tbi->isAnyStart(u));
+ /* etable can't/shouldn't use eod event */
DEBUG_PRINTF("adding suffix to i%zu\n", g[u].index);
- g[u].suffix.graph = edge_props.graph;
- continue;
- }
-
+ g[u].suffix.graph = edge_props.graph;
+ continue;
+ }
+
makeEodEventLeftfix(*tbi, u, h);
- }
- }
-}
-
-static
-bool suitableForEod(const RoseInGraph &ig, vector<RoseInVertex> topo,
- u32 *max_len, const CompileContext &cc) {
- map<RoseInVertex, u32> max_depth_from_eod;
- *max_len = 0;
-
- reverse(topo.begin(), topo.end()); /* we want to start at accept end */
-
- for (auto v : topo) {
- u32 v_depth = 0;
-
- if (ig[v].type == RIV_ACCEPT) {
- DEBUG_PRINTF("[ACCEPT]\n");
- for (const auto &e : in_edges_range(v, ig)) {
- if (!ig[e].graph || !can_only_match_at_eod(*ig[e].graph)) {
- DEBUG_PRINTF("floating accept\n");
- return false;
- }
- }
- }
-
- switch (ig[v].type) {
- case RIV_LITERAL:
- DEBUG_PRINTF("[LITERAL]\n");
- break;
- case RIV_START:
- DEBUG_PRINTF("[START]\n");
- break;
- case RIV_ANCHORED_START:
- DEBUG_PRINTF("[ANCHOR]\n");
- break;
- case RIV_ACCEPT:
- break;
- case RIV_ACCEPT_EOD:
- DEBUG_PRINTF("[EOD]\n");
- break;
- default:
- assert(0);
- DEBUG_PRINTF("????\n");
- return false;
- }
-
- for (const auto &e : out_edges_range(v, ig)) {
- RoseInVertex t = target(e, ig);
-
- assert(contains(max_depth_from_eod, t));
- u64a max_width;
-
- if (ig[v].type == RIV_START || ig[v].type == RIV_ANCHORED_START) {
- /* start itself doesn't need to be in history buffer
- * just need to make sure all succ literals are ok */
- if (ig[t].type == RIV_LITERAL) {
- max_width = ig[t].s.length();
- } else {
- max_width = 0;
- }
- if (ig[e].graph) {
- depth graph_max_width = findMaxWidth(*ig[e].graph);
- DEBUG_PRINTF("graph max width %s, lag %u\n",
- graph_max_width.str().c_str(),
- ig[e].graph_lag);
- if (!graph_max_width.is_finite()) {
- DEBUG_PRINTF("fail due to graph with inf max width\n");
- return false;
- }
- max_width += graph_max_width;
- }
- } else if (ig[e].haig) {
- DEBUG_PRINTF("fail due to haig\n");
- return false;
- } else if (ig[e].graph) {
- depth graph_max_width = findMaxWidth(*ig[e].graph);
- DEBUG_PRINTF("graph max width %s, lag %u\n",
- graph_max_width.str().c_str(), ig[e].graph_lag);
- if (!graph_max_width.is_finite()) {
- DEBUG_PRINTF("fail due to graph with inf max width\n");
- return false;
- }
- max_width = ig[e].graph_lag + graph_max_width;
- } else {
- max_width = ig[e].maxBound;
- if (ig[t].type == RIV_LITERAL) {
- max_width += ig[t].s.length();
- }
- }
-
- max_width += max_depth_from_eod[t];
- if (max_width > ROSE_BOUND_INF) {
- max_width = ROSE_BOUND_INF;
- }
-
- DEBUG_PRINTF("max_width=%llu\n", max_width);
-
- ENSURE_AT_LEAST(&v_depth, (u32)max_width);
- }
-
+ }
+ }
+}
+
+static
+bool suitableForEod(const RoseInGraph &ig, vector<RoseInVertex> topo,
+ u32 *max_len, const CompileContext &cc) {
+ map<RoseInVertex, u32> max_depth_from_eod;
+ *max_len = 0;
+
+ reverse(topo.begin(), topo.end()); /* we want to start at accept end */
+
+ for (auto v : topo) {
+ u32 v_depth = 0;
+
+ if (ig[v].type == RIV_ACCEPT) {
+ DEBUG_PRINTF("[ACCEPT]\n");
+ for (const auto &e : in_edges_range(v, ig)) {
+ if (!ig[e].graph || !can_only_match_at_eod(*ig[e].graph)) {
+ DEBUG_PRINTF("floating accept\n");
+ return false;
+ }
+ }
+ }
+
+ switch (ig[v].type) {
+ case RIV_LITERAL:
+ DEBUG_PRINTF("[LITERAL]\n");
+ break;
+ case RIV_START:
+ DEBUG_PRINTF("[START]\n");
+ break;
+ case RIV_ANCHORED_START:
+ DEBUG_PRINTF("[ANCHOR]\n");
+ break;
+ case RIV_ACCEPT:
+ break;
+ case RIV_ACCEPT_EOD:
+ DEBUG_PRINTF("[EOD]\n");
+ break;
+ default:
+ assert(0);
+ DEBUG_PRINTF("????\n");
+ return false;
+ }
+
+ for (const auto &e : out_edges_range(v, ig)) {
+ RoseInVertex t = target(e, ig);
+
+ assert(contains(max_depth_from_eod, t));
+ u64a max_width;
+
+ if (ig[v].type == RIV_START || ig[v].type == RIV_ANCHORED_START) {
+ /* start itself doesn't need to be in history buffer
+ * just need to make sure all succ literals are ok */
+ if (ig[t].type == RIV_LITERAL) {
+ max_width = ig[t].s.length();
+ } else {
+ max_width = 0;
+ }
+ if (ig[e].graph) {
+ depth graph_max_width = findMaxWidth(*ig[e].graph);
+ DEBUG_PRINTF("graph max width %s, lag %u\n",
+ graph_max_width.str().c_str(),
+ ig[e].graph_lag);
+ if (!graph_max_width.is_finite()) {
+ DEBUG_PRINTF("fail due to graph with inf max width\n");
+ return false;
+ }
+ max_width += graph_max_width;
+ }
+ } else if (ig[e].haig) {
+ DEBUG_PRINTF("fail due to haig\n");
+ return false;
+ } else if (ig[e].graph) {
+ depth graph_max_width = findMaxWidth(*ig[e].graph);
+ DEBUG_PRINTF("graph max width %s, lag %u\n",
+ graph_max_width.str().c_str(), ig[e].graph_lag);
+ if (!graph_max_width.is_finite()) {
+ DEBUG_PRINTF("fail due to graph with inf max width\n");
+ return false;
+ }
+ max_width = ig[e].graph_lag + graph_max_width;
+ } else {
+ max_width = ig[e].maxBound;
+ if (ig[t].type == RIV_LITERAL) {
+ max_width += ig[t].s.length();
+ }
+ }
+
+ max_width += max_depth_from_eod[t];
+ if (max_width > ROSE_BOUND_INF) {
+ max_width = ROSE_BOUND_INF;
+ }
+
+ DEBUG_PRINTF("max_width=%llu\n", max_width);
+
+ ENSURE_AT_LEAST(&v_depth, (u32)max_width);
+ }
+
if (v_depth == ROSE_BOUND_INF
|| v_depth > cc.grey.maxHistoryAvailable) {
- DEBUG_PRINTF("not suitable for eod table %u\n", v_depth);
- return false;
- }
-
- max_depth_from_eod[v] = v_depth;
- ENSURE_AT_LEAST(max_len, v_depth);
- }
-
- DEBUG_PRINTF("to the eod table and beyond\n");
- return true;
-}
-
-static
+ DEBUG_PRINTF("not suitable for eod table %u\n", v_depth);
+ return false;
+ }
+
+ max_depth_from_eod[v] = v_depth;
+ ENSURE_AT_LEAST(max_len, v_depth);
+ }
+
+ DEBUG_PRINTF("to the eod table and beyond\n");
+ return true;
+}
+
+static
void shift_accepts_to_end(const RoseInGraph &ig,
vector<RoseInVertex> &topo_order) {
stable_partition(begin(topo_order), end(topo_order),
@@ -939,761 +939,761 @@ void shift_accepts_to_end(const RoseInGraph &ig,
}
static
-void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) {
- const RoseInGraph &ig = bd.ig;
-
- /* add the pattern in to the main rose graph */
- DEBUG_PRINTF("%srose pop\n", bd.som ? "som " : "");
-
- /* Note: an input vertex may need to create several rose vertices. This is
- * primarily because a RoseVertex can only have 1 one leftfix */
- map<RoseInVertex, vector<RoseVertex> > vertex_map;
-
- vector<RoseInVertex> v_order = topo_order(ig);
+void populateRoseGraph(RoseBuildImpl *tbi, RoseBuildData &bd) {
+ const RoseInGraph &ig = bd.ig;
+
+ /* add the pattern in to the main rose graph */
+ DEBUG_PRINTF("%srose pop\n", bd.som ? "som " : "");
+
+ /* Note: an input vertex may need to create several rose vertices. This is
+ * primarily because a RoseVertex can only have 1 one leftfix */
+ map<RoseInVertex, vector<RoseVertex> > vertex_map;
+
+ vector<RoseInVertex> v_order = topo_order(ig);
shift_accepts_to_end(ig, v_order);
-
- u32 eod_space_required;
- bool use_eod_table = suitableForEod(ig, v_order, &eod_space_required,
- tbi->cc);
- if (use_eod_table) {
- ENSURE_AT_LEAST(&tbi->ematcher_region_size, eod_space_required);
- }
-
- assert(ig[v_order.front()].type == RIV_START
- || ig[v_order.front()].type == RIV_ANCHORED_START);
-
- for (RoseInVertex iv : v_order) {
+
+ u32 eod_space_required;
+ bool use_eod_table = suitableForEod(ig, v_order, &eod_space_required,
+ tbi->cc);
+ if (use_eod_table) {
+ ENSURE_AT_LEAST(&tbi->ematcher_region_size, eod_space_required);
+ }
+
+ assert(ig[v_order.front()].type == RIV_START
+ || ig[v_order.front()].type == RIV_ANCHORED_START);
+
+ for (RoseInVertex iv : v_order) {
DEBUG_PRINTF("vertex %zu\n", ig[iv].index);
-
- if (ig[iv].type == RIV_START) {
- DEBUG_PRINTF("is root\n");
- vertex_map[iv].push_back(tbi->root);
- continue;
- } else if (ig[iv].type == RIV_ANCHORED_START) {
- DEBUG_PRINTF("is anchored root\n");
- vertex_map[iv].push_back(tbi->anchored_root);
- continue;
- }
-
- vector<pair<RoseVertex, RoseInEdge> > parents;
- for (const auto &e : in_edges_range(iv, ig)) {
- RoseInVertex u = source(e, ig);
- assert(contains(vertex_map, u));
- const vector<RoseVertex> &images = vertex_map[u];
-
- // We should have no dupes.
+
+ if (ig[iv].type == RIV_START) {
+ DEBUG_PRINTF("is root\n");
+ vertex_map[iv].push_back(tbi->root);
+ continue;
+ } else if (ig[iv].type == RIV_ANCHORED_START) {
+ DEBUG_PRINTF("is anchored root\n");
+ vertex_map[iv].push_back(tbi->anchored_root);
+ continue;
+ }
+
+ vector<pair<RoseVertex, RoseInEdge> > parents;
+ for (const auto &e : in_edges_range(iv, ig)) {
+ RoseInVertex u = source(e, ig);
+ assert(contains(vertex_map, u));
+ const vector<RoseVertex> &images = vertex_map[u];
+
+ // We should have no dupes.
assert(set<RoseVertex>(images.begin(), images.end()).size()
- == images.size());
-
- for (auto v_image : images) {
- // v_image should NOT already be in our parents list.
- assert(find_if(parents.begin(), parents.end(),
- [&v_image](const pair<RoseVertex, RoseInEdge> &p) {
- return p.first == v_image;
- }) == parents.end());
-
- parents.emplace_back(v_image, e);
-
- if (tbi->isAnchored(v_image)) {
- assert(!use_eod_table);
- u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[iv]);
- assert(overlap <= tbi->cc.grey.maxHistoryAvailable + 1);
- ENSURE_AT_LEAST(&tbi->max_rose_anchored_floating_overlap,
- overlap);
- }
- }
- }
-
- if (ig[iv].type == RIV_LITERAL) {
- DEBUG_PRINTF("LITERAL '%s'\n", dumpString(ig[iv].s).c_str());
- assert(!isLeafNode(iv, ig));
- doRoseLiteralVertex(tbi, use_eod_table, vertex_map, parents, iv,
- bd);
- } else {
- if (ig[iv].type == RIV_ACCEPT) {
- DEBUG_PRINTF("ACCEPT\n");
- } else {
- assert(ig[iv].type == RIV_ACCEPT_EOD);
- DEBUG_PRINTF("ACCEPT_EOD\n");
- }
- assert(isLeafNode(iv, ig)); /* accepts are final */
- doRoseAcceptVertex(tbi, parents, iv, bd);
- }
- }
- DEBUG_PRINTF("done\n");
-}
-
-template<typename GraphT>
-static
-bool empty(const GraphT &g) {
- typename GraphT::vertex_iterator vi, ve;
- tie(vi, ve) = vertices(g);
- return vi == ve;
-}
-
-static
+ == images.size());
+
+ for (auto v_image : images) {
+ // v_image should NOT already be in our parents list.
+ assert(find_if(parents.begin(), parents.end(),
+ [&v_image](const pair<RoseVertex, RoseInEdge> &p) {
+ return p.first == v_image;
+ }) == parents.end());
+
+ parents.emplace_back(v_image, e);
+
+ if (tbi->isAnchored(v_image)) {
+ assert(!use_eod_table);
+ u32 overlap = findRoseAnchorFloatingOverlap(ig[e], ig[iv]);
+ assert(overlap <= tbi->cc.grey.maxHistoryAvailable + 1);
+ ENSURE_AT_LEAST(&tbi->max_rose_anchored_floating_overlap,
+ overlap);
+ }
+ }
+ }
+
+ if (ig[iv].type == RIV_LITERAL) {
+ DEBUG_PRINTF("LITERAL '%s'\n", dumpString(ig[iv].s).c_str());
+ assert(!isLeafNode(iv, ig));
+ doRoseLiteralVertex(tbi, use_eod_table, vertex_map, parents, iv,
+ bd);
+ } else {
+ if (ig[iv].type == RIV_ACCEPT) {
+ DEBUG_PRINTF("ACCEPT\n");
+ } else {
+ assert(ig[iv].type == RIV_ACCEPT_EOD);
+ DEBUG_PRINTF("ACCEPT_EOD\n");
+ }
+ assert(isLeafNode(iv, ig)); /* accepts are final */
+ doRoseAcceptVertex(tbi, parents, iv, bd);
+ }
+ }
+ DEBUG_PRINTF("done\n");
+}
+
+template<typename GraphT>
+static
+bool empty(const GraphT &g) {
+ typename GraphT::vertex_iterator vi, ve;
+ tie(vi, ve) = vertices(g);
+ return vi == ve;
+}
+
+static
bool canImplementGraph(NGHolder &h, bool prefilter, const ReportManager &rm,
const CompileContext &cc) {
- if (isImplementableNFA(h, &rm, cc)) {
- return true;
- }
-
- if (prefilter && cc.grey.prefilterReductions) {
- // If we're prefiltering, we can have another go with a reduced graph.
- UNUSED size_t numBefore = num_vertices(h);
- prefilterReductions(h, cc);
- UNUSED size_t numAfter = num_vertices(h);
- DEBUG_PRINTF("reduced from %zu to %zu vertices\n", numBefore, numAfter);
-
- if (isImplementableNFA(h, &rm, cc)) {
- return true;
- }
- }
-
- DEBUG_PRINTF("unable to build engine\n");
- return false;
-}
-
-static
-bool predsAreDelaySensitive(const RoseInGraph &ig, RoseInVertex v) {
- assert(in_degree(v, ig));
-
- for (const auto &e : in_edges_range(v, ig)) {
- if (ig[e].graph || ig[e].haig) {
- DEBUG_PRINTF("edge graph\n");
- return true;
- }
- if (ig[e].minBound || ig[e].maxBound != ROSE_BOUND_INF) {
- DEBUG_PRINTF("edge bounds\n");
- return true;
- }
-
- RoseInVertex u = source(e, ig);
- if (ig[u].type == RIV_START) {
- continue;
- }
- if (ig[u].type != RIV_LITERAL) {
- DEBUG_PRINTF("unsafe pred vertex\n");
- return true;
- }
- if (ig[u].delay) {
- DEBUG_PRINTF("pred has delay\n");
- return true;
- }
- }
-
- return false;
-}
-
-static
-u32 maxAvailableDelay(const ue2_literal &pred_key, const ue2_literal &lit_key) {
- /* overly conservative if only part of the string is nocase */
- string pred = pred_key.get_string();
- string lit = lit_key.get_string();
-
- if (pred_key.any_nocase() || lit_key.any_nocase()) {
- upperString(pred);
- upperString(lit);
- }
-
- string::size_type last = pred.rfind(lit);
- if (last == string::npos) {
- return MAX_DELAY;
- }
-
- u32 raw = pred.size() - last - 1;
- return MIN(raw, MAX_DELAY);
-}
-
-static
+ if (isImplementableNFA(h, &rm, cc)) {
+ return true;
+ }
+
+ if (prefilter && cc.grey.prefilterReductions) {
+ // If we're prefiltering, we can have another go with a reduced graph.
+ UNUSED size_t numBefore = num_vertices(h);
+ prefilterReductions(h, cc);
+ UNUSED size_t numAfter = num_vertices(h);
+ DEBUG_PRINTF("reduced from %zu to %zu vertices\n", numBefore, numAfter);
+
+ if (isImplementableNFA(h, &rm, cc)) {
+ return true;
+ }
+ }
+
+ DEBUG_PRINTF("unable to build engine\n");
+ return false;
+}
+
+static
+bool predsAreDelaySensitive(const RoseInGraph &ig, RoseInVertex v) {
+ assert(in_degree(v, ig));
+
+ for (const auto &e : in_edges_range(v, ig)) {
+ if (ig[e].graph || ig[e].haig) {
+ DEBUG_PRINTF("edge graph\n");
+ return true;
+ }
+ if (ig[e].minBound || ig[e].maxBound != ROSE_BOUND_INF) {
+ DEBUG_PRINTF("edge bounds\n");
+ return true;
+ }
+
+ RoseInVertex u = source(e, ig);
+ if (ig[u].type == RIV_START) {
+ continue;
+ }
+ if (ig[u].type != RIV_LITERAL) {
+ DEBUG_PRINTF("unsafe pred vertex\n");
+ return true;
+ }
+ if (ig[u].delay) {
+ DEBUG_PRINTF("pred has delay\n");
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static
+u32 maxAvailableDelay(const ue2_literal &pred_key, const ue2_literal &lit_key) {
+ /* overly conservative if only part of the string is nocase */
+ string pred = pred_key.get_string();
+ string lit = lit_key.get_string();
+
+ if (pred_key.any_nocase() || lit_key.any_nocase()) {
+ upperString(pred);
+ upperString(lit);
+ }
+
+ string::size_type last = pred.rfind(lit);
+ if (last == string::npos) {
+ return MAX_DELAY;
+ }
+
+ u32 raw = pred.size() - last - 1;
+ return MIN(raw, MAX_DELAY);
+}
+
+static
u32 findMaxSafeDelay(const RoseInGraph &ig, RoseInVertex u, RoseInVertex v) {
- // First, check the overlap constraints on (u,v).
- size_t max_delay;
- if (ig[v].type == RIV_LITERAL) {
- DEBUG_PRINTF("lit->lit edge: '%s' -> '%s'\n",
- escapeString(ig[u].s).c_str(),
- escapeString(ig[v].s).c_str());
- max_delay = maxAvailableDelay(ig[u].s, ig[v].s);
- } else if (ig[v].type == RIV_ACCEPT) {
- DEBUG_PRINTF("lit->accept edge: '%s' -> ACCEPT\n",
- escapeString(ig[u].s).c_str());
- max_delay = MAX_DELAY;
- } else {
- assert(0);
- return 0;
- }
-
- DEBUG_PRINTF("max safe delay for this edge: %zu\n", max_delay);
-
- // Now consider the predecessors of u.
- for (const auto &e : in_edges_range(u, ig)) {
- RoseInVertex w = source(e, ig);
- if (ig[w].type == RIV_START) {
- continue;
- }
- assert(ig[w].type == RIV_LITERAL);
- assert(ig[w].delay == 0);
-
- DEBUG_PRINTF("pred lit->lit edge: '%s' -> '%s'\n",
- escapeString(ig[w].s).c_str(),
- escapeString(ig[u].s).c_str());
-
- // We cannot delay the literal on u so much that a predecessor literal
- // could occur in the delayed region. For example, consider
- // 'barman.*foobar': if we allow 'foobar' to be delayed by 3, then
- // 'barman' could occur in the input string and race with 'foobar', as
- // in 'foobarman'.
-
- const size_t pred_len = ig[w].s.length();
- size_t overlap = maxOverlap(ig[u].s, ig[w].s, 0);
- DEBUG_PRINTF("pred_len=%zu, overlap=%zu\n", pred_len, overlap);
- assert(overlap <= pred_len);
- size_t max_lit_delay = pred_len - min(overlap + 1, pred_len);
- DEBUG_PRINTF("overlap=%zu -> max_lit_delay=%zu\n", overlap,
- max_lit_delay);
- max_delay = min(max_delay, max_lit_delay);
- }
-
- DEBUG_PRINTF("max_delay=%zu\n", max_delay);
- assert(max_delay <= MAX_DELAY);
- return max_delay;
-}
-
-static
-bool transformInfixToDelay(const RoseInGraph &ig, const RoseInEdge &e,
- const CompileContext &cc, u32 *delay_out) {
- const u32 max_history =
- cc.streaming ? cc.grey.maxHistoryAvailable : ROSE_BOUND_INF;
-
- const RoseInVertex u = source(e, ig), v = target(e, ig);
- const u32 graph_lag = ig[e].graph_lag;
-
- // Clone a copy of the graph, as we need to be able to roll back this
- // operation.
- NGHolder h;
- cloneHolder(h, *ig[e].graph);
-
- DEBUG_PRINTF("target literal: %s\n", dumpString(ig[v].s).c_str());
- DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
- graph_lag);
-
- assert(graph_lag <= ig[v].s.length());
- if (graph_lag < ig[v].s.length()) {
- size_t len = ig[v].s.length() - graph_lag;
- ue2_literal lit(ig[v].s.substr(0, len));
- DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
- u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
- if (delay2 == MO_INVALID_IDX) {
- DEBUG_PRINTF("couldn't remove trailing literal\n");
- return false;
- }
- if (delay2 != len) {
- DEBUG_PRINTF("couldn't remove entire trailing literal\n");
- return false;
- }
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(h, repeat)) {
- DEBUG_PRINTF("graph is not repeat\n");
- return false;
- }
- DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
- if (!repeat.bounds.max.is_infinite()) {
- DEBUG_PRINTF("not inf\n");
- return false;
- }
-
- if (!repeat.reach.all()) {
- DEBUG_PRINTF("non-dot reach\n");
- return false;
- }
-
- u32 delay = ig[v].s.length() + repeat.bounds.min;
- if (delay > MAX_DELAY) {
- DEBUG_PRINTF("delay %u > MAX_DELAY\n", delay);
- return false;
- }
-
- if (delay + ig[u].s.length() - 1 > max_history) {
- DEBUG_PRINTF("delay too large for history\n");
- return false;
- }
-
- *delay_out = delay;
- return true;
-}
-
-static
-void transformLiteralDelay(RoseInGraph &ig, const CompileContext &cc) {
- if (!cc.grey.roseTransformDelay) {
- return;
- }
-
- for (auto u : vertices_range(ig)) {
- if (ig[u].type != RIV_LITERAL) {
- continue;
- }
- if (out_degree(u, ig) != 1) {
- continue;
- }
-
- RoseInEdge e = *out_edges(u, ig).first;
- RoseInVertex v = target(e, ig);
- if (ig[v].type != RIV_LITERAL) {
- continue;
- }
- if (ig[e].haig) {
- continue;
- }
- if (!ig[e].graph) {
- continue;
- }
-
- if (predsAreDelaySensitive(ig, u)) {
- DEBUG_PRINTF("preds are delay sensitive\n");
- continue;
- }
-
- u32 max_delay = findMaxSafeDelay(ig, u, v);
-
- DEBUG_PRINTF("lit->lit edge with graph: '%s' -> '%s'\n",
- escapeString(ig[u].s).c_str(),
- escapeString(ig[v].s).c_str());
-
- u32 delay = 0;
- if (!transformInfixToDelay(ig, e, cc, &delay)) {
- continue;
- }
-
- if (delay > max_delay) {
- DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
- continue;
- }
-
- DEBUG_PRINTF("setting lit delay to %u and deleting graph\n", delay);
- ig[u].delay = delay;
- ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
- ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
- ig[e].graph_lag = 0;
- ig[e].graph.reset();
- ig[e].minBound = 0;
- ig[e].maxBound = ROSE_BOUND_INF;
- }
-}
-
-static
-bool transformInfixToAnchBounds(const RoseInGraph &ig, const RoseInEdge &e,
- const CompileContext &cc, DepthMinMax *bounds) {
- const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
- : ROSE_BOUND_INF;
-
- const RoseInVertex v = target(e, ig);
- const u32 graph_lag = ig[e].graph_lag;
-
- // Clone a copy of the graph, as we need to be able to roll back this
- // operation.
- NGHolder h;
- cloneHolder(h, *ig[e].graph);
-
- DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
- graph_lag);
-
- assert(graph_lag <= ig[v].s.length());
- if (graph_lag < ig[v].s.length()) {
- size_t len = ig[v].s.length() - graph_lag;
- ue2_literal lit(ig[v].s.substr(0, len));
- DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
- u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
- if (delay2 == MO_INVALID_IDX) {
- DEBUG_PRINTF("couldn't remove trailing literal\n");
- return false;
- }
- if (delay2 != len) {
- DEBUG_PRINTF("couldn't remove entire trailing literal\n");
- return false;
- }
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(h, repeat)) {
- DEBUG_PRINTF("graph is not repeat\n");
- return false;
- }
- DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
- if (!repeat.bounds.max.is_infinite()) {
- DEBUG_PRINTF("not inf\n");
- return false;
- }
-
- if (!repeat.reach.all()) {
- DEBUG_PRINTF("non-dot reach\n");
- return false;
- }
-
- *bounds = repeat.bounds;
- return true;
-}
-
-static
-void transformAnchoredLiteralOverlap(RoseInGraph &ig, RoseBuildData &bd,
- const CompileContext &cc) {
- if (!cc.grey.roseTransformDelay) {
- return;
- }
-
- for (const auto &e : edges_range(ig)) {
- const RoseInVertex u = source(e, ig);
- const RoseInVertex v = target(e, ig);
-
- if (ig[u].type != RIV_LITERAL || ig[v].type != RIV_LITERAL) {
- continue;
- }
- if (ig[e].haig || !ig[e].graph) {
- continue;
- }
-
- if (ig[u].min_offset != ig[u].max_offset) {
- DEBUG_PRINTF("u not fixed depth\n");
- continue;
- }
-
- DEBUG_PRINTF("anch_lit->lit edge with graph: '%s' -> '%s'\n",
- escapeString(ig[u].s).c_str(),
- escapeString(ig[v].s).c_str());
-
- DepthMinMax bounds;
- if (!transformInfixToAnchBounds(ig, e, cc, &bounds)) {
- continue;
- }
-
- DEBUG_PRINTF("setting bounds to %s and deleting graph\n",
- bounds.str().c_str());
- ig[e].graph_lag = 0;
- ig[e].graph.reset();
- ig[e].minBound = bounds.min;
- ig[e].maxBound = bounds.max.is_finite() ? (u32)bounds.max
- : ROSE_BOUND_INF;
- bd.anch_history_edges.insert(e);
- }
-}
-
-/**
- * \brief Transform small trailing dot repeat suffixes into delay on the last
- * literal.
- *
- * For example, the case /hatstand.*teakettle./s can just delay 'teakettle' +1
- * rather than having a suffix to handle the dot.
- *
- * This transformation looks for literal->accept edges and transforms them if
- * appropriate. It doesn't handle complex cases where the literal has more than
- * one successor.
- */
-static
-void transformSuffixDelay(RoseInGraph &ig, const CompileContext &cc) {
- if (!cc.grey.roseTransformDelay) {
- return;
- }
-
- const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
- : ROSE_BOUND_INF;
-
- set<RoseInVertex> modified_accepts; // may be dead after transform
-
- for (auto u : vertices_range(ig)) {
- if (ig[u].type != RIV_LITERAL) {
- continue;
- }
- if (out_degree(u, ig) != 1) {
- continue;
- }
-
- RoseInEdge e = *out_edges(u, ig).first;
- RoseInVertex v = target(e, ig);
- if (ig[v].type != RIV_ACCEPT) {
- continue;
- }
- if (ig[e].haig) {
- continue;
- }
- if (!ig[e].graph) {
- continue;
- }
-
- if (predsAreDelaySensitive(ig, u)) {
- DEBUG_PRINTF("preds are delay sensitive\n");
- continue;
- }
-
- DEBUG_PRINTF("lit->accept edge with graph: lit='%s'\n",
- escapeString(ig[u].s).c_str());
-
- const NGHolder &h = *ig[e].graph;
- const set<ReportID> reports = all_reports(h);
- if (reports.size() != 1) {
- DEBUG_PRINTF("too many reports\n");
- continue;
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(h, repeat)) {
- DEBUG_PRINTF("suffix graph is not repeat\n");
- continue;
- }
- DEBUG_PRINTF("suffix graph is %s repeat\n",
- repeat.bounds.str().c_str());
-
- if (!repeat.reach.all()) {
- DEBUG_PRINTF("non-dot reach\n");
- continue;
- }
-
- if (repeat.bounds.min != repeat.bounds.max ||
- repeat.bounds.min > depth(MAX_DELAY)) {
- DEBUG_PRINTF("repeat is variable or too large\n");
- continue;
- }
-
- u32 max_delay = findMaxSafeDelay(ig, u, v);
-
- u32 delay = repeat.bounds.min;
- if (delay > max_delay) {
- DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
- continue;
- }
-
- if (delay + ig[u].s.length() - 1 > max_history) {
- DEBUG_PRINTF("delay too large for history\n");
- continue;
- }
-
- DEBUG_PRINTF("setting lit delay to %u and removing suffix\n", delay);
- ig[u].delay = delay;
- ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
- ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
-
- // Construct a new accept vertex for this report and remove edge e.
- // (This allows us to cope if v has more than one in-edge).
- RoseInVertex v2 =
- add_vertex(RoseInVertexProps::makeAccept(reports), ig);
- add_edge(u, v2, ig);
- remove_edge(e, ig);
- modified_accepts.insert(v);
- }
-
- DEBUG_PRINTF("%zu modified accepts\n", modified_accepts.size());
-
- for (auto v : modified_accepts) {
- if (in_degree(v, ig) == 0) {
- DEBUG_PRINTF("removing accept vertex with no preds\n");
- remove_vertex(v, ig);
- }
- }
-}
-
-#ifndef NDEBUG
-static
-bool validateKinds(const RoseInGraph &g) {
- for (const auto &e : edges_range(g)) {
- if (g[e].graph && g[e].graph->kind != whatRoseIsThis(g, e)) {
- return false;
- }
- }
-
- return true;
-}
-#endif
-
+ // First, check the overlap constraints on (u,v).
+ size_t max_delay;
+ if (ig[v].type == RIV_LITERAL) {
+ DEBUG_PRINTF("lit->lit edge: '%s' -> '%s'\n",
+ escapeString(ig[u].s).c_str(),
+ escapeString(ig[v].s).c_str());
+ max_delay = maxAvailableDelay(ig[u].s, ig[v].s);
+ } else if (ig[v].type == RIV_ACCEPT) {
+ DEBUG_PRINTF("lit->accept edge: '%s' -> ACCEPT\n",
+ escapeString(ig[u].s).c_str());
+ max_delay = MAX_DELAY;
+ } else {
+ assert(0);
+ return 0;
+ }
+
+ DEBUG_PRINTF("max safe delay for this edge: %zu\n", max_delay);
+
+ // Now consider the predecessors of u.
+ for (const auto &e : in_edges_range(u, ig)) {
+ RoseInVertex w = source(e, ig);
+ if (ig[w].type == RIV_START) {
+ continue;
+ }
+ assert(ig[w].type == RIV_LITERAL);
+ assert(ig[w].delay == 0);
+
+ DEBUG_PRINTF("pred lit->lit edge: '%s' -> '%s'\n",
+ escapeString(ig[w].s).c_str(),
+ escapeString(ig[u].s).c_str());
+
+ // We cannot delay the literal on u so much that a predecessor literal
+ // could occur in the delayed region. For example, consider
+ // 'barman.*foobar': if we allow 'foobar' to be delayed by 3, then
+ // 'barman' could occur in the input string and race with 'foobar', as
+ // in 'foobarman'.
+
+ const size_t pred_len = ig[w].s.length();
+ size_t overlap = maxOverlap(ig[u].s, ig[w].s, 0);
+ DEBUG_PRINTF("pred_len=%zu, overlap=%zu\n", pred_len, overlap);
+ assert(overlap <= pred_len);
+ size_t max_lit_delay = pred_len - min(overlap + 1, pred_len);
+ DEBUG_PRINTF("overlap=%zu -> max_lit_delay=%zu\n", overlap,
+ max_lit_delay);
+ max_delay = min(max_delay, max_lit_delay);
+ }
+
+ DEBUG_PRINTF("max_delay=%zu\n", max_delay);
+ assert(max_delay <= MAX_DELAY);
+ return max_delay;
+}
+
+static
+bool transformInfixToDelay(const RoseInGraph &ig, const RoseInEdge &e,
+ const CompileContext &cc, u32 *delay_out) {
+ const u32 max_history =
+ cc.streaming ? cc.grey.maxHistoryAvailable : ROSE_BOUND_INF;
+
+ const RoseInVertex u = source(e, ig), v = target(e, ig);
+ const u32 graph_lag = ig[e].graph_lag;
+
+ // Clone a copy of the graph, as we need to be able to roll back this
+ // operation.
+ NGHolder h;
+ cloneHolder(h, *ig[e].graph);
+
+ DEBUG_PRINTF("target literal: %s\n", dumpString(ig[v].s).c_str());
+ DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
+ graph_lag);
+
+ assert(graph_lag <= ig[v].s.length());
+ if (graph_lag < ig[v].s.length()) {
+ size_t len = ig[v].s.length() - graph_lag;
+ ue2_literal lit(ig[v].s.substr(0, len));
+ DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
+ u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
+ if (delay2 == MO_INVALID_IDX) {
+ DEBUG_PRINTF("couldn't remove trailing literal\n");
+ return false;
+ }
+ if (delay2 != len) {
+ DEBUG_PRINTF("couldn't remove entire trailing literal\n");
+ return false;
+ }
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(h, repeat)) {
+ DEBUG_PRINTF("graph is not repeat\n");
+ return false;
+ }
+ DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
+ if (!repeat.bounds.max.is_infinite()) {
+ DEBUG_PRINTF("not inf\n");
+ return false;
+ }
+
+ if (!repeat.reach.all()) {
+ DEBUG_PRINTF("non-dot reach\n");
+ return false;
+ }
+
+ u32 delay = ig[v].s.length() + repeat.bounds.min;
+ if (delay > MAX_DELAY) {
+ DEBUG_PRINTF("delay %u > MAX_DELAY\n", delay);
+ return false;
+ }
+
+ if (delay + ig[u].s.length() - 1 > max_history) {
+ DEBUG_PRINTF("delay too large for history\n");
+ return false;
+ }
+
+ *delay_out = delay;
+ return true;
+}
+
+static
+void transformLiteralDelay(RoseInGraph &ig, const CompileContext &cc) {
+ if (!cc.grey.roseTransformDelay) {
+ return;
+ }
+
+ for (auto u : vertices_range(ig)) {
+ if (ig[u].type != RIV_LITERAL) {
+ continue;
+ }
+ if (out_degree(u, ig) != 1) {
+ continue;
+ }
+
+ RoseInEdge e = *out_edges(u, ig).first;
+ RoseInVertex v = target(e, ig);
+ if (ig[v].type != RIV_LITERAL) {
+ continue;
+ }
+ if (ig[e].haig) {
+ continue;
+ }
+ if (!ig[e].graph) {
+ continue;
+ }
+
+ if (predsAreDelaySensitive(ig, u)) {
+ DEBUG_PRINTF("preds are delay sensitive\n");
+ continue;
+ }
+
+ u32 max_delay = findMaxSafeDelay(ig, u, v);
+
+ DEBUG_PRINTF("lit->lit edge with graph: '%s' -> '%s'\n",
+ escapeString(ig[u].s).c_str(),
+ escapeString(ig[v].s).c_str());
+
+ u32 delay = 0;
+ if (!transformInfixToDelay(ig, e, cc, &delay)) {
+ continue;
+ }
+
+ if (delay > max_delay) {
+ DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
+ continue;
+ }
+
+ DEBUG_PRINTF("setting lit delay to %u and deleting graph\n", delay);
+ ig[u].delay = delay;
+ ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
+ ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
+ ig[e].graph_lag = 0;
+ ig[e].graph.reset();
+ ig[e].minBound = 0;
+ ig[e].maxBound = ROSE_BOUND_INF;
+ }
+}
+
+static
+bool transformInfixToAnchBounds(const RoseInGraph &ig, const RoseInEdge &e,
+ const CompileContext &cc, DepthMinMax *bounds) {
+ const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
+ : ROSE_BOUND_INF;
+
+ const RoseInVertex v = target(e, ig);
+ const u32 graph_lag = ig[e].graph_lag;
+
+ // Clone a copy of the graph, as we need to be able to roll back this
+ // operation.
+ NGHolder h;
+ cloneHolder(h, *ig[e].graph);
+
+ DEBUG_PRINTF("graph with %zu vertices and graph_lag %u\n", num_vertices(h),
+ graph_lag);
+
+ assert(graph_lag <= ig[v].s.length());
+ if (graph_lag < ig[v].s.length()) {
+ size_t len = ig[v].s.length() - graph_lag;
+ ue2_literal lit(ig[v].s.substr(0, len));
+ DEBUG_PRINTF("lit2=%s\n", dumpString(lit).c_str());
+ u32 delay2 = removeTrailingLiteralStates(h, lit, max_history);
+ if (delay2 == MO_INVALID_IDX) {
+ DEBUG_PRINTF("couldn't remove trailing literal\n");
+ return false;
+ }
+ if (delay2 != len) {
+ DEBUG_PRINTF("couldn't remove entire trailing literal\n");
+ return false;
+ }
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(h, repeat)) {
+ DEBUG_PRINTF("graph is not repeat\n");
+ return false;
+ }
+ DEBUG_PRINTF("graph is %s repeat\n", repeat.bounds.str().c_str());
+ if (!repeat.bounds.max.is_infinite()) {
+ DEBUG_PRINTF("not inf\n");
+ return false;
+ }
+
+ if (!repeat.reach.all()) {
+ DEBUG_PRINTF("non-dot reach\n");
+ return false;
+ }
+
+ *bounds = repeat.bounds;
+ return true;
+}
+
+static
+void transformAnchoredLiteralOverlap(RoseInGraph &ig, RoseBuildData &bd,
+ const CompileContext &cc) {
+ if (!cc.grey.roseTransformDelay) {
+ return;
+ }
+
+ for (const auto &e : edges_range(ig)) {
+ const RoseInVertex u = source(e, ig);
+ const RoseInVertex v = target(e, ig);
+
+ if (ig[u].type != RIV_LITERAL || ig[v].type != RIV_LITERAL) {
+ continue;
+ }
+ if (ig[e].haig || !ig[e].graph) {
+ continue;
+ }
+
+ if (ig[u].min_offset != ig[u].max_offset) {
+ DEBUG_PRINTF("u not fixed depth\n");
+ continue;
+ }
+
+ DEBUG_PRINTF("anch_lit->lit edge with graph: '%s' -> '%s'\n",
+ escapeString(ig[u].s).c_str(),
+ escapeString(ig[v].s).c_str());
+
+ DepthMinMax bounds;
+ if (!transformInfixToAnchBounds(ig, e, cc, &bounds)) {
+ continue;
+ }
+
+ DEBUG_PRINTF("setting bounds to %s and deleting graph\n",
+ bounds.str().c_str());
+ ig[e].graph_lag = 0;
+ ig[e].graph.reset();
+ ig[e].minBound = bounds.min;
+ ig[e].maxBound = bounds.max.is_finite() ? (u32)bounds.max
+ : ROSE_BOUND_INF;
+ bd.anch_history_edges.insert(e);
+ }
+}
+
+/**
+ * \brief Transform small trailing dot repeat suffixes into delay on the last
+ * literal.
+ *
+ * For example, the case /hatstand.*teakettle./s can just delay 'teakettle' +1
+ * rather than having a suffix to handle the dot.
+ *
+ * This transformation looks for literal->accept edges and transforms them if
+ * appropriate. It doesn't handle complex cases where the literal has more than
+ * one successor.
+ */
+static
+void transformSuffixDelay(RoseInGraph &ig, const CompileContext &cc) {
+ if (!cc.grey.roseTransformDelay) {
+ return;
+ }
+
+ const u32 max_history = cc.streaming ? cc.grey.maxHistoryAvailable
+ : ROSE_BOUND_INF;
+
+ set<RoseInVertex> modified_accepts; // may be dead after transform
+
+ for (auto u : vertices_range(ig)) {
+ if (ig[u].type != RIV_LITERAL) {
+ continue;
+ }
+ if (out_degree(u, ig) != 1) {
+ continue;
+ }
+
+ RoseInEdge e = *out_edges(u, ig).first;
+ RoseInVertex v = target(e, ig);
+ if (ig[v].type != RIV_ACCEPT) {
+ continue;
+ }
+ if (ig[e].haig) {
+ continue;
+ }
+ if (!ig[e].graph) {
+ continue;
+ }
+
+ if (predsAreDelaySensitive(ig, u)) {
+ DEBUG_PRINTF("preds are delay sensitive\n");
+ continue;
+ }
+
+ DEBUG_PRINTF("lit->accept edge with graph: lit='%s'\n",
+ escapeString(ig[u].s).c_str());
+
+ const NGHolder &h = *ig[e].graph;
+ const set<ReportID> reports = all_reports(h);
+ if (reports.size() != 1) {
+ DEBUG_PRINTF("too many reports\n");
+ continue;
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(h, repeat)) {
+ DEBUG_PRINTF("suffix graph is not repeat\n");
+ continue;
+ }
+ DEBUG_PRINTF("suffix graph is %s repeat\n",
+ repeat.bounds.str().c_str());
+
+ if (!repeat.reach.all()) {
+ DEBUG_PRINTF("non-dot reach\n");
+ continue;
+ }
+
+ if (repeat.bounds.min != repeat.bounds.max ||
+ repeat.bounds.min > depth(MAX_DELAY)) {
+ DEBUG_PRINTF("repeat is variable or too large\n");
+ continue;
+ }
+
+ u32 max_delay = findMaxSafeDelay(ig, u, v);
+
+ u32 delay = repeat.bounds.min;
+ if (delay > max_delay) {
+ DEBUG_PRINTF("delay=%u > max_delay=%u\n", delay, max_delay);
+ continue;
+ }
+
+ if (delay + ig[u].s.length() - 1 > max_history) {
+ DEBUG_PRINTF("delay too large for history\n");
+ continue;
+ }
+
+ DEBUG_PRINTF("setting lit delay to %u and removing suffix\n", delay);
+ ig[u].delay = delay;
+ ig[u].min_offset = add_rose_depth(ig[u].min_offset, delay);
+ ig[u].max_offset = add_rose_depth(ig[u].max_offset, delay);
+
+ // Construct a new accept vertex for this report and remove edge e.
+ // (This allows us to cope if v has more than one in-edge).
+ RoseInVertex v2 =
+ add_vertex(RoseInVertexProps::makeAccept(reports), ig);
+ add_edge(u, v2, ig);
+ remove_edge(e, ig);
+ modified_accepts.insert(v);
+ }
+
+ DEBUG_PRINTF("%zu modified accepts\n", modified_accepts.size());
+
+ for (auto v : modified_accepts) {
+ if (in_degree(v, ig) == 0) {
+ DEBUG_PRINTF("removing accept vertex with no preds\n");
+ remove_vertex(v, ig);
+ }
+ }
+}
+
+#ifndef NDEBUG
+static
+bool validateKinds(const RoseInGraph &g) {
+ for (const auto &e : edges_range(g)) {
+ if (g[e].graph && g[e].graph->kind != whatRoseIsThis(g, e)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+#endif
+
bool RoseBuildImpl::addRose(const RoseInGraph &ig, bool prefilter) {
- DEBUG_PRINTF("trying to rose\n");
- assert(validateKinds(ig));
+ DEBUG_PRINTF("trying to rose\n");
+ assert(validateKinds(ig));
assert(hasCorrectlyNumberedVertices(ig));
-
- if (::ue2::empty(ig)) {
- assert(0);
- return false;
- }
-
- const unique_ptr<RoseInGraph> in_ptr = cloneRoseGraph(ig);
- RoseInGraph &in = *in_ptr;
-
- RoseBuildData bd(in, false);
-
- transformLiteralDelay(in, cc);
- transformAnchoredLiteralOverlap(in, bd, cc);
- transformSuffixDelay(in, cc);
-
+
+ if (::ue2::empty(ig)) {
+ assert(0);
+ return false;
+ }
+
+ const unique_ptr<RoseInGraph> in_ptr = cloneRoseGraph(ig);
+ RoseInGraph &in = *in_ptr;
+
+ RoseBuildData bd(in, false);
+
+ transformLiteralDelay(in, cc);
+ transformAnchoredLiteralOverlap(in, bd, cc);
+ transformSuffixDelay(in, cc);
+
renumber_vertices(in);
assert(validateKinds(in));
-
+
insertion_ordered_map<NGHolder *, vector<RoseInEdge>> graphs;
-
- for (const auto &e : edges_range(in)) {
- if (!in[e].graph) {
+
+ for (const auto &e : edges_range(in)) {
+ if (!in[e].graph) {
assert(!in[e].dfa);
assert(!in[e].haig);
- continue; // no graph
- }
-
+ continue; // no graph
+ }
+
if (in[e].haig || in[e].dfa) {
/* Early DFAs/Haigs are always implementable (we've already built
* the raw DFA). */
- continue;
- }
-
- NGHolder *h = in[e].graph.get();
+ continue;
+ }
+
+ NGHolder *h = in[e].graph.get();
assert(isCorrectlyTopped(*h));
- graphs[h].push_back(e);
- }
-
- vector<RoseInEdge> graph_edges;
-
+ graphs[h].push_back(e);
+ }
+
+ vector<RoseInEdge> graph_edges;
+
for (const auto &m : graphs) {
NGHolder *h = m.first;
if (!canImplementGraph(*h, prefilter, rm, cc)) {
- return false;
- }
+ return false;
+ }
insert(&graph_edges, graph_edges.end(), m.second);
- }
-
- /* we are now past the point of no return. We can start making irreversible
- changes to the rose graph, etc */
-
- for (const auto &e : graph_edges) {
- assert(in[e].graph);
- assert(!in[e].haig);
- NGHolder &h = *in[e].graph;
- DEBUG_PRINTF("handling %p\n", &h);
- assert(allMatchStatesHaveReports(h));
-
- if (!generates_callbacks(whatRoseIsThis(in, e))
- && in[target(e, in)].type != RIV_ACCEPT_EOD) {
+ }
+
+ /* we are now past the point of no return. We can start making irreversible
+ changes to the rose graph, etc */
+
+ for (const auto &e : graph_edges) {
+ assert(in[e].graph);
+ assert(!in[e].haig);
+ NGHolder &h = *in[e].graph;
+ DEBUG_PRINTF("handling %p\n", &h);
+ assert(allMatchStatesHaveReports(h));
+
+ if (!generates_callbacks(whatRoseIsThis(in, e))
+ && in[target(e, in)].type != RIV_ACCEPT_EOD) {
set_report(h, getNewNfaReport());
- }
- }
-
- populateRoseGraph(this, bd);
-
- return true;
-}
-
-bool RoseBuildImpl::addSombeRose(const RoseInGraph &ig) {
- DEBUG_PRINTF("rose is trying to consume a sombe\n");
- assert(validateKinds(ig));
-
- if (::ue2::empty(ig)) {
- assert(0);
- return false;
- }
-
- RoseBuildData bd(ig, true);
-
- for (const auto &e : edges_range(ig)) {
- if (!ig[e].graph) {
- continue; // no graph
- }
- DEBUG_PRINTF("handling %p\n", ig[e].graph.get());
- assert(allMatchStatesHaveReports(*ig[e].graph));
- assert(ig[e].haig);
- }
-
- populateRoseGraph(this, bd);
-
- return true;
-}
-
-bool roseCheckRose(const RoseInGraph &ig, bool prefilter,
- const ReportManager &rm, const CompileContext &cc) {
- assert(validateKinds(ig));
-
- if (::ue2::empty(ig)) {
- assert(0);
- return false;
- }
-
+ }
+ }
+
+ populateRoseGraph(this, bd);
+
+ return true;
+}
+
+bool RoseBuildImpl::addSombeRose(const RoseInGraph &ig) {
+ DEBUG_PRINTF("rose is trying to consume a sombe\n");
+ assert(validateKinds(ig));
+
+ if (::ue2::empty(ig)) {
+ assert(0);
+ return false;
+ }
+
+ RoseBuildData bd(ig, true);
+
+ for (const auto &e : edges_range(ig)) {
+ if (!ig[e].graph) {
+ continue; // no graph
+ }
+ DEBUG_PRINTF("handling %p\n", ig[e].graph.get());
+ assert(allMatchStatesHaveReports(*ig[e].graph));
+ assert(ig[e].haig);
+ }
+
+ populateRoseGraph(this, bd);
+
+ return true;
+}
+
+bool roseCheckRose(const RoseInGraph &ig, bool prefilter,
+ const ReportManager &rm, const CompileContext &cc) {
+ assert(validateKinds(ig));
+
+ if (::ue2::empty(ig)) {
+ assert(0);
+ return false;
+ }
+
vector<NGHolder *> graphs;
-
- for (const auto &e : edges_range(ig)) {
- if (!ig[e].graph) {
- continue; // no graph
- }
-
- if (ig[e].haig) {
- // Haigs are always implementable (we've already built the raw DFA).
- continue;
- }
-
+
+ for (const auto &e : edges_range(ig)) {
+ if (!ig[e].graph) {
+ continue; // no graph
+ }
+
+ if (ig[e].haig) {
+ // Haigs are always implementable (we've already built the raw DFA).
+ continue;
+ }
+
graphs.push_back(ig[e].graph.get());
- }
-
+ }
+
for (const auto &g : graphs) {
if (!canImplementGraph(*g, prefilter, rm, cc)) {
- return false;
- }
- }
-
- return true;
-}
-
-void RoseBuildImpl::add(bool anchored, bool eod, const ue2_literal &lit,
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void RoseBuildImpl::add(bool anchored, bool eod, const ue2_literal &lit,
const flat_set<ReportID> &reports) {
- assert(!reports.empty());
-
- if (cc.grey.floodAsPuffette && !anchored && !eod && is_flood(lit) &&
- lit.length() > 3) {
- DEBUG_PRINTF("adding as puffette\n");
- const CharReach &cr = *lit.begin();
- for (const auto &report : reports) {
- addOutfix(raw_puff(lit.length(), true, report, cr, true));
- }
-
- return;
- }
-
- RoseInGraph ig;
- RoseInVertex start = add_vertex(RoseInVertexProps::makeStart(anchored), ig);
- RoseInVertex accept = add_vertex(
- eod ? RoseInVertexProps::makeAcceptEod(set<ReportID>())
- : RoseInVertexProps::makeAccept(set<ReportID>()), ig);
- RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
-
- add_edge(start, v, RoseInEdgeProps(0U, anchored ? 0U : ROSE_BOUND_INF), ig);
- add_edge(v, accept, RoseInEdgeProps(0U, 0U), ig);
-
- calcVertexOffsets(ig);
-
- ig[accept].reports.insert(reports.begin(), reports.end());
-
- addRose(ig, false);
-}
-
-static
-u32 findMaxBAWidth(const NGHolder &h) {
- // Must be bi-anchored: no out-edges from startDs (other than its
- // self-loop), no in-edges to accept.
+ assert(!reports.empty());
+
+ if (cc.grey.floodAsPuffette && !anchored && !eod && is_flood(lit) &&
+ lit.length() > 3) {
+ DEBUG_PRINTF("adding as puffette\n");
+ const CharReach &cr = *lit.begin();
+ for (const auto &report : reports) {
+ addOutfix(raw_puff(lit.length(), true, report, cr, true));
+ }
+
+ return;
+ }
+
+ RoseInGraph ig;
+ RoseInVertex start = add_vertex(RoseInVertexProps::makeStart(anchored), ig);
+ RoseInVertex accept = add_vertex(
+ eod ? RoseInVertexProps::makeAcceptEod(set<ReportID>())
+ : RoseInVertexProps::makeAccept(set<ReportID>()), ig);
+ RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
+
+ add_edge(start, v, RoseInEdgeProps(0U, anchored ? 0U : ROSE_BOUND_INF), ig);
+ add_edge(v, accept, RoseInEdgeProps(0U, 0U), ig);
+
+ calcVertexOffsets(ig);
+
+ ig[accept].reports.insert(reports.begin(), reports.end());
+
+ addRose(ig, false);
+}
+
+static
+u32 findMaxBAWidth(const NGHolder &h) {
+ // Must be bi-anchored: no out-edges from startDs (other than its
+ // self-loop), no in-edges to accept.
if (out_degree(h.startDs, h) > 1 || in_degree(h.accept, h)) {
- return ROSE_BOUND_INF;
- }
- depth d = findMaxWidth(h);
- assert(d.is_reachable());
-
- if (!d.is_finite()) {
- return ROSE_BOUND_INF;
- }
- return d;
-}
-
-static
-void populateOutfixInfo(OutfixInfo &outfix, const NGHolder &h,
- const RoseBuildImpl &tbi) {
- outfix.maxBAWidth = findMaxBAWidth(h);
- outfix.minWidth = findMinWidth(h);
- outfix.maxWidth = findMaxWidth(h);
- outfix.maxOffset = findMaxOffset(h, tbi.rm);
- populateReverseAccelerationInfo(outfix.rev_info, h);
-}
-
+ return ROSE_BOUND_INF;
+ }
+ depth d = findMaxWidth(h);
+ assert(d.is_reachable());
+
+ if (!d.is_finite()) {
+ return ROSE_BOUND_INF;
+ }
+ return d;
+}
+
+static
+void populateOutfixInfo(OutfixInfo &outfix, const NGHolder &h,
+ const RoseBuildImpl &tbi) {
+ outfix.maxBAWidth = findMaxBAWidth(h);
+ outfix.minWidth = findMinWidth(h);
+ outfix.maxWidth = findMaxWidth(h);
+ outfix.maxOffset = findMaxOffset(h, tbi.rm);
+ populateReverseAccelerationInfo(outfix.rev_info, h);
+}
+
static
bool addEodOutfix(RoseBuildImpl &build, const NGHolder &h) {
map<flat_set<ReportID>, ReportID> report_remap;
@@ -1748,9 +1748,9 @@ bool addEodOutfix(RoseBuildImpl &build, const NGHolder &h) {
return true;
}
-bool RoseBuildImpl::addOutfix(const NGHolder &h) {
- DEBUG_PRINTF("%zu vertices, %zu edges\n", num_vertices(h), num_edges(h));
-
+bool RoseBuildImpl::addOutfix(const NGHolder &h) {
+ DEBUG_PRINTF("%zu vertices, %zu edges\n", num_vertices(h), num_edges(h));
+
/* TODO: handle more than one report */
if (!in_degree(h.accept, h)
&& all_reports(h).size() == 1
@@ -1758,241 +1758,241 @@ bool RoseBuildImpl::addOutfix(const NGHolder &h) {
return true;
}
- const u32 nfa_states = isImplementableNFA(h, &rm, cc);
- if (nfa_states) {
- DEBUG_PRINTF("implementable as an NFA in %u states\n", nfa_states);
- } else {
- DEBUG_PRINTF("not implementable as an NFA\n");
- }
-
- bool dfa_cand = !nfa_states || nfa_states > 128 /* slow model */
- || can_exhaust(h, rm); /* can be pruned */
-
- unique_ptr<raw_dfa> rdfa;
-
- if (!nfa_states || cc.grey.roseMcClellanOutfix == 2 ||
- (cc.grey.roseMcClellanOutfix == 1 && dfa_cand)) {
- rdfa = buildMcClellan(h, &rm, cc.grey);
- }
-
- if (!nfa_states && !rdfa) {
- DEBUG_PRINTF("could not build as either an NFA or a DFA\n");
- return false;
- }
-
- if (rdfa) {
- outfixes.push_back(OutfixInfo(move(rdfa)));
- } else {
- outfixes.push_back(OutfixInfo(cloneHolder(h)));
- }
-
- populateOutfixInfo(outfixes.back(), h, *this);
-
- return true;
-}
-
-bool RoseBuildImpl::addOutfix(const NGHolder &h, const raw_som_dfa &haig) {
- DEBUG_PRINTF("haig with %zu states\n", haig.states.size());
-
- outfixes.push_back(OutfixInfo(ue2::make_unique<raw_som_dfa>(haig)));
- populateOutfixInfo(outfixes.back(), h, *this);
-
- return true; /* failure is not yet an option */
-}
-
-bool RoseBuildImpl::addOutfix(const raw_puff &rp) {
- if (!mpv_outfix) {
+ const u32 nfa_states = isImplementableNFA(h, &rm, cc);
+ if (nfa_states) {
+ DEBUG_PRINTF("implementable as an NFA in %u states\n", nfa_states);
+ } else {
+ DEBUG_PRINTF("not implementable as an NFA\n");
+ }
+
+ bool dfa_cand = !nfa_states || nfa_states > 128 /* slow model */
+ || can_exhaust(h, rm); /* can be pruned */
+
+ unique_ptr<raw_dfa> rdfa;
+
+ if (!nfa_states || cc.grey.roseMcClellanOutfix == 2 ||
+ (cc.grey.roseMcClellanOutfix == 1 && dfa_cand)) {
+ rdfa = buildMcClellan(h, &rm, cc.grey);
+ }
+
+ if (!nfa_states && !rdfa) {
+ DEBUG_PRINTF("could not build as either an NFA or a DFA\n");
+ return false;
+ }
+
+ if (rdfa) {
+ outfixes.push_back(OutfixInfo(move(rdfa)));
+ } else {
+ outfixes.push_back(OutfixInfo(cloneHolder(h)));
+ }
+
+ populateOutfixInfo(outfixes.back(), h, *this);
+
+ return true;
+}
+
+bool RoseBuildImpl::addOutfix(const NGHolder &h, const raw_som_dfa &haig) {
+ DEBUG_PRINTF("haig with %zu states\n", haig.states.size());
+
+ outfixes.push_back(OutfixInfo(ue2::make_unique<raw_som_dfa>(haig)));
+ populateOutfixInfo(outfixes.back(), h, *this);
+
+ return true; /* failure is not yet an option */
+}
+
+bool RoseBuildImpl::addOutfix(const raw_puff &rp) {
+ if (!mpv_outfix) {
mpv_outfix = std::make_unique<OutfixInfo>(MpvProto());
- }
-
+ }
+
auto *mpv = mpv_outfix->mpv();
assert(mpv);
mpv->puffettes.push_back(rp);
-
- mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
- mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
- mpv_outfix->maxWidth = rp.unbounded
- ? depth::infinity()
- : max(mpv_outfix->maxWidth, depth(rp.repeats));
-
- if (mpv_outfix->maxOffset == ROSE_BOUND_INF || rp.unbounded) {
- mpv_outfix->maxOffset = ROSE_BOUND_INF;
- } else {
- mpv_outfix->maxOffset = MAX(mpv_outfix->maxOffset, rp.repeats);
- }
-
- return true; /* failure is not yet an option */
-}
-
-bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out,
- u32 *event_out) {
- if (!mpv_outfix) {
+
+ mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
+ mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
+ mpv_outfix->maxWidth = rp.unbounded
+ ? depth::infinity()
+ : max(mpv_outfix->maxWidth, depth(rp.repeats));
+
+ if (mpv_outfix->maxOffset == ROSE_BOUND_INF || rp.unbounded) {
+ mpv_outfix->maxOffset = ROSE_BOUND_INF;
+ } else {
+ mpv_outfix->maxOffset = MAX(mpv_outfix->maxOffset, rp.repeats);
+ }
+
+ return true; /* failure is not yet an option */
+}
+
+bool RoseBuildImpl::addChainTail(const raw_puff &rp, u32 *queue_out,
+ u32 *event_out) {
+ if (!mpv_outfix) {
mpv_outfix = std::make_unique<OutfixInfo>(MpvProto());
- }
-
+ }
+
auto *mpv = mpv_outfix->mpv();
assert(mpv);
mpv->triggered_puffettes.push_back(rp);
-
- mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
- mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
- mpv_outfix->maxWidth = rp.unbounded
- ? depth::infinity()
- : max(mpv_outfix->maxWidth, depth(rp.repeats));
-
- mpv_outfix->maxOffset = ROSE_BOUND_INF; /* TODO: we could get information from
- * the caller */
-
- *queue_out = mpv_outfix->get_queue(qif);
+
+ mpv_outfix->maxBAWidth = ROSE_BOUND_INF; /* not ba */
+ mpv_outfix->minWidth = min(mpv_outfix->minWidth, depth(rp.repeats));
+ mpv_outfix->maxWidth = rp.unbounded
+ ? depth::infinity()
+ : max(mpv_outfix->maxWidth, depth(rp.repeats));
+
+ mpv_outfix->maxOffset = ROSE_BOUND_INF; /* TODO: we could get information from
+ * the caller */
+
+ *queue_out = mpv_outfix->get_queue(qif);
*event_out = MQE_TOP_FIRST + mpv->triggered_puffettes.size() - 1;
-
- return true; /* failure is not yet an option */
-}
-
-static
-bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w,
+
+ return true; /* failure is not yet an option */
+}
+
+static
+bool prepAcceptForAddAnchoredNFA(RoseBuildImpl &tbi, const NGHolder &w,
NFAVertex u,
- const vector<DepthMinMax> &vertexDepths,
- map<u32, DepthMinMax> &depthMap,
+ const vector<DepthMinMax> &vertexDepths,
+ map<u32, DepthMinMax> &depthMap,
map<NFAVertex, set<u32>> &reportMap,
- map<ReportID, u32> &allocated_reports,
- flat_set<u32> &added_lit_ids) {
- const depth max_anchored_depth(tbi.cc.grey.maxAnchoredRegion);
+ map<ReportID, u32> &allocated_reports,
+ flat_set<u32> &added_lit_ids) {
+ const depth max_anchored_depth(tbi.cc.grey.maxAnchoredRegion);
const size_t index = w[u].index;
assert(index < vertexDepths.size());
const DepthMinMax &d = vertexDepths.at(index);
-
- for (const auto &int_report : w[u].reports) {
- assert(int_report != MO_INVALID_IDX);
-
- u32 lit_id;
- if (!contains(allocated_reports, int_report)) {
- lit_id = tbi.getNewLiteralId();
- added_lit_ids.insert(lit_id);
- allocated_reports[int_report] = lit_id;
- } else {
- lit_id = allocated_reports[int_report];
- }
-
- reportMap[u].insert(lit_id);
-
- if (!contains(depthMap, lit_id)) {
- depthMap[lit_id] = d;
- } else {
- depthMap[lit_id] = unionDepthMinMax(depthMap[lit_id], d);
- }
-
+
+ for (const auto &int_report : w[u].reports) {
+ assert(int_report != MO_INVALID_IDX);
+
+ u32 lit_id;
+ if (!contains(allocated_reports, int_report)) {
+ lit_id = tbi.getNewLiteralId();
+ added_lit_ids.insert(lit_id);
+ allocated_reports[int_report] = lit_id;
+ } else {
+ lit_id = allocated_reports[int_report];
+ }
+
+ reportMap[u].insert(lit_id);
+
+ if (!contains(depthMap, lit_id)) {
+ depthMap[lit_id] = d;
+ } else {
+ depthMap[lit_id] = unionDepthMinMax(depthMap[lit_id], d);
+ }
+
if (depthMap[lit_id].max > max_anchored_depth) {
- DEBUG_PRINTF("depth=%s exceeds maxAnchoredRegion=%u\n",
+ DEBUG_PRINTF("depth=%s exceeds maxAnchoredRegion=%u\n",
depthMap[lit_id].max.str().c_str(),
- tbi.cc.grey.maxAnchoredRegion);
- return false;
- }
- }
-
- return true;
-}
-
-// Failure path for addAnchoredAcyclic: removes the literal IDs that have been
-// added to support anchored NFAs. Assumes that they are a contiguous range at
-// the end of the RoseBuildImpl::literal_info vector.
-static
-void removeAddedLiterals(RoseBuildImpl &tbi, const flat_set<u32> &lit_ids) {
- if (lit_ids.empty()) {
- return;
- }
-
+ tbi.cc.grey.maxAnchoredRegion);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+// Failure path for addAnchoredAcyclic: removes the literal IDs that have been
+// added to support anchored NFAs. Assumes that they are a contiguous range at
+// the end of the RoseBuildImpl::literal_info vector.
+static
+void removeAddedLiterals(RoseBuildImpl &tbi, const flat_set<u32> &lit_ids) {
+ if (lit_ids.empty()) {
+ return;
+ }
+
DEBUG_PRINTF("remove last %zu literals\n", lit_ids.size());
- // lit_ids should be a contiguous range.
- assert(lit_ids.size() == *lit_ids.rbegin() - *lit_ids.begin() + 1);
+ // lit_ids should be a contiguous range.
+ assert(lit_ids.size() == *lit_ids.rbegin() - *lit_ids.begin() + 1);
assert(*lit_ids.rbegin() == tbi.literals.size() - 1);
-
+
assert(all_of_in(lit_ids, [&](u32 lit_id) {
return lit_id < tbi.literal_info.size() &&
tbi.literals.at(lit_id).table == ROSE_ANCHORED &&
tbi.literal_info[lit_id].vertices.empty();
}));
-
+
tbi.literals.erase_back(lit_ids.size());
assert(tbi.literals.size() == *lit_ids.begin());
-
- // lit_ids should be at the end of tbi.literal_info.
- assert(tbi.literal_info.size() == *lit_ids.rbegin() + 1);
- tbi.literal_info.resize(*lit_ids.begin()); // remove all ids in lit_ids
-}
-
-bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) {
+
+ // lit_ids should be at the end of tbi.literal_info.
+ assert(tbi.literal_info.size() == *lit_ids.rbegin() + 1);
+ tbi.literal_info.resize(*lit_ids.begin()); // remove all ids in lit_ids
+}
+
+bool RoseBuildImpl::addAnchoredAcyclic(const NGHolder &h) {
auto vertexDepths = calcDepthsFrom(h, h.start);
-
- map<NFAVertex, set<u32> > reportMap; /* NFAVertex -> literal ids */
- map<u32, DepthMinMax> depthMap; /* literal id -> min/max depth */
- map<ReportID, u32> allocated_reports; /* report -> literal id */
- flat_set<u32> added_lit_ids; /* literal ids added for this NFA */
-
- for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
+
+ map<NFAVertex, set<u32> > reportMap; /* NFAVertex -> literal ids */
+ map<u32, DepthMinMax> depthMap; /* literal id -> min/max depth */
+ map<ReportID, u32> allocated_reports; /* report -> literal id */
+ flat_set<u32> added_lit_ids; /* literal ids added for this NFA */
+
+ for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap,
- reportMap, allocated_reports,
- added_lit_ids)) {
- removeAddedLiterals(*this, added_lit_ids);
- return false;
- }
- }
-
- map<ReportID, u32> allocated_reports_eod; /* report -> literal id */
-
- for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
- if (v == h.accept) {
- continue;
- }
+ reportMap, allocated_reports,
+ added_lit_ids)) {
+ removeAddedLiterals(*this, added_lit_ids);
+ return false;
+ }
+ }
+
+ map<ReportID, u32> allocated_reports_eod; /* report -> literal id */
+
+ for (auto v : inv_adjacent_vertices_range(h.acceptEod, h)) {
+ if (v == h.accept) {
+ continue;
+ }
if (!prepAcceptForAddAnchoredNFA(*this, h, v, vertexDepths, depthMap,
- reportMap, allocated_reports_eod,
- added_lit_ids)) {
- removeAddedLiterals(*this, added_lit_ids);
- return false;
- }
- }
-
- assert(!reportMap.empty());
-
- int rv = addAnchoredNFA(*this, h, reportMap);
- if (rv != ANCHORED_FAIL) {
- assert(rv != ANCHORED_REMAP);
- DEBUG_PRINTF("added anchored nfa\n");
- /* add edges to the rose graph to bubble the match up */
- for (const auto &m : allocated_reports) {
- const ReportID &report = m.first;
- const u32 &lit_id = m.second;
- assert(depthMap[lit_id].max.is_finite());
- u32 minBound = depthMap[lit_id].min;
- u32 maxBound = depthMap[lit_id].max;
- RoseVertex v
- = createAnchoredVertex(this, lit_id, minBound, maxBound);
- g[v].reports.insert(report);
- }
-
- for (const auto &m : allocated_reports_eod) {
- const ReportID &report = m.first;
- const u32 &lit_id = m.second;
- assert(depthMap[lit_id].max.is_finite());
- u32 minBound = depthMap[lit_id].min;
- u32 maxBound = depthMap[lit_id].max;
- RoseVertex v
- = createAnchoredVertex(this, lit_id, minBound, maxBound);
- RoseVertex eod = add_vertex(g);
- g[eod].eod_accept = true;
- g[eod].reports.insert(report);
- g[eod].min_offset = g[v].min_offset;
- g[eod].max_offset = g[v].max_offset;
- add_edge(v, eod, g);
- }
-
- return true;
- } else {
- DEBUG_PRINTF("failed to add anchored nfa\n");
- removeAddedLiterals(*this, added_lit_ids);
- return false;
- }
-}
-
-} // namespace ue2
+ reportMap, allocated_reports_eod,
+ added_lit_ids)) {
+ removeAddedLiterals(*this, added_lit_ids);
+ return false;
+ }
+ }
+
+ assert(!reportMap.empty());
+
+ int rv = addAnchoredNFA(*this, h, reportMap);
+ if (rv != ANCHORED_FAIL) {
+ assert(rv != ANCHORED_REMAP);
+ DEBUG_PRINTF("added anchored nfa\n");
+ /* add edges to the rose graph to bubble the match up */
+ for (const auto &m : allocated_reports) {
+ const ReportID &report = m.first;
+ const u32 &lit_id = m.second;
+ assert(depthMap[lit_id].max.is_finite());
+ u32 minBound = depthMap[lit_id].min;
+ u32 maxBound = depthMap[lit_id].max;
+ RoseVertex v
+ = createAnchoredVertex(this, lit_id, minBound, maxBound);
+ g[v].reports.insert(report);
+ }
+
+ for (const auto &m : allocated_reports_eod) {
+ const ReportID &report = m.first;
+ const u32 &lit_id = m.second;
+ assert(depthMap[lit_id].max.is_finite());
+ u32 minBound = depthMap[lit_id].min;
+ u32 maxBound = depthMap[lit_id].max;
+ RoseVertex v
+ = createAnchoredVertex(this, lit_id, minBound, maxBound);
+ RoseVertex eod = add_vertex(g);
+ g[eod].eod_accept = true;
+ g[eod].reports.insert(report);
+ g[eod].min_offset = g[v].min_offset;
+ g[eod].max_offset = g[v].max_offset;
+ add_edge(v, eod, g);
+ }
+
+ return true;
+ } else {
+ DEBUG_PRINTF("failed to add anchored nfa\n");
+ removeAddedLiterals(*this, added_lit_ids);
+ return false;
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_add_internal.h b/contrib/libs/hyperscan/src/rose/rose_build_add_internal.h
index d7fd2e8032..143f1dfa58 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_add_internal.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_add_internal.h
@@ -1,47 +1,47 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_ADD_INTERNAL_H
-#define ROSE_BUILD_ADD_INTERNAL_H
-
-#include "rose_graph.h"
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_ADD_INTERNAL_H
+#define ROSE_BUILD_ADD_INTERNAL_H
+
+#include "rose_graph.h"
+#include "ue2common.h"
#include "util/flat_containers.h"
-
-namespace ue2 {
-
-class RoseBuildImpl;
-
-RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent,
- u32 minBound, u32 maxBound, u32 literalId,
- size_t literalLength,
+
+namespace ue2 {
+
+class RoseBuildImpl;
+
+RoseVertex createVertex(RoseBuildImpl *build, const RoseVertex parent,
+ u32 minBound, u32 maxBound, u32 literalId,
+ size_t literalLength,
const flat_set<ReportID> &reports);
-
-} // namespace ue2
-
+
+} // namespace ue2
+
#endif // ROSE_BUILD_ADD_INTERNAL_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_add_mask.cpp b/contrib/libs/hyperscan/src/rose/rose_build_add_mask.cpp
index be9ff37bdc..0a7e44c370 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_add_mask.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_add_mask.cpp
@@ -1,796 +1,796 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_impl.h"
-
-#include "ue2common.h"
-#include "grey.h"
-#include "rose_build_add_internal.h"
-#include "rose_build_anchored.h"
-#include "rose_in_util.h"
-#include "hwlm/hwlm_literal.h"
-#include "nfagraph/ng_depth.h"
-#include "nfagraph/ng_dump.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_reports.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
-#include "util/charreach.h"
-#include "util/charreach_util.h"
-#include "util/compare.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph.h"
-#include "util/make_unique.h"
-#include "util/ue2string.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-#include <utility>
-
-using namespace std;
-
-namespace ue2 {
-
-#define MIN_MASK_LIT_LEN 2
-#define MAX_MASK_SIZE 255
-#define MAX_MASK_LITS 30
-
-static
-void findMaskLiteral(const vector<CharReach> &mask, bool streaming,
- ue2_literal *lit, u32 *offset, const Grey &grey) {
- bool case_fixed = false;
- bool nocase = false;
-
- size_t best_begin = 0;
- size_t best_end = 0;
- size_t best_len = 0;
-
- size_t begin = 0;
- size_t end = 0;
-
- for (size_t i = 0; i < mask.size(); i++) {
- bool fail = false;
- if (mask[i].count() != 1 && !mask[i].isCaselessChar()) {
- DEBUG_PRINTF("hit non-literal char, resetting at %zu\n", i);
- fail = true;
- }
-
- if (!fail && streaming && (end >= grey.maxHistoryAvailable + 1)) {
- DEBUG_PRINTF("hit literal limit, resetting at %zu\n", i);
- fail = true;
- }
-
- if (!fail && case_fixed && mask[i].isAlpha()) {
- if (nocase && mask[i].count() != 2) {
- fail = true;
- }
-
- if (!nocase && mask[i].count() != 1) {
- fail = true;
- }
- }
-
- if (fail) {
- case_fixed = false;
- nocase = false;
- size_t len = end - begin;
- bool better = len > best_len;
- if (better) {
- best_begin = begin;
- best_end = end;
- best_len = len;
- }
- begin = i + 1;
- end = i + 1;
- } else {
- assert(end == i);
- end = i + 1;
-
- if (mask[i].isAlpha()) {
- case_fixed = true;
- nocase = mask[i].count() == 2;
- }
- }
- }
-
- size_t len = end - begin;
- /* Everybody would rather be trigger towards the end */
- bool better = len >= best_len && mask.size() - end <= MAX_DELAY;
-
- if (better) {
- best_begin = begin;
- best_end = end;
- best_len = len;
- }
-
- for (size_t i = best_begin; i < best_end; i++) {
- assert(mask[i].count() == 1 || mask[i].count() == 2);
- lit->push_back(mask[i].find_first(), mask[i].count() > 1);
- }
-
- *offset = verify_u32(best_begin);
-}
-
-static
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_impl.h"
+
+#include "ue2common.h"
+#include "grey.h"
+#include "rose_build_add_internal.h"
+#include "rose_build_anchored.h"
+#include "rose_in_util.h"
+#include "hwlm/hwlm_literal.h"
+#include "nfagraph/ng_depth.h"
+#include "nfagraph/ng_dump.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "util/charreach.h"
+#include "util/charreach_util.h"
+#include "util/compare.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph.h"
+#include "util/make_unique.h"
+#include "util/ue2string.h"
+#include "util/verify_types.h"
+
+#include <algorithm>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+#include <utility>
+
+using namespace std;
+
+namespace ue2 {
+
+#define MIN_MASK_LIT_LEN 2
+#define MAX_MASK_SIZE 255
+#define MAX_MASK_LITS 30
+
+static
+void findMaskLiteral(const vector<CharReach> &mask, bool streaming,
+ ue2_literal *lit, u32 *offset, const Grey &grey) {
+ bool case_fixed = false;
+ bool nocase = false;
+
+ size_t best_begin = 0;
+ size_t best_end = 0;
+ size_t best_len = 0;
+
+ size_t begin = 0;
+ size_t end = 0;
+
+ for (size_t i = 0; i < mask.size(); i++) {
+ bool fail = false;
+ if (mask[i].count() != 1 && !mask[i].isCaselessChar()) {
+ DEBUG_PRINTF("hit non-literal char, resetting at %zu\n", i);
+ fail = true;
+ }
+
+ if (!fail && streaming && (end >= grey.maxHistoryAvailable + 1)) {
+ DEBUG_PRINTF("hit literal limit, resetting at %zu\n", i);
+ fail = true;
+ }
+
+ if (!fail && case_fixed && mask[i].isAlpha()) {
+ if (nocase && mask[i].count() != 2) {
+ fail = true;
+ }
+
+ if (!nocase && mask[i].count() != 1) {
+ fail = true;
+ }
+ }
+
+ if (fail) {
+ case_fixed = false;
+ nocase = false;
+ size_t len = end - begin;
+ bool better = len > best_len;
+ if (better) {
+ best_begin = begin;
+ best_end = end;
+ best_len = len;
+ }
+ begin = i + 1;
+ end = i + 1;
+ } else {
+ assert(end == i);
+ end = i + 1;
+
+ if (mask[i].isAlpha()) {
+ case_fixed = true;
+ nocase = mask[i].count() == 2;
+ }
+ }
+ }
+
+ size_t len = end - begin;
+ /* Everybody would rather be trigger towards the end */
+ bool better = len >= best_len && mask.size() - end <= MAX_DELAY;
+
+ if (better) {
+ best_begin = begin;
+ best_end = end;
+ best_len = len;
+ }
+
+ for (size_t i = best_begin; i < best_end; i++) {
+ assert(mask[i].count() == 1 || mask[i].count() == 2);
+ lit->push_back(mask[i].find_first(), mask[i].count() > 1);
+ }
+
+ *offset = verify_u32(best_begin);
+}
+
+static
bool initFmlCandidates(const CharReach &cr, vector<ue2_literal> &cand) {
- for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
- char c = (char)i;
- bool nocase = myisupper(c) && cr.test(mytolower(c));
- if (myislower(c) && cr.test(mytoupper(c))) {
- continue;
- }
-
+ for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
+ char c = (char)i;
+ bool nocase = myisupper(c) && cr.test(mytolower(c));
+ if (myislower(c) && cr.test(mytoupper(c))) {
+ continue;
+ }
+
if (cand.size() >= MAX_MASK_LITS) {
- DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
- return false;
- }
-
+ DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
+ return false;
+ }
+
cand.emplace_back(c, nocase);
- }
-
+ }
+
assert(cand.size() <= MAX_MASK_LITS);
return !cand.empty();
-}
-
-static
+}
+
+static
bool expandFmlCandidates(const CharReach &cr, vector<ue2_literal> &curr,
vector<ue2_literal> &cand) {
- DEBUG_PRINTF("expanding string with cr of %zu\n", cr.count());
+ DEBUG_PRINTF("expanding string with cr of %zu\n", cr.count());
DEBUG_PRINTF(" current cand list size %zu\n", cand.size());
-
+
curr.clear();
-
- for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
- char c = (char)i;
- bool nocase = myisupper(c) && cr.test(mytolower(c));
- if (myislower(c) && cr.test(mytoupper(c))) {
- continue;
- }
-
+
+ for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
+ char c = (char)i;
+ bool nocase = myisupper(c) && cr.test(mytolower(c));
+ if (myislower(c) && cr.test(mytoupper(c))) {
+ continue;
+ }
+
for (const auto &lit : cand) {
- if (curr.size() >= MAX_MASK_LITS) {
- DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
- return false;
- }
-
+ if (curr.size() >= MAX_MASK_LITS) {
+ DEBUG_PRINTF("hit lit limit of %u\n", MAX_MASK_LITS);
+ return false;
+ }
+
curr.push_back(lit);
curr.back().push_back(c, nocase);
- }
- }
-
- if (curr.back().length() > MAX_MASK2_WIDTH &&
- any_of(begin(curr), end(curr), mixed_sensitivity)) {
- DEBUG_PRINTF("mixed-sensitivity lit is too long, stopping\n");
- return false;
- }
-
- assert(curr.size() <= MAX_MASK_LITS);
+ }
+ }
+
+ if (curr.back().length() > MAX_MASK2_WIDTH &&
+ any_of(begin(curr), end(curr), mixed_sensitivity)) {
+ DEBUG_PRINTF("mixed-sensitivity lit is too long, stopping\n");
+ return false;
+ }
+
+ assert(curr.size() <= MAX_MASK_LITS);
cand.swap(curr);
- return true;
-}
-
-static
-u32 scoreFmlCandidates(const vector<ue2_literal> &cand) {
- if (cand.empty()) {
- DEBUG_PRINTF("no candidates\n");
- return 0;
- }
-
- const u32 len = cand.back().length();
-
- DEBUG_PRINTF("length = %u count %zu\n", len, cand.size());
- u32 min_period = len;
-
- for (const auto &lit : cand) {
+ return true;
+}
+
+static
+u32 scoreFmlCandidates(const vector<ue2_literal> &cand) {
+ if (cand.empty()) {
+ DEBUG_PRINTF("no candidates\n");
+ return 0;
+ }
+
+ const u32 len = cand.back().length();
+
+ DEBUG_PRINTF("length = %u count %zu\n", len, cand.size());
+ u32 min_period = len;
+
+ for (const auto &lit : cand) {
DEBUG_PRINTF("candidate: %s\n", dumpString(lit).c_str());
- u32 period = lit.length() - maxStringSelfOverlap(lit);
- min_period = min(min_period, period);
- }
- DEBUG_PRINTF("min_period %u\n", min_period);
- u32 length_score =
- (5 * min_period + len) * (cand.back().any_nocase() ? 90 : 100);
- u32 count_penalty;
- if (len > 4) {
- count_penalty = 9 * len * cand.size();
- } else {
- count_penalty = 5 * cand.size();
- }
- if (length_score <= count_penalty) {
- return 1;
- }
- return length_score - count_penalty;
-}
-
-/* favours later literals */
-static
-bool findMaskLiterals(const vector<CharReach> &mask, vector<ue2_literal> *lit,
- u32 *minBound, u32 *length) {
- *minBound = 0;
- *length = 0;
-
+ u32 period = lit.length() - maxStringSelfOverlap(lit);
+ min_period = min(min_period, period);
+ }
+ DEBUG_PRINTF("min_period %u\n", min_period);
+ u32 length_score =
+ (5 * min_period + len) * (cand.back().any_nocase() ? 90 : 100);
+ u32 count_penalty;
+ if (len > 4) {
+ count_penalty = 9 * len * cand.size();
+ } else {
+ count_penalty = 5 * cand.size();
+ }
+ if (length_score <= count_penalty) {
+ return 1;
+ }
+ return length_score - count_penalty;
+}
+
+/* favours later literals */
+static
+bool findMaskLiterals(const vector<CharReach> &mask, vector<ue2_literal> *lit,
+ u32 *minBound, u32 *length) {
+ *minBound = 0;
+ *length = 0;
+
vector<ue2_literal> candidates, best_candidates, curr_candidates;
- u32 best_score = 0;
- u32 best_minOffset = 0;
+ u32 best_score = 0;
+ u32 best_minOffset = 0;
for (auto it = mask.begin(); it != mask.end(); ++it) {
- candidates.clear();
+ candidates.clear();
if (!initFmlCandidates(*it, candidates)) {
- DEBUG_PRINTF("failed to init\n");
- continue;
- }
- DEBUG_PRINTF("++\n");
+ DEBUG_PRINTF("failed to init\n");
+ continue;
+ }
+ DEBUG_PRINTF("++\n");
auto jt = it;
while (jt != mask.begin()) {
- --jt;
- DEBUG_PRINTF("--\n");
+ --jt;
+ DEBUG_PRINTF("--\n");
if (!expandFmlCandidates(*jt, curr_candidates, candidates)) {
- DEBUG_PRINTF("expansion stopped\n");
- break;
- }
- }
+ DEBUG_PRINTF("expansion stopped\n");
+ break;
+ }
+ }
// Candidates have been expanded in reverse order.
for (auto &cand : candidates) {
cand = reverse_literal(cand);
}
- u32 score = scoreFmlCandidates(candidates);
- DEBUG_PRINTF("scored %u for literal set of size %zu\n", score,
- candidates.size());
- if (!candidates.empty() && score >= best_score) {
+ u32 score = scoreFmlCandidates(candidates);
+ DEBUG_PRINTF("scored %u for literal set of size %zu\n", score,
+ candidates.size());
+ if (!candidates.empty() && score >= best_score) {
best_minOffset = it - mask.begin() - candidates.back().length() + 1;
- best_candidates.swap(candidates);
- best_score = score;
- }
- }
-
- if (!best_score) {
- DEBUG_PRINTF("no lits\n");
- return false;
- }
-
- *minBound = best_minOffset;
- *length = best_candidates.back().length();
-
- DEBUG_PRINTF("best minbound %u length %u\n", *minBound, *length);
-
+ best_candidates.swap(candidates);
+ best_score = score;
+ }
+ }
+
+ if (!best_score) {
+ DEBUG_PRINTF("no lits\n");
+ return false;
+ }
+
+ *minBound = best_minOffset;
+ *length = best_candidates.back().length();
+
+ DEBUG_PRINTF("best minbound %u length %u\n", *minBound, *length);
+
assert(all_of_in(best_candidates, [&](const ue2_literal &s) {
return s.length() == *length;
}));
*lit = std::move(best_candidates);
- return true;
-}
-
-static
-unique_ptr<NGHolder> buildMaskLhs(bool anchored, u32 prefix_len,
- const vector<CharReach> &mask) {
- DEBUG_PRINTF("build %slhs len %u/%zu\n", anchored ? "anc " : "", prefix_len,
- mask.size());
-
- unique_ptr<NGHolder> lhs = ue2::make_unique<NGHolder>(NFA_PREFIX);
-
- assert(prefix_len);
- assert(mask.size() >= prefix_len);
- NFAVertex pred = anchored ? lhs->start : lhs->startDs;
-
- u32 m_idx = 0;
- while (prefix_len--) {
- NFAVertex v = add_vertex(*lhs);
- (*lhs)[v].char_reach = mask[m_idx++];
- add_edge(pred, v, *lhs);
- pred = v;
- }
- add_edge(pred, lhs->accept, *lhs);
- (*lhs)[pred].reports.insert(0);
-
- return lhs;
-}
-
-static
-void buildLiteralMask(const vector<CharReach> &mask, vector<u8> &msk,
- vector<u8> &cmp, u32 delay) {
- msk.clear();
- cmp.clear();
- if (mask.size() <= delay) {
- return;
- }
-
- // Construct an and/cmp mask from our mask ending at delay positions before
- // the end of the literal, with max length HWLM_MASKLEN.
-
- auto ite = mask.end() - delay;
- auto it = ite - min(size_t{HWLM_MASKLEN}, mask.size() - delay);
-
- for (; it != ite; ++it) {
- msk.push_back(0);
- cmp.push_back(0);
- make_and_cmp_mask(*it, &msk.back(), &cmp.back());
- }
-
- assert(msk.size() == cmp.size());
- assert(msk.size() <= HWLM_MASKLEN);
-}
-
-static
+ return true;
+}
+
+static
+unique_ptr<NGHolder> buildMaskLhs(bool anchored, u32 prefix_len,
+ const vector<CharReach> &mask) {
+ DEBUG_PRINTF("build %slhs len %u/%zu\n", anchored ? "anc " : "", prefix_len,
+ mask.size());
+
+ unique_ptr<NGHolder> lhs = ue2::make_unique<NGHolder>(NFA_PREFIX);
+
+ assert(prefix_len);
+ assert(mask.size() >= prefix_len);
+ NFAVertex pred = anchored ? lhs->start : lhs->startDs;
+
+ u32 m_idx = 0;
+ while (prefix_len--) {
+ NFAVertex v = add_vertex(*lhs);
+ (*lhs)[v].char_reach = mask[m_idx++];
+ add_edge(pred, v, *lhs);
+ pred = v;
+ }
+ add_edge(pred, lhs->accept, *lhs);
+ (*lhs)[pred].reports.insert(0);
+
+ return lhs;
+}
+
+static
+void buildLiteralMask(const vector<CharReach> &mask, vector<u8> &msk,
+ vector<u8> &cmp, u32 delay) {
+ msk.clear();
+ cmp.clear();
+ if (mask.size() <= delay) {
+ return;
+ }
+
+ // Construct an and/cmp mask from our mask ending at delay positions before
+ // the end of the literal, with max length HWLM_MASKLEN.
+
+ auto ite = mask.end() - delay;
+ auto it = ite - min(size_t{HWLM_MASKLEN}, mask.size() - delay);
+
+ for (; it != ite; ++it) {
+ msk.push_back(0);
+ cmp.push_back(0);
+ make_and_cmp_mask(*it, &msk.back(), &cmp.back());
+ }
+
+ assert(msk.size() == cmp.size());
+ assert(msk.size() <= HWLM_MASKLEN);
+}
+
+static
bool validateTransientMask(const vector<CharReach> &mask, bool anchored,
bool eod, const Grey &grey) {
- assert(!mask.empty());
-
- // An EOD anchored mask requires that everything fit into history, while an
- // ordinary floating case can handle one byte more (i.e., max history size
- // and one byte in the buffer).
- const size_t max_width = grey.maxHistoryAvailable + (eod ? 0 : 1);
- if (mask.size() > max_width) {
- DEBUG_PRINTF("mask too long for max available history\n");
- return false;
- }
-
+ assert(!mask.empty());
+
+ // An EOD anchored mask requires that everything fit into history, while an
+ // ordinary floating case can handle one byte more (i.e., max history size
+ // and one byte in the buffer).
+ const size_t max_width = grey.maxHistoryAvailable + (eod ? 0 : 1);
+ if (mask.size() > max_width) {
+ DEBUG_PRINTF("mask too long for max available history\n");
+ return false;
+ }
+
/* although anchored masks cannot be transient, short masks may be placed
* into the atable. */
if (anchored && mask.size() > grey.maxAnchoredRegion) {
return false;
}
- vector<ue2_literal> lits;
- u32 lit_minBound; /* minBound of each literal in lit */
- u32 lit_length; /* length of each literal in lit */
- if (!findMaskLiterals(mask, &lits, &lit_minBound, &lit_length)) {
- DEBUG_PRINTF("failed to find any lits\n");
- return false;
- }
-
- if (lits.empty()) {
- return false;
- }
-
- const u32 delay = mask.size() - lit_length - lit_minBound;
- if (delay > MAX_DELAY) {
- DEBUG_PRINTF("delay %u is too much\n", delay);
- return false;
- }
-
- if (lit_length == 1 && lits.size() > 3) {
- DEBUG_PRINTF("no decent trigger\n");
- return false;
- }
-
- // Mixed-sensitivity literals require benefits masks to implement, and thus
- // have a maximum length. This has been taken into account in
- // findMaskLiterals.
- assert(lit_length <= MAX_MASK2_WIDTH ||
- none_of(begin(lits), end(lits), mixed_sensitivity));
-
- // Build the HWLM literal mask.
- vector<u8> msk, cmp;
- if (grey.roseHamsterMasks) {
- buildLiteralMask(mask, msk, cmp, delay);
- }
-
- // We consider the HWLM mask length to run from the first non-zero byte to
- // the end, and let max(mask length, literal length) be the effective
- // literal length.
- //
- // A one-byte literal with no mask is too short, but a one-byte literal
- // with a few bytes of mask information is OK.
-
- u32 msk_length = distance(find_if(begin(msk), end(msk),
- [](u8 v) { return v != 0; }), end(msk));
- u32 eff_lit_length = max(lit_length, msk_length);
- DEBUG_PRINTF("msk_length=%u, eff_lit_length = %u\n", msk_length,
- eff_lit_length);
-
- if (eff_lit_length < MIN_MASK_LIT_LEN) {
- DEBUG_PRINTF("literals too short\n");
- return false;
- }
-
- DEBUG_PRINTF("mask is ok\n");
- return true;
-}
-
-static
-bool maskIsNeeded(const ue2_literal &lit, const NGHolder &g) {
+ vector<ue2_literal> lits;
+ u32 lit_minBound; /* minBound of each literal in lit */
+ u32 lit_length; /* length of each literal in lit */
+ if (!findMaskLiterals(mask, &lits, &lit_minBound, &lit_length)) {
+ DEBUG_PRINTF("failed to find any lits\n");
+ return false;
+ }
+
+ if (lits.empty()) {
+ return false;
+ }
+
+ const u32 delay = mask.size() - lit_length - lit_minBound;
+ if (delay > MAX_DELAY) {
+ DEBUG_PRINTF("delay %u is too much\n", delay);
+ return false;
+ }
+
+ if (lit_length == 1 && lits.size() > 3) {
+ DEBUG_PRINTF("no decent trigger\n");
+ return false;
+ }
+
+ // Mixed-sensitivity literals require benefits masks to implement, and thus
+ // have a maximum length. This has been taken into account in
+ // findMaskLiterals.
+ assert(lit_length <= MAX_MASK2_WIDTH ||
+ none_of(begin(lits), end(lits), mixed_sensitivity));
+
+ // Build the HWLM literal mask.
+ vector<u8> msk, cmp;
+ if (grey.roseHamsterMasks) {
+ buildLiteralMask(mask, msk, cmp, delay);
+ }
+
+ // We consider the HWLM mask length to run from the first non-zero byte to
+ // the end, and let max(mask length, literal length) be the effective
+ // literal length.
+ //
+ // A one-byte literal with no mask is too short, but a one-byte literal
+ // with a few bytes of mask information is OK.
+
+ u32 msk_length = distance(find_if(begin(msk), end(msk),
+ [](u8 v) { return v != 0; }), end(msk));
+ u32 eff_lit_length = max(lit_length, msk_length);
+ DEBUG_PRINTF("msk_length=%u, eff_lit_length = %u\n", msk_length,
+ eff_lit_length);
+
+ if (eff_lit_length < MIN_MASK_LIT_LEN) {
+ DEBUG_PRINTF("literals too short\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("mask is ok\n");
+ return true;
+}
+
+static
+bool maskIsNeeded(const ue2_literal &lit, const NGHolder &g) {
flat_set<NFAVertex> curr = {g.accept};
flat_set<NFAVertex> next;
-
- for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
- const CharReach &cr = *it;
- DEBUG_PRINTF("check %s\n", describeClass(*it).c_str());
- next.clear();
- for (auto v : curr) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (isSubsetOf(cr, g[u].char_reach)) {
- next.insert(u);
- }
- }
- }
- if (next.empty()) {
- DEBUG_PRINTF("no path to start\n");
- return true;
- }
- curr.swap(next);
- }
-
- for (auto v : curr) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == g.start || u == g.startDs) {
- DEBUG_PRINTF("literal spans graph from start to accept\n");
- return false;
-
- }
- }
- }
-
- DEBUG_PRINTF("literal doesn't reach start\n");
- return true;
-}
-
-static
-void addTransientMask(RoseBuildImpl &build, const vector<CharReach> &mask,
+
+ for (auto it = lit.rbegin(), ite = lit.rend(); it != ite; ++it) {
+ const CharReach &cr = *it;
+ DEBUG_PRINTF("check %s\n", describeClass(*it).c_str());
+ next.clear();
+ for (auto v : curr) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (isSubsetOf(cr, g[u].char_reach)) {
+ next.insert(u);
+ }
+ }
+ }
+ if (next.empty()) {
+ DEBUG_PRINTF("no path to start\n");
+ return true;
+ }
+ curr.swap(next);
+ }
+
+ for (auto v : curr) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == g.start || u == g.startDs) {
+ DEBUG_PRINTF("literal spans graph from start to accept\n");
+ return false;
+
+ }
+ }
+ }
+
+ DEBUG_PRINTF("literal doesn't reach start\n");
+ return true;
+}
+
+static
+void addTransientMask(RoseBuildImpl &build, const vector<CharReach> &mask,
const flat_set<ReportID> &reports, bool anchored,
- bool eod) {
- vector<ue2_literal> lits;
- u32 lit_minBound; /* minBound of each literal in lit */
- u32 lit_length; /* length of each literal in lit */
- if (!findMaskLiterals(mask, &lits, &lit_minBound, &lit_length)) {
- DEBUG_PRINTF("failed to find any lits\n");
- assert(0);
- return;
- }
-
- DEBUG_PRINTF("%zu literals, minBound=%u, length=%u\n", lits.size(),
- lit_minBound, lit_length);
-
- if (lits.empty()) {
- assert(0);
- return;
- }
-
- u32 delay = mask.size() - lit_length - lit_minBound;
- assert(delay <= MAX_DELAY);
- DEBUG_PRINTF("delay=%u\n", delay);
-
- shared_ptr<NGHolder> mask_graph = buildMaskLhs(anchored, mask.size(), mask);
-
- u32 mask_lag = 0; /* TODO */
-
- // Everyone gets the same report ID.
- ReportID mask_report = build.getNewNfaReport();
+ bool eod) {
+ vector<ue2_literal> lits;
+ u32 lit_minBound; /* minBound of each literal in lit */
+ u32 lit_length; /* length of each literal in lit */
+ if (!findMaskLiterals(mask, &lits, &lit_minBound, &lit_length)) {
+ DEBUG_PRINTF("failed to find any lits\n");
+ assert(0);
+ return;
+ }
+
+ DEBUG_PRINTF("%zu literals, minBound=%u, length=%u\n", lits.size(),
+ lit_minBound, lit_length);
+
+ if (lits.empty()) {
+ assert(0);
+ return;
+ }
+
+ u32 delay = mask.size() - lit_length - lit_minBound;
+ assert(delay <= MAX_DELAY);
+ DEBUG_PRINTF("delay=%u\n", delay);
+
+ shared_ptr<NGHolder> mask_graph = buildMaskLhs(anchored, mask.size(), mask);
+
+ u32 mask_lag = 0; /* TODO */
+
+ // Everyone gets the same report ID.
+ ReportID mask_report = build.getNewNfaReport();
set_report(*mask_graph, mask_report);
-
- // Build the HWLM literal mask.
- vector<u8> msk, cmp;
- if (build.cc.grey.roseHamsterMasks) {
- buildLiteralMask(mask, msk, cmp, delay);
- }
-
- /* adjust bounds to be relative to trigger rather than mask */
- const u32 v_min_offset = add_rose_depth(0, mask.size());
- const u32 v_max_offset =
- add_rose_depth(anchored ? 0 : ROSE_BOUND_INF, mask.size());
-
- RoseGraph &g = build.g;
-
- // By default, masked literals go into the floating table (except for eod
- // cases).
- enum rose_literal_table table = ROSE_FLOATING;
-
- RoseVertex eod_v = RoseGraph::null_vertex();
- if (eod) {
- eod_v = add_vertex(g);
- g[eod_v].eod_accept = true;
- insert(&g[eod_v].reports, reports);
- g[eod_v].min_offset = v_min_offset;
- g[eod_v].max_offset = v_max_offset;
-
- // Note: because this is a transient mask, we know that we can match it
- // completely inside the history buffer. So, using the EOD literal
- // table is always safe.
- table = ROSE_EOD_ANCHORED;
-
- // Widen the EOD table window to cover the mask.
- ENSURE_AT_LEAST(&build.ematcher_region_size, mask.size());
- }
-
+
+ // Build the HWLM literal mask.
+ vector<u8> msk, cmp;
+ if (build.cc.grey.roseHamsterMasks) {
+ buildLiteralMask(mask, msk, cmp, delay);
+ }
+
+ /* adjust bounds to be relative to trigger rather than mask */
+ const u32 v_min_offset = add_rose_depth(0, mask.size());
+ const u32 v_max_offset =
+ add_rose_depth(anchored ? 0 : ROSE_BOUND_INF, mask.size());
+
+ RoseGraph &g = build.g;
+
+ // By default, masked literals go into the floating table (except for eod
+ // cases).
+ enum rose_literal_table table = ROSE_FLOATING;
+
+ RoseVertex eod_v = RoseGraph::null_vertex();
+ if (eod) {
+ eod_v = add_vertex(g);
+ g[eod_v].eod_accept = true;
+ insert(&g[eod_v].reports, reports);
+ g[eod_v].min_offset = v_min_offset;
+ g[eod_v].max_offset = v_max_offset;
+
+ // Note: because this is a transient mask, we know that we can match it
+ // completely inside the history buffer. So, using the EOD literal
+ // table is always safe.
+ table = ROSE_EOD_ANCHORED;
+
+ // Widen the EOD table window to cover the mask.
+ ENSURE_AT_LEAST(&build.ematcher_region_size, mask.size());
+ }
+
const flat_set<ReportID> no_reports;
-
- for (const auto &lit : lits) {
- u32 lit_id = build.getLiteralId(lit, msk, cmp, delay, table);
- const RoseVertex parent = anchored ? build.anchored_root : build.root;
- bool use_mask = delay || maskIsNeeded(lit, *mask_graph);
-
- auto v = createVertex(&build, parent, 0, ROSE_BOUND_INF, lit_id,
- lit.length(), eod ? no_reports : reports);
-
- if (use_mask) {
- g[v].left.graph = mask_graph;
- g[v].left.lag = mask_lag;
- g[v].left.leftfix_report = mask_report;
- } else {
- // Make sure our edge bounds are correct.
+
+ for (const auto &lit : lits) {
+ u32 lit_id = build.getLiteralId(lit, msk, cmp, delay, table);
+ const RoseVertex parent = anchored ? build.anchored_root : build.root;
+ bool use_mask = delay || maskIsNeeded(lit, *mask_graph);
+
+ auto v = createVertex(&build, parent, 0, ROSE_BOUND_INF, lit_id,
+ lit.length(), eod ? no_reports : reports);
+
+ if (use_mask) {
+ g[v].left.graph = mask_graph;
+ g[v].left.lag = mask_lag;
+ g[v].left.leftfix_report = mask_report;
+ } else {
+ // Make sure our edge bounds are correct.
RoseEdge e = edge(parent, v, g);
- g[e].minBound = 0;
- g[e].maxBound = anchored ? 0 : ROSE_BOUND_INF;
- g[e].history = anchored ? ROSE_ROLE_HISTORY_ANCH
- : ROSE_ROLE_HISTORY_NONE;
- }
-
- // Set offsets correctly.
- g[v].min_offset = v_min_offset;
- g[v].max_offset = v_max_offset;
-
- if (eod) {
+ g[e].minBound = 0;
+ g[e].maxBound = anchored ? 0 : ROSE_BOUND_INF;
+ g[e].history = anchored ? ROSE_ROLE_HISTORY_ANCH
+ : ROSE_ROLE_HISTORY_NONE;
+ }
+
+ // Set offsets correctly.
+ g[v].min_offset = v_min_offset;
+ g[v].max_offset = v_max_offset;
+
+ if (eod) {
RoseEdge e = add_edge(v, eod_v, g);
- g[e].minBound = 0;
- g[e].maxBound = 0;
- g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE;
- }
- }
-}
-
-static
+ g[e].minBound = 0;
+ g[e].maxBound = 0;
+ g[e].history = ROSE_ROLE_HISTORY_LAST_BYTE;
+ }
+ }
+}
+
+static
unique_ptr<NGHolder> buildMaskRhs(const flat_set<ReportID> &reports,
- const vector<CharReach> &mask,
- u32 suffix_len) {
- assert(suffix_len);
- assert(mask.size() > suffix_len);
-
- unique_ptr<NGHolder> rhs = ue2::make_unique<NGHolder>(NFA_SUFFIX);
- NGHolder &h = *rhs;
-
- NFAVertex succ = h.accept;
- u32 m_idx = mask.size() - 1;
- while (suffix_len--) {
- NFAVertex u = add_vertex(h);
- if (succ == h.accept) {
- h[u].reports.insert(reports.begin(), reports.end());
- }
- h[u].char_reach = mask[m_idx--];
- add_edge(u, succ, h);
- succ = u;
- }
-
+ const vector<CharReach> &mask,
+ u32 suffix_len) {
+ assert(suffix_len);
+ assert(mask.size() > suffix_len);
+
+ unique_ptr<NGHolder> rhs = ue2::make_unique<NGHolder>(NFA_SUFFIX);
+ NGHolder &h = *rhs;
+
+ NFAVertex succ = h.accept;
+ u32 m_idx = mask.size() - 1;
+ while (suffix_len--) {
+ NFAVertex u = add_vertex(h);
+ if (succ == h.accept) {
+ h[u].reports.insert(reports.begin(), reports.end());
+ }
+ h[u].char_reach = mask[m_idx--];
+ add_edge(u, succ, h);
+ succ = u;
+ }
+
NFAEdge e = add_edge(h.start, succ, h);
h[e].tops.insert(DEFAULT_TOP);
-
- return rhs;
-}
-
-static
+
+ return rhs;
+}
+
+static
void doAddMask(RoseBuildImpl &tbi, bool anchored, const vector<CharReach> &mask,
const ue2_literal &lit, u32 prefix_len, u32 suffix_len,
const flat_set<ReportID> &reports) {
- /* Note: bounds are relative to literal start */
- RoseInGraph ig;
- RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(anchored), ig);
- RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
-
- DEBUG_PRINTF("pref + lit = %u\n", prefix_len);
- assert(prefix_len >= lit.length());
-
- // prefix len is relative to end of literal.
- u32 minBound = prefix_len - lit.length();
-
- if (minBound) {
- if (anchored && prefix_len > tbi.cc.grey.maxAnchoredRegion) {
- DEBUG_PRINTF("too deep\n");
- /* see if there is an anchored literal we can also hang off */
-
- ue2_literal lit2;
- u32 lit2_offset;
- vector<CharReach> mask2 = mask;
- assert(mask2.size() > tbi.cc.grey.maxAnchoredRegion);
- mask2.resize(MIN(tbi.cc.grey.maxAnchoredRegion, minBound));
-
- findMaskLiteral(mask2, tbi.cc.streaming, &lit2, &lit2_offset,
- tbi.cc.grey);
-
- if (lit2.length() >= MIN_MASK_LIT_LEN) {
- u32 prefix2_len = lit2_offset + lit2.length();
- assert(prefix2_len < minBound);
- RoseInVertex u
- = add_vertex(RoseInVertexProps::makeLiteral(lit2), ig);
- if (lit2_offset){
- DEBUG_PRINTF("building lhs (off %u)\n", lit2_offset);
- shared_ptr<NGHolder> lhs2
- = buildMaskLhs(true, lit2_offset, mask);
- add_edge(s, u, RoseInEdgeProps(lhs2, lit2.length()), ig);
- } else {
- add_edge(s, u, RoseInEdgeProps(0, 0), ig);
- }
-
- /* midfix */
- DEBUG_PRINTF("building mhs\n");
- vector<CharReach> mask3(mask.begin() + prefix2_len, mask.end());
- u32 overlap = maxOverlap(lit2, lit, 0);
- u32 delay = lit.length() - overlap;
- shared_ptr<NGHolder> mhs
- = buildMaskLhs(true, minBound - prefix2_len + overlap,
- mask3);
- mhs->kind = NFA_INFIX;
+ /* Note: bounds are relative to literal start */
+ RoseInGraph ig;
+ RoseInVertex s = add_vertex(RoseInVertexProps::makeStart(anchored), ig);
+ RoseInVertex v = add_vertex(RoseInVertexProps::makeLiteral(lit), ig);
+
+ DEBUG_PRINTF("pref + lit = %u\n", prefix_len);
+ assert(prefix_len >= lit.length());
+
+ // prefix len is relative to end of literal.
+ u32 minBound = prefix_len - lit.length();
+
+ if (minBound) {
+ if (anchored && prefix_len > tbi.cc.grey.maxAnchoredRegion) {
+ DEBUG_PRINTF("too deep\n");
+ /* see if there is an anchored literal we can also hang off */
+
+ ue2_literal lit2;
+ u32 lit2_offset;
+ vector<CharReach> mask2 = mask;
+ assert(mask2.size() > tbi.cc.grey.maxAnchoredRegion);
+ mask2.resize(MIN(tbi.cc.grey.maxAnchoredRegion, minBound));
+
+ findMaskLiteral(mask2, tbi.cc.streaming, &lit2, &lit2_offset,
+ tbi.cc.grey);
+
+ if (lit2.length() >= MIN_MASK_LIT_LEN) {
+ u32 prefix2_len = lit2_offset + lit2.length();
+ assert(prefix2_len < minBound);
+ RoseInVertex u
+ = add_vertex(RoseInVertexProps::makeLiteral(lit2), ig);
+ if (lit2_offset){
+ DEBUG_PRINTF("building lhs (off %u)\n", lit2_offset);
+ shared_ptr<NGHolder> lhs2
+ = buildMaskLhs(true, lit2_offset, mask);
+ add_edge(s, u, RoseInEdgeProps(lhs2, lit2.length()), ig);
+ } else {
+ add_edge(s, u, RoseInEdgeProps(0, 0), ig);
+ }
+
+ /* midfix */
+ DEBUG_PRINTF("building mhs\n");
+ vector<CharReach> mask3(mask.begin() + prefix2_len, mask.end());
+ u32 overlap = maxOverlap(lit2, lit, 0);
+ u32 delay = lit.length() - overlap;
+ shared_ptr<NGHolder> mhs
+ = buildMaskLhs(true, minBound - prefix2_len + overlap,
+ mask3);
+ mhs->kind = NFA_INFIX;
setTops(*mhs);
- add_edge(u, v, RoseInEdgeProps(mhs, delay), ig);
-
- DEBUG_PRINTF("add anch literal too!\n");
- goto do_rhs;
- }
- }
-
- shared_ptr<NGHolder> lhs = buildMaskLhs(anchored, minBound, mask);
- add_edge(s, v, RoseInEdgeProps(lhs, lit.length()), ig);
- } else {
- u32 maxBound = anchored ? minBound : ROSE_BOUND_INF;
- add_edge(s, v, RoseInEdgeProps(minBound, maxBound), ig);
- }
-
- do_rhs:
- if (suffix_len) {
- shared_ptr<NGHolder> rhs = buildMaskRhs(reports, mask, suffix_len);
- RoseInVertex a =
- add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
- add_edge(v, a, RoseInEdgeProps(rhs, 0), ig);
- } else {
- /* Note: masks have no eod connections */
- RoseInVertex a
- = add_vertex(RoseInVertexProps::makeAccept(reports), ig);
- add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
- }
-
- calcVertexOffsets(ig);
-
- bool rv = tbi.addRose(ig, false);
-
- assert(rv); /* checkAllowMask should have prevented this */
- if (!rv) {
- throw std::exception();
- }
-}
-
-static
-bool checkAllowMask(const vector<CharReach> &mask, ue2_literal *lit,
- u32 *prefix_len, u32 *suffix_len,
- const CompileContext &cc) {
- assert(!mask.empty());
- u32 lit_offset;
- findMaskLiteral(mask, cc.streaming, lit, &lit_offset, cc.grey);
-
- if (lit->length() < MIN_MASK_LIT_LEN && lit->length() != mask.size()) {
- DEBUG_PRINTF("need more literal - bad mask\n");
- return false;
- }
-
- DEBUG_PRINTF("mask lit '%s', len=%zu at offset=%u\n",
- dumpString(*lit).c_str(), lit->length(), lit_offset);
-
- assert(!cc.streaming || lit->length() <= cc.grey.maxHistoryAvailable + 1);
-
- /* literal is included in the prefix nfa so that matches from the prefix
- * can't occur in the history buffer - probably should tweak the NFA API
- * to allow such matches not to be suppressed */
- *prefix_len = lit_offset + lit->length();
- *suffix_len = mask.size() - *prefix_len;
- DEBUG_PRINTF("prefix_len=%u, suffix_len=%u\n", *prefix_len, *suffix_len);
-
- /* check if we can backtrack sufficiently */
- if (cc.streaming && *prefix_len > cc.grey.maxHistoryAvailable + 1) {
- DEBUG_PRINTF("too much lag\n");
- return false;
- }
-
- if (*suffix_len > MAX_MASK_SIZE || *prefix_len > MAX_MASK_SIZE) {
- DEBUG_PRINTF("too big\n");
- return false;
- }
-
- return true;
-}
-
-bool RoseBuildImpl::add(bool anchored, const vector<CharReach> &mask,
+ add_edge(u, v, RoseInEdgeProps(mhs, delay), ig);
+
+ DEBUG_PRINTF("add anch literal too!\n");
+ goto do_rhs;
+ }
+ }
+
+ shared_ptr<NGHolder> lhs = buildMaskLhs(anchored, minBound, mask);
+ add_edge(s, v, RoseInEdgeProps(lhs, lit.length()), ig);
+ } else {
+ u32 maxBound = anchored ? minBound : ROSE_BOUND_INF;
+ add_edge(s, v, RoseInEdgeProps(minBound, maxBound), ig);
+ }
+
+ do_rhs:
+ if (suffix_len) {
+ shared_ptr<NGHolder> rhs = buildMaskRhs(reports, mask, suffix_len);
+ RoseInVertex a =
+ add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), ig);
+ add_edge(v, a, RoseInEdgeProps(rhs, 0), ig);
+ } else {
+ /* Note: masks have no eod connections */
+ RoseInVertex a
+ = add_vertex(RoseInVertexProps::makeAccept(reports), ig);
+ add_edge(v, a, RoseInEdgeProps(0U, 0U), ig);
+ }
+
+ calcVertexOffsets(ig);
+
+ bool rv = tbi.addRose(ig, false);
+
+ assert(rv); /* checkAllowMask should have prevented this */
+ if (!rv) {
+ throw std::exception();
+ }
+}
+
+static
+bool checkAllowMask(const vector<CharReach> &mask, ue2_literal *lit,
+ u32 *prefix_len, u32 *suffix_len,
+ const CompileContext &cc) {
+ assert(!mask.empty());
+ u32 lit_offset;
+ findMaskLiteral(mask, cc.streaming, lit, &lit_offset, cc.grey);
+
+ if (lit->length() < MIN_MASK_LIT_LEN && lit->length() != mask.size()) {
+ DEBUG_PRINTF("need more literal - bad mask\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("mask lit '%s', len=%zu at offset=%u\n",
+ dumpString(*lit).c_str(), lit->length(), lit_offset);
+
+ assert(!cc.streaming || lit->length() <= cc.grey.maxHistoryAvailable + 1);
+
+ /* literal is included in the prefix nfa so that matches from the prefix
+ * can't occur in the history buffer - probably should tweak the NFA API
+ * to allow such matches not to be suppressed */
+ *prefix_len = lit_offset + lit->length();
+ *suffix_len = mask.size() - *prefix_len;
+ DEBUG_PRINTF("prefix_len=%u, suffix_len=%u\n", *prefix_len, *suffix_len);
+
+ /* check if we can backtrack sufficiently */
+ if (cc.streaming && *prefix_len > cc.grey.maxHistoryAvailable + 1) {
+ DEBUG_PRINTF("too much lag\n");
+ return false;
+ }
+
+ if (*suffix_len > MAX_MASK_SIZE || *prefix_len > MAX_MASK_SIZE) {
+ DEBUG_PRINTF("too big\n");
+ return false;
+ }
+
+ return true;
+}
+
+bool RoseBuildImpl::add(bool anchored, const vector<CharReach> &mask,
const flat_set<ReportID> &reports) {
if (validateTransientMask(mask, anchored, false, cc.grey)) {
- bool eod = false;
- addTransientMask(*this, mask, reports, anchored, eod);
- return true;
- }
-
- ue2_literal lit;
- u32 prefix_len = 0;
- u32 suffix_len = 0;
-
- if (!checkAllowMask(mask, &lit, &prefix_len, &suffix_len, cc)) {
- return false;
- }
-
- /* we know that the mask can be handled now, start playing with the rose
- * graph */
- doAddMask(*this, anchored, mask, lit, prefix_len, suffix_len, reports);
-
- return true;
-}
-
-bool RoseBuildImpl::validateMask(const vector<CharReach> &mask,
+ bool eod = false;
+ addTransientMask(*this, mask, reports, anchored, eod);
+ return true;
+ }
+
+ ue2_literal lit;
+ u32 prefix_len = 0;
+ u32 suffix_len = 0;
+
+ if (!checkAllowMask(mask, &lit, &prefix_len, &suffix_len, cc)) {
+ return false;
+ }
+
+ /* we know that the mask can be handled now, start playing with the rose
+ * graph */
+ doAddMask(*this, anchored, mask, lit, prefix_len, suffix_len, reports);
+
+ return true;
+}
+
+bool RoseBuildImpl::validateMask(const vector<CharReach> &mask,
UNUSED const flat_set<ReportID> &reports,
bool anchored, bool eod) const {
return validateTransientMask(mask, anchored, eod, cc.grey);
-}
-
-static
-unique_ptr<NGHolder> makeAnchoredGraph(const vector<CharReach> &mask,
+}
+
+static
+unique_ptr<NGHolder> makeAnchoredGraph(const vector<CharReach> &mask,
const flat_set<ReportID> &reports,
- bool eod) {
- auto gp = ue2::make_unique<NGHolder>();
- NGHolder &g = *gp;
-
- NFAVertex u = g.start;
- for (const auto &cr : mask) {
- NFAVertex v = add_vertex(g);
- g[v].char_reach = cr;
- add_edge(u, v, g);
- u = v;
- }
-
-
- g[u].reports = reports;
- add_edge(u, eod ? g.acceptEod : g.accept, g);
-
- return gp;
-}
-
-static
-bool addAnchoredMask(RoseBuildImpl &build, const vector<CharReach> &mask,
+ bool eod) {
+ auto gp = ue2::make_unique<NGHolder>();
+ NGHolder &g = *gp;
+
+ NFAVertex u = g.start;
+ for (const auto &cr : mask) {
+ NFAVertex v = add_vertex(g);
+ g[v].char_reach = cr;
+ add_edge(u, v, g);
+ u = v;
+ }
+
+
+ g[u].reports = reports;
+ add_edge(u, eod ? g.acceptEod : g.accept, g);
+
+ return gp;
+}
+
+static
+bool addAnchoredMask(RoseBuildImpl &build, const vector<CharReach> &mask,
const flat_set<ReportID> &reports, bool eod) {
- if (!build.cc.grey.allowAnchoredAcyclic) {
- return false;
- }
-
- auto g = makeAnchoredGraph(mask, reports, eod);
- assert(g);
-
- return build.addAnchoredAcyclic(*g);
-}
-
-void RoseBuildImpl::addMask(const vector<CharReach> &mask,
+ if (!build.cc.grey.allowAnchoredAcyclic) {
+ return false;
+ }
+
+ auto g = makeAnchoredGraph(mask, reports, eod);
+ assert(g);
+
+ return build.addAnchoredAcyclic(*g);
+}
+
+void RoseBuildImpl::addMask(const vector<CharReach> &mask,
const flat_set<ReportID> &reports, bool anchored,
bool eod) {
- if (anchored && addAnchoredMask(*this, mask, reports, eod)) {
- DEBUG_PRINTF("added mask as anchored acyclic graph\n");
- return;
- }
-
- addTransientMask(*this, mask, reports, anchored, eod);
-}
-
-} // namespace ue2
+ if (anchored && addAnchoredMask(*this, mask, reports, eod)) {
+ DEBUG_PRINTF("added mask as anchored acyclic graph\n");
+ return;
+ }
+
+ addTransientMask(*this, mask, reports, anchored, eod);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_anchored.cpp b/contrib/libs/hyperscan/src/rose/rose_build_anchored.cpp
index 5a3aa00638..23688b8d22 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_anchored.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_anchored.cpp
@@ -1,182 +1,182 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_anchored.h"
-
-#include "grey.h"
-#include "rose_build_impl.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_anchored.h"
+
+#include "grey.h"
+#include "rose_build_impl.h"
#include "rose_build_matchers.h"
-#include "rose_internal.h"
-#include "ue2common.h"
-#include "nfa/dfa_min.h"
-#include "nfa/mcclellancompile.h"
-#include "nfa/mcclellancompile_util.h"
-#include "nfa/nfa_build_util.h"
-#include "nfa/rdfa_merge.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_mcclellan_internal.h"
-#include "util/alloc.h"
-#include "util/bitfield.h"
-#include "util/charreach.h"
-#include "util/compile_context.h"
-#include "util/compile_error.h"
-#include "util/container.h"
-#include "util/determinise.h"
+#include "rose_internal.h"
+#include "ue2common.h"
+#include "nfa/dfa_min.h"
+#include "nfa/mcclellancompile.h"
+#include "nfa/mcclellancompile_util.h"
+#include "nfa/nfa_build_util.h"
+#include "nfa/rdfa_merge.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_mcclellan_internal.h"
+#include "util/alloc.h"
+#include "util/bitfield.h"
+#include "util/charreach.h"
+#include "util/compile_context.h"
+#include "util/compile_error.h"
+#include "util/container.h"
+#include "util/determinise.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
-#include "util/make_unique.h"
-#include "util/order_check.h"
-#include "util/ue2string.h"
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+#include "util/order_check.h"
+#include "util/ue2string.h"
#include "util/unordered.h"
-#include "util/verify_types.h"
-
-#include <map>
-#include <queue>
-#include <set>
-#include <vector>
-
-using namespace std;
-
-namespace ue2 {
-
-#define ANCHORED_NFA_STATE_LIMIT 512
-#define MAX_DFA_STATES 16000
-#define DFA_PAIR_MERGE_THRESHOLD 5000
-#define MAX_SMALL_START_REACH 4
-
-#define INIT_STATE (DEAD_STATE + 1)
-
+#include "util/verify_types.h"
+
+#include <map>
+#include <queue>
+#include <set>
+#include <vector>
+
+using namespace std;
+
+namespace ue2 {
+
+#define ANCHORED_NFA_STATE_LIMIT 512
+#define MAX_DFA_STATES 16000
+#define DFA_PAIR_MERGE_THRESHOLD 5000
+#define MAX_SMALL_START_REACH 4
+
+#define INIT_STATE (DEAD_STATE + 1)
+
#define NO_FRAG_ID (~0U)
-// Adds a vertex with the given reach.
-static
-NFAVertex add_vertex(NGHolder &h, const CharReach &cr) {
- NFAVertex v = add_vertex(h);
- h[v].char_reach = cr;
- return v;
-}
-
-static
-void add_edges(const set<NFAVertex> &parents, NFAVertex v, NGHolder &h) {
- for (auto p : parents) {
- add_edge(p, v, h);
- }
-}
-
-static
-set<NFAVertex> addDotsToGraph(NGHolder &h, NFAVertex start, u32 min, u32 max,
- const CharReach &cr) {
- DEBUG_PRINTF("adding [%u, %u] to graph\n", min, max);
- u32 i = 0;
- set<NFAVertex> curr;
- curr.insert(start);
- for (; i < min; i++) {
- NFAVertex next = add_vertex(h, cr);
- add_edges(curr, next, h);
- curr.clear();
- curr.insert(next);
- }
-
- assert(max != ROSE_BOUND_INF);
-
- set<NFAVertex> orig = curr;
- for (; i < max; i++) {
- NFAVertex next = add_vertex(h, cr);
- add_edges(curr, next, h);
- curr.clear();
- curr.insert(next);
- curr.insert(orig.begin(), orig.end());
- }
-
- return curr;
-}
-
-static
-NFAVertex addToGraph(NGHolder &h, const set<NFAVertex> &curr,
- const ue2_literal &s) {
- DEBUG_PRINTF("adding %s to graph\n", dumpString(s).c_str());
- assert(!s.empty());
-
- ue2_literal::const_iterator it = s.begin();
- NFAVertex u = add_vertex(h, *it);
- add_edges(curr, u, h);
-
- for (++it; it != s.end(); ++it) {
- NFAVertex next = add_vertex(h, *it);
- add_edge(u, next, h);
- u = next;
- }
-
- return u;
-}
-
-static
-void mergeAnchoredDfas(vector<unique_ptr<raw_dfa>> &dfas,
- const RoseBuildImpl &build) {
- // First, group our DFAs into "small start" and "big start" sets.
- vector<unique_ptr<raw_dfa>> small_starts, big_starts;
- for (auto &rdfa : dfas) {
- u32 start_size = mcclellanStartReachSize(rdfa.get());
- if (start_size <= MAX_SMALL_START_REACH) {
- small_starts.push_back(move(rdfa));
- } else {
- big_starts.push_back(move(rdfa));
- }
- }
- dfas.clear();
-
- DEBUG_PRINTF("%zu dfas with small starts, %zu dfas with big starts\n",
- small_starts.size(), big_starts.size());
- mergeDfas(small_starts, MAX_DFA_STATES, nullptr, build.cc.grey);
- mergeDfas(big_starts, MAX_DFA_STATES, nullptr, build.cc.grey);
-
- // Rehome our groups into one vector.
- for (auto &rdfa : small_starts) {
- dfas.push_back(move(rdfa));
- }
- for (auto &rdfa : big_starts) {
- dfas.push_back(move(rdfa));
- }
-
- // Final test: if we've built two DFAs here that are small enough, we can
- // try to merge them.
- if (dfas.size() == 2) {
- size_t total_states = dfas[0]->states.size() + dfas[1]->states.size();
- if (total_states < DFA_PAIR_MERGE_THRESHOLD) {
- DEBUG_PRINTF("doing small pair merge\n");
- mergeDfas(dfas, MAX_DFA_STATES, nullptr, build.cc.grey);
- }
- }
-}
-
-static
+// Adds a vertex with the given reach.
+static
+NFAVertex add_vertex(NGHolder &h, const CharReach &cr) {
+ NFAVertex v = add_vertex(h);
+ h[v].char_reach = cr;
+ return v;
+}
+
+static
+void add_edges(const set<NFAVertex> &parents, NFAVertex v, NGHolder &h) {
+ for (auto p : parents) {
+ add_edge(p, v, h);
+ }
+}
+
+static
+set<NFAVertex> addDotsToGraph(NGHolder &h, NFAVertex start, u32 min, u32 max,
+ const CharReach &cr) {
+ DEBUG_PRINTF("adding [%u, %u] to graph\n", min, max);
+ u32 i = 0;
+ set<NFAVertex> curr;
+ curr.insert(start);
+ for (; i < min; i++) {
+ NFAVertex next = add_vertex(h, cr);
+ add_edges(curr, next, h);
+ curr.clear();
+ curr.insert(next);
+ }
+
+ assert(max != ROSE_BOUND_INF);
+
+ set<NFAVertex> orig = curr;
+ for (; i < max; i++) {
+ NFAVertex next = add_vertex(h, cr);
+ add_edges(curr, next, h);
+ curr.clear();
+ curr.insert(next);
+ curr.insert(orig.begin(), orig.end());
+ }
+
+ return curr;
+}
+
+static
+NFAVertex addToGraph(NGHolder &h, const set<NFAVertex> &curr,
+ const ue2_literal &s) {
+ DEBUG_PRINTF("adding %s to graph\n", dumpString(s).c_str());
+ assert(!s.empty());
+
+ ue2_literal::const_iterator it = s.begin();
+ NFAVertex u = add_vertex(h, *it);
+ add_edges(curr, u, h);
+
+ for (++it; it != s.end(); ++it) {
+ NFAVertex next = add_vertex(h, *it);
+ add_edge(u, next, h);
+ u = next;
+ }
+
+ return u;
+}
+
+static
+void mergeAnchoredDfas(vector<unique_ptr<raw_dfa>> &dfas,
+ const RoseBuildImpl &build) {
+ // First, group our DFAs into "small start" and "big start" sets.
+ vector<unique_ptr<raw_dfa>> small_starts, big_starts;
+ for (auto &rdfa : dfas) {
+ u32 start_size = mcclellanStartReachSize(rdfa.get());
+ if (start_size <= MAX_SMALL_START_REACH) {
+ small_starts.push_back(move(rdfa));
+ } else {
+ big_starts.push_back(move(rdfa));
+ }
+ }
+ dfas.clear();
+
+ DEBUG_PRINTF("%zu dfas with small starts, %zu dfas with big starts\n",
+ small_starts.size(), big_starts.size());
+ mergeDfas(small_starts, MAX_DFA_STATES, nullptr, build.cc.grey);
+ mergeDfas(big_starts, MAX_DFA_STATES, nullptr, build.cc.grey);
+
+ // Rehome our groups into one vector.
+ for (auto &rdfa : small_starts) {
+ dfas.push_back(move(rdfa));
+ }
+ for (auto &rdfa : big_starts) {
+ dfas.push_back(move(rdfa));
+ }
+
+ // Final test: if we've built two DFAs here that are small enough, we can
+ // try to merge them.
+ if (dfas.size() == 2) {
+ size_t total_states = dfas[0]->states.size() + dfas[1]->states.size();
+ if (total_states < DFA_PAIR_MERGE_THRESHOLD) {
+ DEBUG_PRINTF("doing small pair merge\n");
+ mergeDfas(dfas, MAX_DFA_STATES, nullptr, build.cc.grey);
+ }
+ }
+}
+
+static
void remapAnchoredReports(raw_dfa &rdfa, const vector<u32> &frag_map) {
for (dstate &ds : rdfa.states) {
assert(ds.reports_eod.empty()); // Not used in anchored matcher.
@@ -190,27 +190,27 @@ void remapAnchoredReports(raw_dfa &rdfa, const vector<u32> &frag_map) {
new_reports.insert(frag_map[id]);
}
ds.reports = std::move(new_reports);
- }
-}
-
+ }
+}
+
/**
* \brief Replaces the report ids currently in the dfas (rose graph literal
* ids) with the fragment id for each literal.
*/
-static
+static
void remapAnchoredReports(RoseBuildImpl &build, const vector<u32> &frag_map) {
for (auto &m : build.anchored_nfas) {
for (auto &rdfa : m.second) {
assert(rdfa);
remapAnchoredReports(*rdfa, frag_map);
}
- }
-}
-
+ }
+}
+
/**
* Returns mapping from literal ids to fragment ids.
*/
-static
+static
vector<u32> reverseFragMap(const RoseBuildImpl &build,
const vector<LitFragment> &fragments) {
vector<u32> rev(build.literal_info.size(), NO_FRAG_ID);
@@ -218,16 +218,16 @@ vector<u32> reverseFragMap(const RoseBuildImpl &build,
for (u32 lit_id : f.lit_ids) {
assert(lit_id < rev.size());
rev[lit_id] = f.fragment_id;
- }
- }
+ }
+ }
return rev;
-}
-
+}
+
/**
* \brief Replace the reports (which are literal final_ids) in the given
* raw_dfa with program offsets.
*/
-static
+static
void remapIdsToPrograms(const vector<LitFragment> &fragments, raw_dfa &rdfa) {
for (dstate &ds : rdfa.states) {
assert(ds.reports_eod.empty()); // Not used in anchored matcher.
@@ -247,609 +247,609 @@ void remapIdsToPrograms(const vector<LitFragment> &fragments, raw_dfa &rdfa) {
static
unique_ptr<NGHolder> populate_holder(const simple_anchored_info &sai,
const flat_set<u32> &exit_ids) {
- DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound,
- sai.max_bound, dumpString(sai.literal).c_str());
+ DEBUG_PRINTF("populating holder for ^.{%u,%u}%s\n", sai.min_bound,
+ sai.max_bound, dumpString(sai.literal).c_str());
auto h_ptr = std::make_unique<NGHolder>();
NGHolder &h = *h_ptr;
auto ends = addDotsToGraph(h, h.start, sai.min_bound, sai.max_bound,
CharReach::dot());
- NFAVertex v = addToGraph(h, ends, sai.literal);
- add_edge(v, h.accept, h);
- h[v].reports.insert(exit_ids.begin(), exit_ids.end());
+ NFAVertex v = addToGraph(h, ends, sai.literal);
+ add_edge(v, h.accept, h);
+ h[v].reports.insert(exit_ids.begin(), exit_ids.end());
return h_ptr;
-}
-
+}
+
u32 anchoredStateSize(const anchored_matcher_info &atable) {
const struct anchored_matcher_info *curr = &atable;
-
- // Walk the list until we find the last element; total state size will be
- // that engine's state offset plus its state requirement.
- while (curr->next_offset) {
- curr = (const anchored_matcher_info *)
- ((const char *)curr + curr->next_offset);
- }
-
- const NFA *nfa = (const NFA *)((const char *)curr + sizeof(*curr));
+
+ // Walk the list until we find the last element; total state size will be
+ // that engine's state offset plus its state requirement.
+ while (curr->next_offset) {
+ curr = (const anchored_matcher_info *)
+ ((const char *)curr + curr->next_offset);
+ }
+
+ const NFA *nfa = (const NFA *)((const char *)curr + sizeof(*curr));
return curr->state_offset + nfa->streamStateSize;
-}
-
-namespace {
-
+}
+
+namespace {
+
using nfa_state_set = bitfield<ANCHORED_NFA_STATE_LIMIT>;
-
-struct Holder_StateSet {
- Holder_StateSet() : wdelay(0) {}
-
- nfa_state_set wrap_state;
- u32 wdelay;
-
- bool operator==(const Holder_StateSet &b) const {
- return wdelay == b.wdelay && wrap_state == b.wrap_state;
- }
+
+struct Holder_StateSet {
+ Holder_StateSet() : wdelay(0) {}
+
+ nfa_state_set wrap_state;
+ u32 wdelay;
+
+ bool operator==(const Holder_StateSet &b) const {
+ return wdelay == b.wdelay && wrap_state == b.wrap_state;
+ }
size_t hash() const {
return hash_all(wrap_state, wdelay);
}
-};
-
-class Automaton_Holder {
-public:
+};
+
+class Automaton_Holder {
+public:
using StateSet = Holder_StateSet;
using StateMap = ue2_unordered_map<StateSet, dstate_id_t>;
-
- explicit Automaton_Holder(const NGHolder &g_in) : g(g_in) {
- for (auto v : vertices_range(g)) {
- vertexToIndex[v] = indexToVertex.size();
- indexToVertex.push_back(v);
- }
-
- assert(indexToVertex.size() <= ANCHORED_NFA_STATE_LIMIT);
-
- DEBUG_PRINTF("%zu states\n", indexToVertex.size());
- init.wdelay = 0;
- init.wrap_state.set(vertexToIndex[g.start]);
-
- DEBUG_PRINTF("init wdelay %u\n", init.wdelay);
-
- calculateAlphabet();
- cr_by_index = populateCR(g, indexToVertex, alpha);
- }
-
-private:
- void calculateAlphabet() {
- vector<CharReach> esets(1, CharReach::dot());
-
- for (auto v : indexToVertex) {
- const CharReach &cr = g[v].char_reach;
-
- for (size_t i = 0; i < esets.size(); i++) {
- if (esets[i].count() == 1) {
- continue;
- }
-
- CharReach t = cr & esets[i];
-
- if (t.any() && t != esets[i]) {
- esets[i] &= ~t;
- esets.push_back(t);
- }
- }
- }
-
- alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
- }
-
-public:
- void transition(const StateSet &in, StateSet *next) {
- /* track the dfa state, reset nfa states */
- u32 wdelay = in.wdelay ? in.wdelay - 1 : 0;
-
- for (symbol_t s = 0; s < alphasize; s++) {
- next[s].wrap_state.reset();
- next[s].wdelay = wdelay;
- }
-
- nfa_state_set succ;
-
- if (wdelay != in.wdelay) {
- DEBUG_PRINTF("enabling start\n");
- succ.set(vertexToIndex[g.startDs]);
- }
-
- for (size_t i = in.wrap_state.find_first(); i != nfa_state_set::npos;
- i = in.wrap_state.find_next(i)) {
- NFAVertex v = indexToVertex[i];
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!contains(vertexToIndex, w)
- || w == g.accept || w == g.acceptEod) {
- continue;
- }
-
- if (w == g.startDs) {
- continue;
- }
-
- succ.set(vertexToIndex[w]);
- }
- }
-
- for (size_t j = succ.find_first(); j != nfa_state_set::npos;
- j = succ.find_next(j)) {
- const CharReach &cr = cr_by_index[j];
- for (size_t s = cr.find_first(); s != CharReach::npos;
- s = cr.find_next(s)) {
- next[s].wrap_state.set(j); /* pre alpha'ed */
- }
- }
-
- next[alpha[TOP]] = in;
- }
-
- const vector<StateSet> initial() {
- return {init};
- }
-
- void reports(const StateSet &in, flat_set<ReportID> &rv) {
- rv.clear();
- for (size_t i = in.wrap_state.find_first(); i != nfa_state_set::npos;
- i = in.wrap_state.find_next(i)) {
- NFAVertex v = indexToVertex[i];
- if (edge(v, g.accept, g).second) {
- assert(!g[v].reports.empty());
- insert(&rv, g[v].reports);
- } else {
- assert(g[v].reports.empty());
- }
- }
- }
-
- void reportsEod(const StateSet &, flat_set<ReportID> &r) {
- r.clear();
- }
-
- static bool canPrune(const flat_set<ReportID> &) {
- /* used by ng_ to prune states after highlander accepts */
- return false;
- }
-
-private:
- const NGHolder &g;
+
+ explicit Automaton_Holder(const NGHolder &g_in) : g(g_in) {
+ for (auto v : vertices_range(g)) {
+ vertexToIndex[v] = indexToVertex.size();
+ indexToVertex.push_back(v);
+ }
+
+ assert(indexToVertex.size() <= ANCHORED_NFA_STATE_LIMIT);
+
+ DEBUG_PRINTF("%zu states\n", indexToVertex.size());
+ init.wdelay = 0;
+ init.wrap_state.set(vertexToIndex[g.start]);
+
+ DEBUG_PRINTF("init wdelay %u\n", init.wdelay);
+
+ calculateAlphabet();
+ cr_by_index = populateCR(g, indexToVertex, alpha);
+ }
+
+private:
+ void calculateAlphabet() {
+ vector<CharReach> esets(1, CharReach::dot());
+
+ for (auto v : indexToVertex) {
+ const CharReach &cr = g[v].char_reach;
+
+ for (size_t i = 0; i < esets.size(); i++) {
+ if (esets[i].count() == 1) {
+ continue;
+ }
+
+ CharReach t = cr & esets[i];
+
+ if (t.any() && t != esets[i]) {
+ esets[i] &= ~t;
+ esets.push_back(t);
+ }
+ }
+ }
+
+ alphasize = buildAlphabetFromEquivSets(esets, alpha, unalpha);
+ }
+
+public:
+ void transition(const StateSet &in, StateSet *next) {
+ /* track the dfa state, reset nfa states */
+ u32 wdelay = in.wdelay ? in.wdelay - 1 : 0;
+
+ for (symbol_t s = 0; s < alphasize; s++) {
+ next[s].wrap_state.reset();
+ next[s].wdelay = wdelay;
+ }
+
+ nfa_state_set succ;
+
+ if (wdelay != in.wdelay) {
+ DEBUG_PRINTF("enabling start\n");
+ succ.set(vertexToIndex[g.startDs]);
+ }
+
+ for (size_t i = in.wrap_state.find_first(); i != nfa_state_set::npos;
+ i = in.wrap_state.find_next(i)) {
+ NFAVertex v = indexToVertex[i];
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!contains(vertexToIndex, w)
+ || w == g.accept || w == g.acceptEod) {
+ continue;
+ }
+
+ if (w == g.startDs) {
+ continue;
+ }
+
+ succ.set(vertexToIndex[w]);
+ }
+ }
+
+ for (size_t j = succ.find_first(); j != nfa_state_set::npos;
+ j = succ.find_next(j)) {
+ const CharReach &cr = cr_by_index[j];
+ for (size_t s = cr.find_first(); s != CharReach::npos;
+ s = cr.find_next(s)) {
+ next[s].wrap_state.set(j); /* pre alpha'ed */
+ }
+ }
+
+ next[alpha[TOP]] = in;
+ }
+
+ const vector<StateSet> initial() {
+ return {init};
+ }
+
+ void reports(const StateSet &in, flat_set<ReportID> &rv) {
+ rv.clear();
+ for (size_t i = in.wrap_state.find_first(); i != nfa_state_set::npos;
+ i = in.wrap_state.find_next(i)) {
+ NFAVertex v = indexToVertex[i];
+ if (edge(v, g.accept, g).second) {
+ assert(!g[v].reports.empty());
+ insert(&rv, g[v].reports);
+ } else {
+ assert(g[v].reports.empty());
+ }
+ }
+ }
+
+ void reportsEod(const StateSet &, flat_set<ReportID> &r) {
+ r.clear();
+ }
+
+ static bool canPrune(const flat_set<ReportID> &) {
+ /* used by ng_ to prune states after highlander accepts */
+ return false;
+ }
+
+private:
+ const NGHolder &g;
unordered_map<NFAVertex, u32> vertexToIndex;
- vector<NFAVertex> indexToVertex;
- vector<CharReach> cr_by_index;
- StateSet init;
-public:
- StateSet dead;
- array<u16, ALPHABET_SIZE> alpha;
- array<u16, ALPHABET_SIZE> unalpha;
- u16 alphasize;
-};
-
-} // namespace
-
-static
-bool check_dupe(const raw_dfa &rdfa,
- const vector<unique_ptr<raw_dfa>> &existing, ReportID *remap) {
- if (!remap) {
- DEBUG_PRINTF("no remap\n");
- return false;
- }
-
- set<ReportID> rdfa_reports;
- for (const auto &ds : rdfa.states) {
- rdfa_reports.insert(ds.reports.begin(), ds.reports.end());
- }
- if (rdfa_reports.size() != 1) {
- return false; /* too complicated for now would need mapping TODO */
- }
-
- for (const auto &e_rdfa : existing) {
- assert(e_rdfa);
- const raw_dfa &b = *e_rdfa;
-
- if (rdfa.start_anchored != b.start_anchored ||
- rdfa.alpha_size != b.alpha_size ||
- rdfa.states.size() != b.states.size() ||
- rdfa.alpha_remap != b.alpha_remap) {
- continue;
- }
-
- set<ReportID> b_reports;
-
- for (u32 i = 0; i < b.states.size(); i++) {
- assert(b.states[i].reports_eod.empty());
- assert(rdfa.states[i].reports_eod.empty());
- if (rdfa.states[i].reports.size() != b.states[i].reports.size()) {
- goto next_dfa;
- }
- b_reports.insert(b.states[i].reports.begin(),
- b.states[i].reports.end());
-
- assert(rdfa.states[i].next.size() == b.states[i].next.size());
- if (!equal(rdfa.states[i].next.begin(), rdfa.states[i].next.end(),
- b.states[i].next.begin())) {
- goto next_dfa;
- }
- }
-
- if (b_reports.size() != 1) {
- continue;
- }
-
- *remap = *b_reports.begin();
- DEBUG_PRINTF("dupe found remapping to %u\n", *remap);
- return true;
- next_dfa:;
- }
-
- return false;
-}
-
-static
+ vector<NFAVertex> indexToVertex;
+ vector<CharReach> cr_by_index;
+ StateSet init;
+public:
+ StateSet dead;
+ array<u16, ALPHABET_SIZE> alpha;
+ array<u16, ALPHABET_SIZE> unalpha;
+ u16 alphasize;
+};
+
+} // namespace
+
+static
+bool check_dupe(const raw_dfa &rdfa,
+ const vector<unique_ptr<raw_dfa>> &existing, ReportID *remap) {
+ if (!remap) {
+ DEBUG_PRINTF("no remap\n");
+ return false;
+ }
+
+ set<ReportID> rdfa_reports;
+ for (const auto &ds : rdfa.states) {
+ rdfa_reports.insert(ds.reports.begin(), ds.reports.end());
+ }
+ if (rdfa_reports.size() != 1) {
+ return false; /* too complicated for now would need mapping TODO */
+ }
+
+ for (const auto &e_rdfa : existing) {
+ assert(e_rdfa);
+ const raw_dfa &b = *e_rdfa;
+
+ if (rdfa.start_anchored != b.start_anchored ||
+ rdfa.alpha_size != b.alpha_size ||
+ rdfa.states.size() != b.states.size() ||
+ rdfa.alpha_remap != b.alpha_remap) {
+ continue;
+ }
+
+ set<ReportID> b_reports;
+
+ for (u32 i = 0; i < b.states.size(); i++) {
+ assert(b.states[i].reports_eod.empty());
+ assert(rdfa.states[i].reports_eod.empty());
+ if (rdfa.states[i].reports.size() != b.states[i].reports.size()) {
+ goto next_dfa;
+ }
+ b_reports.insert(b.states[i].reports.begin(),
+ b.states[i].reports.end());
+
+ assert(rdfa.states[i].next.size() == b.states[i].next.size());
+ if (!equal(rdfa.states[i].next.begin(), rdfa.states[i].next.end(),
+ b.states[i].next.begin())) {
+ goto next_dfa;
+ }
+ }
+
+ if (b_reports.size() != 1) {
+ continue;
+ }
+
+ *remap = *b_reports.begin();
+ DEBUG_PRINTF("dupe found remapping to %u\n", *remap);
+ return true;
+ next_dfa:;
+ }
+
+ return false;
+}
+
+static
bool check_dupe_simple(const RoseBuildImpl &build, u32 min_bound, u32 max_bound,
- const ue2_literal &lit, ReportID *remap) {
- if (!remap) {
- DEBUG_PRINTF("no remap\n");
- return false;
- }
-
- simple_anchored_info sai(min_bound, max_bound, lit);
+ const ue2_literal &lit, ReportID *remap) {
+ if (!remap) {
+ DEBUG_PRINTF("no remap\n");
+ return false;
+ }
+
+ simple_anchored_info sai(min_bound, max_bound, lit);
if (contains(build.anchored_simple, sai)) {
*remap = *build.anchored_simple.at(sai).begin();
- return true;
- }
-
- return false;
-}
-
-static
-NFAVertex extractLiteral(const NGHolder &h, ue2_literal *lit) {
- vector<NFAVertex> lit_verts;
- NFAVertex v = h.accept;
- while ((v = getSoleSourceVertex(h, v))) {
- const CharReach &cr = h[v].char_reach;
- if (cr.count() > 1 && !cr.isCaselessChar()) {
- break;
- }
- lit_verts.push_back(v);
- }
-
- if (lit_verts.empty()) {
+ return true;
+ }
+
+ return false;
+}
+
+static
+NFAVertex extractLiteral(const NGHolder &h, ue2_literal *lit) {
+ vector<NFAVertex> lit_verts;
+ NFAVertex v = h.accept;
+ while ((v = getSoleSourceVertex(h, v))) {
+ const CharReach &cr = h[v].char_reach;
+ if (cr.count() > 1 && !cr.isCaselessChar()) {
+ break;
+ }
+ lit_verts.push_back(v);
+ }
+
+ if (lit_verts.empty()) {
return NGHolder::null_vertex();
- }
-
- bool nocase = false;
- bool case_set = false;
-
- for (auto it = lit_verts.rbegin(), ite = lit_verts.rend(); it != ite;
- ++it) {
- const CharReach &cr = h[*it].char_reach;
- if (cr.isAlpha()) {
- bool cr_nocase = cr.count() != 1;
- if (case_set && cr_nocase != nocase) {
+ }
+
+ bool nocase = false;
+ bool case_set = false;
+
+ for (auto it = lit_verts.rbegin(), ite = lit_verts.rend(); it != ite;
+ ++it) {
+ const CharReach &cr = h[*it].char_reach;
+ if (cr.isAlpha()) {
+ bool cr_nocase = cr.count() != 1;
+ if (case_set && cr_nocase != nocase) {
return NGHolder::null_vertex();
- }
-
- case_set = true;
- nocase = cr_nocase;
- lit->push_back(cr.find_first(), nocase);
- } else {
- lit->push_back(cr.find_first(), false);
- }
- }
-
- return lit_verts.back();
-}
-
-static
-bool isSimple(const NGHolder &h, u32 *min_bound, u32 *max_bound,
- ue2_literal *lit, u32 *report) {
- assert(!proper_out_degree(h.startDs, h));
- assert(in_degree(h.acceptEod, h) == 1);
-
- DEBUG_PRINTF("looking for simple case\n");
- NFAVertex lit_head = extractLiteral(h, lit);
-
+ }
+
+ case_set = true;
+ nocase = cr_nocase;
+ lit->push_back(cr.find_first(), nocase);
+ } else {
+ lit->push_back(cr.find_first(), false);
+ }
+ }
+
+ return lit_verts.back();
+}
+
+static
+bool isSimple(const NGHolder &h, u32 *min_bound, u32 *max_bound,
+ ue2_literal *lit, u32 *report) {
+ assert(!proper_out_degree(h.startDs, h));
+ assert(in_degree(h.acceptEod, h) == 1);
+
+ DEBUG_PRINTF("looking for simple case\n");
+ NFAVertex lit_head = extractLiteral(h, lit);
+
if (lit_head == NGHolder::null_vertex()) {
- DEBUG_PRINTF("no literal found\n");
- return false;
- }
-
- const auto &reps = h[*inv_adjacent_vertices(h.accept, h).first].reports;
-
- if (reps.size() != 1) {
- return false;
- }
- *report = *reps.begin();
-
- assert(!lit->empty());
-
- set<NFAVertex> rep_exits;
-
- /* lit should only be connected to dot vertices */
- for (auto u : inv_adjacent_vertices_range(lit_head, h)) {
+ DEBUG_PRINTF("no literal found\n");
+ return false;
+ }
+
+ const auto &reps = h[*inv_adjacent_vertices(h.accept, h).first].reports;
+
+ if (reps.size() != 1) {
+ return false;
+ }
+ *report = *reps.begin();
+
+ assert(!lit->empty());
+
+ set<NFAVertex> rep_exits;
+
+ /* lit should only be connected to dot vertices */
+ for (auto u : inv_adjacent_vertices_range(lit_head, h)) {
DEBUG_PRINTF("checking %zu\n", h[u].index);
- if (!h[u].char_reach.all()) {
- return false;
- }
-
- if (u != h.start) {
- rep_exits.insert(u);
- }
- }
-
- if (rep_exits.empty()) {
- DEBUG_PRINTF("direct anchored\n");
- assert(edge(h.start, lit_head, h).second);
- *min_bound = 0;
- *max_bound = 0;
- return true;
- }
-
- NFAVertex key = *rep_exits.begin();
-
- // Special-case the check for '^.foo' or '^.?foo'.
- if (rep_exits.size() == 1 && edge(h.start, key, h).second &&
- out_degree(key, h) == 1) {
- DEBUG_PRINTF("one exit\n");
- assert(edge(h.start, h.startDs, h).second);
- size_t num_enters = out_degree(h.start, h);
- if (num_enters == 2) {
- DEBUG_PRINTF("^.{1,1} prefix\n");
- *min_bound = 1;
- *max_bound = 1;
- return true;
- }
- if (num_enters == 3 && edge(h.start, lit_head, h).second) {
- DEBUG_PRINTF("^.{0,1} prefix\n");
- *min_bound = 0;
- *max_bound = 1;
- return true;
- }
- }
-
- vector<GraphRepeatInfo> repeats;
- findRepeats(h, 2, &repeats);
-
- vector<GraphRepeatInfo>::const_iterator it;
- for (it = repeats.begin(); it != repeats.end(); ++it) {
- DEBUG_PRINTF("checking.. %zu verts\n", it->vertices.size());
- if (find(it->vertices.begin(), it->vertices.end(), key)
- != it->vertices.end()) {
- break;
- }
- }
- if (it == repeats.end()) {
- DEBUG_PRINTF("no repeat found\n");
- return false;
- }
-
- set<NFAVertex> rep_verts;
- insert(&rep_verts, it->vertices);
- if (!is_subset_of(rep_exits, rep_verts)) {
- DEBUG_PRINTF("bad exit check\n");
- return false;
- }
-
- set<NFAVertex> rep_enters;
- insert(&rep_enters, adjacent_vertices(h.start, h));
- rep_enters.erase(lit_head);
- rep_enters.erase(h.startDs);
-
- if (!is_subset_of(rep_enters, rep_verts)) {
- DEBUG_PRINTF("bad entry check\n");
- return false;
- }
-
- u32 min_b = it->repeatMin;
- if (edge(h.start, lit_head, h).second) { /* jump edge */
- if (min_b != 1) {
- DEBUG_PRINTF("jump edge around repeat with min bound\n");
- return false;
- }
-
- min_b = 0;
- }
- *min_bound = min_b;
- *max_bound = it->repeatMax;
-
- DEBUG_PRINTF("repeat %u %u before %s\n", *min_bound, *max_bound,
- dumpString(*lit).c_str());
- return true;
-}
-
-static
+ if (!h[u].char_reach.all()) {
+ return false;
+ }
+
+ if (u != h.start) {
+ rep_exits.insert(u);
+ }
+ }
+
+ if (rep_exits.empty()) {
+ DEBUG_PRINTF("direct anchored\n");
+ assert(edge(h.start, lit_head, h).second);
+ *min_bound = 0;
+ *max_bound = 0;
+ return true;
+ }
+
+ NFAVertex key = *rep_exits.begin();
+
+ // Special-case the check for '^.foo' or '^.?foo'.
+ if (rep_exits.size() == 1 && edge(h.start, key, h).second &&
+ out_degree(key, h) == 1) {
+ DEBUG_PRINTF("one exit\n");
+ assert(edge(h.start, h.startDs, h).second);
+ size_t num_enters = out_degree(h.start, h);
+ if (num_enters == 2) {
+ DEBUG_PRINTF("^.{1,1} prefix\n");
+ *min_bound = 1;
+ *max_bound = 1;
+ return true;
+ }
+ if (num_enters == 3 && edge(h.start, lit_head, h).second) {
+ DEBUG_PRINTF("^.{0,1} prefix\n");
+ *min_bound = 0;
+ *max_bound = 1;
+ return true;
+ }
+ }
+
+ vector<GraphRepeatInfo> repeats;
+ findRepeats(h, 2, &repeats);
+
+ vector<GraphRepeatInfo>::const_iterator it;
+ for (it = repeats.begin(); it != repeats.end(); ++it) {
+ DEBUG_PRINTF("checking.. %zu verts\n", it->vertices.size());
+ if (find(it->vertices.begin(), it->vertices.end(), key)
+ != it->vertices.end()) {
+ break;
+ }
+ }
+ if (it == repeats.end()) {
+ DEBUG_PRINTF("no repeat found\n");
+ return false;
+ }
+
+ set<NFAVertex> rep_verts;
+ insert(&rep_verts, it->vertices);
+ if (!is_subset_of(rep_exits, rep_verts)) {
+ DEBUG_PRINTF("bad exit check\n");
+ return false;
+ }
+
+ set<NFAVertex> rep_enters;
+ insert(&rep_enters, adjacent_vertices(h.start, h));
+ rep_enters.erase(lit_head);
+ rep_enters.erase(h.startDs);
+
+ if (!is_subset_of(rep_enters, rep_verts)) {
+ DEBUG_PRINTF("bad entry check\n");
+ return false;
+ }
+
+ u32 min_b = it->repeatMin;
+ if (edge(h.start, lit_head, h).second) { /* jump edge */
+ if (min_b != 1) {
+ DEBUG_PRINTF("jump edge around repeat with min bound\n");
+ return false;
+ }
+
+ min_b = 0;
+ }
+ *min_bound = min_b;
+ *max_bound = it->repeatMax;
+
+ DEBUG_PRINTF("repeat %u %u before %s\n", *min_bound, *max_bound,
+ dumpString(*lit).c_str());
+ return true;
+}
+
+static
int finalise_out(RoseBuildImpl &build, const NGHolder &h,
- const Automaton_Holder &autom, unique_ptr<raw_dfa> out_dfa,
- ReportID *remap) {
- u32 min_bound = ~0U;
- u32 max_bound = ~0U;
- ue2_literal lit;
- u32 simple_report = MO_INVALID_IDX;
- if (isSimple(h, &min_bound, &max_bound, &lit, &simple_report)) {
- assert(simple_report != MO_INVALID_IDX);
+ const Automaton_Holder &autom, unique_ptr<raw_dfa> out_dfa,
+ ReportID *remap) {
+ u32 min_bound = ~0U;
+ u32 max_bound = ~0U;
+ ue2_literal lit;
+ u32 simple_report = MO_INVALID_IDX;
+ if (isSimple(h, &min_bound, &max_bound, &lit, &simple_report)) {
+ assert(simple_report != MO_INVALID_IDX);
if (check_dupe_simple(build, min_bound, max_bound, lit, remap)) {
- DEBUG_PRINTF("found duplicate remapping to %u\n", *remap);
- return ANCHORED_REMAP;
- }
- DEBUG_PRINTF("add with report %u\n", simple_report);
+ DEBUG_PRINTF("found duplicate remapping to %u\n", *remap);
+ return ANCHORED_REMAP;
+ }
+ DEBUG_PRINTF("add with report %u\n", simple_report);
build.anchored_simple[simple_anchored_info(min_bound, max_bound, lit)]
- .insert(simple_report);
- return ANCHORED_SUCCESS;
- }
-
- out_dfa->start_anchored = INIT_STATE;
- out_dfa->start_floating = DEAD_STATE;
- out_dfa->alpha_size = autom.alphasize;
- out_dfa->alpha_remap = autom.alpha;
- auto hash = hash_dfa_no_reports(*out_dfa);
+ .insert(simple_report);
+ return ANCHORED_SUCCESS;
+ }
+
+ out_dfa->start_anchored = INIT_STATE;
+ out_dfa->start_floating = DEAD_STATE;
+ out_dfa->alpha_size = autom.alphasize;
+ out_dfa->alpha_remap = autom.alpha;
+ auto hash = hash_dfa_no_reports(*out_dfa);
if (check_dupe(*out_dfa, build.anchored_nfas[hash], remap)) {
- return ANCHORED_REMAP;
- }
+ return ANCHORED_REMAP;
+ }
build.anchored_nfas[hash].push_back(move(out_dfa));
- return ANCHORED_SUCCESS;
-}
-
-static
+ return ANCHORED_SUCCESS;
+}
+
+static
int addAutomaton(RoseBuildImpl &build, const NGHolder &h, ReportID *remap) {
- if (num_vertices(h) > ANCHORED_NFA_STATE_LIMIT) {
- DEBUG_PRINTF("autom bad!\n");
- return ANCHORED_FAIL;
- }
-
- Automaton_Holder autom(h);
-
+ if (num_vertices(h) > ANCHORED_NFA_STATE_LIMIT) {
+ DEBUG_PRINTF("autom bad!\n");
+ return ANCHORED_FAIL;
+ }
+
+ Automaton_Holder autom(h);
+
auto out_dfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
if (determinise(autom, out_dfa->states, MAX_DFA_STATES)) {
return finalise_out(build, h, autom, move(out_dfa), remap);
- }
-
- DEBUG_PRINTF("determinise failed\n");
- return ANCHORED_FAIL;
-}
-
-static
-void setReports(NGHolder &h, const map<NFAVertex, set<u32>> &reportMap,
+ }
+
+ DEBUG_PRINTF("determinise failed\n");
+ return ANCHORED_FAIL;
+}
+
+static
+void setReports(NGHolder &h, const map<NFAVertex, set<u32>> &reportMap,
const unordered_map<NFAVertex, NFAVertex> &orig_to_copy) {
- for (const auto &m : reportMap) {
- NFAVertex t = orig_to_copy.at(m.first);
- assert(!m.second.empty());
- add_edge(t, h.accept, h);
- insert(&h[t].reports, m.second);
- }
-}
-
+ for (const auto &m : reportMap) {
+ NFAVertex t = orig_to_copy.at(m.first);
+ assert(!m.second.empty());
+ add_edge(t, h.accept, h);
+ insert(&h[t].reports, m.second);
+ }
+}
+
int addAnchoredNFA(RoseBuildImpl &build, const NGHolder &wrapper,
- const map<NFAVertex, set<u32>> &reportMap) {
- NGHolder h;
+ const map<NFAVertex, set<u32>> &reportMap) {
+ NGHolder h;
unordered_map<NFAVertex, NFAVertex> orig_to_copy;
- cloneHolder(h, wrapper, &orig_to_copy);
- clear_in_edges(h.accept, h);
- clear_in_edges(h.acceptEod, h);
- add_edge(h.accept, h.acceptEod, h);
- clearReports(h);
- setReports(h, reportMap, orig_to_copy);
-
+ cloneHolder(h, wrapper, &orig_to_copy);
+ clear_in_edges(h.accept, h);
+ clear_in_edges(h.acceptEod, h);
+ add_edge(h.accept, h.acceptEod, h);
+ clearReports(h);
+ setReports(h, reportMap, orig_to_copy);
+
return addAutomaton(build, h, nullptr);
-}
-
+}
+
int addToAnchoredMatcher(RoseBuildImpl &build, const NGHolder &anchored,
- u32 exit_id, ReportID *remap) {
- NGHolder h;
- cloneHolder(h, anchored);
- clearReports(h);
- assert(in_degree(h.acceptEod, h) == 1);
- for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
- h[v].reports.clear();
- h[v].reports.insert(exit_id);
- }
-
+ u32 exit_id, ReportID *remap) {
+ NGHolder h;
+ cloneHolder(h, anchored);
+ clearReports(h);
+ assert(in_degree(h.acceptEod, h) == 1);
+ for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
+ h[v].reports.clear();
+ h[v].reports.insert(exit_id);
+ }
+
return addAutomaton(build, h, remap);
-}
-
-static
+}
+
+static
void buildSimpleDfas(const RoseBuildImpl &build, const vector<u32> &frag_map,
- vector<unique_ptr<raw_dfa>> *anchored_dfas) {
- /* we should have determinised all of these before so there should be no
- * chance of failure. */
+ vector<unique_ptr<raw_dfa>> *anchored_dfas) {
+ /* we should have determinised all of these before so there should be no
+ * chance of failure. */
flat_set<u32> exit_ids;
for (const auto &simple : build.anchored_simple) {
exit_ids.clear();
- for (auto lit_id : simple.second) {
+ for (auto lit_id : simple.second) {
assert(lit_id < frag_map.size());
exit_ids.insert(frag_map[lit_id]);
- }
+ }
auto h = populate_holder(simple.first, exit_ids);
Automaton_Holder autom(*h);
auto rdfa = ue2::make_unique<raw_dfa>(NFA_OUTFIX_RAW);
UNUSED bool rv = determinise(autom, rdfa->states, MAX_DFA_STATES);
assert(rv);
- rdfa->start_anchored = INIT_STATE;
- rdfa->start_floating = DEAD_STATE;
- rdfa->alpha_size = autom.alphasize;
- rdfa->alpha_remap = autom.alpha;
- anchored_dfas->push_back(move(rdfa));
- }
-}
-
-/**
- * Fill the given vector with all of the raw_dfas we need to compile into the
- * anchored matcher. Takes ownership of the input structures, clearing them
- * from RoseBuildImpl.
- */
-static
+ rdfa->start_anchored = INIT_STATE;
+ rdfa->start_floating = DEAD_STATE;
+ rdfa->alpha_size = autom.alphasize;
+ rdfa->alpha_remap = autom.alpha;
+ anchored_dfas->push_back(move(rdfa));
+ }
+}
+
+/**
+ * Fill the given vector with all of the raw_dfas we need to compile into the
+ * anchored matcher. Takes ownership of the input structures, clearing them
+ * from RoseBuildImpl.
+ */
+static
vector<unique_ptr<raw_dfa>> getAnchoredDfas(RoseBuildImpl &build,
const vector<u32> &frag_map) {
vector<unique_ptr<raw_dfa>> dfas;
- // DFAs that already exist as raw_dfas.
+ // DFAs that already exist as raw_dfas.
for (auto &anch_dfas : build.anchored_nfas) {
- for (auto &rdfa : anch_dfas.second) {
+ for (auto &rdfa : anch_dfas.second) {
dfas.push_back(move(rdfa));
- }
- }
+ }
+ }
build.anchored_nfas.clear();
-
- // DFAs we currently have as simple literals.
+
+ // DFAs we currently have as simple literals.
if (!build.anchored_simple.empty()) {
buildSimpleDfas(build, frag_map, &dfas);
build.anchored_simple.clear();
- }
+ }
return dfas;
-}
-
-/**
- * \brief Builds our anchored DFAs into runtime NFAs.
- *
- * Constructs a vector of NFA structures and a vector of their start offsets
- * (number of dots removed from the prefix) from the raw_dfa structures given.
- *
- * Note: frees the raw_dfa structures on completion.
- *
- * \return Total bytes required for the complete anchored matcher.
- */
-static
+}
+
+/**
+ * \brief Builds our anchored DFAs into runtime NFAs.
+ *
+ * Constructs a vector of NFA structures and a vector of their start offsets
+ * (number of dots removed from the prefix) from the raw_dfa structures given.
+ *
+ * Note: frees the raw_dfa structures on completion.
+ *
+ * \return Total bytes required for the complete anchored matcher.
+ */
+static
size_t buildNfas(vector<raw_dfa> &anchored_dfas,
vector<bytecode_ptr<NFA>> *nfas,
vector<u32> *start_offset, const CompileContext &cc,
const ReportManager &rm) {
- const size_t num_dfas = anchored_dfas.size();
-
- nfas->reserve(num_dfas);
- start_offset->reserve(num_dfas);
-
- size_t total_size = 0;
-
- for (auto &rdfa : anchored_dfas) {
+ const size_t num_dfas = anchored_dfas.size();
+
+ nfas->reserve(num_dfas);
+ start_offset->reserve(num_dfas);
+
+ size_t total_size = 0;
+
+ for (auto &rdfa : anchored_dfas) {
u32 removed_dots = remove_leading_dots(rdfa);
- start_offset->push_back(removed_dots);
-
+ start_offset->push_back(removed_dots);
+
minimize_hopcroft(rdfa, cc.grey);
-
+
auto nfa = mcclellanCompile(rdfa, cc, rm, false);
- if (!nfa) {
- assert(0);
- throw std::bad_alloc();
- }
-
- assert(nfa->length);
- total_size += ROUNDUP_CL(sizeof(anchored_matcher_info) + nfa->length);
- nfas->push_back(move(nfa));
- }
-
- // We no longer need to keep the raw_dfa structures around.
- anchored_dfas.clear();
-
- return total_size;
-}
-
+ if (!nfa) {
+ assert(0);
+ throw std::bad_alloc();
+ }
+
+ assert(nfa->length);
+ total_size += ROUNDUP_CL(sizeof(anchored_matcher_info) + nfa->length);
+ nfas->push_back(move(nfa));
+ }
+
+ // We no longer need to keep the raw_dfa structures around.
+ anchored_dfas.clear();
+
+ return total_size;
+}
+
vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build,
const vector<LitFragment> &fragments) {
vector<raw_dfa> dfas;
-
+
if (build.anchored_nfas.empty() && build.anchored_simple.empty()) {
- DEBUG_PRINTF("empty\n");
+ DEBUG_PRINTF("empty\n");
return dfas;
}
@@ -874,50 +874,50 @@ buildAnchoredMatcher(RoseBuildImpl &build, const vector<LitFragment> &fragments,
if (dfas.empty()) {
DEBUG_PRINTF("empty\n");
- return nullptr;
- }
-
+ return nullptr;
+ }
+
for (auto &rdfa : dfas) {
remapIdsToPrograms(fragments, rdfa);
}
-
+
vector<bytecode_ptr<NFA>> nfas;
- vector<u32> start_offset; // start offset for each dfa (dots removed)
+ vector<u32> start_offset; // start offset for each dfa (dots removed)
size_t total_size = buildNfas(dfas, &nfas, &start_offset, cc, build.rm);
-
- if (total_size > cc.grey.limitRoseAnchoredSize) {
- throw ResourceLimitError();
- }
-
+
+ if (total_size > cc.grey.limitRoseAnchoredSize) {
+ throw ResourceLimitError();
+ }
+
auto atable =
make_zeroed_bytecode_ptr<anchored_matcher_info>(total_size, 64);
- char *curr = (char *)atable.get();
-
- u32 state_offset = 0;
- for (size_t i = 0; i < nfas.size(); i++) {
- const NFA *nfa = nfas[i].get();
- anchored_matcher_info *ami = (anchored_matcher_info *)curr;
- char *prev_curr = curr;
-
- curr += sizeof(anchored_matcher_info);
-
- memcpy(curr, nfa, nfa->length);
- curr += nfa->length;
- curr = ROUNDUP_PTR(curr, 64);
-
- if (i + 1 == nfas.size()) {
- ami->next_offset = 0U;
- } else {
- ami->next_offset = verify_u32(curr - prev_curr);
- }
-
- ami->state_offset = state_offset;
+ char *curr = (char *)atable.get();
+
+ u32 state_offset = 0;
+ for (size_t i = 0; i < nfas.size(); i++) {
+ const NFA *nfa = nfas[i].get();
+ anchored_matcher_info *ami = (anchored_matcher_info *)curr;
+ char *prev_curr = curr;
+
+ curr += sizeof(anchored_matcher_info);
+
+ memcpy(curr, nfa, nfa->length);
+ curr += nfa->length;
+ curr = ROUNDUP_PTR(curr, 64);
+
+ if (i + 1 == nfas.size()) {
+ ami->next_offset = 0U;
+ } else {
+ ami->next_offset = verify_u32(curr - prev_curr);
+ }
+
+ ami->state_offset = state_offset;
state_offset += nfa->streamStateSize;
- ami->anchoredMinDistance = start_offset[i];
- }
-
+ ami->anchoredMinDistance = start_offset[i];
+ }
+
DEBUG_PRINTF("success %zu\n", atable.size());
- return atable;
-}
-
-} // namespace ue2
+ return atable;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_anchored.h b/contrib/libs/hyperscan/src/rose/rose_build_anchored.h
index 0301eea217..37d268ac5a 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_anchored.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_anchored.h
@@ -1,57 +1,57 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_ANCHORED
-#define ROSE_BUILD_ANCHORED
-
-#include "ue2common.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_ANCHORED
+#define ROSE_BUILD_ANCHORED
+
+#include "ue2common.h"
#include "rose_build_impl.h"
-#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_holder.h"
#include "util/bytecode_ptr.h"
-
-#include <map>
-#include <vector>
-#include <set>
-
+
+#include <map>
+#include <vector>
+#include <set>
+
struct anchored_matcher_info;
-
-namespace ue2 {
-
-class RoseBuildImpl;
+
+namespace ue2 {
+
+class RoseBuildImpl;
struct raw_dfa;
struct LitFragment;
-
+
/**
* \brief Construct a set of anchored DFAs from our anchored literals/engines.
*/
std::vector<raw_dfa> buildAnchoredDfas(RoseBuildImpl &build,
const std::vector<LitFragment> &fragments);
-
+
/**
* \brief Construct an anchored_matcher_info runtime structure from the given
* set of DFAs.
@@ -66,16 +66,16 @@ buildAnchoredMatcher(RoseBuildImpl &build,
u32 anchoredStateSize(const anchored_matcher_info &atable);
-#define ANCHORED_FAIL 0
-#define ANCHORED_SUCCESS 1
-#define ANCHORED_REMAP 2
-
+#define ANCHORED_FAIL 0
+#define ANCHORED_SUCCESS 1
+#define ANCHORED_REMAP 2
+
int addAnchoredNFA(RoseBuildImpl &build, const NGHolder &wrapper,
- const std::map<NFAVertex, std::set<u32>> &reportMap);
-
+ const std::map<NFAVertex, std::set<u32>> &reportMap);
+
int addToAnchoredMatcher(RoseBuildImpl &build, const NGHolder &anchored,
- u32 exit_id, ReportID *remap);
-
-} // namespace ue2
-
-#endif
+ u32 exit_id, ReportID *remap);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_bytecode.cpp b/contrib/libs/hyperscan/src/rose/rose_build_bytecode.cpp
index b40257e4d5..df464c2800 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_bytecode.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_bytecode.cpp
@@ -1,186 +1,186 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_impl.h"
-
-#include "ue2common.h"
-#include "grey.h"
-#include "hs_compile.h" // for HS_MODE_*
-#include "rose_build_add_internal.h"
-#include "rose_build_anchored.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_impl.h"
+
+#include "ue2common.h"
+#include "grey.h"
+#include "hs_compile.h" // for HS_MODE_*
+#include "rose_build_add_internal.h"
+#include "rose_build_anchored.h"
#include "rose_build_dump.h"
#include "rose_build_engine_blob.h"
#include "rose_build_exclusive.h"
#include "rose_build_groups.h"
-#include "rose_build_infix.h"
+#include "rose_build_infix.h"
#include "rose_build_long_lit.h"
-#include "rose_build_lookaround.h"
+#include "rose_build_lookaround.h"
#include "rose_build_matchers.h"
#include "rose_build_misc.h"
#include "rose_build_program.h"
#include "rose_build_resources.h"
-#include "rose_build_scatter.h"
-#include "rose_build_util.h"
-#include "rose_build_width.h"
+#include "rose_build_scatter.h"
+#include "rose_build_util.h"
+#include "rose_build_width.h"
#include "rose_internal.h"
#include "rose_program.h"
-#include "hwlm/hwlm.h" /* engine types */
-#include "hwlm/hwlm_build.h"
+#include "hwlm/hwlm.h" /* engine types */
+#include "hwlm/hwlm_build.h"
#include "hwlm/hwlm_literal.h"
-#include "nfa/castlecompile.h"
-#include "nfa/goughcompile.h"
-#include "nfa/mcclellancompile.h"
+#include "nfa/castlecompile.h"
+#include "nfa/goughcompile.h"
+#include "nfa/mcclellancompile.h"
#include "nfa/mcclellancompile_util.h"
#include "nfa/mcsheng_compile.h"
-#include "nfa/nfa_api_queue.h"
-#include "nfa/nfa_build_util.h"
-#include "nfa/nfa_internal.h"
+#include "nfa/nfa_api_queue.h"
+#include "nfa/nfa_build_util.h"
+#include "nfa/nfa_internal.h"
#include "nfa/shengcompile.h"
-#include "nfa/shufticompile.h"
+#include "nfa/shufticompile.h"
#include "nfa/tamaramacompile.h"
#include "nfa/tamarama_internal.h"
#include "nfagraph/ng_execute.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_lbr.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_mcclellan.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_reports.h"
-#include "nfagraph/ng_revacc.h"
-#include "nfagraph/ng_stop.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_lbr.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_mcclellan.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_revacc.h"
+#include "nfagraph/ng_stop.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
#include "smallwrite/smallwrite_build.h"
-#include "som/slot_manager.h"
-#include "util/bitutils.h"
-#include "util/boundary_reports.h"
-#include "util/charreach.h"
-#include "util/charreach_util.h"
-#include "util/compile_context.h"
-#include "util/compile_error.h"
-#include "util/container.h"
+#include "som/slot_manager.h"
+#include "util/bitutils.h"
+#include "util/boundary_reports.h"
+#include "util/charreach.h"
+#include "util/charreach_util.h"
+#include "util/compile_context.h"
+#include "util/compile_error.h"
+#include "util/container.h"
#include "util/fatbit_build.h"
-#include "util/graph_range.h"
+#include "util/graph_range.h"
#include "util/insertion_ordered.h"
#include "util/make_unique.h"
-#include "util/multibit_build.h"
+#include "util/multibit_build.h"
#include "util/noncopyable.h"
-#include "util/order_check.h"
+#include "util/order_check.h"
#include "util/popcount.h"
-#include "util/queue_index_factory.h"
-#include "util/report_manager.h"
-#include "util/ue2string.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
+#include "util/queue_index_factory.h"
+#include "util/report_manager.h"
+#include "util/ue2string.h"
+#include "util/verify_types.h"
+
+#include <algorithm>
#include <array>
-#include <map>
-#include <queue>
-#include <set>
-#include <sstream>
-#include <string>
-#include <vector>
-#include <utility>
-
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_values;
-using boost::adaptors::map_keys;
-
-namespace ue2 {
-
-/* The rose bytecode construction is a giant cesspit.
- *
- * One issue is that bits and pieces are constructed piecemeal and these
- * sections are used by later in the construction process. Until the very end of
- * the construction there is no useful invariant holding for the bytecode. This
- * makes reordering / understanding the construction process awkward as there
- * are hidden dependencies everywhere. We should start by shifting towards
- * a model where the bytecode is only written to during the construction so that
- * the dependencies can be understood by us mere mortals.
- *
- * I am sure the construction process is also bad from a number of other
- * standpoints as well but the can come later.
- *
- * Actually, one other annoying issues the plague of member functions on the
- * impl which tightly couples the internals of this file to all the other rose
- * build files. Need more egregiously awesome free functions.
- */
-
-namespace /* anon */ {
-
+#include <map>
+#include <queue>
+#include <set>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <utility>
+
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
+using boost::adaptors::map_keys;
+
+namespace ue2 {
+
+/* The rose bytecode construction is a giant cesspit.
+ *
+ * One issue is that bits and pieces are constructed piecemeal and these
+ * sections are used by later in the construction process. Until the very end of
+ * the construction there is no useful invariant holding for the bytecode. This
+ * makes reordering / understanding the construction process awkward as there
+ * are hidden dependencies everywhere. We should start by shifting towards
+ * a model where the bytecode is only written to during the construction so that
+ * the dependencies can be understood by us mere mortals.
+ *
+ * I am sure the construction process is also bad from a number of other
+ * standpoints as well but the can come later.
+ *
+ * Actually, one other annoying issues the plague of member functions on the
+ * impl which tightly couples the internals of this file to all the other rose
+ * build files. Need more egregiously awesome free functions.
+ */
+
+namespace /* anon */ {
+
struct build_context : noncopyable {
/** \brief information about engines to the left of a vertex */
map<RoseVertex, left_build_info> leftfix_info;
-
+
/** \brief mapping from suffix to queue index. */
map<suffix_id, u32> suffixes;
-
+
/** \brief engine info by queue. */
map<u32, engine_info> engine_info_by_queue;
-
+
/** \brief Simple cache of programs written to engine blob, used for
* deduplication. */
unordered_map<RoseProgram, u32, RoseProgramHash,
RoseProgramEquivalence> program_cache;
-
+
/** \brief State indices, for those roles that have them.
* Each vertex present has a unique state index in the range
* [0, roleStateIndices.size()). */
unordered_map<RoseVertex, u32> roleStateIndices;
-
+
/** \brief Mapping from queue index to bytecode offset for built engines
* that have already been pushed into the engine_blob. */
unordered_map<u32, u32> engineOffsets;
-
+
/** \brief List of long literals (ones with CHECK_LONG_LIT instructions)
* that need hash table support. */
vector<ue2_case_string> longLiterals;
-
+
/** \brief Contents of the Rose bytecode immediately following the
* RoseEngine. */
RoseEngineBlob engine_blob;
-
+
/** \brief True if this Rose engine has an MPV engine. */
bool needs_mpv_catchup = false;
-
+
/** \brief Resources in use (tracked as programs are added). */
RoseResources resources;
};
-
+
/** \brief subengine info including built engine and
* corresponding triggering rose vertices */
struct ExclusiveSubengine {
bytecode_ptr<NFA> nfa;
vector<RoseVertex> vertices;
};
-
+
/** \brief exclusive info to build tamarama */
struct ExclusiveInfo : noncopyable {
// subengine info
@@ -189,11 +189,11 @@ struct ExclusiveInfo : noncopyable {
set<ReportID> reports;
// assigned queue id
u32 queue;
-};
-
-}
-
-static
+};
+
+}
+
+static
void add_nfa_to_blob(build_context &bc, NFA &nfa) {
u32 qi = nfa.queueIndex;
u32 nfa_offset = bc.engine_blob.add(nfa, nfa.length);
@@ -202,19 +202,19 @@ void add_nfa_to_blob(build_context &bc, NFA &nfa) {
assert(!contains(bc.engineOffsets, qi));
bc.engineOffsets.emplace(qi, nfa_offset);
-}
-
-static
-u32 countRosePrefixes(const vector<LeftNfaInfo> &roses) {
- u32 num = 0;
- for (const auto &r : roses) {
- if (!r.infix) {
- num++;
- }
- }
- return num;
-}
-
+}
+
+static
+u32 countRosePrefixes(const vector<LeftNfaInfo> &roses) {
+ u32 num = 0;
+ for (const auto &r : roses) {
+ if (!r.infix) {
+ num++;
+ }
+ }
+ return num;
+}
+
/**
* \brief True if this Rose engine needs to run a catch up whenever a literal
* report is generated.
@@ -222,7 +222,7 @@ u32 countRosePrefixes(const vector<LeftNfaInfo> &roses) {
* Catch up is necessary if there are output-exposed engines (suffixes,
* outfixes).
*/
-static
+static
bool needsCatchup(const RoseBuildImpl &build) {
/* Note: we could be more selective about when we need to generate catch up
* instructions rather than just a boolean yes/no - for instance, if we know
@@ -237,7 +237,7 @@ bool needsCatchup(const RoseBuildImpl &build) {
if (!build.outfixes.empty()) {
/* TODO: check that they have non-eod reports */
- DEBUG_PRINTF("has outfixes\n");
+ DEBUG_PRINTF("has outfixes\n");
return true;
}
@@ -259,75 +259,75 @@ static
bool isPureFloating(const RoseResources &resources, const CompileContext &cc) {
if (!resources.has_floating) {
DEBUG_PRINTF("no floating table\n");
- return false;
- }
-
+ return false;
+ }
+
if (resources.has_outfixes || resources.has_suffixes ||
resources.has_leftfixes) {
DEBUG_PRINTF("has engines\n");
return false;
}
-
+
if (resources.has_anchored) {
DEBUG_PRINTF("has anchored matcher\n");
- return false;
- }
-
+ return false;
+ }
+
if (resources.has_eod) {
DEBUG_PRINTF("has eod work to do\n");
return false;
}
-
+
if (resources.has_states) {
DEBUG_PRINTF("has states\n");
return false;
}
-
+
if (resources.has_lit_delay) {
DEBUG_PRINTF("has delayed literals\n");
return false;
}
-
+
if (cc.streaming && resources.has_lit_check) {
DEBUG_PRINTF("has long literals in streaming mode, which needs long "
"literal table support\n");
return false;
- }
-
+ }
+
if (resources.checks_groups) {
DEBUG_PRINTF("has group checks\n");
return false;
}
- DEBUG_PRINTF("pure floating literals\n");
- return true;
-}
-
-static
+ DEBUG_PRINTF("pure floating literals\n");
+ return true;
+}
+
+static
bool isSingleOutfix(const RoseBuildImpl &tbi) {
- for (auto v : vertices_range(tbi.g)) {
- if (tbi.isAnyStart(v)) {
- continue;
- }
- if (tbi.hasLiteralInTable(v, ROSE_ANCHORED_SMALL_BLOCK)) {
- continue;
- }
- DEBUG_PRINTF("has role\n");
- return false;
- }
-
- if (tbi.ssm.numSomSlots()) {
- return false;
- }
-
- if (!tbi.boundary.report_at_eod.empty()) {
- return false; /* streaming runtime makes liberal use of broken flag */
- }
-
+ for (auto v : vertices_range(tbi.g)) {
+ if (tbi.isAnyStart(v)) {
+ continue;
+ }
+ if (tbi.hasLiteralInTable(v, ROSE_ANCHORED_SMALL_BLOCK)) {
+ continue;
+ }
+ DEBUG_PRINTF("has role\n");
+ return false;
+ }
+
+ if (tbi.ssm.numSomSlots()) {
+ return false;
+ }
+
+ if (!tbi.boundary.report_at_eod.empty()) {
+ return false; /* streaming runtime makes liberal use of broken flag */
+ }
+
return tbi.outfixes.size() == 1;
-}
-
-static
+}
+
+static
u8 pickRuntimeImpl(const RoseBuildImpl &build, const RoseResources &resources,
UNUSED u32 outfixEndQueue) {
DEBUG_PRINTF("has_outfixes=%d\n", resources.has_outfixes);
@@ -343,33 +343,33 @@ u8 pickRuntimeImpl(const RoseBuildImpl &build, const RoseResources &resources,
DEBUG_PRINTF("has_eod=%d\n", resources.has_eod);
if (isPureFloating(resources, build.cc)) {
- return ROSE_RUNTIME_PURE_LITERAL;
- }
-
+ return ROSE_RUNTIME_PURE_LITERAL;
+ }
+
if (isSingleOutfix(build)) {
- return ROSE_RUNTIME_SINGLE_OUTFIX;
- }
-
- return ROSE_RUNTIME_FULL_ROSE;
-}
-
+ return ROSE_RUNTIME_SINGLE_OUTFIX;
+ }
+
+ return ROSE_RUNTIME_FULL_ROSE;
+}
+
/**
* \brief True if this Rose engine needs to run MPV catch up in front of
* non-MPV reports.
*/
-static
+static
bool needsMpvCatchup(const RoseBuildImpl &build) {
const auto &outfixes = build.outfixes;
bool has_mpv =
any_of(begin(outfixes), end(outfixes), [](const OutfixInfo &outfix) {
return outfix.is_nonempty_mpv();
});
-
+
if (!has_mpv) {
DEBUG_PRINTF("no mpv\n");
return false;
- }
-
+ }
+
if (isSingleOutfix(build)) {
DEBUG_PRINTF("single outfix\n");
return false;
@@ -393,39 +393,39 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount,
// Role state storage.
curr_offset += mmbit_size(rolesWithStateCount);
- so->activeLeafArray = curr_offset; /* TODO: limit size of array */
- curr_offset += mmbit_size(activeArrayCount);
+ so->activeLeafArray = curr_offset; /* TODO: limit size of array */
+ curr_offset += mmbit_size(activeArrayCount);
so->activeLeafArray_size = mmbit_size(activeArrayCount);
-
- so->activeLeftArray = curr_offset; /* TODO: limit size of array */
+
+ so->activeLeftArray = curr_offset; /* TODO: limit size of array */
curr_offset += mmbit_size(activeLeftCount);
- so->activeLeftArray_size = mmbit_size(activeLeftCount);
-
+ so->activeLeftArray_size = mmbit_size(activeLeftCount);
+
so->longLitState = curr_offset;
curr_offset += longLitStreamStateRequired;
so->longLitState_size = longLitStreamStateRequired;
-
- // ONE WHOLE BYTE for each active leftfix with lag.
- so->leftfixLagTable = curr_offset;
- curr_offset += laggedRoseCount;
-
- so->anchorState = curr_offset;
- curr_offset += anchorStateSize;
-
- so->groups = curr_offset;
+
+ // ONE WHOLE BYTE for each active leftfix with lag.
+ so->leftfixLagTable = curr_offset;
+ curr_offset += laggedRoseCount;
+
+ so->anchorState = curr_offset;
+ curr_offset += anchorStateSize;
+
+ so->groups = curr_offset;
so->groups_size = (build.group_end + 7) / 8;
- assert(so->groups_size <= sizeof(u64a));
- curr_offset += so->groups_size;
-
- // The history consists of the bytes in the history only. YAY
- so->history = curr_offset;
- curr_offset += historyRequired;
-
+ assert(so->groups_size <= sizeof(u64a));
+ curr_offset += so->groups_size;
+
+ // The history consists of the bytes in the history only. YAY
+ so->history = curr_offset;
+ curr_offset += historyRequired;
+
// Exhaustion multibit.
- so->exhausted = curr_offset;
+ so->exhausted = curr_offset;
curr_offset += mmbit_size(build.rm.numEkeys());
so->exhausted_size = mmbit_size(build.rm.numEkeys());
-
+
// Logical multibit.
so->logicalVec = curr_offset;
so->logicalVec_size = mmbit_size(build.rm.numLogicalKeys() +
@@ -437,191 +437,191 @@ void fillStateOffsets(const RoseBuildImpl &build, u32 rolesWithStateCount,
so->combVec_size = mmbit_size(build.rm.numCkeys());
curr_offset += so->combVec_size;
- // SOM locations and valid/writeable multibit structures.
+ // SOM locations and valid/writeable multibit structures.
if (build.ssm.numSomSlots()) {
const u32 somWidth = build.ssm.somPrecision();
- if (somWidth) { // somWidth is zero in block mode.
- curr_offset = ROUNDUP_N(curr_offset, somWidth);
- so->somLocation = curr_offset;
+ if (somWidth) { // somWidth is zero in block mode.
+ curr_offset = ROUNDUP_N(curr_offset, somWidth);
+ so->somLocation = curr_offset;
curr_offset += build.ssm.numSomSlots() * somWidth;
- } else {
- so->somLocation = 0;
- }
- so->somValid = curr_offset;
+ } else {
+ so->somLocation = 0;
+ }
+ so->somValid = curr_offset;
curr_offset += mmbit_size(build.ssm.numSomSlots());
- so->somWritable = curr_offset;
+ so->somWritable = curr_offset;
curr_offset += mmbit_size(build.ssm.numSomSlots());
so->somMultibit_size = mmbit_size(build.ssm.numSomSlots());
- } else {
- // No SOM handling, avoid growing the stream state any further.
- so->somLocation = 0;
- so->somValid = 0;
- so->somWritable = 0;
- }
-
- // note: state space for mask nfas is allocated later
+ } else {
+ // No SOM handling, avoid growing the stream state any further.
+ so->somLocation = 0;
+ so->somValid = 0;
+ so->somWritable = 0;
+ }
+
+ // note: state space for mask nfas is allocated later
so->nfaStateBegin = curr_offset;
- so->end = curr_offset;
-}
-
-// Get the mask of initial vertices due to root and anchored_root.
-rose_group RoseBuildImpl::getInitialGroups() const {
+ so->end = curr_offset;
+}
+
+// Get the mask of initial vertices due to root and anchored_root.
+rose_group RoseBuildImpl::getInitialGroups() const {
rose_group groups = getSuccGroups(root)
| getSuccGroups(anchored_root)
| boundary_group_mask;
- DEBUG_PRINTF("initial groups = %016llx\n", groups);
- return groups;
-}
-
-static
-bool nfaStuckOn(const NGHolder &g) {
- assert(!proper_out_degree(g.startDs, g));
- set<NFAVertex> succ;
- insert(&succ, adjacent_vertices(g.start, g));
- succ.erase(g.startDs);
-
- set<NFAVertex> asucc;
- set<u32> tops;
- set<u32> done_tops;
-
- for (const auto &e : out_edges_range(g.start, g)) {
+ DEBUG_PRINTF("initial groups = %016llx\n", groups);
+ return groups;
+}
+
+static
+bool nfaStuckOn(const NGHolder &g) {
+ assert(!proper_out_degree(g.startDs, g));
+ set<NFAVertex> succ;
+ insert(&succ, adjacent_vertices(g.start, g));
+ succ.erase(g.startDs);
+
+ set<NFAVertex> asucc;
+ set<u32> tops;
+ set<u32> done_tops;
+
+ for (const auto &e : out_edges_range(g.start, g)) {
insert(&tops, g[e].tops);
- if (!g[target(e, g)].char_reach.all()) {
- continue;
- }
-
- asucc.clear();
- insert(&asucc, adjacent_vertices(target(e, g), g));
-
- if (asucc == succ) {
+ if (!g[target(e, g)].char_reach.all()) {
+ continue;
+ }
+
+ asucc.clear();
+ insert(&asucc, adjacent_vertices(target(e, g), g));
+
+ if (asucc == succ) {
insert(&done_tops, g[e].tops);
- }
- }
-
- return tops == done_tops;
-}
-
-namespace {
-struct PredTopPair {
- PredTopPair(RoseVertex v, u32 t) : pred(v), top(t) {}
- bool operator<(const PredTopPair &b) const {
- const PredTopPair &a = *this;
- ORDER_CHECK(pred);
- ORDER_CHECK(top);
- return false;
- }
- RoseVertex pred;
- u32 top;
-};
-}
-
-static
-void findFixedDepthTops(const RoseGraph &g, const set<PredTopPair> &triggers,
- map<u32, u32> *fixed_depth_tops) {
- DEBUG_PRINTF("|trig| %zu\n", triggers.size());
- /* find all pred roles for this holder, group by top */
- /* if all pred roles for a given top have the same min and max offset, we
- * add the top to the fixed_depth_top map */
- map<u32, set<RoseVertex> > pred_by_top;
- for (const auto &ptp : triggers) {
- u32 top = ptp.top;
- RoseVertex u = ptp.pred;
- pred_by_top[top].insert(u);
- }
-
- for (const auto &e : pred_by_top) {
- u32 top = e.first;
- const set<RoseVertex> &preds = e.second;
- if (!g[*preds.begin()].fixedOffset()) {
- continue;
- }
- u32 depth = g[*preds.begin()].min_offset;
- for (RoseVertex u : preds) {
- if (g[u].min_offset != depth || g[u].max_offset != depth) {
- goto next_top;
- }
- }
- DEBUG_PRINTF("%u at depth %u\n", top, depth);
- (*fixed_depth_tops)[top] = depth;
- next_top:;
- }
-}
-
-/**
- * \brief Heuristic for picking between a DFA or NFA implementation of an
- * engine.
- */
-static
+ }
+ }
+
+ return tops == done_tops;
+}
+
+namespace {
+struct PredTopPair {
+ PredTopPair(RoseVertex v, u32 t) : pred(v), top(t) {}
+ bool operator<(const PredTopPair &b) const {
+ const PredTopPair &a = *this;
+ ORDER_CHECK(pred);
+ ORDER_CHECK(top);
+ return false;
+ }
+ RoseVertex pred;
+ u32 top;
+};
+}
+
+static
+void findFixedDepthTops(const RoseGraph &g, const set<PredTopPair> &triggers,
+ map<u32, u32> *fixed_depth_tops) {
+ DEBUG_PRINTF("|trig| %zu\n", triggers.size());
+ /* find all pred roles for this holder, group by top */
+ /* if all pred roles for a given top have the same min and max offset, we
+ * add the top to the fixed_depth_top map */
+ map<u32, set<RoseVertex> > pred_by_top;
+ for (const auto &ptp : triggers) {
+ u32 top = ptp.top;
+ RoseVertex u = ptp.pred;
+ pred_by_top[top].insert(u);
+ }
+
+ for (const auto &e : pred_by_top) {
+ u32 top = e.first;
+ const set<RoseVertex> &preds = e.second;
+ if (!g[*preds.begin()].fixedOffset()) {
+ continue;
+ }
+ u32 depth = g[*preds.begin()].min_offset;
+ for (RoseVertex u : preds) {
+ if (g[u].min_offset != depth || g[u].max_offset != depth) {
+ goto next_top;
+ }
+ }
+ DEBUG_PRINTF("%u at depth %u\n", top, depth);
+ (*fixed_depth_tops)[top] = depth;
+ next_top:;
+ }
+}
+
+/**
+ * \brief Heuristic for picking between a DFA or NFA implementation of an
+ * engine.
+ */
+static
bytecode_ptr<NFA> pickImpl(bytecode_ptr<NFA> dfa_impl,
bytecode_ptr<NFA> nfa_impl,
bool fast_nfa) {
- assert(nfa_impl);
- assert(dfa_impl);
+ assert(nfa_impl);
+ assert(dfa_impl);
assert(isDfaType(dfa_impl->type));
-
- // If our NFA is an LBR, it always wins.
- if (isLbrType(nfa_impl->type)) {
- return nfa_impl;
- }
-
+
+ // If our NFA is an LBR, it always wins.
+ if (isLbrType(nfa_impl->type)) {
+ return nfa_impl;
+ }
+
// if our DFA is an accelerated Sheng, it always wins.
if (isShengType(dfa_impl->type) && has_accel(*dfa_impl)) {
return dfa_impl;
}
- bool d_accel = has_accel(*dfa_impl);
- bool n_accel = has_accel(*nfa_impl);
+ bool d_accel = has_accel(*dfa_impl);
+ bool n_accel = has_accel(*nfa_impl);
bool d_big = isBigDfaType(dfa_impl->type);
- bool n_vsmall = nfa_impl->nPositions <= 32;
- bool n_br = has_bounded_repeats(*nfa_impl);
- DEBUG_PRINTF("da %d na %d db %d nvs %d nbr %d\n", (int)d_accel,
- (int)n_accel, (int)d_big, (int)n_vsmall, (int)n_br);
- if (d_big) {
- if (!n_vsmall) {
- if (d_accel || !n_accel) {
- return dfa_impl;
- } else {
- return nfa_impl;
- }
- } else {
+ bool n_vsmall = nfa_impl->nPositions <= 32;
+ bool n_br = has_bounded_repeats(*nfa_impl);
+ DEBUG_PRINTF("da %d na %d db %d nvs %d nbr %d\n", (int)d_accel,
+ (int)n_accel, (int)d_big, (int)n_vsmall, (int)n_br);
+ if (d_big) {
+ if (!n_vsmall) {
+ if (d_accel || !n_accel) {
+ return dfa_impl;
+ } else {
+ return nfa_impl;
+ }
+ } else {
if (n_accel && fast_nfa) {
- return nfa_impl;
- } else {
- return dfa_impl;
- }
- }
- } else {
- /* favour a McClellan 8, unless the nfa looks really good and the dfa
- * looks like trouble */
- if (!d_accel && n_vsmall && n_accel && !n_br) {
- return nfa_impl;
- } else {
- return dfa_impl;
- }
- }
-}
-
-/**
- * \brief Builds an LBR if there's one repeat in the given CastleProto,
- * otherwise a Castle.
- */
-static
+ return nfa_impl;
+ } else {
+ return dfa_impl;
+ }
+ }
+ } else {
+ /* favour a McClellan 8, unless the nfa looks really good and the dfa
+ * looks like trouble */
+ if (!d_accel && n_vsmall && n_accel && !n_br) {
+ return nfa_impl;
+ } else {
+ return dfa_impl;
+ }
+ }
+}
+
+/**
+ * \brief Builds an LBR if there's one repeat in the given CastleProto,
+ * otherwise a Castle.
+ */
+static
bytecode_ptr<NFA>
-buildRepeatEngine(const CastleProto &proto,
- const map<u32, vector<vector<CharReach>>> &triggers,
+buildRepeatEngine(const CastleProto &proto,
+ const map<u32, vector<vector<CharReach>>> &triggers,
const CompileContext &cc, const ReportManager &rm) {
- // If we only have one repeat, the LBR should always be the best possible
- // implementation.
- if (proto.repeats.size() == 1 && cc.grey.allowLbr) {
+ // If we only have one repeat, the LBR should always be the best possible
+ // implementation.
+ if (proto.repeats.size() == 1 && cc.grey.allowLbr) {
return constructLBR(proto, triggers.at(0), cc, rm);
- }
-
+ }
+
auto castle_nfa = buildCastle(proto, triggers, cc, rm);
- assert(castle_nfa); // Should always be constructible.
- return castle_nfa;
-}
-
+ assert(castle_nfa); // Should always be constructible.
+ return castle_nfa;
+}
+
static
bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, bool is_transient,
const CompileContext &cc, const ReportManager &rm) {
@@ -649,236 +649,236 @@ bytecode_ptr<NFA> getDfa(raw_dfa &rdfa, bool is_transient,
return dfa;
}
-/* builds suffix nfas */
-static
+/* builds suffix nfas */
+static
bytecode_ptr<NFA>
-buildSuffix(const ReportManager &rm, const SomSlotManager &ssm,
- const map<u32, u32> &fixed_depth_tops,
- const map<u32, vector<vector<CharReach>>> &triggers,
- suffix_id suff, const CompileContext &cc) {
- if (suff.castle()) {
+buildSuffix(const ReportManager &rm, const SomSlotManager &ssm,
+ const map<u32, u32> &fixed_depth_tops,
+ const map<u32, vector<vector<CharReach>>> &triggers,
+ suffix_id suff, const CompileContext &cc) {
+ if (suff.castle()) {
auto n = buildRepeatEngine(*suff.castle(), triggers, cc, rm);
- assert(n);
- return n;
- }
-
- if (suff.haig()) {
+ assert(n);
+ return n;
+ }
+
+ if (suff.haig()) {
auto n = goughCompile(*suff.haig(), ssm.somPrecision(), cc, rm);
- assert(n);
- return n;
- }
-
- if (suff.dfa()) {
+ assert(n);
+ return n;
+ }
+
+ if (suff.dfa()) {
auto d = getDfa(*suff.dfa(), false, cc, rm);
- assert(d);
- return d;
- }
-
- assert(suff.graph());
- NGHolder &holder = *suff.graph();
- assert(holder.kind == NFA_SUFFIX);
- const bool oneTop = onlyOneTop(holder);
- bool compress_state = cc.streaming;
-
- // Take a shot at the LBR engine.
- if (oneTop) {
+ assert(d);
+ return d;
+ }
+
+ assert(suff.graph());
+ NGHolder &holder = *suff.graph();
+ assert(holder.kind == NFA_SUFFIX);
+ const bool oneTop = onlyOneTop(holder);
+ bool compress_state = cc.streaming;
+
+ // Take a shot at the LBR engine.
+ if (oneTop) {
auto lbr = constructLBR(holder, triggers.at(0), cc, rm);
- if (lbr) {
- return lbr;
- }
- }
-
+ if (lbr) {
+ return lbr;
+ }
+ }
+
bool fast_nfa = false;
- auto n = constructNFA(holder, &rm, fixed_depth_tops, triggers,
+ auto n = constructNFA(holder, &rm, fixed_depth_tops, triggers,
compress_state, fast_nfa, cc);
- assert(n);
-
- if (oneTop && cc.grey.roseMcClellanSuffix) {
- if (cc.grey.roseMcClellanSuffix == 2 || n->nPositions > 128 ||
+ assert(n);
+
+ if (oneTop && cc.grey.roseMcClellanSuffix) {
+ if (cc.grey.roseMcClellanSuffix == 2 || n->nPositions > 128 ||
!has_bounded_repeats_other_than_firsts(*n) || !fast_nfa) {
- auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0),
- cc.grey);
- if (rdfa) {
+ auto rdfa = buildMcClellan(holder, &rm, false, triggers.at(0),
+ cc.grey);
+ if (rdfa) {
auto d = getDfa(*rdfa, false, cc, rm);
- assert(d);
- if (cc.grey.roseMcClellanSuffix != 2) {
+ assert(d);
+ if (cc.grey.roseMcClellanSuffix != 2) {
n = pickImpl(move(d), move(n), fast_nfa);
- } else {
- n = move(d);
- }
-
- assert(n);
- if (isMcClellanType(n->type)) {
- // DFA chosen. We may be able to set some more properties
- // in the NFA structure here.
- u64a maxOffset = findMaxOffset(holder, rm);
- if (maxOffset != MAX_OFFSET && maxOffset < 0xffffffffull) {
- n->maxOffset = (u32)maxOffset;
- DEBUG_PRINTF("dfa max offset %llu\n", maxOffset);
- } else {
- n->maxOffset = 0; // inf
- }
- }
- }
- }
- }
- return n;
-}
-
-static
-void findInfixTriggers(const RoseBuildImpl &build,
- map<left_id, set<PredTopPair> > *infixTriggers) {
- const RoseGraph &g = build.g;
- for (auto v : vertices_range(g)) {
- if (!g[v].left) {
- continue;
- }
-
- set<PredTopPair> &triggers = (*infixTriggers)[left_id(g[v].left)];
-
- for (const auto &e : in_edges_range(v, g)) {
- RoseVertex u = source(e, g);
- if (build.isAnyStart(u)) {
- continue;
- }
- triggers.insert(PredTopPair(u, g[e].rose_top));
- }
- }
-}
-
-static
-vector<CharReach> as_cr_seq(const rose_literal_id &lit) {
- vector<CharReach> rv = as_cr_seq(lit.s);
- for (u32 i = 0; i < lit.delay; i++) {
- rv.push_back(CharReach::dot());
- }
-
- /* TODO: take into account cmp/msk */
- return rv;
-}
-
-/**
- * \brief Returns a map of trigger literals as sequences of CharReach, grouped
- * by top index.
- */
-static
-void findTriggerSequences(const RoseBuildImpl &tbi,
- const set<PredTopPair> &triggers,
- map<u32, vector<vector<CharReach> > > *trigger_lits) {
- map<u32, set<u32> > lit_ids_by_top;
- for (const PredTopPair &t : triggers) {
- insert(&lit_ids_by_top[t.top], tbi.g[t.pred].literals);
- }
-
- for (const auto &e : lit_ids_by_top) {
- const u32 top = e.first;
- const set<u32> &lit_ids = e.second;
-
+ } else {
+ n = move(d);
+ }
+
+ assert(n);
+ if (isMcClellanType(n->type)) {
+ // DFA chosen. We may be able to set some more properties
+ // in the NFA structure here.
+ u64a maxOffset = findMaxOffset(holder, rm);
+ if (maxOffset != MAX_OFFSET && maxOffset < 0xffffffffull) {
+ n->maxOffset = (u32)maxOffset;
+ DEBUG_PRINTF("dfa max offset %llu\n", maxOffset);
+ } else {
+ n->maxOffset = 0; // inf
+ }
+ }
+ }
+ }
+ }
+ return n;
+}
+
+static
+void findInfixTriggers(const RoseBuildImpl &build,
+ map<left_id, set<PredTopPair> > *infixTriggers) {
+ const RoseGraph &g = build.g;
+ for (auto v : vertices_range(g)) {
+ if (!g[v].left) {
+ continue;
+ }
+
+ set<PredTopPair> &triggers = (*infixTriggers)[left_id(g[v].left)];
+
+ for (const auto &e : in_edges_range(v, g)) {
+ RoseVertex u = source(e, g);
+ if (build.isAnyStart(u)) {
+ continue;
+ }
+ triggers.insert(PredTopPair(u, g[e].rose_top));
+ }
+ }
+}
+
+static
+vector<CharReach> as_cr_seq(const rose_literal_id &lit) {
+ vector<CharReach> rv = as_cr_seq(lit.s);
+ for (u32 i = 0; i < lit.delay; i++) {
+ rv.push_back(CharReach::dot());
+ }
+
+ /* TODO: take into account cmp/msk */
+ return rv;
+}
+
+/**
+ * \brief Returns a map of trigger literals as sequences of CharReach, grouped
+ * by top index.
+ */
+static
+void findTriggerSequences(const RoseBuildImpl &tbi,
+ const set<PredTopPair> &triggers,
+ map<u32, vector<vector<CharReach> > > *trigger_lits) {
+ map<u32, set<u32> > lit_ids_by_top;
+ for (const PredTopPair &t : triggers) {
+ insert(&lit_ids_by_top[t.top], tbi.g[t.pred].literals);
+ }
+
+ for (const auto &e : lit_ids_by_top) {
+ const u32 top = e.first;
+ const set<u32> &lit_ids = e.second;
+
for (u32 id : lit_ids) {
const rose_literal_id &lit = tbi.literals.at(id);
- (*trigger_lits)[top].push_back(as_cr_seq(lit));
- }
- }
-}
-
+ (*trigger_lits)[top].push_back(as_cr_seq(lit));
+ }
+ }
+}
+
static
bytecode_ptr<NFA> makeLeftNfa(const RoseBuildImpl &tbi, left_id &left,
const bool is_prefix, const bool is_transient,
const map<left_id, set<PredTopPair>> &infixTriggers,
const CompileContext &cc) {
const ReportManager &rm = tbi.rm;
-
+
bytecode_ptr<NFA> n;
- // Should compress state if this rose is non-transient and we're in
- // streaming mode.
- const bool compress_state = !is_transient;
-
+ // Should compress state if this rose is non-transient and we're in
+ // streaming mode.
+ const bool compress_state = !is_transient;
+
assert(is_prefix || !left.graph() || left.graph()->kind == NFA_INFIX);
assert(!is_prefix || !left.graph() || left.graph()->kind == NFA_PREFIX
|| left.graph()->kind == NFA_EAGER_PREFIX);
-
- // Holder should be implementable as an NFA at the very least.
- if (!left.dfa() && left.graph()) {
- assert(isImplementableNFA(*left.graph(), nullptr, cc));
- }
-
- map<u32, u32> fixed_depth_tops;
- if (!is_prefix /* infix */) {
- const set<PredTopPair> &triggers = infixTriggers.at(left);
- findFixedDepthTops(tbi.g, triggers, &fixed_depth_tops);
- }
-
- if (left.castle()) {
- assert(!is_prefix);
- map<u32, vector<vector<CharReach> > > triggers;
- findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
+
+ // Holder should be implementable as an NFA at the very least.
+ if (!left.dfa() && left.graph()) {
+ assert(isImplementableNFA(*left.graph(), nullptr, cc));
+ }
+
+ map<u32, u32> fixed_depth_tops;
+ if (!is_prefix /* infix */) {
+ const set<PredTopPair> &triggers = infixTriggers.at(left);
+ findFixedDepthTops(tbi.g, triggers, &fixed_depth_tops);
+ }
+
+ if (left.castle()) {
+ assert(!is_prefix);
+ map<u32, vector<vector<CharReach> > > triggers;
+ findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
n = buildRepeatEngine(*left.castle(), triggers, cc, rm);
- assert(n);
- return n; // Castles/LBRs are always best!
- }
-
- if (left.dfa()) {
+ assert(n);
+ return n; // Castles/LBRs are always best!
+ }
+
+ if (left.dfa()) {
n = getDfa(*left.dfa(), is_transient, cc, rm);
- } else if (left.graph() && cc.grey.roseMcClellanPrefix == 2 && is_prefix &&
- !is_transient) {
- auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
- if (rdfa) {
+ } else if (left.graph() && cc.grey.roseMcClellanPrefix == 2 && is_prefix &&
+ !is_transient) {
+ auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
+ if (rdfa) {
n = getDfa(*rdfa, is_transient, cc, rm);
assert(n);
- }
- }
-
- // We can attempt to build LBRs for infixes.
- if (!n && !is_prefix && left.graph() && onlyOneTop(*left.graph())) {
- map<u32, vector<vector<CharReach> > > triggers;
- findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
+ }
+ }
+
+ // We can attempt to build LBRs for infixes.
+ if (!n && !is_prefix && left.graph() && onlyOneTop(*left.graph())) {
+ map<u32, vector<vector<CharReach> > > triggers;
+ findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
assert(triggers.size() == 1); // single top
n = constructLBR(*left.graph(), triggers.begin()->second, cc, rm);
- }
-
+ }
+
bool fast_nfa = false;
- if (!n && left.graph()) {
- map<u32, vector<vector<CharReach>>> triggers;
+ if (!n && left.graph()) {
+ map<u32, vector<vector<CharReach>>> triggers;
if (left.graph()->kind == NFA_INFIX) {
findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
}
- n = constructNFA(*left.graph(), nullptr, fixed_depth_tops, triggers,
+ n = constructNFA(*left.graph(), nullptr, fixed_depth_tops, triggers,
compress_state, fast_nfa, cc);
- }
-
- if (cc.grey.roseMcClellanPrefix == 1 && is_prefix && !left.dfa()
- && left.graph()
+ }
+
+ if (cc.grey.roseMcClellanPrefix == 1 && is_prefix && !left.dfa()
+ && left.graph()
&& (!n || !has_bounded_repeats_other_than_firsts(*n) || !fast_nfa)) {
- auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
- if (rdfa) {
+ auto rdfa = buildMcClellan(*left.graph(), nullptr, cc.grey);
+ if (rdfa) {
auto d = getDfa(*rdfa, is_transient, cc, rm);
- assert(d);
+ assert(d);
n = pickImpl(move(d), move(n), fast_nfa);
- }
- }
-
- return n;
-}
-
-static
-void setLeftNfaProperties(NFA &n, const left_id &left) {
- depth min_width = findMinWidth(left);
- DEBUG_PRINTF("min_width=%s\n", min_width.str().c_str());
- u32 min_width_value = min_width.is_finite() ? (u32)min_width : 0;
- n.minWidth = min_width_value;
-
- depth max_width = findMaxWidth(left);
- DEBUG_PRINTF("max_width=%s\n", max_width.str().c_str());
- u32 max_width_value = max_width.is_finite() ? (u32)max_width : 0;
- n.maxWidth = max_width_value;
-
- // FIXME: NFA::maxOffset in Rose can't be found from reports as they don't
- // map to internal_report structures; it would have to come from the Rose
- // graph.
-}
-
-static
+ }
+ }
+
+ return n;
+}
+
+static
+void setLeftNfaProperties(NFA &n, const left_id &left) {
+ depth min_width = findMinWidth(left);
+ DEBUG_PRINTF("min_width=%s\n", min_width.str().c_str());
+ u32 min_width_value = min_width.is_finite() ? (u32)min_width : 0;
+ n.minWidth = min_width_value;
+
+ depth max_width = findMaxWidth(left);
+ DEBUG_PRINTF("max_width=%s\n", max_width.str().c_str());
+ u32 max_width_value = max_width.is_finite() ? (u32)max_width : 0;
+ n.maxWidth = max_width_value;
+
+ // FIXME: NFA::maxOffset in Rose can't be found from reports as they don't
+ // map to internal_report structures; it would have to come from the Rose
+ // graph.
+}
+
+static
void appendTailToHolder(NGHolder &h, const flat_set<ReportID> &reports,
const vector<NFAVertex> &starts,
const vector<CharReach> &tail) {
@@ -902,16 +902,16 @@ void appendTailToHolder(NGHolder &h, const flat_set<ReportID> &reports,
h[curr].char_reach = *it;
++it;
}
-
+
h[curr].reports = reports;
add_edge(curr, h.accept, h);
}
-
+
static
void appendTailToHolder(NGHolder &h, const vector<CharReach> &tail) {
assert(in_degree(h.acceptEod, h) == 1);
assert(!tail.empty());
-
+
map<flat_set<ReportID>, vector<NFAVertex> > reporters;
for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
reporters[h[v].reports].push_back(v);
@@ -950,11 +950,11 @@ u32 decreaseLag(const RoseBuildImpl &build, NGHolder &h,
restored[i] |= *lit_it;
++lit_it;
}
- }
+ }
}
-
+
assert(!restored.empty());
-
+
appendTailToHolder(h, restored);
return restored.size();
@@ -991,9 +991,9 @@ bool checkSuitableForEager(bool is_prefix, const left_id &left,
if (build.isInETable(s)
|| contains(rg[s].literals, build.eod_event_literal_id)) {
return false; /* Ignore EOD related prefixes */
- }
+ }
}
-
+
if (left.dfa()) {
const raw_dfa &dfa = *left.dfa();
if (dfa.start_floating != DEAD_STATE) {
@@ -1002,7 +1002,7 @@ bool checkSuitableForEager(bool is_prefix, const left_id &left,
if (!dfa.states[dfa.start_anchored].reports.empty()) {
return false; /* vacuous (todo: handle?) */
}
-
+
if (!can_die_early(dfa, EAGER_DIE_BEFORE_LIMIT)) {
return false;
}
@@ -1012,11 +1012,11 @@ bool checkSuitableForEager(bool is_prefix, const left_id &left,
if (proper_out_degree(g.startDs, g)) {
return false; /* not purely anchored */
}
-
+
ei.new_graph = cloneHolder(*left.graph());
auto gg = ei.new_graph;
gg->kind = NFA_EAGER_PREFIX;
-
+
ei.lag_adjust = decreaseLag(build, *gg, succs);
if (is_match_vertex(gg->start, *gg)) {
@@ -1165,15 +1165,15 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi,
for (u32 lit_id : g[u].literals) {
lits.insert(build.literals.at(lit_id).s);
}
- }
- }
+ }
+ }
DEBUG_PRINTF("%zu literals\n", lits.size());
max_queuelen = findMaxInfixMatches(leftfix, lits);
if (max_queuelen < UINT32_MAX) {
max_queuelen++;
}
}
-
+
u32 max_width;
if (is_transient) {
depth d = findMaxWidth(leftfix);
@@ -1182,13 +1182,13 @@ bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi,
} else {
max_width = 0;
}
-
+
u8 cm_count = 0;
CharReach cm_cr;
if (cc.grey.allowCountingMiracles) {
findCountingMiracleInfo(leftfix, stop, &cm_count, &cm_cr);
}
-
+
for (RoseVertex v : succs) {
bc.leftfix_info.emplace(v, left_build_info(qi, g[v].left.lag, max_width,
squash_mask, stop,
@@ -1215,11 +1215,11 @@ unique_ptr<TamaInfo> constructTamaInfo(const RoseGraph &g,
for (const auto &e : in_edges_range(v, g)) {
tops.insert(g[e].rose_top);
}
- }
+ }
}
tamaInfo->add(nfa, tops);
}
-
+
return tamaInfo;
}
@@ -1238,12 +1238,12 @@ void updateTops(const RoseGraph &g, const TamaInfo &tamaInfo,
for (const auto &e : in_edges_range(v, g)) {
tamaProto.add(n, g[v].index, g[e].rose_top, out_top_remap);
}
- }
+ }
}
i++;
}
}
-
+
static
shared_ptr<TamaProto> constructContainerEngine(const RoseGraph &g,
build_context &bc,
@@ -1253,13 +1253,13 @@ shared_ptr<TamaProto> constructContainerEngine(const RoseGraph &g,
const Grey &grey) {
const auto &subengines = info.subengines;
auto tamaInfo = constructTamaInfo(g, subengines, is_suffix);
-
+
map<pair<const NFA *, u32>, u32> out_top_remap;
auto n = buildTamarama(*tamaInfo, queue, out_top_remap);
enforceEngineSizeLimit(n.get(), grey);
bc.engine_info_by_queue.emplace(n->queueIndex, engine_info(n.get(), false));
add_nfa_to_blob(bc, *n);
-
+
DEBUG_PRINTF("queue id:%u\n", queue);
shared_ptr<TamaProto> tamaProto = make_shared<TamaProto>();
tamaProto->reports = info.reports;
@@ -1283,11 +1283,11 @@ void buildInfixContainer(RoseGraph &g, build_context &bc,
for (const auto &v : verts) {
DEBUG_PRINTF("vert id:%zu\n", g[v].index);
g[v].left.tamarama = tamaProto;
- }
+ }
}
}
}
-
+
static
void buildSuffixContainer(RoseGraph &g, build_context &bc,
const vector<ExclusiveInfo> &exclusive_info,
@@ -1307,10 +1307,10 @@ void buildSuffixContainer(RoseGraph &g, build_context &bc,
const auto &v = verts[0];
suffix_id newSuffix(g[v].suffix);
bc.suffixes.emplace(newSuffix, queue);
- }
+ }
}
}
-
+
static
void updateExclusiveInfixProperties(const RoseBuildImpl &build,
const vector<ExclusiveInfo> &exclusive_info,
@@ -1320,14 +1320,14 @@ void updateExclusiveInfixProperties(const RoseBuildImpl &build,
for (const auto &info : exclusive_info) {
// Set leftfix optimisations, disabled for tamarama subengines
rose_group squash_mask = ~rose_group{0};
- // Leftfixes can have stop alphabets.
- vector<u8> stop(N_CHARS, 0);
+ // Leftfixes can have stop alphabets.
+ vector<u8> stop(N_CHARS, 0);
// Infix NFAs can have bounds on their queue lengths.
u32 max_queuelen = 0;
u32 max_width = 0;
u8 cm_count = 0;
CharReach cm_cr;
-
+
const auto &qi = info.queue;
const auto &subengines = info.subengines;
bool no_retrigger = true;
@@ -1346,7 +1346,7 @@ void updateExclusiveInfixProperties(const RoseBuildImpl &build,
for (u32 lit_id : build.g[u].literals) {
lits.insert(build.literals.at(lit_id).s);
}
- }
+ }
DEBUG_PRINTF("%zu literals\n", lits.size());
u32 queuelen = findMaxInfixMatches(leftfix, lits);
@@ -1354,7 +1354,7 @@ void updateExclusiveInfixProperties(const RoseBuildImpl &build,
queuelen++;
}
max_queuelen = max(max_queuelen, queuelen);
- }
+ }
}
if (no_retrigger) {
@@ -1369,11 +1369,11 @@ void updateExclusiveInfixProperties(const RoseBuildImpl &build,
squash_mask, stop,
max_queuelen, cm_count,
cm_cr));
- }
- }
+ }
+ }
}
}
-
+
static
void updateExclusiveSuffixProperties(const RoseBuildImpl &build,
const vector<ExclusiveInfo> &exclusive_info,
@@ -1390,14 +1390,14 @@ void updateExclusiveSuffixProperties(const RoseBuildImpl &build,
no_retrigger = false;
break;
}
- }
-
+ }
+
if (no_retrigger) {
no_retrigger_queues->insert(qi);
- }
+ }
}
}
-
+
static
void buildExclusiveInfixes(RoseBuildImpl &build, build_context &bc,
QueueIndexFactory &qif,
@@ -1429,12 +1429,12 @@ void buildExclusiveInfixes(RoseBuildImpl &build, build_context &bc,
}
info.queue = qif.get_queue();
exclusive_info.push_back(move(info));
- }
+ }
updateExclusiveInfixProperties(build, exclusive_info, bc.leftfix_info,
no_retrigger_queues);
buildInfixContainer(g, bc, exclusive_info, build.cc.grey);
}
-
+
static
void findExclusiveInfixes(RoseBuildImpl &build, build_context &bc,
QueueIndexFactory &qif,
@@ -1582,48 +1582,48 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
leftfix);
}
- return true;
-}
-
-static
-void findSuffixTriggers(const RoseBuildImpl &tbi,
- map<suffix_id, set<PredTopPair> > *suffixTriggers) {
- const RoseGraph &g = tbi.g;
- for (auto v : vertices_range(g)) {
- if (!g[v].suffix) {
- continue;
- }
- PredTopPair ptp(v, g[v].suffix.top);
- (*suffixTriggers)[g[v].suffix].insert(ptp);
- }
-}
-
-static
-bool hasNonSmallBlockOutfix(const vector<OutfixInfo> &outfixes) {
- for (const auto &out : outfixes) {
- if (!out.in_sbmatcher) {
- return true;
- }
- }
- return false;
-}
-
+ return true;
+}
+
+static
+void findSuffixTriggers(const RoseBuildImpl &tbi,
+ map<suffix_id, set<PredTopPair> > *suffixTriggers) {
+ const RoseGraph &g = tbi.g;
+ for (auto v : vertices_range(g)) {
+ if (!g[v].suffix) {
+ continue;
+ }
+ PredTopPair ptp(v, g[v].suffix.top);
+ (*suffixTriggers)[g[v].suffix].insert(ptp);
+ }
+}
+
+static
+bool hasNonSmallBlockOutfix(const vector<OutfixInfo> &outfixes) {
+ for (const auto &out : outfixes) {
+ if (!out.in_sbmatcher) {
+ return true;
+ }
+ }
+ return false;
+}
+
namespace {
class OutfixBuilder : public boost::static_visitor<bytecode_ptr<NFA>> {
public:
explicit OutfixBuilder(const RoseBuildImpl &build_in) : build(build_in) {}
-
+
bytecode_ptr<NFA> operator()(boost::blank&) const {
return nullptr;
};
-
+
bytecode_ptr<NFA> operator()(unique_ptr<raw_dfa> &rdfa) const {
// Unleash the mighty DFA!
return getDfa(*rdfa, false, build.cc, build.rm);
}
bytecode_ptr<NFA> operator()(unique_ptr<raw_som_dfa> &haig) const {
- // Unleash the Goughfish!
+ // Unleash the Goughfish!
return goughCompile(*haig, build.ssm.somPrecision(), build.cc,
build.rm);
}
@@ -1633,31 +1633,31 @@ public:
const ReportManager &rm = build.rm;
NGHolder &h = *holder;
- assert(h.kind == NFA_OUTFIX);
-
- // Build NFA.
+ assert(h.kind == NFA_OUTFIX);
+
+ // Build NFA.
const map<u32, u32> fixed_depth_tops; /* no tops */
const map<u32, vector<vector<CharReach>>> triggers; /* no tops */
bool compress_state = cc.streaming;
bool fast_nfa = false;
auto n = constructNFA(h, &rm, fixed_depth_tops, triggers,
compress_state, fast_nfa, cc);
-
- // Try for a DFA upgrade.
+
+ // Try for a DFA upgrade.
if (n && cc.grey.roseMcClellanOutfix &&
(!has_bounded_repeats_other_than_firsts(*n) || !fast_nfa)) {
- auto rdfa = buildMcClellan(h, &rm, cc.grey);
- if (rdfa) {
+ auto rdfa = buildMcClellan(h, &rm, cc.grey);
+ if (rdfa) {
auto d = getDfa(*rdfa, false, cc, rm);
- if (d) {
+ if (d) {
n = pickImpl(move(d), move(n), fast_nfa);
- }
- }
- }
+ }
+ }
+ }
return n;
- }
-
+ }
+
bytecode_ptr<NFA> operator()(UNUSED MpvProto &mpv) const {
// MPV construction handled separately.
assert(mpv.puffettes.empty());
@@ -1675,166 +1675,166 @@ bytecode_ptr<NFA> buildOutfix(const RoseBuildImpl &build, OutfixInfo &outfix) {
auto n = boost::apply_visitor(OutfixBuilder(build), outfix.proto);
if (n && build.cc.grey.reverseAccelerate) {
- buildReverseAcceleration(n.get(), outfix.rev_info, outfix.minWidth);
- }
-
- return n;
-}
-
-static
+ buildReverseAcceleration(n.get(), outfix.rev_info, outfix.minWidth);
+ }
+
+ return n;
+}
+
+static
void prepMpv(RoseBuildImpl &tbi, build_context &bc, size_t *historyRequired,
bool *mpv_as_outfix) {
assert(bc.engineOffsets.empty()); // MPV should be first
- *mpv_as_outfix = false;
+ *mpv_as_outfix = false;
OutfixInfo *mpv_outfix = nullptr;
-
- /* assume outfixes are just above chain tails in queue indices */
- for (auto &out : tbi.outfixes) {
- if (out.is_nonempty_mpv()) {
+
+ /* assume outfixes are just above chain tails in queue indices */
+ for (auto &out : tbi.outfixes) {
+ if (out.is_nonempty_mpv()) {
assert(!mpv_outfix);
mpv_outfix = &out;
- } else {
+ } else {
assert(!out.mpv());
- }
- }
-
+ }
+ }
+
if (!mpv_outfix) {
- return;
- }
-
+ return;
+ }
+
auto *mpv = mpv_outfix->mpv();
auto nfa = mpvCompile(mpv->puffettes, mpv->triggered_puffettes, tbi.rm);
- assert(nfa);
- if (!nfa) {
- throw CompileError("Unable to generate bytecode.");
- }
-
- if (tbi.cc.grey.reverseAccelerate) {
+ assert(nfa);
+ if (!nfa) {
+ throw CompileError("Unable to generate bytecode.");
+ }
+
+ if (tbi.cc.grey.reverseAccelerate) {
buildReverseAcceleration(nfa.get(), mpv_outfix->rev_info,
mpv_outfix->minWidth);
- }
-
+ }
+
u32 qi = mpv_outfix->get_queue(tbi.qif);
- nfa->queueIndex = qi;
+ nfa->queueIndex = qi;
enforceEngineSizeLimit(nfa.get(), tbi.cc.grey);
bc.engine_info_by_queue.emplace(nfa->queueIndex,
engine_info(nfa.get(), false));
-
- DEBUG_PRINTF("built mpv\n");
-
- if (!*historyRequired && requires_decompress_key(*nfa)) {
- *historyRequired = 1;
- }
-
+
+ DEBUG_PRINTF("built mpv\n");
+
+ if (!*historyRequired && requires_decompress_key(*nfa)) {
+ *historyRequired = 1;
+ }
+
add_nfa_to_blob(bc, *nfa);
- *mpv_as_outfix = !mpv->puffettes.empty();
-}
-
-static
-void setOutfixProperties(NFA &n, const OutfixInfo &outfix) {
- depth min_width = outfix.minWidth;
- DEBUG_PRINTF("min_width=%s\n", min_width.str().c_str());
- u32 min_width_value = min_width.is_finite() ? (u32)min_width : 0;
- n.minWidth = min_width_value;
-
- depth max_width = outfix.maxWidth;
- DEBUG_PRINTF("max_width=%s\n", max_width.str().c_str());
- u32 max_width_value = max_width.is_finite() ? (u32)max_width : 0;
- n.maxWidth = max_width_value;
-
- DEBUG_PRINTF("max_offset=%llu\n", outfix.maxOffset);
- u32 max_offset_value = outfix.maxOffset < ~0U ? (u32)outfix.maxOffset : 0;
- n.maxOffset = max_offset_value;
-
- DEBUG_PRINTF("maxBAWidth=%u\n", outfix.maxBAWidth);
- if (outfix.maxBAWidth != ROSE_BOUND_INF && outfix.maxBAWidth < 256) {
- n.maxBiAnchoredWidth = verify_u8(outfix.maxBAWidth);
- }
-}
-
-static
+ *mpv_as_outfix = !mpv->puffettes.empty();
+}
+
+static
+void setOutfixProperties(NFA &n, const OutfixInfo &outfix) {
+ depth min_width = outfix.minWidth;
+ DEBUG_PRINTF("min_width=%s\n", min_width.str().c_str());
+ u32 min_width_value = min_width.is_finite() ? (u32)min_width : 0;
+ n.minWidth = min_width_value;
+
+ depth max_width = outfix.maxWidth;
+ DEBUG_PRINTF("max_width=%s\n", max_width.str().c_str());
+ u32 max_width_value = max_width.is_finite() ? (u32)max_width : 0;
+ n.maxWidth = max_width_value;
+
+ DEBUG_PRINTF("max_offset=%llu\n", outfix.maxOffset);
+ u32 max_offset_value = outfix.maxOffset < ~0U ? (u32)outfix.maxOffset : 0;
+ n.maxOffset = max_offset_value;
+
+ DEBUG_PRINTF("maxBAWidth=%u\n", outfix.maxBAWidth);
+ if (outfix.maxBAWidth != ROSE_BOUND_INF && outfix.maxBAWidth < 256) {
+ n.maxBiAnchoredWidth = verify_u8(outfix.maxBAWidth);
+ }
+}
+
+static
bool prepOutfixes(RoseBuildImpl &tbi, build_context &bc,
- size_t *historyRequired) {
- if (tbi.cc.grey.onlyOneOutfix && tbi.outfixes.size() > 1) {
- DEBUG_PRINTF("we have %zu outfixes, but Grey::onlyOneOutfix is set\n",
- tbi.outfixes.size());
- throw ResourceLimitError();
- }
-
+ size_t *historyRequired) {
+ if (tbi.cc.grey.onlyOneOutfix && tbi.outfixes.size() > 1) {
+ DEBUG_PRINTF("we have %zu outfixes, but Grey::onlyOneOutfix is set\n",
+ tbi.outfixes.size());
+ throw ResourceLimitError();
+ }
+
assert(tbi.qif.allocated_count() == bc.engineOffsets.size());
-
- for (auto &out : tbi.outfixes) {
+
+ for (auto &out : tbi.outfixes) {
if (out.mpv()) {
- continue; /* already done */
- }
+ continue; /* already done */
+ }
DEBUG_PRINTF("building outfix %zd\n", &out - &tbi.outfixes[0]);
- auto n = buildOutfix(tbi, out);
- if (!n) {
- assert(0);
- return false;
- }
-
- setOutfixProperties(*n, out);
-
+ auto n = buildOutfix(tbi, out);
+ if (!n) {
+ assert(0);
+ return false;
+ }
+
+ setOutfixProperties(*n, out);
+
n->queueIndex = out.get_queue(tbi.qif);
enforceEngineSizeLimit(n.get(), tbi.cc.grey);
bc.engine_info_by_queue.emplace(n->queueIndex,
engine_info(n.get(), false));
-
- if (!*historyRequired && requires_decompress_key(*n)) {
- *historyRequired = 1;
- }
-
+
+ if (!*historyRequired && requires_decompress_key(*n)) {
+ *historyRequired = 1;
+ }
+
add_nfa_to_blob(bc, *n);
- }
-
- return true;
-}
-
-static
+ }
+
+ return true;
+}
+
+static
void assignSuffixQueues(RoseBuildImpl &build, map<suffix_id, u32> &suffixes) {
const RoseGraph &g = build.g;
-
- for (auto v : vertices_range(g)) {
- if (!g[v].suffix) {
- continue;
- }
-
- const suffix_id s(g[v].suffix);
-
+
+ for (auto v : vertices_range(g)) {
+ if (!g[v].suffix) {
+ continue;
+ }
+
+ const suffix_id s(g[v].suffix);
+
DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].index, s.graph());
-
- // We may have already built this NFA.
+
+ // We may have already built this NFA.
if (contains(suffixes, s)) {
- continue;
- }
-
+ continue;
+ }
+
u32 queue = build.qif.get_queue();
- DEBUG_PRINTF("assigning %p to queue %u\n", s.graph(), queue);
+ DEBUG_PRINTF("assigning %p to queue %u\n", s.graph(), queue);
suffixes.emplace(s, queue);
- }
-}
-
-static
-void setSuffixProperties(NFA &n, const suffix_id &suff,
- const ReportManager &rm) {
- depth min_width = findMinWidth(suff);
- DEBUG_PRINTF("min_width=%s\n", min_width.str().c_str());
- u32 min_width_value = min_width.is_finite() ? (u32)min_width : 0;
- n.minWidth = min_width_value;
-
- depth max_width = findMaxWidth(suff);
- DEBUG_PRINTF("max_width=%s\n", max_width.str().c_str());
- u32 max_width_value = max_width.is_finite() ? (u32)max_width : 0;
- n.maxWidth = max_width_value;
-
- u64a max_offset = findMaxOffset(all_reports(suff), rm);
- DEBUG_PRINTF("max_offset=%llu\n", max_offset);
- u32 max_offset_value = max_offset < ~0U ? (u32)max_offset : 0;
- n.maxOffset = max_offset_value;
-}
-
-static
+ }
+}
+
+static
+void setSuffixProperties(NFA &n, const suffix_id &suff,
+ const ReportManager &rm) {
+ depth min_width = findMinWidth(suff);
+ DEBUG_PRINTF("min_width=%s\n", min_width.str().c_str());
+ u32 min_width_value = min_width.is_finite() ? (u32)min_width : 0;
+ n.minWidth = min_width_value;
+
+ depth max_width = findMaxWidth(suff);
+ DEBUG_PRINTF("max_width=%s\n", max_width.str().c_str());
+ u32 max_width_value = max_width.is_finite() ? (u32)max_width : 0;
+ n.maxWidth = max_width_value;
+
+ u64a max_offset = findMaxOffset(all_reports(suff), rm);
+ DEBUG_PRINTF("max_offset=%llu\n", max_offset);
+ u32 max_offset_value = max_offset < ~0U ? (u32)max_offset : 0;
+ n.maxOffset = max_offset_value;
+}
+
+static
void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc,
QueueIndexFactory &qif,
map<suffix_id, set<PredTopPair>> &suffixTriggers,
@@ -1842,19 +1842,19 @@ void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc,
const vector<vector<u32>> &groups,
set<u32> *no_retrigger_queues) {
RoseGraph &g = build.g;
-
+
vector<ExclusiveInfo> exclusive_info;
for (const auto &gp : groups) {
ExclusiveInfo info;
for (const auto &id : gp) {
const auto &verts = vertex_map.at(id);
suffix_id s(g[verts[0]].suffix);
-
+
const set<PredTopPair> &s_triggers = suffixTriggers.at(s);
-
+
map<u32, u32> fixed_depth_tops;
findFixedDepthTops(g, s_triggers, &fixed_depth_tops);
-
+
map<u32, vector<vector<CharReach>>> triggers;
findTriggerSequences(build, s_triggers, &triggers);
@@ -1871,7 +1871,7 @@ void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc,
const auto &reports = all_reports(s);
info.reports.insert(reports.begin(), reports.end());
- }
+ }
info.queue = qif.get_queue();
exclusive_info.push_back(move(info));
}
@@ -1879,14 +1879,14 @@ void buildExclusiveSuffixes(RoseBuildImpl &build, build_context &bc,
no_retrigger_queues);
buildSuffixContainer(g, bc, exclusive_info, build.cc.grey);
}
-
+
static
void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc,
QueueIndexFactory &qif,
map<suffix_id, set<PredTopPair>> &suffixTriggers,
set<u32> *no_retrigger_queues) {
const RoseGraph &g = tbi.g;
-
+
map<suffix_id, u32> suffixes;
set<RoleInfo<suffix_id>> roleInfoSet;
map<u32, vector<RoseVertex>> vertex_map;
@@ -1894,8 +1894,8 @@ void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc,
for (auto v : vertices_range(g)) {
if (!g[v].suffix) {
continue;
- }
-
+ }
+
const suffix_id s(g[v].suffix);
DEBUG_PRINTF("vertex %zu triggers suffix %p\n", g[v].index, s.graph());
@@ -1907,30 +1907,30 @@ void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc,
vertex_map[id].push_back(v);
}
continue;
- }
-
+ }
+
if (s.haig()) {
continue;
}
-
+
// Currently disable eod suffixes for exclusive analysis
if (!tbi.isInETable(v) && (s.graph() || s.castle())) {
DEBUG_PRINTF("assigning %p to id %u\n", s.graph(), role_id);
suffixes.emplace(s, role_id);
-
+
vertex_map[role_id].push_back(v);
const set<PredTopPair> &s_triggers = suffixTriggers.at(s);
map<u32, vector<vector<CharReach>>> triggers;
findTriggerSequences(tbi, s_triggers, &triggers);
-
+
RoleInfo<suffix_id> info(s, role_id);
if (setTriggerLiteralsSuffix(info, triggers)) {
roleInfoSet.insert(info);
}
role_id++;
}
- }
-
+ }
+
if (suffixes.size() > 1) {
DEBUG_PRINTF("suffix size:%zu\n", suffixes.size());
vector<vector<u32>> groups;
@@ -1938,9 +1938,9 @@ void findExclusiveSuffixes(RoseBuildImpl &tbi, build_context &bc,
buildExclusiveSuffixes(tbi, bc, qif, suffixTriggers, vertex_map,
groups, no_retrigger_queues);
}
-}
-
-static
+}
+
+static
bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc,
set<u32> *no_retrigger_queues,
const map<suffix_id, set<PredTopPair>> &suffixTriggers) {
@@ -1952,31 +1952,31 @@ bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc,
ordered.emplace_back(e.second, e.first);
}
sort(begin(ordered), end(ordered));
-
+
for (const auto &e : ordered) {
const u32 queue = e.first;
const suffix_id &s = e.second;
-
+
if (s.tamarama()) {
continue;
}
-
+
const set<PredTopPair> &s_triggers = suffixTriggers.at(s);
-
+
map<u32, u32> fixed_depth_tops;
findFixedDepthTops(tbi.g, s_triggers, &fixed_depth_tops);
-
+
map<u32, vector<vector<CharReach>>> triggers;
findTriggerSequences(tbi, s_triggers, &triggers);
-
+
auto n = buildSuffix(tbi.rm, tbi.ssm, fixed_depth_tops, triggers,
s, tbi.cc);
if (!n) {
return false;
}
-
+
setSuffixProperties(*n, s, tbi.rm);
-
+
n->queueIndex = queue;
enforceEngineSizeLimit(n.get(), tbi.cc.grey);
bc.engine_info_by_queue.emplace(n->queueIndex,
@@ -1990,63 +1990,63 @@ bool buildSuffixes(const RoseBuildImpl &tbi, build_context &bc,
}
add_nfa_to_blob(bc, *n);
- }
-
+ }
+
return true;
-}
-
-static
+}
+
+static
void buildCountingMiracles(build_context &bc) {
- map<pair<CharReach, u8>, u32> pre_built;
-
+ map<pair<CharReach, u8>, u32> pre_built;
+
for (left_build_info &lbi : bc.leftfix_info | map_values) {
if (!lbi.countingMiracleCount) {
continue;
- }
-
- const CharReach &cr = lbi.countingMiracleReach;
- assert(!cr.all() && !cr.none());
-
- auto key = make_pair(cr, lbi.countingMiracleCount);
- if (contains(pre_built, key)) {
- lbi.countingMiracleOffset = pre_built[key];
- continue;
- }
-
- RoseCountingMiracle rcm;
- memset(&rcm, 0, sizeof(rcm));
-
- if (cr.count() == 1) {
- rcm.c = cr.find_first();
- } else {
- rcm.shufti = 1;
+ }
+
+ const CharReach &cr = lbi.countingMiracleReach;
+ assert(!cr.all() && !cr.none());
+
+ auto key = make_pair(cr, lbi.countingMiracleCount);
+ if (contains(pre_built, key)) {
+ lbi.countingMiracleOffset = pre_built[key];
+ continue;
+ }
+
+ RoseCountingMiracle rcm;
+ memset(&rcm, 0, sizeof(rcm));
+
+ if (cr.count() == 1) {
+ rcm.c = cr.find_first();
+ } else {
+ rcm.shufti = 1;
int rv = shuftiBuildMasks(cr, (u8 *)&rcm.lo, (u8 *)&rcm.hi);
- if (rv == -1) {
- DEBUG_PRINTF("failed to build shufti\n");
- lbi.countingMiracleCount = 0; /* remove counting miracle */
- continue;
- }
-
- rcm.poison = (~cr).find_first();
- }
-
- rcm.count = lbi.countingMiracleCount;
-
+ if (rv == -1) {
+ DEBUG_PRINTF("failed to build shufti\n");
+ lbi.countingMiracleCount = 0; /* remove counting miracle */
+ continue;
+ }
+
+ rcm.poison = (~cr).find_first();
+ }
+
+ rcm.count = lbi.countingMiracleCount;
+
lbi.countingMiracleOffset = bc.engine_blob.add(rcm);
- pre_built[key] = lbi.countingMiracleOffset;
- DEBUG_PRINTF("built cm for count of %u @ %u\n", rcm.count,
- lbi.countingMiracleOffset);
- }
-}
-
+ pre_built[key] = lbi.countingMiracleOffset;
+ DEBUG_PRINTF("built cm for count of %u @ %u\n", rcm.count,
+ lbi.countingMiracleOffset);
+ }
+}
+
/* Note: buildNfas may reduce the lag for vertices that have prefixes */
-static
+static
bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif,
set<u32> *no_retrigger_queues, set<u32> *eager_queues,
u32 *leftfixBeginQueue) {
map<suffix_id, set<PredTopPair>> suffixTriggers;
findSuffixTriggers(tbi, &suffixTriggers);
-
+
if (tbi.cc.grey.allowTamarama && tbi.cc.streaming) {
findExclusiveSuffixes(tbi, bc, qif, suffixTriggers,
no_retrigger_queues);
@@ -2055,156 +2055,156 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif,
assignSuffixQueues(tbi, bc.suffixes);
if (!buildSuffixes(tbi, bc, no_retrigger_queues, suffixTriggers)) {
- return false;
- }
+ return false;
+ }
suffixTriggers.clear();
-
- *leftfixBeginQueue = qif.allocated_count();
-
+
+ *leftfixBeginQueue = qif.allocated_count();
+
if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues,
- true)) {
- return false;
- }
-
+ true)) {
+ return false;
+ }
+
if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues,
- false)) {
- return false;
- }
-
- return true;
-}
-
-static
+ false)) {
+ return false;
+ }
+
+ return true;
+}
+
+static
void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info,
RoseStateOffsets *so, u32 *scratchStateSize,
u32 *transientStateSize) {
- u32 state_offset;
+ u32 state_offset;
if (eng_info.transient) {
// Transient engines do not use stream state, but must have room in
// transient state (stored in scratch).
state_offset = *transientStateSize;
*transientStateSize += eng_info.stream_size;
- } else {
+ } else {
// Pack NFA stream state on to the end of the Rose stream state.
- state_offset = so->end;
+ state_offset = so->end;
so->end += eng_info.stream_size;
- }
-
+ }
+
nfa_info.stateOffset = state_offset;
-
+
// Uncompressed state in scratch must be aligned.
*scratchStateSize = ROUNDUP_N(*scratchStateSize, eng_info.scratch_align);
nfa_info.fullStateOffset = *scratchStateSize;
*scratchStateSize += eng_info.scratch_size;
-}
-
-static
+}
+
+static
void updateNfaState(const build_context &bc, vector<NfaInfo> &nfa_infos,
RoseStateOffsets *so, u32 *scratchStateSize,
u32 *transientStateSize) {
if (nfa_infos.empty()) {
return;
- }
-
+ }
+
*transientStateSize = 0;
*scratchStateSize = 0;
-
+
for (u32 qi = 0; qi < nfa_infos.size(); qi++) {
NfaInfo &nfa_info = nfa_infos[qi];
const auto &eng_info = bc.engine_info_by_queue.at(qi);
allocateStateSpace(eng_info, nfa_info, so, scratchStateSize,
transientStateSize);
- }
-}
-
-/* does not include history requirements for outfixes or literal matchers */
-u32 RoseBuildImpl::calcHistoryRequired() const {
- u32 m = cc.grey.minHistoryAvailable;
-
- for (auto v : vertices_range(g)) {
- if (g[v].suffix) {
- m = MAX(m, 2); // so that history req is at least 1, for state
- // compression.
- /* TODO: check if suffix uses state compression */
- }
-
- if (g[v].left) {
- const u32 lag = g[v].left.lag;
- const left_id leftfix(g[v].left);
- if (contains(transient, leftfix)) {
- u32 mv = lag + findMaxWidth(leftfix);
-
- // If this vertex has an event literal, we need to add one to
- // cope with it.
- if (hasLiteralInTable(v, ROSE_EVENT)) {
- mv++;
- }
-
- m = MAX(m, mv);
- } else {
- /* rose will be caught up from (lag - 1), also need an extra
- * byte behind that to find the decompression key */
- m = MAX(m, lag + 1);
- m = MAX(m, 2); // so that history req is at least 1, for state
- // compression.
- }
- }
- }
-
- // Delayed literals contribute to history requirement as well.
+ }
+}
+
+/* does not include history requirements for outfixes or literal matchers */
+u32 RoseBuildImpl::calcHistoryRequired() const {
+ u32 m = cc.grey.minHistoryAvailable;
+
+ for (auto v : vertices_range(g)) {
+ if (g[v].suffix) {
+ m = MAX(m, 2); // so that history req is at least 1, for state
+ // compression.
+ /* TODO: check if suffix uses state compression */
+ }
+
+ if (g[v].left) {
+ const u32 lag = g[v].left.lag;
+ const left_id leftfix(g[v].left);
+ if (contains(transient, leftfix)) {
+ u32 mv = lag + findMaxWidth(leftfix);
+
+ // If this vertex has an event literal, we need to add one to
+ // cope with it.
+ if (hasLiteralInTable(v, ROSE_EVENT)) {
+ mv++;
+ }
+
+ m = MAX(m, mv);
+ } else {
+ /* rose will be caught up from (lag - 1), also need an extra
+ * byte behind that to find the decompression key */
+ m = MAX(m, lag + 1);
+ m = MAX(m, 2); // so that history req is at least 1, for state
+ // compression.
+ }
+ }
+ }
+
+ // Delayed literals contribute to history requirement as well.
for (u32 id = 0; id < literals.size(); id++) {
const auto &lit = literals.at(id);
- if (lit.delay) {
- // If the literal is delayed _and_ has a mask that is longer than
- // the literal, we need enough history to match the whole mask as
- // well when rebuilding delayed matches.
- size_t len = std::max(lit.elength(), lit.msk.size() + lit.delay);
- ENSURE_AT_LEAST(&m, verify_u32(len));
- }
-
- /* Benefit checks require data is available. */
- if (literal_info.at(id).requires_benefits) {
- ENSURE_AT_LEAST(&m,
- MIN(verify_u32(lit.elength()), MAX_MASK2_WIDTH));
- }
- }
-
- m = MAX(m, max_rose_anchored_floating_overlap);
-
- DEBUG_PRINTF("m=%u, ematcher_region_size=%u\n", m, ematcher_region_size);
-
- if (ematcher_region_size >= m) {
- return ematcher_region_size;
- }
-
- return m ? m - 1 : 0;
-}
-
-static
+ if (lit.delay) {
+ // If the literal is delayed _and_ has a mask that is longer than
+ // the literal, we need enough history to match the whole mask as
+ // well when rebuilding delayed matches.
+ size_t len = std::max(lit.elength(), lit.msk.size() + lit.delay);
+ ENSURE_AT_LEAST(&m, verify_u32(len));
+ }
+
+ /* Benefit checks require data is available. */
+ if (literal_info.at(id).requires_benefits) {
+ ENSURE_AT_LEAST(&m,
+ MIN(verify_u32(lit.elength()), MAX_MASK2_WIDTH));
+ }
+ }
+
+ m = MAX(m, max_rose_anchored_floating_overlap);
+
+ DEBUG_PRINTF("m=%u, ematcher_region_size=%u\n", m, ematcher_region_size);
+
+ if (ematcher_region_size >= m) {
+ return ematcher_region_size;
+ }
+
+ return m ? m - 1 : 0;
+}
+
+static
u32 buildLastByteIter(const RoseGraph &g, build_context &bc) {
vector<u32> lb_roles;
-
+
for (auto v : vertices_range(g)) {
if (!hasLastByteHistorySucc(g, v)) {
continue;
- }
+ }
// Eager EOD reporters won't have state indices.
auto it = bc.roleStateIndices.find(v);
if (it != end(bc.roleStateIndices)) {
lb_roles.push_back(it->second);
DEBUG_PRINTF("last byte %u\n", it->second);
- }
- }
-
+ }
+ }
+
if (lb_roles.empty()) {
return 0; /* invalid offset */
- }
+ }
auto iter = mmbBuildSparseIterator(lb_roles, bc.roleStateIndices.size());
return bc.engine_blob.add_iterator(iter);
-}
-
-static
+}
+
+static
u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build,
const vector<raw_dfa> &anchored_dfas) {
if (anchored_dfas.size() > 1) {
@@ -2212,8 +2212,8 @@ u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build,
/* We must regard matches from other anchored tables as unordered, as
* we do for floating matches. */
return 1;
- }
-
+ }
+
const RoseGraph &g = build.g;
u32 minWidth = ROSE_BOUND_INF;
for (auto v : vertices_range(g)) {
@@ -2221,58 +2221,58 @@ u32 findMinFloatingLiteralMatch(const RoseBuildImpl &build,
DEBUG_PRINTF("skipping %zu anchored or root\n", g[v].index);
continue;
}
-
+
u32 w = g[v].min_offset;
DEBUG_PRINTF("%zu m_o = %u\n", g[v].index, w);
-
+
if (w < minWidth) {
minWidth = w;
- }
- }
-
+ }
+ }
+
return minWidth;
-}
-
-static
+}
+
+static
vector<u32> buildSuffixEkeyLists(const RoseBuildImpl &build, build_context &bc,
const QueueIndexFactory &qif) {
vector<u32> out(qif.allocated_count());
-
+
map<u32, vector<u32>> qi_to_ekeys; /* for determinism */
-
+
for (const auto &e : bc.suffixes) {
const suffix_id &s = e.first;
u32 qi = e.second;
set<u32> ekeys = reportsToEkeys(all_reports(s), build.rm);
-
+
if (!ekeys.empty()) {
qi_to_ekeys[qi] = {ekeys.begin(), ekeys.end()};
- }
- }
-
+ }
+ }
+
/* for each outfix also build elists */
for (const auto &outfix : build.outfixes) {
u32 qi = outfix.get_queue();
set<u32> ekeys = reportsToEkeys(all_reports(outfix), build.rm);
-
+
if (!ekeys.empty()) {
qi_to_ekeys[qi] = {ekeys.begin(), ekeys.end()};
}
- }
-
+ }
+
for (auto &e : qi_to_ekeys) {
u32 qi = e.first;
auto &ekeys = e.second;
assert(!ekeys.empty());
ekeys.push_back(INVALID_EKEY); /* terminator */
out[qi] = bc.engine_blob.add_range(ekeys);
- }
-
+ }
+
return out;
-}
-
+}
+
/** Returns sparse iter offset in engine blob. */
-static
+static
u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) {
vector<u32> keys;
for (u32 qi = 0; qi < activeQueueCount; ++qi) {
@@ -2281,68 +2281,68 @@ u32 buildEodNfaIterator(build_context &bc, const u32 activeQueueCount) {
DEBUG_PRINTF("nfa qi=%u accepts eod\n", qi);
keys.push_back(qi);
}
- }
-
+ }
+
if (keys.empty()) {
return 0;
- }
-
+ }
+
DEBUG_PRINTF("building iter for %zu nfas\n", keys.size());
-
+
auto iter = mmbBuildSparseIterator(keys, activeQueueCount);
return bc.engine_blob.add_iterator(iter);
-}
-
-static
+}
+
+static
bool hasMpvTrigger(const set<u32> &reports, const ReportManager &rm) {
for (u32 r : reports) {
if (rm.getReport(r).type == INTERNAL_ROSE_CHAIN) {
- return true;
- }
- }
-
- return false;
-}
-
-static
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static
bool anyEndfixMpvTriggers(const RoseBuildImpl &build) {
const RoseGraph &g = build.g;
unordered_set<suffix_id> done;
-
+
/* suffixes */
for (auto v : vertices_range(g)) {
if (!g[v].suffix) {
continue;
- }
+ }
if (contains(done, g[v].suffix)) {
continue; /* already done */
}
done.insert(g[v].suffix);
-
+
if (hasMpvTrigger(all_reports(g[v].suffix), build.rm)) {
return true;
- }
- }
-
+ }
+ }
+
/* outfixes */
for (const auto &out : build.outfixes) {
if (hasMpvTrigger(all_reports(out), build.rm)) {
- return true;
- }
- }
-
- return false;
-}
-
+ return true;
+ }
+ }
+
+ return false;
+}
+
struct DerivedBoundaryReports {
explicit DerivedBoundaryReports(const BoundaryReports &boundary) {
insert(&report_at_0_eod_full, boundary.report_at_0_eod);
insert(&report_at_0_eod_full, boundary.report_at_eod);
insert(&report_at_0_eod_full, boundary.report_at_0);
- }
+ }
set<ReportID> report_at_0_eod_full;
};
-
+
static
void addSomRevNfas(build_context &bc, RoseEngine &proto,
const SomSlotManager &ssm) {
@@ -2357,71 +2357,71 @@ void addSomRevNfas(build_context &bc, RoseEngine &proto,
nfa_offsets.push_back(offset);
/* note: som rev nfas don't need a queue assigned as only run in block
* mode reverse */
- }
-
+ }
+
proto.somRevCount = verify_u32(nfas.size());
proto.somRevOffsetOffset = bc.engine_blob.add_range(nfa_offsets);
-}
-
-static
+}
+
+static
void recordResources(RoseResources &resources, const RoseBuildImpl &build,
const vector<raw_dfa> &anchored_dfas,
const vector<LitFragment> &fragments) {
if (!build.outfixes.empty()) {
resources.has_outfixes = true;
- }
-
+ }
+
resources.has_literals = !fragments.empty();
-
+
const auto &g = build.g;
for (const auto &v : vertices_range(g)) {
if (g[v].eod_accept) {
resources.has_eod = true;
break;
- }
+ }
if (g[v].suffix && has_eod_accepts(g[v].suffix)) {
resources.has_eod = true;
break;
- }
- }
-
+ }
+ }
+
resources.has_anchored = !anchored_dfas.empty();
resources.has_anchored_multiple = anchored_dfas.size() > 1;
for (const auto &rdfa : anchored_dfas) {
if (rdfa.states.size() > 256) {
resources.has_anchored_large = true;
- }
- }
-
-}
-
-static
+ }
+ }
+
+}
+
+static
u32 writeProgram(build_context &bc, RoseProgram &&program) {
if (program.empty()) {
DEBUG_PRINTF("no program\n");
return 0;
- }
-
+ }
+
applyFinalSpecialisation(program);
-
+
auto it = bc.program_cache.find(program);
if (it != end(bc.program_cache)) {
DEBUG_PRINTF("reusing cached program at %u\n", it->second);
return it->second;
- }
-
+ }
+
recordResources(bc.resources, program);
recordLongLiterals(bc.longLiterals, program);
-
+
auto prog_bytecode = writeProgram(bc.engine_blob, program);
u32 offset = bc.engine_blob.add(prog_bytecode);
DEBUG_PRINTF("prog len %zu written at offset %u\n", prog_bytecode.size(),
offset);
bc.program_cache.emplace(move(program), offset);
return offset;
-}
-
-static
+}
+
+static
u32 writeActiveLeftIter(RoseEngineBlob &engine_blob,
const vector<LeftNfaInfo> &leftInfoTable) {
vector<u32> keys;
@@ -2430,19 +2430,19 @@ u32 writeActiveLeftIter(RoseEngineBlob &engine_blob,
DEBUG_PRINTF("leftfix %zu is active\n", i);
keys.push_back(verify_u32(i));
}
- }
-
+ }
+
DEBUG_PRINTF("%zu active leftfixes\n", keys.size());
-
+
if (keys.empty()) {
return 0;
- }
-
+ }
+
auto iter = mmbBuildSparseIterator(keys, verify_u32(leftInfoTable.size()));
return engine_blob.add_iterator(iter);
-}
-
-static
+}
+
+static
bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc,
u32 outfixEndQueue) {
for (u32 i = 0; i < outfixEndQueue; i++) {
@@ -2451,38 +2451,38 @@ bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc,
DEBUG_PRINTF("outfix has eod\n");
return true;
}
- }
-
+ }
+
if (build.eod_event_literal_id != MO_INVALID_IDX) {
DEBUG_PRINTF("eod is an event to be celebrated\n");
return true;
- }
-
+ }
+
const RoseGraph &g = build.g;
for (auto v : vertices_range(g)) {
if (g[v].eod_accept) {
DEBUG_PRINTF("literally report eod\n");
- return true;
- }
+ return true;
+ }
if (g[v].suffix && has_eod_accepts(g[v].suffix)) {
DEBUG_PRINTF("eod suffix\n");
return true;
}
- }
+ }
DEBUG_PRINTF("yawn\n");
- return false;
-}
-
-static
+ return false;
+}
+
+static
void writeDkeyInfo(const ReportManager &rm, RoseEngineBlob &engine_blob,
RoseEngine &proto) {
const auto inv_dkeys = rm.getDkeyToReportTable();
proto.invDkeyOffset = engine_blob.add_range(inv_dkeys);
proto.dkeyCount = rm.numDkeys();
proto.dkeyLogSize = fatbit_size(proto.dkeyCount);
-}
-
-static
+}
+
+static
void writeLeftInfo(RoseEngineBlob &engine_blob, RoseEngine &proto,
const vector<LeftNfaInfo> &leftInfoTable) {
proto.leftOffset = engine_blob.add_range(leftInfoTable);
@@ -2491,9 +2491,9 @@ void writeLeftInfo(RoseEngineBlob &engine_blob, RoseEngine &proto,
proto.roseCount = verify_u32(leftInfoTable.size());
proto.activeLeftCount = verify_u32(leftInfoTable.size());
proto.rosePrefixCount = countRosePrefixes(leftInfoTable);
-}
-
-static
+}
+
+static
void writeLogicalInfo(const ReportManager &rm, RoseEngineBlob &engine_blob,
RoseEngine &proto) {
const auto &tree = rm.getLogicalTree();
@@ -2511,68 +2511,68 @@ void writeNfaInfo(const RoseBuildImpl &build, build_context &bc,
const u32 queue_count = build.qif.allocated_count();
if (!queue_count) {
return;
- }
-
+ }
+
auto ekey_lists = buildSuffixEkeyLists(build, bc, build.qif);
-
+
vector<NfaInfo> infos(queue_count);
memset(infos.data(), 0, sizeof(NfaInfo) * queue_count);
-
+
for (u32 qi = 0; qi < queue_count; qi++) {
NfaInfo &info = infos[qi];
info.nfaOffset = bc.engineOffsets.at(qi);
assert(qi < ekey_lists.size());
info.ekeyListOffset = ekey_lists.at(qi);
info.no_retrigger = contains(no_retrigger_queues, qi) ? 1 : 0;
- }
-
+ }
+
// Mark outfixes that are in the small block matcher.
for (const auto &out : build.outfixes) {
const u32 qi = out.get_queue();
assert(qi < infos.size());
infos.at(qi).in_sbmatcher = out.in_sbmatcher;
- }
-
+ }
+
// Mark suffixes triggered by EOD table literals.
const RoseGraph &g = build.g;
- for (auto v : vertices_range(g)) {
- if (!g[v].suffix) {
- continue;
- }
+ for (auto v : vertices_range(g)) {
+ if (!g[v].suffix) {
+ continue;
+ }
u32 qi = bc.suffixes.at(g[v].suffix);
assert(qi < infos.size());
if (build.isInETable(v)) {
infos.at(qi).eod = 1;
- }
- }
-
+ }
+ }
+
// Update state offsets to do with NFAs in proto and in the NfaInfo
// structures.
updateNfaState(bc, infos, &proto.stateOffsets, &proto.scratchStateSize,
&proto.tStateSize);
-
+
proto.nfaInfoOffset = bc.engine_blob.add_range(infos);
-}
-
-static
+}
+
+static
bool hasBoundaryReports(const BoundaryReports &boundary) {
if (!boundary.report_at_0.empty()) {
DEBUG_PRINTF("has boundary reports at 0\n");
return true;
- }
+ }
if (!boundary.report_at_0_eod.empty()) {
DEBUG_PRINTF("has boundary reports at 0 eod\n");
return true;
- }
+ }
if (!boundary.report_at_eod.empty()) {
DEBUG_PRINTF("has boundary reports at eod\n");
return true;
- }
+ }
DEBUG_PRINTF("no boundary reports\n");
return false;
-}
-
-static
+}
+
+static
void makeBoundaryPrograms(const RoseBuildImpl &build, build_context &bc,
const BoundaryReports &boundary,
const DerivedBoundaryReports &dboundary,
@@ -2580,29 +2580,29 @@ void makeBoundaryPrograms(const RoseBuildImpl &build, build_context &bc,
DEBUG_PRINTF("report ^: %zu\n", boundary.report_at_0.size());
DEBUG_PRINTF("report $: %zu\n", boundary.report_at_eod.size());
DEBUG_PRINTF("report ^$: %zu\n", dboundary.report_at_0_eod_full.size());
-
+
auto eod_prog = makeBoundaryProgram(build, boundary.report_at_eod);
out.reportEodOffset = writeProgram(bc, move(eod_prog));
-
+
auto zero_prog = makeBoundaryProgram(build, boundary.report_at_0);
out.reportZeroOffset = writeProgram(bc, move(zero_prog));
-
+
auto zeod_prog = makeBoundaryProgram(build, dboundary.report_at_0_eod_full);
out.reportZeroEodOffset = writeProgram(bc, move(zeod_prog));
-}
-
-static
+}
+
+static
unordered_map<RoseVertex, u32> assignStateIndices(const RoseBuildImpl &build) {
const auto &g = build.g;
-
+
u32 state = 0;
unordered_map<RoseVertex, u32> roleStateIndices;
- for (auto v : vertices_range(g)) {
+ for (auto v : vertices_range(g)) {
// Virtual vertices (starts, EOD accept vertices) never need state
// indices.
if (build.isVirtualVertex(v)) {
- continue;
- }
+ continue;
+ }
// We only need a state index if we have successors that are not
// eagerly-reported EOD vertices.
@@ -2612,60 +2612,60 @@ unordered_map<RoseVertex, u32> assignStateIndices(const RoseBuildImpl &build) {
needs_state_index = true;
break;
}
- }
-
+ }
+
if (!needs_state_index) {
continue;
- }
-
+ }
+
/* TODO: also don't need a state index if all edges are nfa based */
roleStateIndices.emplace(v, state++);
- }
-
+ }
+
DEBUG_PRINTF("assigned %u states (from %zu vertices)\n", state,
num_vertices(g));
return roleStateIndices;
-}
-
-static
+}
+
+static
bool hasUsefulStops(const left_build_info &build) {
for (u32 i = 0; i < N_CHARS; i++) {
if (build.stopAlphabet[i]) {
- return true;
- }
- }
- return false;
-}
-
-static
+ return true;
+ }
+ }
+ return false;
+}
+
+static
void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
const set<u32> &eager_queues, u32 leftfixBeginQueue,
u32 leftfixCount, vector<LeftNfaInfo> &leftTable,
u32 *laggedRoseCount, size_t *history) {
const RoseGraph &g = tbi.g;
const CompileContext &cc = tbi.cc;
-
+
unordered_set<u32> done_core;
-
+
leftTable.resize(leftfixCount);
-
+
u32 lagIndex = 0;
-
+
for (RoseVertex v : vertices_range(g)) {
if (!g[v].left) {
continue;
- }
+ }
assert(contains(bc.leftfix_info, v));
const left_build_info &lbi = bc.leftfix_info.at(v);
if (lbi.has_lookaround) {
continue;
}
-
+
assert(lbi.queue >= leftfixBeginQueue);
u32 left_index = lbi.queue - leftfixBeginQueue;
assert(left_index < leftfixCount);
-
+
/* seedy hack to make miracles more effective.
*
* TODO: make miracle seeking not depend on history length and have
@@ -2676,30 +2676,30 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
g[v].left.lag + 1
+ cc.grey.miracleHistoryBonus));
}
-
+
LeftNfaInfo &left = leftTable[left_index];
if (!contains(done_core, left_index)) {
done_core.insert(left_index);
memset(&left, 0, sizeof(left));
left.squash_mask = ~0ULL;
-
+
DEBUG_PRINTF("populating info for %u\n", left_index);
-
+
left.maxQueueLen = lbi.max_queuelen;
-
+
if (hasUsefulStops(lbi)) {
assert(lbi.stopAlphabet.size() == N_CHARS);
left.stopTable = bc.engine_blob.add_range(lbi.stopAlphabet);
}
-
+
assert(lbi.countingMiracleOffset || !lbi.countingMiracleCount);
left.countingMiracleOffset = lbi.countingMiracleOffset;
-
+
DEBUG_PRINTF("mw = %u\n", lbi.transient);
left.transient = verify_u8(lbi.transient);
left.infix = tbi.isNonRootSuccessor(v);
left.eager = contains(eager_queues, lbi.queue);
-
+
// A rose has a lagIndex if it's non-transient and we are
// streaming.
if (!lbi.transient && cc.streaming) {
@@ -2709,64 +2709,64 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
left.lagIndex = ROSE_OFFSET_INVALID;
}
}
-
+
DEBUG_PRINTF("rose %u is %s\n", left_index,
left.infix ? "infix" : "prefix");
-
+
// Update squash mask.
left.squash_mask &= lbi.squash_mask;
-
+
// Update the max delay.
ENSURE_AT_LEAST(&left.maxLag, lbi.lag);
-
+
if (contains(g[v].literals, tbi.eod_event_literal_id)) {
left.eod_check = 1;
}
- }
-
+ }
+
DEBUG_PRINTF("built %u roses with lag indices\n", lagIndex);
*laggedRoseCount = lagIndex;
-}
-
-static
+}
+
+static
RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc,
ProgramBuild &prog_build, u32 lit_id,
const vector<vector<RoseEdge>> &lit_edge_map,
bool is_anchored_replay_program) {
DEBUG_PRINTF("lit_id=%u\n", lit_id);
assert(lit_id < lit_edge_map.size());
-
+
return makeLiteralProgram(build, bc.leftfix_info, bc.suffixes,
bc.engine_info_by_queue, bc.roleStateIndices,
prog_build, lit_id, lit_edge_map.at(lit_id),
is_anchored_replay_program);
-}
-
-static
+}
+
+static
RoseProgram makeFragmentProgram(const RoseBuildImpl &build, build_context &bc,
ProgramBuild &prog_build,
const vector<u32> &lit_ids,
const vector<vector<RoseEdge>> &lit_edge_map) {
assert(!lit_ids.empty());
-
+
vector<RoseProgram> blocks;
for (const auto &lit_id : lit_ids) {
auto prog = makeLiteralProgram(build, bc, prog_build, lit_id,
lit_edge_map, false);
blocks.push_back(move(prog));
- }
-
+ }
+
return assembleProgramBlocks(move(blocks));
-}
-
+}
+
/**
* \brief Returns a map from literal ID to a list of edges leading into
* vertices with that literal ID.
*/
-static
+static
vector<vector<RoseEdge>> findEdgesByLiteral(const RoseBuildImpl &build) {
vector<vector<RoseEdge>> lit_edge_map(build.literals.size());
-
+
const auto &g = build.g;
for (const auto &v : vertices_range(g)) {
for (const auto &lit_id : g[v].literals) {
@@ -2775,7 +2775,7 @@ vector<vector<RoseEdge>> findEdgesByLiteral(const RoseBuildImpl &build) {
insert(&edge_list, edge_list.end(), in_edges(v, g));
}
}
-
+
// Sort edges in each edge list by (source, target) indices. This gives us
// less surprising ordering in program generation for a literal with many
// edges.
@@ -2785,82 +2785,82 @@ vector<vector<RoseEdge>> findEdgesByLiteral(const RoseBuildImpl &build) {
return tie(g[source(a, g)].index, g[target(a, g)].index) <
tie(g[source(b, g)].index, g[target(b, g)].index);
});
- }
-
+ }
+
return lit_edge_map;
-}
-
-static
+}
+
+static
bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) {
assert(lit_id < build.literal_info.size());
const auto &info = build.literal_info[lit_id];
if (!info.vertices.empty()) {
return true;
}
-
+
for (const u32 &delayed_id : info.delayed_ids) {
assert(delayed_id < build.literal_info.size());
const rose_literal_info &delayed_info = build.literal_info[delayed_id];
if (!delayed_info.vertices.empty()) {
return true;
}
- }
-
+ }
+
DEBUG_PRINTF("literal %u has no refs\n", lit_id);
return false;
-}
-
-static
+}
+
+static
rose_literal_id getFragment(rose_literal_id lit) {
if (lit.s.length() > ROSE_SHORT_LITERAL_LEN_MAX) {
// Trim to last ROSE_SHORT_LITERAL_LEN_MAX bytes.
lit.s.erase(0, lit.s.length() - ROSE_SHORT_LITERAL_LEN_MAX);
- }
+ }
DEBUG_PRINTF("fragment: %s\n", dumpString(lit.s).c_str());
return lit;
-}
-
-static
+}
+
+static
vector<LitFragment> groupByFragment(const RoseBuildImpl &build) {
vector<LitFragment> fragments;
u32 frag_id = 0;
-
+
struct FragmentInfo {
vector<u32> lit_ids;
rose_group groups = 0;
};
-
+
map<rose_literal_id, FragmentInfo> frag_info;
-
+
for (u32 lit_id = 0; lit_id < build.literals.size(); lit_id++) {
const auto &lit = build.literals.at(lit_id);
const auto &info = build.literal_info.at(lit_id);
-
+
if (!isUsedLiteral(build, lit_id)) {
DEBUG_PRINTF("lit %u is unused\n", lit_id);
continue;
- }
-
+ }
+
if (lit.table == ROSE_EVENT) {
DEBUG_PRINTF("lit %u is an event\n", lit_id);
- continue;
- }
-
+ continue;
+ }
+
auto groups = info.group_mask;
-
+
if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) {
fragments.emplace_back(frag_id, lit.s, groups, lit_id);
frag_id++;
continue;
- }
-
+ }
+
DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id,
dumpString(lit.s).c_str());
auto &fi = frag_info[getFragment(lit)];
fi.lit_ids.push_back(lit_id);
fi.groups |= groups;
- }
-
+ }
+
for (auto &m : frag_info) {
auto &lit = m.first;
auto &fi = m.second;
@@ -2869,17 +2869,17 @@ vector<LitFragment> groupByFragment(const RoseBuildImpl &build) {
fragments.emplace_back(frag_id, lit.s, fi.groups, move(fi.lit_ids));
frag_id++;
assert(frag_id == fragments.size());
- }
-
+ }
+
return fragments;
-}
-
-static
+}
+
+static
void buildIncludedIdMap(unordered_map<u32, pair<u32, u8>> &includedIdMap,
const LitProto *litProto) {
if (!litProto) {
return;
- }
+ }
const auto &proto = *litProto->hwlmProto;
for (const auto &lit : proto.lits) {
if (contains(includedIdMap, lit.id)) {
@@ -2897,11 +2897,11 @@ void buildIncludedIdMap(unordered_map<u32, pair<u32, u8>> &includedIdMap,
includedIdMap[lit.id] = make_pair(lit.included_id, lit.squash);
} else {
includedIdMap[lit.id] = make_pair(INVALID_LIT_ID, 0);
- }
- }
-}
-
-static
+ }
+ }
+}
+
+static
void findInclusionGroups(vector<LitFragment> &fragments,
LitProto *fproto, LitProto *drproto,
LitProto *eproto, LitProto *sbproto) {
@@ -2911,7 +2911,7 @@ void findInclusionGroups(vector<LitFragment> &fragments,
buildIncludedIdMap(includedDelayIdMap, drproto);
buildIncludedIdMap(includedIdMap, eproto);
buildIncludedIdMap(includedIdMap, sbproto);
-
+
size_t fragNum = fragments.size();
vector<u32> candidates;
for (size_t j = 0; j < fragNum; j++) {
@@ -2922,8 +2922,8 @@ void findInclusionGroups(vector<LitFragment> &fragments,
candidates.push_back(j);
DEBUG_PRINTF("find candidate\n");
}
- }
-
+ }
+
for (const auto &c : candidates) {
auto &frag = fragments[c];
u32 id = c;
@@ -2935,20 +2935,20 @@ void findInclusionGroups(vector<LitFragment> &fragments,
DEBUG_PRINTF("frag id %u child frag id %u\n", c,
frag.included_frag_id);
}
-
+
if (contains(includedDelayIdMap, id) &&
includedDelayIdMap[id].first != INVALID_LIT_ID) {
const auto &childId = includedDelayIdMap[id];
frag.included_delay_frag_id = childId.first;
frag.delay_squash = childId.second;
-
+
DEBUG_PRINTF("delay frag id %u child frag id %u\n", c,
frag.included_delay_frag_id);
- }
- }
-}
-
-static
+ }
+ }
+}
+
+static
void buildFragmentPrograms(const RoseBuildImpl &build,
vector<LitFragment> &fragments,
build_context &bc, ProgramBuild &prog_build,
@@ -2964,12 +2964,12 @@ void buildFragmentPrograms(const RoseBuildImpl &build,
auto caseful2 = !b.s.any_nocase();
return tie(len1, caseful1) < tie(len2, caseful2);
});
-
+
for (auto &frag : ordered_fragments) {
auto &pfrag = fragments[frag.fragment_id];
DEBUG_PRINTF("frag_id=%u, lit_ids=[%s]\n", pfrag.fragment_id,
as_string_list(pfrag.lit_ids).c_str());
-
+
auto lit_prog = makeFragmentProgram(build, bc, prog_build,
pfrag.lit_ids, lit_edge_map);
if (pfrag.included_frag_id != INVALID_FRAG_ID &&
@@ -2981,14 +2981,14 @@ void buildFragmentPrograms(const RoseBuildImpl &build,
DEBUG_PRINTF("child %u offset %u\n", cfrag.fragment_id,
child_offset);
addIncludedJumpProgram(lit_prog, child_offset, pfrag.squash);
- }
+ }
pfrag.lit_program_offset = writeProgram(bc, move(lit_prog));
-
+
// We only do delayed rebuild in streaming mode.
if (!build.cc.streaming) {
- continue;
- }
-
+ continue;
+ }
+
auto rebuild_prog = makeDelayRebuildProgram(build, prog_build,
pfrag.lit_ids);
if (pfrag.included_delay_frag_id != INVALID_FRAG_ID &&
@@ -3001,12 +3001,12 @@ void buildFragmentPrograms(const RoseBuildImpl &build,
child_offset);
addIncludedJumpProgram(rebuild_prog, child_offset,
pfrag.delay_squash);
- }
+ }
pfrag.delay_program_offset = writeProgram(bc, move(rebuild_prog));
- }
-}
-
-static
+ }
+}
+
+static
void updateLitProtoProgramOffset(vector<LitFragment> &fragments,
LitProto &litProto, bool delay) {
auto &proto = *litProto.hwlmProto;
@@ -3022,34 +3022,34 @@ void updateLitProtoProgramOffset(vector<LitFragment> &fragments,
frag.lit_program_offset);
lit.id = frag.lit_program_offset;
}
- }
-}
-
-static
+ }
+}
+
+static
void updateLitProgramOffset(vector<LitFragment> &fragments,
LitProto *fproto, LitProto *drproto,
LitProto *eproto, LitProto *sbproto) {
if (fproto) {
updateLitProtoProgramOffset(fragments, *fproto, false);
}
-
+
if (drproto) {
updateLitProtoProgramOffset(fragments, *drproto, true);
}
-
+
if (eproto) {
updateLitProtoProgramOffset(fragments, *eproto, false);
- }
-
+ }
+
if (sbproto) {
updateLitProtoProgramOffset(fragments, *sbproto, false);
- }
-}
-
+ }
+}
+
/**
* \brief Build the interpreter programs for each literal.
*/
-static
+static
void buildLiteralPrograms(const RoseBuildImpl &build,
vector<LitFragment> &fragments, build_context &bc,
ProgramBuild &prog_build, LitProto *fproto,
@@ -3057,42 +3057,42 @@ void buildLiteralPrograms(const RoseBuildImpl &build,
LitProto *sbproto) {
DEBUG_PRINTF("%zu fragments\n", fragments.size());
auto lit_edge_map = findEdgesByLiteral(build);
-
+
findInclusionGroups(fragments, fproto, drproto, eproto, sbproto);
-
+
buildFragmentPrograms(build, fragments, bc, prog_build, lit_edge_map);
-
+
// update literal program offsets for literal matcher prototypes
updateLitProgramOffset(fragments, fproto, drproto, eproto, sbproto);
-}
-
+}
+
/**
* \brief Write delay replay programs to the bytecode.
*
* Returns the offset of the beginning of the program array, and the number of
* programs.
*/
-static
+static
pair<u32, u32> writeDelayPrograms(const RoseBuildImpl &build,
const vector<LitFragment> &fragments,
build_context &bc,
ProgramBuild &prog_build) {
auto lit_edge_map = findEdgesByLiteral(build);
-
+
vector<u32> programs; // program offsets indexed by (delayed) lit id
unordered_map<u32, u32> cache; // program offsets we have already seen
-
+
for (const auto &frag : fragments) {
for (const u32 lit_id : frag.lit_ids) {
const auto &info = build.literal_info.at(lit_id);
-
+
for (const auto &delayed_lit_id : info.delayed_ids) {
DEBUG_PRINTF("lit id %u delay id %u\n", lit_id, delayed_lit_id);
auto prog = makeLiteralProgram(build, bc, prog_build,
delayed_lit_id, lit_edge_map,
false);
u32 offset = writeProgram(bc, move(prog));
-
+
u32 delay_id;
auto it = cache.find(offset);
if (it != end(cache)) {
@@ -3108,37 +3108,37 @@ pair<u32, u32> writeDelayPrograms(const RoseBuildImpl &build,
}
prog_build.delay_programs.emplace(delayed_lit_id, delay_id);
}
- }
- }
-
+ }
+ }
+
DEBUG_PRINTF("%zu delay programs\n", programs.size());
return {bc.engine_blob.add_range(programs), verify_u32(programs.size())};
-}
-
+}
+
/**
* \brief Write anchored replay programs to the bytecode.
*
* Returns the offset of the beginning of the program array, and the number of
* programs.
*/
-static
+static
pair<u32, u32> writeAnchoredPrograms(const RoseBuildImpl &build,
const vector<LitFragment> &fragments,
build_context &bc,
ProgramBuild &prog_build) {
auto lit_edge_map = findEdgesByLiteral(build);
-
+
vector<u32> programs; // program offsets indexed by anchored id
unordered_map<u32, u32> cache; // program offsets we have already seen
-
+
for (const auto &frag : fragments) {
for (const u32 lit_id : frag.lit_ids) {
const auto &lit = build.literals.at(lit_id);
-
+
if (lit.table != ROSE_ANCHORED) {
continue;
}
-
+
// If this anchored literal can never match past
// floatingMinLiteralMatchOffset, we will never have to record it.
if (findMaxOffset(build, lit_id)
@@ -3148,12 +3148,12 @@ pair<u32, u32> writeAnchoredPrograms(const RoseBuildImpl &build,
prog_build.floatingMinLiteralMatchOffset);
continue;
}
-
+
auto prog = makeLiteralProgram(build, bc, prog_build, lit_id,
lit_edge_map, true);
u32 offset = writeProgram(bc, move(prog));
DEBUG_PRINTF("lit_id=%u -> anch prog at %u\n", lit_id, offset);
-
+
u32 anch_id;
auto it = cache.find(offset);
if (it != end(cache)) {
@@ -3168,13 +3168,13 @@ pair<u32, u32> writeAnchoredPrograms(const RoseBuildImpl &build,
offset);
}
prog_build.anchored_programs.emplace(lit_id, anch_id);
- }
- }
-
+ }
+ }
+
DEBUG_PRINTF("%zu anchored programs\n", programs.size());
return {bc.engine_blob.add_range(programs), verify_u32(programs.size())};
}
-
+
/**
* \brief Returns all reports used by output-exposed engines, for which we need
* to generate programs.
@@ -3182,33 +3182,33 @@ pair<u32, u32> writeAnchoredPrograms(const RoseBuildImpl &build,
static
set<ReportID> findEngineReports(const RoseBuildImpl &build) {
set<ReportID> reports;
-
+
// The small write engine uses these engine report programs.
insert(&reports, build.smwr.all_reports());
-
+
for (const auto &outfix : build.outfixes) {
insert(&reports, all_reports(outfix));
- }
-
+ }
+
const auto &g = build.g;
for (auto v : vertices_range(g)) {
if (g[v].suffix) {
insert(&reports, all_reports(g[v].suffix));
- }
- }
-
+ }
+ }
+
DEBUG_PRINTF("%zu engine reports (of %zu)\n", reports.size(),
build.rm.numReports());
return reports;
-}
-
-static
+}
+
+static
pair<u32, u32> buildReportPrograms(const RoseBuildImpl &build,
build_context &bc) {
const auto reports = findEngineReports(build);
vector<u32> programs;
programs.reserve(reports.size());
-
+
for (ReportID id : reports) {
auto program = makeReportProgram(build, bc.needs_mpv_catchup, id);
u32 offset = writeProgram(bc, move(program));
@@ -3216,14 +3216,14 @@ pair<u32, u32> buildReportPrograms(const RoseBuildImpl &build,
build.rm.setProgramOffset(id, offset);
DEBUG_PRINTF("program for report %u @ %u (%zu instructions)\n", id,
programs.back(), program.size());
- }
-
+ }
+
u32 offset = bc.engine_blob.add_range(programs);
u32 count = verify_u32(programs.size());
return {offset, count};
-}
-
-static
+}
+
+static
bool hasEodAnchoredSuffix(const RoseBuildImpl &build) {
const RoseGraph &g = build.g;
for (auto v : vertices_range(g)) {
@@ -3231,40 +3231,40 @@ bool hasEodAnchoredSuffix(const RoseBuildImpl &build) {
DEBUG_PRINTF("vertex %zu is in eod table and has a suffix\n",
g[v].index);
return true;
- }
- }
+ }
+ }
return false;
-}
-
-static
+}
+
+static
bool hasEodMatcher(const RoseBuildImpl &build) {
const RoseGraph &g = build.g;
for (auto v : vertices_range(g)) {
if (build.isInETable(v)) {
DEBUG_PRINTF("vertex %zu is in eod table\n", g[v].index);
- return true;
- }
- }
- return false;
-}
-
-static
+ return true;
+ }
+ }
+ return false;
+}
+
+static
void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc,
ProgramBuild &prog_build, bool in_etable,
RoseProgram &program) {
const RoseGraph &g = build.g;
-
+
// Predecessor state id -> program block.
map<u32, RoseProgram> pred_blocks;
-
+
for (auto v : vertices_range(g)) {
if (!g[v].eod_accept) {
- continue;
- }
-
+ continue;
+ }
+
DEBUG_PRINTF("vertex %zu (with %zu preds) fires on EOD\n", g[v].index,
in_degree(v, g));
-
+
vector<RoseEdge> edge_list;
for (const auto &e : in_edges_range(v, g)) {
RoseVertex u = source(e, g);
@@ -3272,15 +3272,15 @@ void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc,
DEBUG_PRINTF("pred %zu %s in etable\n", g[u].index,
in_etable ? "is not" : "is");
continue;
- }
+ }
if (canEagerlyReportAtEod(build, e)) {
DEBUG_PRINTF("already done report for vertex %zu\n",
g[u].index);
continue;
- }
+ }
edge_list.push_back(e);
- }
-
+ }
+
const bool multiple_preds = edge_list.size() > 1;
for (const auto &e : edge_list) {
RoseVertex u = source(e, g);
@@ -3288,19 +3288,19 @@ void addEodAnchorProgram(const RoseBuildImpl &build, const build_context &bc,
u32 pred_state = bc.roleStateIndices.at(u);
pred_blocks[pred_state].add_block(
makeEodAnchorProgram(build, prog_build, e, multiple_preds));
- }
- }
-
+ }
+ }
+
addPredBlocks(pred_blocks, bc.roleStateIndices.size(), program);
-}
-
-static
+}
+
+static
void addEodEventProgram(const RoseBuildImpl &build, build_context &bc,
ProgramBuild &prog_build, RoseProgram &program) {
if (build.eod_event_literal_id == MO_INVALID_IDX) {
return;
}
-
+
const RoseGraph &g = build.g;
const auto &lit_info = build.literal_info.at(build.eod_event_literal_id);
assert(lit_info.delayed_ids.empty());
@@ -3312,44 +3312,44 @@ void addEodEventProgram(const RoseBuildImpl &build, build_context &bc,
for (const auto &v : lit_info.vertices) {
for (const auto &e : in_edges_range(v, g)) {
edge_list.push_back(e);
- }
+ }
}
-
+
// Sort edge list for determinism, prettiness.
sort(begin(edge_list), end(edge_list),
[&g](const RoseEdge &a, const RoseEdge &b) {
return tie(g[source(a, g)].index, g[target(a, g)].index) <
tie(g[source(b, g)].index, g[target(b, g)].index);
});
-
+
auto block = makeLiteralProgram(build, bc.leftfix_info, bc.suffixes,
bc.engine_info_by_queue,
bc.roleStateIndices, prog_build,
build.eod_event_literal_id, edge_list,
false);
program.add_block(move(block));
-}
-
-static
+}
+
+static
RoseProgram makeEodProgram(const RoseBuildImpl &build, build_context &bc,
ProgramBuild &prog_build, u32 eodNfaIterOffset) {
RoseProgram program;
-
+
addEodEventProgram(build, bc, prog_build, program);
addEnginesEodProgram(eodNfaIterOffset, program);
addEodAnchorProgram(build, bc, prog_build, false, program);
if (hasEodMatcher(build)) {
addMatcherEodProgram(program);
- }
+ }
addEodAnchorProgram(build, bc, prog_build, true, program);
if (hasEodAnchoredSuffix(build)) {
addSuffixesEodProgram(program);
}
-
+
return program;
-}
-
-static
+}
+
+static
RoseProgram makeFlushCombProgram(const RoseEngine &t) {
RoseProgram program;
if (t.ckeyCount) {
@@ -3368,159 +3368,159 @@ RoseProgram makeLastFlushCombProgram(const RoseEngine &t) {
}
static
-u32 history_required(const rose_literal_id &key) {
- if (key.msk.size() < key.s.length()) {
- return key.elength() - 1;
- } else {
- return key.msk.size() + key.delay - 1;
- }
-}
-
-static
-void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) {
- const RoseGraph &g = build.g;
-
- engine->floatingDistance = 0;
- engine->floatingMinDistance = ROSE_BOUND_INF;
- engine->anchoredDistance = 0;
- engine->maxFloatingDelayedMatch = 0;
- u32 delayRebuildLength = 0;
- engine->smallBlockDistance = 0;
-
- for (auto v : vertices_range(g)) {
- if (g[v].literals.empty()) {
- continue;
- }
-
- assert(g[v].min_offset < ROSE_BOUND_INF); // cannot == ROSE_BOUND_INF
- assert(g[v].min_offset <= g[v].max_offset);
-
- for (u32 lit_id : g[v].literals) {
+u32 history_required(const rose_literal_id &key) {
+ if (key.msk.size() < key.s.length()) {
+ return key.elength() - 1;
+ } else {
+ return key.msk.size() + key.delay - 1;
+ }
+}
+
+static
+void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) {
+ const RoseGraph &g = build.g;
+
+ engine->floatingDistance = 0;
+ engine->floatingMinDistance = ROSE_BOUND_INF;
+ engine->anchoredDistance = 0;
+ engine->maxFloatingDelayedMatch = 0;
+ u32 delayRebuildLength = 0;
+ engine->smallBlockDistance = 0;
+
+ for (auto v : vertices_range(g)) {
+ if (g[v].literals.empty()) {
+ continue;
+ }
+
+ assert(g[v].min_offset < ROSE_BOUND_INF); // cannot == ROSE_BOUND_INF
+ assert(g[v].min_offset <= g[v].max_offset);
+
+ for (u32 lit_id : g[v].literals) {
const rose_literal_id &key = build.literals.at(lit_id);
- u32 max_d = g[v].max_offset;
- u32 min_d = g[v].min_offset;
-
+ u32 max_d = g[v].max_offset;
+ u32 min_d = g[v].min_offset;
+
DEBUG_PRINTF("checking %u: elen %zu min/max %u/%u\n", lit_id,
key.elength_including_mask(), min_d, max_d);
- if (build.literal_info[lit_id].undelayed_id != lit_id) {
- /* this is a delayed match; need to update delay properties */
- /* TODO: can delayed literals ever be in another table ? */
- if (key.table == ROSE_FLOATING) {
- ENSURE_AT_LEAST(&engine->maxFloatingDelayedMatch, max_d);
- ENSURE_AT_LEAST(&delayRebuildLength, history_required(key));
- }
- }
-
- /* for the FloatingDistances we need the true max depth of the
- string */
- if (max_d != ROSE_BOUND_INF && key.table != ROSE_ANCHORED) {
- assert(max_d >= key.delay);
- max_d -= key.delay;
- }
-
- switch (key.table) {
- case ROSE_FLOATING:
- ENSURE_AT_LEAST(&engine->floatingDistance, max_d);
+ if (build.literal_info[lit_id].undelayed_id != lit_id) {
+ /* this is a delayed match; need to update delay properties */
+ /* TODO: can delayed literals ever be in another table ? */
+ if (key.table == ROSE_FLOATING) {
+ ENSURE_AT_LEAST(&engine->maxFloatingDelayedMatch, max_d);
+ ENSURE_AT_LEAST(&delayRebuildLength, history_required(key));
+ }
+ }
+
+ /* for the FloatingDistances we need the true max depth of the
+ string */
+ if (max_d != ROSE_BOUND_INF && key.table != ROSE_ANCHORED) {
+ assert(max_d >= key.delay);
+ max_d -= key.delay;
+ }
+
+ switch (key.table) {
+ case ROSE_FLOATING:
+ ENSURE_AT_LEAST(&engine->floatingDistance, max_d);
if (min_d >= key.elength_including_mask()) {
- LIMIT_TO_AT_MOST(&engine->floatingMinDistance,
+ LIMIT_TO_AT_MOST(&engine->floatingMinDistance,
min_d - (u32)key.elength_including_mask());
- } else {
- /* overlapped literals from rose + anchored table can
- * cause us to underflow due to sloppiness in
- * estimates */
- engine->floatingMinDistance = 0;
- }
- break;
- case ROSE_ANCHORED_SMALL_BLOCK:
- ENSURE_AT_LEAST(&engine->smallBlockDistance, max_d);
- break;
- case ROSE_ANCHORED:
- ENSURE_AT_LEAST(&engine->anchoredDistance, max_d);
- break;
- case ROSE_EOD_ANCHORED:
- // EOD anchored literals are in another table, so they
- // don't contribute to these calculations.
- break;
- case ROSE_EVENT:
- break; // Not a real literal.
- }
- }
- }
-
- // Floating literals go in the small block table too.
- ENSURE_AT_LEAST(&engine->smallBlockDistance, engine->floatingDistance);
-
- // Clipped by its very nature.
- LIMIT_TO_AT_MOST(&engine->smallBlockDistance, 32U);
-
- engine->delayRebuildLength = delayRebuildLength;
-
- DEBUG_PRINTF("anchoredDistance = %u\n", engine->anchoredDistance);
- DEBUG_PRINTF("floatingDistance = %u\n", engine->floatingDistance);
- DEBUG_PRINTF("smallBlockDistance = %u\n", engine->smallBlockDistance);
- assert(engine->anchoredDistance <= build.cc.grey.maxAnchoredRegion);
-
- /* anchored->floating squash literals may lower floating min distance */
- /* TODO: find actual value */
- if (!engine->anchoredDistance) {
- return;
- }
-}
-
+ } else {
+ /* overlapped literals from rose + anchored table can
+ * cause us to underflow due to sloppiness in
+ * estimates */
+ engine->floatingMinDistance = 0;
+ }
+ break;
+ case ROSE_ANCHORED_SMALL_BLOCK:
+ ENSURE_AT_LEAST(&engine->smallBlockDistance, max_d);
+ break;
+ case ROSE_ANCHORED:
+ ENSURE_AT_LEAST(&engine->anchoredDistance, max_d);
+ break;
+ case ROSE_EOD_ANCHORED:
+ // EOD anchored literals are in another table, so they
+ // don't contribute to these calculations.
+ break;
+ case ROSE_EVENT:
+ break; // Not a real literal.
+ }
+ }
+ }
+
+ // Floating literals go in the small block table too.
+ ENSURE_AT_LEAST(&engine->smallBlockDistance, engine->floatingDistance);
+
+ // Clipped by its very nature.
+ LIMIT_TO_AT_MOST(&engine->smallBlockDistance, 32U);
+
+ engine->delayRebuildLength = delayRebuildLength;
+
+ DEBUG_PRINTF("anchoredDistance = %u\n", engine->anchoredDistance);
+ DEBUG_PRINTF("floatingDistance = %u\n", engine->floatingDistance);
+ DEBUG_PRINTF("smallBlockDistance = %u\n", engine->smallBlockDistance);
+ assert(engine->anchoredDistance <= build.cc.grey.maxAnchoredRegion);
+
+ /* anchored->floating squash literals may lower floating min distance */
+ /* TODO: find actual value */
+ if (!engine->anchoredDistance) {
+ return;
+ }
+}
+
static
u32 writeEagerQueueIter(const set<u32> &eager, u32 leftfixBeginQueue,
u32 queue_count, RoseEngineBlob &engine_blob) {
if (eager.empty()) {
return 0;
- }
-
+ }
+
vector<u32> vec;
for (u32 q : eager) {
assert(q >= leftfixBeginQueue);
vec.push_back(q - leftfixBeginQueue);
}
-
+
auto iter = mmbBuildSparseIterator(vec, queue_count - leftfixBeginQueue);
return engine_blob.add_iterator(iter);
}
-
+
static
bytecode_ptr<RoseEngine> addSmallWriteEngine(const RoseBuildImpl &build,
const RoseResources &res,
bytecode_ptr<RoseEngine> rose) {
assert(rose);
-
+
if (roseIsPureLiteral(rose.get())) {
DEBUG_PRINTF("pure literal case, not adding smwr\n");
return rose;
- }
-
+ }
+
u32 qual = roseQuality(res, rose.get());
auto smwr_engine = build.smwr.build(qual);
if (!smwr_engine) {
DEBUG_PRINTF("no smwr built\n");
return rose;
- }
-
+ }
+
const size_t mainSize = rose.size();
const size_t smallWriteSize = smwr_engine.size();
DEBUG_PRINTF("adding smwr engine, size=%zu\n", smallWriteSize);
-
+
const size_t smwrOffset = ROUNDUP_CL(mainSize);
const size_t newSize = smwrOffset + smallWriteSize;
-
+
auto rose2 = make_zeroed_bytecode_ptr<RoseEngine>(newSize, 64);
char *ptr = (char *)rose2.get();
memcpy(ptr, rose.get(), mainSize);
memcpy(ptr + smwrOffset, smwr_engine.get(), smallWriteSize);
-
+
rose2->smallWriteOffset = verify_u32(smwrOffset);
rose2->size = verify_u32(newSize);
-
+
return rose2;
}
-
+
/**
* \brief Returns the pair (number of literals, max length) for all real
* literals in the floating table that are in-use.
@@ -3529,10 +3529,10 @@ static
pair<size_t, size_t> floatingCountAndMaxLen(const RoseBuildImpl &build) {
size_t num = 0;
size_t max_len = 0;
-
+
for (u32 id = 0; id < build.literals.size(); id++) {
const rose_literal_id &lit = build.literals.at(id);
-
+
if (lit.table != ROSE_FLOATING) {
continue;
}
@@ -3544,31 +3544,31 @@ pair<size_t, size_t> floatingCountAndMaxLen(const RoseBuildImpl &build) {
if (!isUsedLiteral(build, id)) {
continue;
}
-
+
num++;
max_len = max(max_len, lit.s.length());
- }
+ }
DEBUG_PRINTF("%zu floating literals with max_len=%zu\n", num, max_len);
return {num, max_len};
}
-
+
size_t calcLongLitThreshold(const RoseBuildImpl &build,
const size_t historyRequired) {
const auto &cc = build.cc;
-
+
// In block mode, we don't have history, so we don't need long literal
// support and can just use "medium-length" literal confirm. TODO: we could
// specialize further and have a block mode literal confirm instruction.
if (!cc.streaming) {
return SIZE_MAX;
- }
-
+ }
+
size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN;
-
+
// Expand to size of history we've already allocated. Note that we need N-1
// bytes of history to match a literal of length N.
longLitLengthThreshold = max(longLitLengthThreshold, historyRequired + 1);
-
+
// If we only have one literal, allow for a larger value in order to avoid
// building a long literal table for a trivial Noodle case that we could
// fit in history.
@@ -3579,15 +3579,15 @@ size_t calcLongLitThreshold(const RoseBuildImpl &build,
num_len.second);
longLitLengthThreshold = num_len.second;
}
- }
-
+ }
+
// Clamp to max history available.
longLitLengthThreshold =
min(longLitLengthThreshold, size_t{cc.grey.maxHistoryAvailable} + 1);
-
+
return longLitLengthThreshold;
}
-
+
static
map<left_id, u32> makeLeftQueueMap(const RoseGraph &g,
const map<RoseVertex, left_build_info> &leftfix_info) {
@@ -3601,18 +3601,18 @@ map<left_id, u32> makeLeftQueueMap(const RoseGraph &g,
left_id left(g[e.first].left);
assert(!contains(lqm, left) || lqm[left] == e.second.queue);
lqm[left] = e.second.queue;
- }
-
+ }
+
return lqm;
}
-
+
bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
// We keep all our offsets, counts etc. in a prototype RoseEngine which we
// will copy into the real one once it is allocated: we can't do this
// until we know how big it will be.
RoseEngine proto;
memset(&proto, 0, sizeof(proto));
-
+
// Set scanning mode.
if (!cc.streaming) {
proto.mode = HS_MODE_BLOCK;
@@ -3621,29 +3621,29 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
} else {
proto.mode = HS_MODE_STREAM;
}
-
+
DerivedBoundaryReports dboundary(boundary);
-
+
size_t historyRequired = calcHistoryRequired(); // Updated by HWLM.
size_t longLitLengthThreshold = calcLongLitThreshold(*this,
historyRequired);
DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold);
-
+
vector<LitFragment> fragments = groupByFragment(*this);
-
+
auto anchored_dfas = buildAnchoredDfas(*this, fragments);
-
+
build_context bc;
u32 floatingMinLiteralMatchOffset
= findMinFloatingLiteralMatch(*this, anchored_dfas);
recordResources(bc.resources, *this, anchored_dfas, fragments);
bc.needs_mpv_catchup = needsMpvCatchup(*this);
-
+
makeBoundaryPrograms(*this, bc, boundary, dboundary, proto.boundary);
-
+
tie(proto.reportProgramOffset, proto.reportProgramCount) =
buildReportPrograms(*this, bc);
-
+
// Build NFAs
bool mpv_as_outfix;
prepMpv(*this, bc, &historyRequired, &mpv_as_outfix);
@@ -3653,10 +3653,10 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
}
proto.outfixEndQueue = qif.allocated_count();
proto.leftfixBeginQueue = proto.outfixEndQueue;
-
+
set<u32> no_retrigger_queues;
set<u32> eager_queues;
-
+
/* Note: buildNfas may reduce the lag for vertices that have prefixes */
if (!buildNfas(*this, bc, qif, &no_retrigger_queues, &eager_queues,
&proto.leftfixBeginQueue)) {
@@ -3664,76 +3664,76 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
}
u32 eodNfaIterOffset = buildEodNfaIterator(bc, proto.leftfixBeginQueue);
buildCountingMiracles(bc);
-
+
u32 queue_count = qif.allocated_count(); /* excludes anchored matcher q;
* som rev nfas */
if (queue_count > cc.grey.limitRoseEngineCount) {
throw ResourceLimitError();
}
-
+
// Enforce role table resource limit.
if (num_vertices(g) > cc.grey.limitRoseRoleCount) {
throw ResourceLimitError();
}
-
+
bc.roleStateIndices = assignStateIndices(*this);
-
+
u32 laggedRoseCount = 0;
vector<LeftNfaInfo> leftInfoTable;
buildLeftInfoTable(*this, bc, eager_queues, proto.leftfixBeginQueue,
queue_count - proto.leftfixBeginQueue, leftInfoTable,
&laggedRoseCount, &historyRequired);
-
+
// Information only needed for program construction.
ProgramBuild prog_build(floatingMinLiteralMatchOffset,
longLitLengthThreshold, needsCatchup(*this));
prog_build.vertex_group_map = getVertexGroupMap(*this);
prog_build.squashable_groups = getSquashableGroups(*this);
-
+
tie(proto.anchoredProgramOffset, proto.anchored_count) =
writeAnchoredPrograms(*this, fragments, bc, prog_build);
-
+
tie(proto.delayProgramOffset, proto.delay_count) =
writeDelayPrograms(*this, fragments, bc, prog_build);
-
+
// Build floating HWLM matcher prototype.
rose_group fgroups = 0;
auto fproto = buildFloatingMatcherProto(*this, fragments,
longLitLengthThreshold,
&fgroups, &historyRequired);
-
+
// Build delay rebuild HWLM matcher prototype.
auto drproto = buildDelayRebuildMatcherProto(*this, fragments,
longLitLengthThreshold);
-
+
// Build EOD-anchored HWLM matcher prototype.
auto eproto = buildEodAnchoredMatcherProto(*this, fragments);
-
+
// Build small-block HWLM matcher prototype.
auto sbproto = buildSmallBlockMatcherProto(*this, fragments);
-
+
buildLiteralPrograms(*this, fragments, bc, prog_build, fproto.get(),
drproto.get(), eproto.get(), sbproto.get());
-
+
auto eod_prog = makeEodProgram(*this, bc, prog_build, eodNfaIterOffset);
proto.eodProgramOffset = writeProgram(bc, move(eod_prog));
-
+
size_t longLitStreamStateRequired = 0;
proto.longLitTableOffset
= buildLongLiteralTable(*this, bc.engine_blob, bc.longLiterals,
longLitLengthThreshold, &historyRequired,
&longLitStreamStateRequired);
-
+
proto.lastByteHistoryIterOffset = buildLastByteIter(g, bc);
proto.eagerIterOffset = writeEagerQueueIter(
eager_queues, proto.leftfixBeginQueue, queue_count, bc.engine_blob);
-
+
addSomRevNfas(bc, proto, ssm);
-
+
writeDkeyInfo(rm, bc.engine_blob, proto);
writeLeftInfo(bc.engine_blob, proto, leftInfoTable);
writeLogicalInfo(rm, bc.engine_blob, proto);
-
+
auto flushComb_prog = makeFlushCombProgram(proto);
proto.flushCombProgramOffset = writeProgram(bc, move(flushComb_prog));
@@ -3743,105 +3743,105 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
// Build anchored matcher.
auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas);
- if (atable) {
+ if (atable) {
proto.amatcherOffset = bc.engine_blob.add(atable);
- }
+ }
// Build floating HWLM matcher.
auto ftable = buildHWLMMatcher(*this, fproto.get());
- if (ftable) {
+ if (ftable) {
proto.fmatcherOffset = bc.engine_blob.add(ftable);
bc.resources.has_floating = true;
- }
+ }
// Build delay rebuild HWLM matcher.
auto drtable = buildHWLMMatcher(*this, drproto.get());
if (drtable) {
proto.drmatcherOffset = bc.engine_blob.add(drtable);
- }
+ }
// Build EOD-anchored HWLM matcher.
auto etable = buildHWLMMatcher(*this, eproto.get());
- if (etable) {
+ if (etable) {
proto.ematcherOffset = bc.engine_blob.add(etable);
- }
+ }
// Build small-block HWLM matcher.
auto sbtable = buildHWLMMatcher(*this, sbproto.get());
- if (sbtable) {
+ if (sbtable) {
proto.sbmatcherOffset = bc.engine_blob.add(sbtable);
- }
-
+ }
+
proto.activeArrayCount = proto.leftfixBeginQueue;
-
+
proto.anchorStateSize = atable ? anchoredStateSize(*atable) : 0;
-
+
DEBUG_PRINTF("rose history required %zu\n", historyRequired);
assert(!cc.streaming || historyRequired <= cc.grey.maxHistoryAvailable);
-
+
// Some SOM schemes (reverse NFAs, for example) may require more history.
historyRequired = max(historyRequired, (size_t)ssm.somHistoryRequired());
-
+
assert(!cc.streaming || historyRequired <=
max(cc.grey.maxHistoryAvailable, cc.grey.somMaxRevNfaLength));
-
+
fillStateOffsets(*this, bc.roleStateIndices.size(), proto.anchorStateSize,
proto.activeArrayCount, proto.activeLeftCount,
laggedRoseCount, longLitStreamStateRequired,
historyRequired, &proto.stateOffsets);
-
+
// Write in NfaInfo structures. This will also update state size
// information in proto.
writeNfaInfo(*this, bc, proto, no_retrigger_queues);
-
+
scatter_plan_raw state_scatter = buildStateScatterPlan(
sizeof(u8), bc.roleStateIndices.size(), proto.activeLeftCount,
proto.rosePrefixCount, proto.stateOffsets, cc.streaming,
proto.activeArrayCount, proto.outfixBeginQueue, proto.outfixEndQueue);
-
+
u32 currOffset; /* relative to base of RoseEngine */
if (!bc.engine_blob.empty()) {
currOffset = bc.engine_blob.base_offset + bc.engine_blob.size();
} else {
currOffset = sizeof(RoseEngine);
}
-
+
currOffset = ROUNDUP_CL(currOffset);
DEBUG_PRINTF("currOffset %u\n", currOffset);
-
+
currOffset = ROUNDUP_N(currOffset, alignof(scatter_unit_u64a));
u32 state_scatter_aux_offset = currOffset;
currOffset += aux_size(state_scatter);
-
+
proto.historyRequired = verify_u32(historyRequired);
proto.ekeyCount = rm.numEkeys();
-
+
proto.somHorizon = ssm.somPrecision();
proto.somLocationCount = ssm.numSomSlots();
proto.somLocationFatbitSize = fatbit_size(proto.somLocationCount);
-
+
proto.runtimeImpl = pickRuntimeImpl(*this, bc.resources,
proto.outfixEndQueue);
proto.mpvTriggeredByLeaf = anyEndfixMpvTriggers(*this);
-
+
proto.queueCount = queue_count;
proto.activeQueueArraySize = fatbit_size(queue_count);
proto.handledKeyCount = prog_build.handledKeys.size();
proto.handledKeyFatbitSize = fatbit_size(proto.handledKeyCount);
-
+
proto.rolesWithStateCount = bc.roleStateIndices.size();
-
+
proto.initMpvNfa = mpv_as_outfix ? 0 : MO_INVALID_IDX;
proto.stateSize = mmbit_size(bc.roleStateIndices.size());
-
+
proto.delay_fatbit_size = fatbit_size(proto.delay_count);
proto.anchored_fatbit_size = fatbit_size(proto.anchored_count);
-
- // The Small Write matcher is (conditionally) added to the RoseEngine in
- // another pass by the caller. Set to zero (meaning no SMWR engine) for
- // now.
+
+ // The Small Write matcher is (conditionally) added to the RoseEngine in
+ // another pass by the caller. Set to zero (meaning no SMWR engine) for
+ // now.
proto.smallWriteOffset = 0;
-
+
proto.amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED);
proto.fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING);
proto.eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED);
@@ -3850,47 +3850,47 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
proto.minWidth = hasBoundaryReports(boundary) ? 0 : minWidth;
proto.minWidthExcludingBoundaries = minWidth;
proto.floatingMinLiteralMatchOffset = floatingMinLiteralMatchOffset;
-
+
proto.maxBiAnchoredWidth = findMaxBAWidth(*this);
proto.noFloatingRoots = hasNoFloatingRoots();
proto.requiresEodCheck = hasEodAnchors(*this, bc, proto.outfixEndQueue);
proto.hasOutfixesInSmallBlock = hasNonSmallBlockOutfix(outfixes);
proto.canExhaust = rm.patternSetCanExhaust();
proto.hasSom = hasSom;
-
- /* populate anchoredDistance, floatingDistance, floatingMinDistance, etc */
+
+ /* populate anchoredDistance, floatingDistance, floatingMinDistance, etc */
fillMatcherDistances(*this, &proto);
-
+
proto.initialGroups = getInitialGroups();
proto.floating_group_mask = fgroups;
proto.totalNumLiterals = verify_u32(literal_info.size());
proto.asize = verify_u32(atable.size());
proto.ematcherRegionSize = ematcher_region_size;
-
+
proto.size = currOffset;
-
+
// Time to allocate the real RoseEngine structure, at cacheline alignment.
auto engine = make_zeroed_bytecode_ptr<RoseEngine>(currOffset, 64);
assert(engine); // will have thrown bad_alloc otherwise.
-
+
// Copy in our prototype engine data.
memcpy(engine.get(), &proto, sizeof(proto));
-
+
write_out(&engine->state_init, (char *)engine.get(), state_scatter,
state_scatter_aux_offset);
-
+
// Copy in the engine blob.
bc.engine_blob.write_bytes(engine.get());
-
+
// Add a small write engine if appropriate.
engine = addSmallWriteEngine(*this, bc.resources, move(engine));
-
+
DEBUG_PRINTF("rose done %p\n", engine.get());
-
+
dumpRose(*this, fragments, makeLeftQueueMap(g, bc.leftfix_info),
bc.suffixes, engine.get());
-
- return engine;
-}
-
-} // namespace ue2
+
+ return engine;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_compile.cpp b/contrib/libs/hyperscan/src/rose/rose_build_compile.cpp
index a9cd2b95df..1cf3bbe695 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_compile.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_compile.cpp
@@ -1,151 +1,151 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_impl.h"
-
-#include "grey.h"
-#include "hs_internal.h"
-#include "rose_build_anchored.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_impl.h"
+
+#include "grey.h"
+#include "hs_internal.h"
+#include "rose_build_anchored.h"
#include "rose_build_castle.h"
-#include "rose_build_convert.h"
-#include "rose_build_dump.h"
+#include "rose_build_convert.h"
+#include "rose_build_dump.h"
#include "rose_build_groups.h"
#include "rose_build_matchers.h"
-#include "rose_build_merge.h"
-#include "rose_build_role_aliasing.h"
-#include "rose_build_util.h"
-#include "ue2common.h"
+#include "rose_build_merge.h"
+#include "rose_build_role_aliasing.h"
+#include "rose_build_util.h"
+#include "ue2common.h"
#include "hwlm/hwlm_literal.h"
-#include "nfa/nfa_internal.h"
-#include "nfa/rdfa.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_execute.h"
-#include "nfagraph/ng_is_equal.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_mcclellan.h"
+#include "nfa/nfa_internal.h"
+#include "nfa/rdfa.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_execute.h"
+#include "nfagraph/ng_is_equal.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_mcclellan.h"
#include "nfagraph/ng_prune.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_reports.h"
-#include "nfagraph/ng_stop.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
-#include "util/bitutils.h"
-#include "util/charreach.h"
-#include "util/charreach_util.h"
-#include "util/compare.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_stop.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "util/bitutils.h"
+#include "util/charreach.h"
+#include "util/charreach_util.h"
+#include "util/compare.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
-#include "util/order_check.h"
-#include "util/report_manager.h"
-#include "util/ue2string.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
-#include <functional>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-#include <utility>
-
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_values;
-
-namespace ue2 {
-
-#define ANCHORED_REHOME_MIN_FLOATING 800
-#define ANCHORED_REHOME_MIN_FLOATING_SHORT 50
-#define ANCHORED_REHOME_ALLOW_SHORT 20
-#define ANCHORED_REHOME_DEEP 25
-#define ANCHORED_REHOME_SHORT_LEN 3
-
+#include "util/graph_range.h"
+#include "util/order_check.h"
+#include "util/report_manager.h"
+#include "util/ue2string.h"
+#include "util/verify_types.h"
+
+#include <algorithm>
+#include <functional>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+#include <utility>
+
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
+
+namespace ue2 {
+
+#define ANCHORED_REHOME_MIN_FLOATING 800
+#define ANCHORED_REHOME_MIN_FLOATING_SHORT 50
+#define ANCHORED_REHOME_ALLOW_SHORT 20
+#define ANCHORED_REHOME_DEEP 25
+#define ANCHORED_REHOME_SHORT_LEN 3
+
#define MAX_EXPLOSION_NC 3
-static
+static
bool limited_explosion(const ue2_literal &s) {
u32 nc_count = 0;
-
+
for (const auto &e : s) {
if (e.nocase) {
nc_count++;
- }
- }
-
+ }
+ }
+
return nc_count <= MAX_EXPLOSION_NC;
-}
-
-static
+}
+
+static
void removeLiteralFromGraph(RoseBuildImpl &build, u32 id) {
assert(id < build.literal_info.size());
auto &info = build.literal_info.at(id);
for (const auto &v : info.vertices) {
build.g[v].literals.erase(id);
- }
+ }
info.vertices.clear();
-}
-
+}
+
/**
* \brief Replace the given mixed-case literal with the set of its caseless
* variants.
*/
-static
+static
void explodeLiteral(RoseBuildImpl &build, u32 id) {
const auto &lit = build.literals.at(id);
auto &info = build.literal_info[id];
-
+
assert(!info.group_mask); // not set yet
assert(info.undelayed_id == id); // we do not explode delayed literals
-
+
for (auto it = caseIterateBegin(lit.s); it != caseIterateEnd(); ++it) {
ue2_literal new_str(*it, false);
-
+
if (!maskIsConsistent(new_str.get_string(), false, lit.msk, lit.cmp)) {
DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n");
- continue;
- }
-
+ continue;
+ }
+
u32 new_id =
build.getLiteralId(new_str, lit.msk, lit.cmp, lit.delay, lit.table);
-
+
DEBUG_PRINTF("adding exploded lit %u: '%s'\n", new_id,
dumpString(new_str).c_str());
-
+
const auto &new_lit = build.literals.at(new_id);
auto &new_info = build.literal_info.at(new_id);
insert(&new_info.vertices, info.vertices);
for (const auto &v : info.vertices) {
build.g[v].literals.insert(new_id);
- }
-
+ }
+
build.literal_info[new_id].undelayed_id = new_id;
if (!info.delayed_ids.empty()) {
flat_set<u32> &del_ids = new_info.delayed_ids;
@@ -156,35 +156,35 @@ void explodeLiteral(RoseBuildImpl &build, u32 id) {
dlit.delay, dlit.table);
del_ids.insert(new_delay_id);
build.literal_info[new_delay_id].undelayed_id = new_id;
- }
- }
- }
-
+ }
+ }
+ }
+
// Remove the old literal and any old delay variants.
removeLiteralFromGraph(build, id);
for (u32 delay_id : info.delayed_ids) {
removeLiteralFromGraph(build, delay_id);
- }
+ }
info.delayed_ids.clear();
-}
-
-void RoseBuildImpl::handleMixedSensitivity(void) {
+}
+
+void RoseBuildImpl::handleMixedSensitivity(void) {
vector<u32> explode;
for (u32 id = 0; id < literals.size(); id++) {
const rose_literal_id &lit = literals.at(id);
-
- if (lit.delay) {
- continue; /* delay id's are virtual-ish */
- }
-
- if (lit.table == ROSE_ANCHORED || lit.table == ROSE_EVENT) {
- continue; /* wrong table */
- }
-
- if (!mixed_sensitivity(lit.s)) {
- continue;
- }
-
+
+ if (lit.delay) {
+ continue; /* delay id's are virtual-ish */
+ }
+
+ if (lit.table == ROSE_ANCHORED || lit.table == ROSE_EVENT) {
+ continue; /* wrong table */
+ }
+
+ if (!mixed_sensitivity(lit.s)) {
+ continue;
+ }
+
// We don't want to explode long literals, as they require confirmation
// with a CHECK_LONG_LIT instruction and need unique final_ids.
// TODO: we could allow explosion for literals where the prefixes
@@ -192,226 +192,226 @@ void RoseBuildImpl::handleMixedSensitivity(void) {
if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN &&
limited_explosion(lit.s) && literal_info[id].delayed_ids.empty()) {
- DEBUG_PRINTF("need to explode existing string '%s'\n",
- dumpString(lit.s).c_str());
+ DEBUG_PRINTF("need to explode existing string '%s'\n",
+ dumpString(lit.s).c_str());
explode.push_back(id);
- } else {
- literal_info[id].requires_benefits = true;
- }
- }
+ } else {
+ literal_info[id].requires_benefits = true;
+ }
+ }
for (u32 id : explode) {
explodeLiteral(*this, id);
}
-}
-
-// Returns the length of the longest prefix of s that is (a) also a suffix of s
-// and (b) not s itself.
-static
-size_t maxPeriod(const ue2_literal &s) {
- /* overly conservative if only part of the string is nocase */
- if (s.empty()) {
- return 0;
- }
-
- const size_t len = s.length();
- const char *begin = s.c_str(), *end = begin + len;
- size_t i;
- for (i = len - 1; i != 0; i--) {
- if (!cmp(begin, end - i, i, s.any_nocase())) {
- break;
- }
- }
-
- return i;
-}
-
-bool RoseBuildImpl::isPseudoStar(const RoseEdge &e) const {
- return !g[e].minBound && isPseudoStarOrFirstOnly(e);
-}
-
-bool RoseBuildImpl::isPseudoStarOrFirstOnly(const RoseEdge &e) const {
- RoseVertex u = source(e, g);
- RoseVertex v = target(e, g);
-
- if (g[e].maxBound != ROSE_BOUND_INF) {
- return false;
- }
-
- if (isAnyStart(u)) {
- return true;
- }
-
- if (isAnchored(u)) {
- /* anchored table runs out of order */
- return false;
- }
-
- if (hasDelayedLiteral(u)) {
- return false;
- }
-
- if (g[v].left) {
- return false;
- }
-
- if (g[v].eod_accept) {
- return true;
- }
-
- assert(!g[v].literals.empty());
- if (maxLiteralOverlap(u, v)) {
- return false;
- }
-
- return true;
-}
-
-bool RoseBuildImpl::hasOnlyPseudoStarInEdges(RoseVertex v) const {
- for (const auto &e : in_edges_range(v, g)) {
- if (!isPseudoStar(e)) {
- return false;
- }
- }
- return true;
-}
-
-static
-size_t trailerDueToSelf(const rose_literal_id &lit) {
- size_t trailer = lit.s.length() - maxPeriod(lit.s);
- if (trailer > 255) {
- return 255;
- }
- if (!trailer) {
- return 1;
- }
- return trailer;
-}
-
-static
-RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) {
- const RoseGraph &g = tbi.g;
- const RoseVertex u = source(e, g); /* pred role */
- const RoseVertex v = target(e, g); /* current role */
-
+}
+
+// Returns the length of the longest prefix of s that is (a) also a suffix of s
+// and (b) not s itself.
+static
+size_t maxPeriod(const ue2_literal &s) {
+ /* overly conservative if only part of the string is nocase */
+ if (s.empty()) {
+ return 0;
+ }
+
+ const size_t len = s.length();
+ const char *begin = s.c_str(), *end = begin + len;
+ size_t i;
+ for (i = len - 1; i != 0; i--) {
+ if (!cmp(begin, end - i, i, s.any_nocase())) {
+ break;
+ }
+ }
+
+ return i;
+}
+
+bool RoseBuildImpl::isPseudoStar(const RoseEdge &e) const {
+ return !g[e].minBound && isPseudoStarOrFirstOnly(e);
+}
+
+bool RoseBuildImpl::isPseudoStarOrFirstOnly(const RoseEdge &e) const {
+ RoseVertex u = source(e, g);
+ RoseVertex v = target(e, g);
+
+ if (g[e].maxBound != ROSE_BOUND_INF) {
+ return false;
+ }
+
+ if (isAnyStart(u)) {
+ return true;
+ }
+
+ if (isAnchored(u)) {
+ /* anchored table runs out of order */
+ return false;
+ }
+
+ if (hasDelayedLiteral(u)) {
+ return false;
+ }
+
+ if (g[v].left) {
+ return false;
+ }
+
+ if (g[v].eod_accept) {
+ return true;
+ }
+
+ assert(!g[v].literals.empty());
+ if (maxLiteralOverlap(u, v)) {
+ return false;
+ }
+
+ return true;
+}
+
+bool RoseBuildImpl::hasOnlyPseudoStarInEdges(RoseVertex v) const {
+ for (const auto &e : in_edges_range(v, g)) {
+ if (!isPseudoStar(e)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static
+size_t trailerDueToSelf(const rose_literal_id &lit) {
+ size_t trailer = lit.s.length() - maxPeriod(lit.s);
+ if (trailer > 255) {
+ return 255;
+ }
+ if (!trailer) {
+ return 1;
+ }
+ return trailer;
+}
+
+static
+RoseRoleHistory findHistoryScheme(const RoseBuildImpl &tbi, const RoseEdge &e) {
+ const RoseGraph &g = tbi.g;
+ const RoseVertex u = source(e, g); /* pred role */
+ const RoseVertex v = target(e, g); /* current role */
+
DEBUG_PRINTF("find history for [%zu,%zu]\n", g[u].index, g[v].index);
- DEBUG_PRINTF("u has min_offset=%u, max_offset=%u\n", g[u].min_offset,
- g[u].max_offset);
-
- if (g[v].left) {
- if (!tbi.isAnyStart(u)) {
- /* infix nfa will track history, treat as pseudo .*. Note: rose lits
- * may overlap so rose history track would be wrong anyway */
- DEBUG_PRINTF("skipping history as prefix\n");
- return ROSE_ROLE_HISTORY_NONE;
- }
- if (g[e].minBound || g[e].maxBound != ROSE_BOUND_INF) {
- DEBUG_PRINTF("rose prefix with external bounds\n");
- return ROSE_ROLE_HISTORY_ANCH;
- } else {
- return ROSE_ROLE_HISTORY_NONE;
- }
- }
-
- // Handle EOD cases.
- if (g[v].eod_accept) {
- const u32 minBound = g[e].minBound, maxBound = g[e].maxBound;
- DEBUG_PRINTF("EOD edge with bounds [%u,%u]\n", minBound, maxBound);
-
- // Trivial case: we don't need history for {0,inf} bounds
- if (minBound == 0 && maxBound == ROSE_BOUND_INF) {
- return ROSE_ROLE_HISTORY_NONE;
- }
-
- // Event literals store no history.
- if (tbi.hasLiteralInTable(u, ROSE_EVENT)) {
- return ROSE_ROLE_HISTORY_NONE;
- }
-
- // Trivial case: fixed offset from anchor
- if (g[u].fixedOffset()) {
- return ROSE_ROLE_HISTORY_ANCH;
- }
-
- // If the bounds are {0,0}, this role can only match precisely at EOD.
- if (minBound == 0 && maxBound == 0) {
+ DEBUG_PRINTF("u has min_offset=%u, max_offset=%u\n", g[u].min_offset,
+ g[u].max_offset);
+
+ if (g[v].left) {
+ if (!tbi.isAnyStart(u)) {
+ /* infix nfa will track history, treat as pseudo .*. Note: rose lits
+ * may overlap so rose history track would be wrong anyway */
+ DEBUG_PRINTF("skipping history as prefix\n");
+ return ROSE_ROLE_HISTORY_NONE;
+ }
+ if (g[e].minBound || g[e].maxBound != ROSE_BOUND_INF) {
+ DEBUG_PRINTF("rose prefix with external bounds\n");
+ return ROSE_ROLE_HISTORY_ANCH;
+ } else {
+ return ROSE_ROLE_HISTORY_NONE;
+ }
+ }
+
+ // Handle EOD cases.
+ if (g[v].eod_accept) {
+ const u32 minBound = g[e].minBound, maxBound = g[e].maxBound;
+ DEBUG_PRINTF("EOD edge with bounds [%u,%u]\n", minBound, maxBound);
+
+ // Trivial case: we don't need history for {0,inf} bounds
+ if (minBound == 0 && maxBound == ROSE_BOUND_INF) {
+ return ROSE_ROLE_HISTORY_NONE;
+ }
+
+ // Event literals store no history.
+ if (tbi.hasLiteralInTable(u, ROSE_EVENT)) {
+ return ROSE_ROLE_HISTORY_NONE;
+ }
+
+ // Trivial case: fixed offset from anchor
+ if (g[u].fixedOffset()) {
+ return ROSE_ROLE_HISTORY_ANCH;
+ }
+
+ // If the bounds are {0,0}, this role can only match precisely at EOD.
+ if (minBound == 0 && maxBound == 0) {
/* last byte history will squash the state byte so cannot have other
* succ */
assert(out_degree(u, g) == 1);
- return ROSE_ROLE_HISTORY_LAST_BYTE;
- }
-
- // XXX: No other history schemes should be possible any longer.
- assert(0);
- }
-
- // Non-EOD cases.
-
- DEBUG_PRINTF("examining edge [%zu,%zu] with bounds {%u,%u}\n",
+ return ROSE_ROLE_HISTORY_LAST_BYTE;
+ }
+
+ // XXX: No other history schemes should be possible any longer.
+ assert(0);
+ }
+
+ // Non-EOD cases.
+
+ DEBUG_PRINTF("examining edge [%zu,%zu] with bounds {%u,%u}\n",
g[u].index, g[v].index, g[e].minBound, g[e].maxBound);
-
- if (tbi.isAnchored(v)) {
- // Matches for literals in the anchored table will always arrive at the
- // right offsets, so there's no need for history-based confirmation.
- DEBUG_PRINTF("v in anchored table, no need for history\n");
- assert(u == tbi.anchored_root);
- return ROSE_ROLE_HISTORY_NONE;
- }
-
+
+ if (tbi.isAnchored(v)) {
+ // Matches for literals in the anchored table will always arrive at the
+ // right offsets, so there's no need for history-based confirmation.
+ DEBUG_PRINTF("v in anchored table, no need for history\n");
+ assert(u == tbi.anchored_root);
+ return ROSE_ROLE_HISTORY_NONE;
+ }
+
if (g[u].fixedOffset() &&
(g[e].minBound || g[e].maxBound != ROSE_BOUND_INF)) {
- DEBUG_PRINTF("fixed offset -> anch\n");
- return ROSE_ROLE_HISTORY_ANCH;
- }
-
- return ROSE_ROLE_HISTORY_NONE;
-}
-
-static
-void assignHistories(RoseBuildImpl &tbi) {
- for (const auto &e : edges_range(tbi.g)) {
- if (tbi.g[e].history == ROSE_ROLE_HISTORY_INVALID) {
- tbi.g[e].history = findHistoryScheme(tbi, e);
- }
- }
-}
-
-bool RoseBuildImpl::isDirectReport(u32 id) const {
- assert(id < literal_info.size());
-
- // Literal info properties.
- const rose_literal_info &info = literal_info[id];
- if (info.vertices.empty()) {
- return false;
- }
-
- if (!info.delayed_ids.empty() /* dr's don't set groups */
- || info.requires_benefits) { /* dr's don't require confirm */
- return false;
- }
-
- if (isDelayed(id)) { /* can't handle delayed dr atm as we require delay
- * ids to be dense */
- return false;
- }
-
- // Role properties.
-
- // Note that a literal can have multiple roles and still be a direct
- // report; it'll become a multi-direct report ("MDR") that fires each
- // role's reports from a list.
-
- for (auto v : info.vertices) {
+ DEBUG_PRINTF("fixed offset -> anch\n");
+ return ROSE_ROLE_HISTORY_ANCH;
+ }
+
+ return ROSE_ROLE_HISTORY_NONE;
+}
+
+static
+void assignHistories(RoseBuildImpl &tbi) {
+ for (const auto &e : edges_range(tbi.g)) {
+ if (tbi.g[e].history == ROSE_ROLE_HISTORY_INVALID) {
+ tbi.g[e].history = findHistoryScheme(tbi, e);
+ }
+ }
+}
+
+bool RoseBuildImpl::isDirectReport(u32 id) const {
+ assert(id < literal_info.size());
+
+ // Literal info properties.
+ const rose_literal_info &info = literal_info[id];
+ if (info.vertices.empty()) {
+ return false;
+ }
+
+ if (!info.delayed_ids.empty() /* dr's don't set groups */
+ || info.requires_benefits) { /* dr's don't require confirm */
+ return false;
+ }
+
+ if (isDelayed(id)) { /* can't handle delayed dr atm as we require delay
+ * ids to be dense */
+ return false;
+ }
+
+ // Role properties.
+
+ // Note that a literal can have multiple roles and still be a direct
+ // report; it'll become a multi-direct report ("MDR") that fires each
+ // role's reports from a list.
+
+ for (auto v : info.vertices) {
assert(contains(g[v].literals, id));
-
- if (g[v].reports.empty() ||
- g[v].eod_accept || // no accept EOD
- !g[v].isBoring() ||
- !isLeafNode(v, g) || // Must have no out-edges
- in_degree(v, g) != 1) { // Role must have exactly one in-edge
- return false;
- }
-
+
+ if (g[v].reports.empty() ||
+ g[v].eod_accept || // no accept EOD
+ !g[v].isBoring() ||
+ !isLeafNode(v, g) || // Must have no out-edges
+ in_degree(v, g) != 1) { // Role must have exactly one in-edge
+ return false;
+ }
+
// Use the program to handle cases that aren't external reports.
for (const ReportID &rid : g[v].reports) {
if (!isExternalReport(rm.getReport(rid))) {
@@ -420,36 +420,36 @@ bool RoseBuildImpl::isDirectReport(u32 id) const {
}
if (literals.at(id).table == ROSE_ANCHORED) {
- /* in-edges are irrelevant for anchored region. */
- continue;
- }
-
- /* The in-edge must be an (0, inf) edge from root. */
- assert(in_degree(v, g) != 0);
- RoseEdge e = *(in_edges(v, g).first);
- if (source(e, g) != root || g[e].minBound != 0 ||
- g[e].maxBound != ROSE_BOUND_INF) {
- return false;
- }
-
- // Note: we allow ekeys; they will result in unused roles being built as
- // direct reporting will be used when actually matching in Rose.
- /* TODO: prevent roles being created */
- }
-
- DEBUG_PRINTF("literal %u ('%s') is a %s report\n", id,
+ /* in-edges are irrelevant for anchored region. */
+ continue;
+ }
+
+ /* The in-edge must be an (0, inf) edge from root. */
+ assert(in_degree(v, g) != 0);
+ RoseEdge e = *(in_edges(v, g).first);
+ if (source(e, g) != root || g[e].minBound != 0 ||
+ g[e].maxBound != ROSE_BOUND_INF) {
+ return false;
+ }
+
+ // Note: we allow ekeys; they will result in unused roles being built as
+ // direct reporting will be used when actually matching in Rose.
+ /* TODO: prevent roles being created */
+ }
+
+ DEBUG_PRINTF("literal %u ('%s') is a %s report\n", id,
dumpString(literals.at(id).s).c_str(),
- info.vertices.size() > 1 ? "multi-direct" : "direct");
- return true;
-}
-
+ info.vertices.size() > 1 ? "multi-direct" : "direct");
+ return true;
+}
+
/* If we have prefixes that can squash all the floating roots, we can have a
* somewhat-conditional floating table. As we can't yet look at squash_masks, we
* have to make some guess as to if we are in this case but the win for not
* running a floating table over a large portion of the stream is significantly
* larger than avoiding running an eod table over the last N bytes. */
-static
+static
bool checkFloatingKillableByPrefixes(const RoseBuildImpl &tbi) {
for (auto v : vertices_range(tbi.g)) {
if (!tbi.isRootSuccessor(v)) {
@@ -484,25 +484,25 @@ bool checkFloatingKillableByPrefixes(const RoseBuildImpl &tbi) {
static
bool checkEodStealFloating(const RoseBuildImpl &build,
- const vector<u32> &eodLiteralsForFloating,
- u32 numFloatingLiterals,
- size_t shortestFloatingLen) {
- if (eodLiteralsForFloating.empty()) {
- DEBUG_PRINTF("no eod literals\n");
- return true;
- }
-
- if (!numFloatingLiterals) {
- DEBUG_PRINTF("no floating table\n");
- return false;
- }
-
+ const vector<u32> &eodLiteralsForFloating,
+ u32 numFloatingLiterals,
+ size_t shortestFloatingLen) {
+ if (eodLiteralsForFloating.empty()) {
+ DEBUG_PRINTF("no eod literals\n");
+ return true;
+ }
+
+ if (!numFloatingLiterals) {
+ DEBUG_PRINTF("no floating table\n");
+ return false;
+ }
+
if (build.hasNoFloatingRoots()) {
- DEBUG_PRINTF("skipping as floating table is conditional\n");
- /* TODO: investigate putting stuff in atable */
- return false;
- }
-
+ DEBUG_PRINTF("skipping as floating table is conditional\n");
+ /* TODO: investigate putting stuff in atable */
+ return false;
+ }
+
if (checkFloatingKillableByPrefixes(build)) {
DEBUG_PRINTF("skipping as prefixes may make ftable conditional\n");
return false;
@@ -516,333 +516,333 @@ bool checkEodStealFloating(const RoseBuildImpl &build,
}
}
- DEBUG_PRINTF("%zu are eod literals, %u floating; floating len=%zu\n",
- eodLiteralsForFloating.size(), numFloatingLiterals,
- shortestFloatingLen);
- u32 new_floating_lits = 0;
-
- for (u32 eod_id : eodLiteralsForFloating) {
+ DEBUG_PRINTF("%zu are eod literals, %u floating; floating len=%zu\n",
+ eodLiteralsForFloating.size(), numFloatingLiterals,
+ shortestFloatingLen);
+ u32 new_floating_lits = 0;
+
+ for (u32 eod_id : eodLiteralsForFloating) {
const rose_literal_id &lit = build.literals.at(eod_id);
- DEBUG_PRINTF("checking '%s'\n", dumpString(lit.s).c_str());
-
+ DEBUG_PRINTF("checking '%s'\n", dumpString(lit.s).c_str());
+
if (contains(floating_lits, lit.s)) {
- DEBUG_PRINTF("skip; there is already a floating version\n");
- continue;
- }
-
- // Don't want to make the shortest floating literal shorter/worse.
- if (trailerDueToSelf(lit) < 4 || lit.s.length() < shortestFloatingLen) {
- DEBUG_PRINTF("len=%zu, selfOverlap=%zu\n", lit.s.length(),
- trailerDueToSelf(lit));
- DEBUG_PRINTF("would shorten, bailing\n");
- return false;
- }
-
- new_floating_lits++;
- }
- DEBUG_PRINTF("..would require %u new floating literals\n",
- new_floating_lits);
-
- // Magic number thresholds: we only want to get rid of our EOD table if it
- // would make no real difference to the FDR.
- if (numFloatingLiterals / 8 < new_floating_lits
- && (new_floating_lits > 3 || numFloatingLiterals <= 2)) {
- DEBUG_PRINTF("leaving eod table alone.\n");
- return false;
- }
-
- return true;
-}
-
-static
-void promoteEodToFloating(RoseBuildImpl &tbi, const vector<u32> &eodLiterals) {
+ DEBUG_PRINTF("skip; there is already a floating version\n");
+ continue;
+ }
+
+ // Don't want to make the shortest floating literal shorter/worse.
+ if (trailerDueToSelf(lit) < 4 || lit.s.length() < shortestFloatingLen) {
+ DEBUG_PRINTF("len=%zu, selfOverlap=%zu\n", lit.s.length(),
+ trailerDueToSelf(lit));
+ DEBUG_PRINTF("would shorten, bailing\n");
+ return false;
+ }
+
+ new_floating_lits++;
+ }
+ DEBUG_PRINTF("..would require %u new floating literals\n",
+ new_floating_lits);
+
+ // Magic number thresholds: we only want to get rid of our EOD table if it
+ // would make no real difference to the FDR.
+ if (numFloatingLiterals / 8 < new_floating_lits
+ && (new_floating_lits > 3 || numFloatingLiterals <= 2)) {
+ DEBUG_PRINTF("leaving eod table alone.\n");
+ return false;
+ }
+
+ return true;
+}
+
+static
+void promoteEodToFloating(RoseBuildImpl &tbi, const vector<u32> &eodLiterals) {
DEBUG_PRINTF("promoting %zu eod literals to floating table\n",
eodLiterals.size());
-
- for (u32 eod_id : eodLiterals) {
+
+ for (u32 eod_id : eodLiterals) {
const rose_literal_id &lit = tbi.literals.at(eod_id);
DEBUG_PRINTF("eod_id=%u, lit=%s\n", eod_id, dumpString(lit.s).c_str());
- u32 floating_id = tbi.getLiteralId(lit.s, lit.msk, lit.cmp, lit.delay,
- ROSE_FLOATING);
+ u32 floating_id = tbi.getLiteralId(lit.s, lit.msk, lit.cmp, lit.delay,
+ ROSE_FLOATING);
DEBUG_PRINTF("floating_id=%u, lit=%s\n", floating_id,
dumpString(tbi.literals.at(floating_id).s).c_str());
- auto &float_verts = tbi.literal_info[floating_id].vertices;
- auto &eod_verts = tbi.literal_info[eod_id].vertices;
-
- insert(&float_verts, eod_verts);
- eod_verts.clear();
-
- DEBUG_PRINTF("eod_lit=%u -> float_lit=%u\n", eod_id, floating_id);
-
- for (auto v : float_verts) {
- tbi.g[v].literals.erase(eod_id);
- tbi.g[v].literals.insert(floating_id);
- }
-
- tbi.literal_info[floating_id].requires_benefits
- = tbi.literal_info[eod_id].requires_benefits;
- }
-}
-
-static
-bool promoteEodToAnchored(RoseBuildImpl &tbi, const vector<u32> &eodLiterals) {
- DEBUG_PRINTF("promoting eod literals to anchored table\n");
- bool rv = true;
-
- for (u32 eod_id : eodLiterals) {
+ auto &float_verts = tbi.literal_info[floating_id].vertices;
+ auto &eod_verts = tbi.literal_info[eod_id].vertices;
+
+ insert(&float_verts, eod_verts);
+ eod_verts.clear();
+
+ DEBUG_PRINTF("eod_lit=%u -> float_lit=%u\n", eod_id, floating_id);
+
+ for (auto v : float_verts) {
+ tbi.g[v].literals.erase(eod_id);
+ tbi.g[v].literals.insert(floating_id);
+ }
+
+ tbi.literal_info[floating_id].requires_benefits
+ = tbi.literal_info[eod_id].requires_benefits;
+ }
+}
+
+static
+bool promoteEodToAnchored(RoseBuildImpl &tbi, const vector<u32> &eodLiterals) {
+ DEBUG_PRINTF("promoting eod literals to anchored table\n");
+ bool rv = true;
+
+ for (u32 eod_id : eodLiterals) {
const rose_literal_id &lit = tbi.literals.at(eod_id);
-
- NGHolder h;
- add_edge(h.start, h.accept, h);
- appendLiteral(h, lit.s); /* we only accept cases which are anchored
- * hard up against start */
-
- u32 a_id = tbi.getNewLiteralId();
- u32 remap_id = 0;
- DEBUG_PRINTF(" trying to add dfa stuff\n");
- int anch_ok = addToAnchoredMatcher(tbi, h, a_id, &remap_id);
-
- if (anch_ok == ANCHORED_FAIL) {
- DEBUG_PRINTF("failed to promote to anchored need to keep etable\n");
- rv = false;
- continue;
- } else if (anch_ok == ANCHORED_REMAP) {
- DEBUG_PRINTF("remapped\n");
- a_id = remap_id;
- } else {
- assert(anch_ok == ANCHORED_SUCCESS);
- }
-
- // Store the literal itself in a side structure so that we can use it
- // for overlap calculations later. This may be obsolete when the old
- // Rose construction path (and its history selection code) goes away.
- tbi.anchoredLitSuffix.insert(make_pair(a_id, lit));
-
- auto &a_verts = tbi.literal_info[a_id].vertices;
- auto &eod_verts = tbi.literal_info[eod_id].vertices;
-
- for (auto v : eod_verts) {
- for (const auto &e : in_edges_range(v, tbi.g)) {
- assert(tbi.g[e].maxBound != ROSE_BOUND_INF);
- tbi.g[e].minBound += lit.s.length();
- tbi.g[e].maxBound += lit.s.length();
- }
- }
-
- insert(&a_verts, eod_verts);
- eod_verts.clear();
-
- for (auto v : a_verts) {
- tbi.g[v].literals.erase(eod_id);
- tbi.g[v].literals.insert(a_id);
- }
- }
-
- return rv;
-}
-
-static
-bool suitableForAnchored(const RoseBuildImpl &tbi, const rose_literal_id &l_id,
- const rose_literal_info &lit) {
- const RoseGraph &g = tbi.g;
-
- bool seen = false;
- u32 min_offset = 0;
- u32 max_offset = 0;
-
- if (!lit.delayed_ids.empty() || l_id.delay) {
- DEBUG_PRINTF("delay\n");
- return false;
- }
-
- if (!l_id.msk.empty()) {
- DEBUG_PRINTF("msk\n");
- return false;
- }
-
- for (auto v : lit.vertices) {
- if (!seen) {
- min_offset = g[v].min_offset;
- max_offset = g[v].max_offset;
- seen = true;
-
- if (max_offset > tbi.cc.grey.maxAnchoredRegion) {
- DEBUG_PRINTF("too deep %u\n", max_offset);
- return false;
- }
- }
-
- if (max_offset != g[v].max_offset || min_offset != g[v].min_offset) {
- DEBUG_PRINTF(":(\n");
- return false;
- }
-
- if (!g[v].isBoring()) {
- DEBUG_PRINTF(":(\n");
- return false;
- }
-
- if (g[v].literals.size() != 1) {
- DEBUG_PRINTF("shared\n");
- return false;
- }
-
- if (tbi.isNonRootSuccessor(v)) {
- DEBUG_PRINTF("non root\n");
- return false;
- }
-
- if (max_offset != l_id.s.length() || min_offset != l_id.s.length()) {
- DEBUG_PRINTF("|%zu| (%u,%u):(\n", l_id.s.length(), min_offset,
- max_offset);
- /* TODO: handle cases with small bounds */
- return false;
- }
-
- for (auto w : adjacent_vertices_range(v, g)) {
- if (!g[w].eod_accept) {
- DEBUG_PRINTF("non eod accept literal\n");
- return false;
- }
- }
- }
- return true;
-}
-
-// If we've got a small number of long, innocuous EOD literals and a large
-// floating table, we consider promoting those EOD literals to the floating
-// table to avoid having to run both. See UE-2069, consider deleting this and
-// replacing with an elegant reverse DFA.
-/* We do not want to do this if we would otherwise avoid running the floating
- * table altogether. */
-static
-void stealEodVertices(RoseBuildImpl &tbi) {
- u32 numFloatingLiterals = 0;
- u32 numAnchoredLiterals = 0;
- size_t shortestFloatingLen = SIZE_MAX;
- vector<u32> eodLiteralsForFloating;
- vector<u32> eodLiteralsForAnchored;
- DEBUG_PRINTF("hi\n");
-
- for (u32 i = 0; i < tbi.literal_info.size(); i++) {
- const auto &info = tbi.literal_info[i];
- if (info.vertices.empty()) {
- continue; // skip unused literals
- }
-
+
+ NGHolder h;
+ add_edge(h.start, h.accept, h);
+ appendLiteral(h, lit.s); /* we only accept cases which are anchored
+ * hard up against start */
+
+ u32 a_id = tbi.getNewLiteralId();
+ u32 remap_id = 0;
+ DEBUG_PRINTF(" trying to add dfa stuff\n");
+ int anch_ok = addToAnchoredMatcher(tbi, h, a_id, &remap_id);
+
+ if (anch_ok == ANCHORED_FAIL) {
+ DEBUG_PRINTF("failed to promote to anchored need to keep etable\n");
+ rv = false;
+ continue;
+ } else if (anch_ok == ANCHORED_REMAP) {
+ DEBUG_PRINTF("remapped\n");
+ a_id = remap_id;
+ } else {
+ assert(anch_ok == ANCHORED_SUCCESS);
+ }
+
+ // Store the literal itself in a side structure so that we can use it
+ // for overlap calculations later. This may be obsolete when the old
+ // Rose construction path (and its history selection code) goes away.
+ tbi.anchoredLitSuffix.insert(make_pair(a_id, lit));
+
+ auto &a_verts = tbi.literal_info[a_id].vertices;
+ auto &eod_verts = tbi.literal_info[eod_id].vertices;
+
+ for (auto v : eod_verts) {
+ for (const auto &e : in_edges_range(v, tbi.g)) {
+ assert(tbi.g[e].maxBound != ROSE_BOUND_INF);
+ tbi.g[e].minBound += lit.s.length();
+ tbi.g[e].maxBound += lit.s.length();
+ }
+ }
+
+ insert(&a_verts, eod_verts);
+ eod_verts.clear();
+
+ for (auto v : a_verts) {
+ tbi.g[v].literals.erase(eod_id);
+ tbi.g[v].literals.insert(a_id);
+ }
+ }
+
+ return rv;
+}
+
+static
+bool suitableForAnchored(const RoseBuildImpl &tbi, const rose_literal_id &l_id,
+ const rose_literal_info &lit) {
+ const RoseGraph &g = tbi.g;
+
+ bool seen = false;
+ u32 min_offset = 0;
+ u32 max_offset = 0;
+
+ if (!lit.delayed_ids.empty() || l_id.delay) {
+ DEBUG_PRINTF("delay\n");
+ return false;
+ }
+
+ if (!l_id.msk.empty()) {
+ DEBUG_PRINTF("msk\n");
+ return false;
+ }
+
+ for (auto v : lit.vertices) {
+ if (!seen) {
+ min_offset = g[v].min_offset;
+ max_offset = g[v].max_offset;
+ seen = true;
+
+ if (max_offset > tbi.cc.grey.maxAnchoredRegion) {
+ DEBUG_PRINTF("too deep %u\n", max_offset);
+ return false;
+ }
+ }
+
+ if (max_offset != g[v].max_offset || min_offset != g[v].min_offset) {
+ DEBUG_PRINTF(":(\n");
+ return false;
+ }
+
+ if (!g[v].isBoring()) {
+ DEBUG_PRINTF(":(\n");
+ return false;
+ }
+
+ if (g[v].literals.size() != 1) {
+ DEBUG_PRINTF("shared\n");
+ return false;
+ }
+
+ if (tbi.isNonRootSuccessor(v)) {
+ DEBUG_PRINTF("non root\n");
+ return false;
+ }
+
+ if (max_offset != l_id.s.length() || min_offset != l_id.s.length()) {
+ DEBUG_PRINTF("|%zu| (%u,%u):(\n", l_id.s.length(), min_offset,
+ max_offset);
+ /* TODO: handle cases with small bounds */
+ return false;
+ }
+
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (!g[w].eod_accept) {
+ DEBUG_PRINTF("non eod accept literal\n");
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+// If we've got a small number of long, innocuous EOD literals and a large
+// floating table, we consider promoting those EOD literals to the floating
+// table to avoid having to run both. See UE-2069, consider deleting this and
+// replacing with an elegant reverse DFA.
+/* We do not want to do this if we would otherwise avoid running the floating
+ * table altogether. */
+static
+void stealEodVertices(RoseBuildImpl &tbi) {
+ u32 numFloatingLiterals = 0;
+ u32 numAnchoredLiterals = 0;
+ size_t shortestFloatingLen = SIZE_MAX;
+ vector<u32> eodLiteralsForFloating;
+ vector<u32> eodLiteralsForAnchored;
+ DEBUG_PRINTF("hi\n");
+
+ for (u32 i = 0; i < tbi.literal_info.size(); i++) {
+ const auto &info = tbi.literal_info[i];
+ if (info.vertices.empty()) {
+ continue; // skip unused literals
+ }
+
const rose_literal_id &lit = tbi.literals.at(i);
-
- if (lit.table == ROSE_EOD_ANCHORED) {
- if (suitableForAnchored(tbi, lit, info)) {
- eodLiteralsForAnchored.push_back(i);
- } else {
- eodLiteralsForFloating.push_back(i);
- }
- } else if (lit.table == ROSE_FLOATING) {
- numFloatingLiterals++;
- shortestFloatingLen = min(shortestFloatingLen, lit.s.length());
- } else if (lit.table == ROSE_ANCHORED) {
- numAnchoredLiterals++;
- }
- }
-
- /* given a choice of having either an eod table or an anchored table, we
- * always favour having an anchored table */
-
- if (!checkEodStealFloating(tbi, eodLiteralsForFloating, numFloatingLiterals,
- shortestFloatingLen)) {
- DEBUG_PRINTF("removing etable weakens ftable\n");
- return;
- }
-
- promoteEodToFloating(tbi, eodLiteralsForFloating);
-
- if (!promoteEodToAnchored(tbi, eodLiteralsForAnchored)) {
- DEBUG_PRINTF("still need ematcher\n");
- return;
- }
-
- // We're no longer using the EOD matcher.
- tbi.ematcher_region_size = 0;
-}
-
-bool RoseBuildImpl::isDelayed(u32 id) const {
- return literal_info.at(id).undelayed_id != id;
-}
-
+
+ if (lit.table == ROSE_EOD_ANCHORED) {
+ if (suitableForAnchored(tbi, lit, info)) {
+ eodLiteralsForAnchored.push_back(i);
+ } else {
+ eodLiteralsForFloating.push_back(i);
+ }
+ } else if (lit.table == ROSE_FLOATING) {
+ numFloatingLiterals++;
+ shortestFloatingLen = min(shortestFloatingLen, lit.s.length());
+ } else if (lit.table == ROSE_ANCHORED) {
+ numAnchoredLiterals++;
+ }
+ }
+
+ /* given a choice of having either an eod table or an anchored table, we
+ * always favour having an anchored table */
+
+ if (!checkEodStealFloating(tbi, eodLiteralsForFloating, numFloatingLiterals,
+ shortestFloatingLen)) {
+ DEBUG_PRINTF("removing etable weakens ftable\n");
+ return;
+ }
+
+ promoteEodToFloating(tbi, eodLiteralsForFloating);
+
+ if (!promoteEodToAnchored(tbi, eodLiteralsForAnchored)) {
+ DEBUG_PRINTF("still need ematcher\n");
+ return;
+ }
+
+ // We're no longer using the EOD matcher.
+ tbi.ematcher_region_size = 0;
+}
+
+bool RoseBuildImpl::isDelayed(u32 id) const {
+ return literal_info.at(id).undelayed_id != id;
+}
+
bool RoseBuildImpl::hasDelayedLiteral(RoseVertex v) const {
for (u32 lit_id : g[v].literals) {
if (literals.at(lit_id).delay) {
return true;
- }
- }
-
+ }
+ }
+
return false;
-}
-
+}
+
bool RoseBuildImpl::hasDelayPred(RoseVertex v) const {
for (auto u : inv_adjacent_vertices_range(v, g)) {
if (hasDelayedLiteral(u)) {
return true;
- }
- }
-
- return false;
-}
-
+ }
+ }
+
+ return false;
+}
+
bool RoseBuildImpl::hasAnchoredTablePred(RoseVertex v) const {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
if (isAnchored(u)) {
return true;
- }
- }
-
+ }
+ }
+
return false;
-}
-
+}
+
void RoseBuildImpl::findTransientLeftfixes(void) {
for (auto v : vertices_range(g)) {
if (!g[v].left) {
- continue;
- }
-
+ continue;
+ }
+
/* infixes can never (or at least not yet) be transient */
if (isNonRootSuccessor(v)) {
- continue;
- }
-
+ continue;
+ }
+
const left_id &left(g[v].left);
-
+
if (::ue2::isAnchored(left) && !isInETable(v)) {
/* etable prefixes currently MUST be transient as we do not know
* where we can safely catch them up to (yet). */
DEBUG_PRINTF("anchored roses in rocky soil are not fleeting\n");
continue;
}
-
+
const depth max_width = findMaxWidth(left);
if (!max_width.is_finite()) {
DEBUG_PRINTF("inf max width\n");
- continue;
- }
-
+ continue;
+ }
+
if (cc.streaming) {
/* STREAMING: transient prefixes must be able to run using history
* rather than storing state. */
u32 his = g[v].left.lag + max_width;
-
+
// If this vertex has an event literal, we need to add one to cope
// with it.
if (hasLiteralInTable(v, ROSE_EVENT)) {
his++;
}
-
+
/* +1 as trigger must appear in main buffer and no byte is needed to
* decompress the state */
if (his <= cc.grey.maxHistoryAvailable + 1) {
transient.insert(left);
DEBUG_PRINTF("a transient leftfix spotted his=%u\n", his);
- }
+ }
} else {
/* BLOCK: transientness is less important and more fuzzy, ideally
* it should be quick to calculate the state. No need to worry about
@@ -852,10 +852,10 @@ void RoseBuildImpl::findTransientLeftfixes(void) {
DEBUG_PRINTF("a transient block leftfix spotted [%u]\n",
(u32)max_width);
}
- }
+ }
}
}
-
+
/** Find all the different roses and their associated literals. */
static
map<left_id, vector<RoseVertex>> findLeftSucc(const RoseBuildImpl &build) {
@@ -864,69 +864,69 @@ map<left_id, vector<RoseVertex>> findLeftSucc(const RoseBuildImpl &build) {
if (build.g[v].left) {
const LeftEngInfo &lei = build.g[v].left;
leftfixes[lei].push_back(v);
- }
+ }
}
return leftfixes;
}
-
+
namespace {
struct infix_info {
set<RoseVertex> preds;
set<RoseVertex> succs;
};
}
-
+
static
map<NGHolder *, infix_info> findInfixGraphInfo(const RoseBuildImpl &build) {
map<NGHolder *, infix_info> rv;
-
+
for (auto v : vertices_range(build.g)) {
if (!build.g[v].left) {
- continue;
- }
-
+ continue;
+ }
+
if (build.isRootSuccessor(v)) {
DEBUG_PRINTF("a prefix is never an infix\n");
continue;
- }
-
+ }
+
/* ensure only proper nfas */
const LeftEngInfo &lei = build.g[v].left;
if (!lei.graph) {
- continue;
- }
+ continue;
+ }
if (lei.haig || lei.dfa) {
continue;
- }
+ }
assert(!lei.castle);
infix_info &info = rv[lei.graph.get()];
insert(&info.preds, inv_adjacent_vertices_range(v, build.g));
info.succs.insert(v);
- }
-
+ }
+
return rv;
-}
-
+}
+
static
map<u32, flat_set<NFAEdge>> getTopInfo(const NGHolder &h) {
map<u32, flat_set<NFAEdge>> rv;
for (NFAEdge e : out_edges_range(h.start, h)) {
for (u32 t : h[e].tops) {
rv[t].insert(e);
- }
- }
+ }
+ }
return rv;
-}
-
+}
+
static
u32 findUnusedTop(const map<u32, flat_set<NFAEdge>> &tops) {
u32 i = 0;
while (contains(tops, i)) {
i++;
- }
+ }
return i;
-}
-
+}
+
static
bool reduceTopTriggerLoad(RoseBuildImpl &build, NGHolder &h, RoseVertex u) {
RoseGraph &g = build.g;
@@ -936,43 +936,43 @@ bool reduceTopTriggerLoad(RoseBuildImpl &build, NGHolder &h, RoseVertex u) {
RoseVertex v = target(e, g);
if (g[v].left.graph.get() != &h) {
continue;
- }
+ }
tops.insert(g[e].rose_top);
- }
-
+ }
+
assert(!tops.empty());
if (tops.size() <= 1) {
- return false;
- }
+ return false;
+ }
DEBUG_PRINTF("%zu triggers %zu tops for %p\n", build.g[u].index,
tops.size(), &h);
-
+
auto h_top_info = getTopInfo(h);
flat_set<NFAEdge> edges_to_trigger;
for (u32 t : tops) {
insert(&edges_to_trigger, h_top_info[t]);
- }
-
+ }
+
u32 new_top = ~0U;
/* check if there is already a top with the right the successor set */
for (const auto &elem : h_top_info) {
if (elem.second == edges_to_trigger) {
new_top = elem.first;
break;
- }
- }
-
+ }
+ }
+
/* if no existing suitable top, add a new top for us */
if (new_top == ~0U) {
new_top = findUnusedTop(h_top_info);
-
+
/* add top to edges out of start */
for (NFAEdge e : out_edges_range(h.start, h)) {
if (has_intersection(tops, h[e].tops)) {
h[e].tops.insert(new_top);
}
- }
-
+ }
+
/* check still implementable if we add a new top */
if (!isImplementableNFA(h, nullptr, build.cc)) {
DEBUG_PRINTF("unable to add new top\n");
@@ -981,219 +981,219 @@ bool reduceTopTriggerLoad(RoseBuildImpl &build, NGHolder &h, RoseVertex u) {
}
/* we should be back to the original graph */
assert(isImplementableNFA(h, nullptr, build.cc));
- return false;
- }
- }
-
+ return false;
+ }
+ }
+
DEBUG_PRINTF("using new merged top %u\n", new_top);
assert(new_top != ~0U);
for (RoseEdge e: out_edges_range(u, g)) {
RoseVertex v = target(e, g);
if (g[v].left.graph.get() != &h) {
continue;
- }
+ }
g[e].rose_top = new_top;
- }
-
+ }
+
return true;
-}
-
-static
+}
+
+static
void packInfixTops(NGHolder &h, RoseGraph &g,
const set<RoseVertex> &verts) {
if (!is_triggered(h)) {
DEBUG_PRINTF("not triggered, no tops\n");
return;
- }
+ }
assert(isCorrectlyTopped(h));
DEBUG_PRINTF("pruning unused tops\n");
flat_set<u32> used_tops;
for (auto v : verts) {
assert(g[v].left.graph.get() == &h);
-
+
for (const auto &e : in_edges_range(v, g)) {
u32 top = g[e].rose_top;
used_tops.insert(top);
}
- }
-
+ }
+
map<u32, u32> top_mapping;
for (u32 t : used_tops) {
u32 new_top = top_mapping.size();
top_mapping[t] = new_top;
- }
-
+ }
+
for (auto v : verts) {
assert(g[v].left.graph.get() == &h);
-
+
for (const auto &e : in_edges_range(v, g)) {
g[e].rose_top = top_mapping.at(g[e].rose_top);
- }
+ }
}
-
+
vector<NFAEdge> dead;
for (const auto &e : out_edges_range(h.start, h)) {
NFAVertex v = target(e, h);
if (v == h.startDs) {
continue; // stylised edge, leave it alone.
- }
+ }
flat_set<u32> updated_tops;
for (u32 t : h[e].tops) {
if (contains(top_mapping, t)) {
updated_tops.insert(top_mapping.at(t));
- }
- }
+ }
+ }
h[e].tops = std::move(updated_tops);
if (h[e].tops.empty()) {
DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index);
dead.push_back(e);
- }
- }
-
+ }
+ }
+
if (dead.empty()) {
return;
- }
-
+ }
+
remove_edges(dead, h);
pruneUseless(h);
clearReports(h); // As we may have removed vacuous edges.
-}
-
-static
+}
+
+static
void reduceTopTriggerLoad(RoseBuildImpl &build) {
auto infixes = findInfixGraphInfo(build);
-
+
for (auto &p : infixes) {
if (onlyOneTop(*p.first)) {
- continue;
- }
-
+ continue;
+ }
+
bool changed = false;
for (RoseVertex v : p.second.preds) {
changed |= reduceTopTriggerLoad(build, *p.first, v);
- }
-
+ }
+
if (changed) {
packInfixTops(*p.first, build.g, p.second.succs);
reduceImplementableGraph(*p.first, SOM_NONE, nullptr, build.cc);
- }
- }
-}
-
-static
+ }
+ }
+}
+
+static
bool triggerKillsRoseGraph(const RoseBuildImpl &build, const left_id &left,
- const set<ue2_literal> &all_lits,
- const RoseEdge &e) {
- assert(left.graph());
- const NGHolder &h = *left.graph();
-
+ const set<ue2_literal> &all_lits,
+ const RoseEdge &e) {
+ assert(left.graph());
+ const NGHolder &h = *left.graph();
+
flat_set<NFAVertex> all_states;
- insert(&all_states, vertices(h));
- assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */
- DEBUG_PRINTF("removing sds\n");
- all_states.erase(h.startDs);
-
+ insert(&all_states, vertices(h));
+ assert(out_degree(h.startDs, h) == 1); /* triggered don't use sds */
+ DEBUG_PRINTF("removing sds\n");
+ all_states.erase(h.startDs);
+
flat_set<NFAVertex> states;
-
- /* check each pred literal to see if they all kill previous graph
- * state */
+
+ /* check each pred literal to see if they all kill previous graph
+ * state */
for (u32 lit_id : build.g[source(e, build.g)].literals) {
const rose_literal_id &pred_lit = build.literals.at(lit_id);
- const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s);
-
- DEBUG_PRINTF("running graph %zu\n", states.size());
- states = execute_graph(h, s, all_states, true);
- DEBUG_PRINTF("ran, %zu states on\n", states.size());
-
- if (!states.empty()) {
- return false;
- }
- }
-
- return true;
-}
-
-static
+ const ue2_literal s = findNonOverlappingTail(all_lits, pred_lit.s);
+
+ DEBUG_PRINTF("running graph %zu\n", states.size());
+ states = execute_graph(h, s, all_states, true);
+ DEBUG_PRINTF("ran, %zu states on\n", states.size());
+
+ if (!states.empty()) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static
bool triggerKillsRose(const RoseBuildImpl &build, const left_id &left,
- const set<ue2_literal> &all_lits, const RoseEdge &e) {
- if (left.haig()) {
- /* TODO: To allow this for som-based engines we would also need to
- * ensure as well that no other triggers can occur at the same location
- * with a different som. */
- return false;
- }
-
- if (left.graph()) {
+ const set<ue2_literal> &all_lits, const RoseEdge &e) {
+ if (left.haig()) {
+ /* TODO: To allow this for som-based engines we would also need to
+ * ensure as well that no other triggers can occur at the same location
+ * with a different som. */
+ return false;
+ }
+
+ if (left.graph()) {
return triggerKillsRoseGraph(build, left, all_lits, e);
- }
-
- if (left.castle()) {
+ }
+
+ if (left.castle()) {
return triggerKillsRoseCastle(build, left, all_lits, e);
- }
-
- return false;
-}
-
+ }
+
+ return false;
+}
+
/* Sometimes the arrival of a top for a rose infix can ensure that the nfa would
* be dead at that time. In the case of multiple trigger literals, we can only
* base our decision on that portion of literal after any overlapping literals.
*/
-static
+static
void findTopTriggerCancels(RoseBuildImpl &build) {
auto left_succ = findLeftSucc(build); /* leftfixes -> succ verts */
-
+
for (const auto &r : left_succ) {
- const left_id &left = r.first;
- const vector<RoseVertex> &succs = r.second;
-
- assert(!succs.empty());
+ const left_id &left = r.first;
+ const vector<RoseVertex> &succs = r.second;
+
+ assert(!succs.empty());
if (build.isRootSuccessor(*succs.begin())) {
- /* a prefix is never an infix */
- continue;
- }
-
- set<u32> tops_seen;
- set<RoseEdge> rose_edges;
- set<u32> pred_lit_ids;
-
- for (auto v : succs) {
+ /* a prefix is never an infix */
+ continue;
+ }
+
+ set<u32> tops_seen;
+ set<RoseEdge> rose_edges;
+ set<u32> pred_lit_ids;
+
+ for (auto v : succs) {
for (const auto &e : in_edges_range(v, build.g)) {
RoseVertex u = source(e, build.g);
tops_seen.insert(build.g[e].rose_top);
insert(&pred_lit_ids, build.g[u].literals);
- rose_edges.insert(e);
- }
- }
-
- set<ue2_literal> all_lits;
-
- if (tops_seen.size() > 1) {
- goto next_rose; /* slightly tricky to deal with overlap case */
- }
-
- for (u32 lit_id : pred_lit_ids) {
+ rose_edges.insert(e);
+ }
+ }
+
+ set<ue2_literal> all_lits;
+
+ if (tops_seen.size() > 1) {
+ goto next_rose; /* slightly tricky to deal with overlap case */
+ }
+
+ for (u32 lit_id : pred_lit_ids) {
const rose_literal_id &p_lit = build.literals.at(lit_id);
- if (p_lit.delay || p_lit.table == ROSE_ANCHORED) {
- goto next_rose;
- }
- all_lits.insert(p_lit.s);
- DEBUG_PRINTF("trigger: '%s'\n", dumpString(p_lit.s).c_str());
- }
-
- DEBUG_PRINTF("rose has %zu trigger literals, %zu edges\n",
- all_lits.size(), rose_edges.size());
-
- for (const auto &e : rose_edges) {
+ if (p_lit.delay || p_lit.table == ROSE_ANCHORED) {
+ goto next_rose;
+ }
+ all_lits.insert(p_lit.s);
+ DEBUG_PRINTF("trigger: '%s'\n", dumpString(p_lit.s).c_str());
+ }
+
+ DEBUG_PRINTF("rose has %zu trigger literals, %zu edges\n",
+ all_lits.size(), rose_edges.size());
+
+ for (const auto &e : rose_edges) {
if (triggerKillsRose(build, left, all_lits, e)) {
- DEBUG_PRINTF("top will override previous rose state\n");
+ DEBUG_PRINTF("top will override previous rose state\n");
build.g[e].rose_cancel_prev_top = true;
- }
- }
- next_rose:;
- }
-}
-
-static
+ }
+ }
+ next_rose:;
+ }
+}
+
+static
void optimiseRoseTops(RoseBuildImpl &build) {
reduceTopTriggerLoad(build);
/* prune unused tops ? */
@@ -1201,599 +1201,599 @@ void optimiseRoseTops(RoseBuildImpl &build) {
}
static
-void buildRoseSquashMasks(RoseBuildImpl &tbi) {
- /* Rose nfa squash masks are applied to the groups when the nfa can no
- * longer match */
-
- map<left_id, vector<RoseVertex>> roses =
- findLeftSucc(tbi); /* rose -> succ verts */
-
- /* a rose nfa can squash a group if all literals in that group are a
- * successor of the nfa and all the literals */
- for (const auto &e : roses) {
- const left_id &left = e.first;
- const vector<RoseVertex> &succs = e.second;
-
- set<u32> lit_ids;
- bool anchored_pred = false;
- for (auto v : succs) {
- lit_ids.insert(tbi.g[v].literals.begin(), tbi.g[v].literals.end());
- for (auto u : inv_adjacent_vertices_range(v, tbi.g)) {
- anchored_pred |= tbi.isAnchored(u);
- }
- }
-
- /* Due to the anchored table not being able to set groups again,
- * we cannot use a rose nfa for group squashing if it is being triggered
- * from the anchored table and can match more than once. */
-
- if (anchored_pred) { /* infix with pred in anchored table */
- u32 min_off = ~0U;
- u32 max_off = 0U;
- for (auto v : succs) {
- for (auto u : inv_adjacent_vertices_range(v, tbi.g)) {
- min_off = min(min_off, tbi.g[u].min_offset);
- max_off = max(max_off, tbi.g[u].max_offset);
- }
- }
- if (min_off != max_off) {
- /* leave all groups alone */
- tbi.rose_squash_masks[left] = ~0ULL;
- continue;
- }
- }
-
+void buildRoseSquashMasks(RoseBuildImpl &tbi) {
+ /* Rose nfa squash masks are applied to the groups when the nfa can no
+ * longer match */
+
+ map<left_id, vector<RoseVertex>> roses =
+ findLeftSucc(tbi); /* rose -> succ verts */
+
+ /* a rose nfa can squash a group if all literals in that group are a
+ * successor of the nfa and all the literals */
+ for (const auto &e : roses) {
+ const left_id &left = e.first;
+ const vector<RoseVertex> &succs = e.second;
+
+ set<u32> lit_ids;
+ bool anchored_pred = false;
+ for (auto v : succs) {
+ lit_ids.insert(tbi.g[v].literals.begin(), tbi.g[v].literals.end());
+ for (auto u : inv_adjacent_vertices_range(v, tbi.g)) {
+ anchored_pred |= tbi.isAnchored(u);
+ }
+ }
+
+ /* Due to the anchored table not being able to set groups again,
+ * we cannot use a rose nfa for group squashing if it is being triggered
+ * from the anchored table and can match more than once. */
+
+ if (anchored_pred) { /* infix with pred in anchored table */
+ u32 min_off = ~0U;
+ u32 max_off = 0U;
+ for (auto v : succs) {
+ for (auto u : inv_adjacent_vertices_range(v, tbi.g)) {
+ min_off = min(min_off, tbi.g[u].min_offset);
+ max_off = max(max_off, tbi.g[u].max_offset);
+ }
+ }
+ if (min_off != max_off) {
+ /* leave all groups alone */
+ tbi.rose_squash_masks[left] = ~0ULL;
+ continue;
+ }
+ }
+
rose_group unsquashable = tbi.boundary_group_mask;
-
- for (u32 lit_id : lit_ids) {
- const rose_literal_info &info = tbi.literal_info[lit_id];
+
+ for (u32 lit_id : lit_ids) {
+ const rose_literal_info &info = tbi.literal_info[lit_id];
if (!info.delayed_ids.empty()
|| !all_of_in(info.vertices,
[&](RoseVertex v) {
return left == tbi.g[v].left; })) {
DEBUG_PRINTF("group %llu is unsquashable\n", info.group_mask);
- unsquashable |= info.group_mask;
- }
- }
-
- rose_group squash_mask = ~0ULL; /* leave all groups alone */
-
- for (u32 i = 0; i < ROSE_GROUPS_MAX; i++) {
- if (is_subset_of(tbi.group_to_literal[i], lit_ids)) {
- squash_mask &= ~(1ULL << i);
- }
- }
- squash_mask |= unsquashable;
- tbi.rose_squash_masks[left] = squash_mask;
- }
-}
-
-static
-void countFloatingLiterals(const RoseBuildImpl &tbi, u32 *total_count,
- u32 *short_count) {
- *total_count = 0;
- *short_count = 0;
+ unsquashable |= info.group_mask;
+ }
+ }
+
+ rose_group squash_mask = ~0ULL; /* leave all groups alone */
+
+ for (u32 i = 0; i < ROSE_GROUPS_MAX; i++) {
+ if (is_subset_of(tbi.group_to_literal[i], lit_ids)) {
+ squash_mask &= ~(1ULL << i);
+ }
+ }
+ squash_mask |= unsquashable;
+ tbi.rose_squash_masks[left] = squash_mask;
+ }
+}
+
+static
+void countFloatingLiterals(const RoseBuildImpl &tbi, u32 *total_count,
+ u32 *short_count) {
+ *total_count = 0;
+ *short_count = 0;
for (const rose_literal_id &lit : tbi.literals) {
- if (lit.delay) {
- continue; /* delay id's are virtual-ish */
- }
-
- if (lit.table != ROSE_FLOATING) {
- continue; /* wrong table */
- }
-
- ++*total_count;
- if (lit.s.length() <= ANCHORED_REHOME_SHORT_LEN) {
- ++*short_count;
- }
- }
-}
-
-static
-void rehomeAnchoredLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai,
- const set<u32> &lit_ids) {
- /* TODO: verify that vertices only have a single literal at the moment */
-
- DEBUG_PRINTF("rehoming ^.{%u,%u}%s\n", sai.min_bound, sai.max_bound,
- dumpString(sai.literal).c_str());
-
- /* Get a floating literal corresponding to the anchored literal */
- u32 new_literal_id = tbi.getLiteralId(sai.literal, 0, ROSE_FLOATING);
- rose_literal_info &new_lit_info = tbi.literal_info[new_literal_id];
- DEBUG_PRINTF("floating literal id -> %u\n", new_literal_id);
-
- for (u32 lit_id : lit_ids) {
- rose_literal_info &old_lit_info = tbi.literal_info[lit_id];
- assert(old_lit_info.delayed_ids.empty());
-
- for (auto v : old_lit_info.vertices) {
- /* Transfer vertex over to new literal id */
- assert(tbi.g[v].literals.size() == 1);
- tbi.g[v].literals.clear();
- tbi.g[v].literals.insert(new_literal_id);
- new_lit_info.vertices.insert(v);
-
- /* ensure bounds on the vertex's in-edge are correct */
- assert(in_degree(v, tbi.g) == 1);
- const RoseEdge &e = *in_edges(v, tbi.g).first;
- assert(tbi.g[e].minBound == sai.min_bound + sai.literal.length());
- assert(tbi.g[e].maxBound == sai.max_bound + sai.literal.length());
- tbi.g[e].minBound = sai.min_bound;
- tbi.g[e].maxBound = sai.max_bound;
- }
-
- /* mark the old literal as empty */
- old_lit_info.vertices.clear();
- }
-}
-
-static
-void rehomeAnchoredLiterals(RoseBuildImpl &tbi) {
- /* if we have many literals in the floating table, we want to push
- * literals which are anchored but deep into the floating table as they
- * are unlikely to reduce the performance of the floating table. */
- u32 total_count;
- u32 short_count;
- countFloatingLiterals(tbi, &total_count, &short_count);
-
- DEBUG_PRINTF("considering rehoming options\n");
-
- if (total_count < ANCHORED_REHOME_MIN_FLOATING
- && short_count < ANCHORED_REHOME_MIN_FLOATING_SHORT) {
- DEBUG_PRINTF("not a heavy case %u %u\n", total_count, short_count);
- return;
- }
-
- u32 min_rehome_len = ANCHORED_REHOME_SHORT_LEN + 1;
- if (short_count >= ANCHORED_REHOME_ALLOW_SHORT) {
- min_rehome_len--;
- }
-
- for (map<simple_anchored_info, set<u32> >::iterator it
- = tbi.anchored_simple.begin();
- it != tbi.anchored_simple.end();) {
- if (it->first.max_bound < ANCHORED_REHOME_DEEP
- || it->first.literal.length() < min_rehome_len) {
- ++it;
- continue;
- }
-
- rehomeAnchoredLiteral(tbi, it->first, it->second);
- tbi.anchored_simple.erase(it++);
- }
-}
-
-/** \brief Maximum number of single-byte literals to add to the small block
- * table. */
-static const size_t MAX_1BYTE_SMALL_BLOCK_LITERALS = 20;
-
-static
-void addSmallBlockLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai,
- const set<u32> &lit_ids) {
- DEBUG_PRINTF("anchored ^.{%u,%u}%s\n", sai.min_bound, sai.max_bound,
- dumpString(sai.literal).c_str());
-
- u32 lit_id = tbi.getLiteralId(sai.literal, 0, ROSE_ANCHORED_SMALL_BLOCK);
- rose_literal_info &lit_info = tbi.literal_info[lit_id];
- DEBUG_PRINTF("anchored small block literal id -> %u\n", lit_id);
-
- RoseGraph &g = tbi.g;
- const RoseVertex anchored_root = tbi.anchored_root;
-
- for (u32 old_id : lit_ids) {
- assert(old_id < tbi.literal_info.size());
- const rose_literal_info &li = tbi.literal_info[old_id];
-
+ if (lit.delay) {
+ continue; /* delay id's are virtual-ish */
+ }
+
+ if (lit.table != ROSE_FLOATING) {
+ continue; /* wrong table */
+ }
+
+ ++*total_count;
+ if (lit.s.length() <= ANCHORED_REHOME_SHORT_LEN) {
+ ++*short_count;
+ }
+ }
+}
+
+static
+void rehomeAnchoredLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai,
+ const set<u32> &lit_ids) {
+ /* TODO: verify that vertices only have a single literal at the moment */
+
+ DEBUG_PRINTF("rehoming ^.{%u,%u}%s\n", sai.min_bound, sai.max_bound,
+ dumpString(sai.literal).c_str());
+
+ /* Get a floating literal corresponding to the anchored literal */
+ u32 new_literal_id = tbi.getLiteralId(sai.literal, 0, ROSE_FLOATING);
+ rose_literal_info &new_lit_info = tbi.literal_info[new_literal_id];
+ DEBUG_PRINTF("floating literal id -> %u\n", new_literal_id);
+
+ for (u32 lit_id : lit_ids) {
+ rose_literal_info &old_lit_info = tbi.literal_info[lit_id];
+ assert(old_lit_info.delayed_ids.empty());
+
+ for (auto v : old_lit_info.vertices) {
+ /* Transfer vertex over to new literal id */
+ assert(tbi.g[v].literals.size() == 1);
+ tbi.g[v].literals.clear();
+ tbi.g[v].literals.insert(new_literal_id);
+ new_lit_info.vertices.insert(v);
+
+ /* ensure bounds on the vertex's in-edge are correct */
+ assert(in_degree(v, tbi.g) == 1);
+ const RoseEdge &e = *in_edges(v, tbi.g).first;
+ assert(tbi.g[e].minBound == sai.min_bound + sai.literal.length());
+ assert(tbi.g[e].maxBound == sai.max_bound + sai.literal.length());
+ tbi.g[e].minBound = sai.min_bound;
+ tbi.g[e].maxBound = sai.max_bound;
+ }
+
+ /* mark the old literal as empty */
+ old_lit_info.vertices.clear();
+ }
+}
+
+static
+void rehomeAnchoredLiterals(RoseBuildImpl &tbi) {
+ /* if we have many literals in the floating table, we want to push
+ * literals which are anchored but deep into the floating table as they
+ * are unlikely to reduce the performance of the floating table. */
+ u32 total_count;
+ u32 short_count;
+ countFloatingLiterals(tbi, &total_count, &short_count);
+
+ DEBUG_PRINTF("considering rehoming options\n");
+
+ if (total_count < ANCHORED_REHOME_MIN_FLOATING
+ && short_count < ANCHORED_REHOME_MIN_FLOATING_SHORT) {
+ DEBUG_PRINTF("not a heavy case %u %u\n", total_count, short_count);
+ return;
+ }
+
+ u32 min_rehome_len = ANCHORED_REHOME_SHORT_LEN + 1;
+ if (short_count >= ANCHORED_REHOME_ALLOW_SHORT) {
+ min_rehome_len--;
+ }
+
+ for (map<simple_anchored_info, set<u32> >::iterator it
+ = tbi.anchored_simple.begin();
+ it != tbi.anchored_simple.end();) {
+ if (it->first.max_bound < ANCHORED_REHOME_DEEP
+ || it->first.literal.length() < min_rehome_len) {
+ ++it;
+ continue;
+ }
+
+ rehomeAnchoredLiteral(tbi, it->first, it->second);
+ tbi.anchored_simple.erase(it++);
+ }
+}
+
+/** \brief Maximum number of single-byte literals to add to the small block
+ * table. */
+static const size_t MAX_1BYTE_SMALL_BLOCK_LITERALS = 20;
+
+static
+void addSmallBlockLiteral(RoseBuildImpl &tbi, const simple_anchored_info &sai,
+ const set<u32> &lit_ids) {
+ DEBUG_PRINTF("anchored ^.{%u,%u}%s\n", sai.min_bound, sai.max_bound,
+ dumpString(sai.literal).c_str());
+
+ u32 lit_id = tbi.getLiteralId(sai.literal, 0, ROSE_ANCHORED_SMALL_BLOCK);
+ rose_literal_info &lit_info = tbi.literal_info[lit_id];
+ DEBUG_PRINTF("anchored small block literal id -> %u\n", lit_id);
+
+ RoseGraph &g = tbi.g;
+ const RoseVertex anchored_root = tbi.anchored_root;
+
+ for (u32 old_id : lit_ids) {
+ assert(old_id < tbi.literal_info.size());
+ const rose_literal_info &li = tbi.literal_info[old_id];
+
for (auto lit_v : li.vertices) {
- // Clone vertex with the new literal ID.
- RoseVertex v = add_vertex(g[lit_v], g);
- g[v].literals.clear();
- g[v].literals.insert(lit_id);
- g[v].min_offset = sai.min_bound + sai.literal.length();
- g[v].max_offset = sai.max_bound + sai.literal.length();
- lit_info.vertices.insert(v);
-
+ // Clone vertex with the new literal ID.
+ RoseVertex v = add_vertex(g[lit_v], g);
+ g[v].literals.clear();
+ g[v].literals.insert(lit_id);
+ g[v].min_offset = sai.min_bound + sai.literal.length();
+ g[v].max_offset = sai.max_bound + sai.literal.length();
+ lit_info.vertices.insert(v);
+
RoseEdge e = add_edge(anchored_root, v, g);
g[e].minBound = sai.min_bound;
g[e].maxBound = sai.max_bound;
- }
- }
-}
-
-static
-void addSmallBlockLiteral(RoseBuildImpl &tbi, const ue2_literal &lit,
- const flat_set<ReportID> &reports) {
- DEBUG_PRINTF("lit %s, reports: %s\n", dumpString(lit).c_str(),
- as_string_list(reports).c_str());
- assert(!reports.empty());
-
- u32 lit_id = tbi.getLiteralId(lit, 0, ROSE_ANCHORED_SMALL_BLOCK);
- assert(lit_id < tbi.literal_info.size());
- rose_literal_info &lit_info = tbi.literal_info[lit_id];
-
- RoseGraph &g = tbi.g;
-
- RoseVertex v = add_vertex(g);
- g[v].literals.insert(lit_id);
- g[v].reports = reports;
-
+ }
+ }
+}
+
+static
+void addSmallBlockLiteral(RoseBuildImpl &tbi, const ue2_literal &lit,
+ const flat_set<ReportID> &reports) {
+ DEBUG_PRINTF("lit %s, reports: %s\n", dumpString(lit).c_str(),
+ as_string_list(reports).c_str());
+ assert(!reports.empty());
+
+ u32 lit_id = tbi.getLiteralId(lit, 0, ROSE_ANCHORED_SMALL_BLOCK);
+ assert(lit_id < tbi.literal_info.size());
+ rose_literal_info &lit_info = tbi.literal_info[lit_id];
+
+ RoseGraph &g = tbi.g;
+
+ RoseVertex v = add_vertex(g);
+ g[v].literals.insert(lit_id);
+ g[v].reports = reports;
+
RoseEdge e = add_edge(tbi.root, v, g);
- g[e].minBound = 0;
- g[e].maxBound = ROSE_BOUND_INF;
- g[v].min_offset = 1;
- g[v].max_offset = ROSE_BOUND_INF;
- lit_info.vertices.insert(v);
-}
-
-static
-bool stateIsSEPLiteral(const dstate_id_t &s, const symbol_t &sym,
- const raw_dfa &rdfa) {
- const dstate &ds = rdfa.states[s];
- if (!ds.reports_eod.empty() || ds.reports.empty()) {
- DEBUG_PRINTF("badly formed reports\n");
- return false;
- }
-
- DEBUG_PRINTF("examine state %u reached by sym %u\n", s, sym);
-
- for (symbol_t i = 0; i < rdfa.getImplAlphaSize(); i++) {
- const auto &s_next = ds.next[i];
- DEBUG_PRINTF("state %u -> %u on sym %u\n", s, s_next, i);
- if (s_next == DEAD_STATE) {
- continue; // dead, probably pruned
- } else if (s_next == s && i == sym) {
- continue; // self loop on same symbol
- } else if (s_next == rdfa.start_floating) {
- continue; // return to floating start
- }
-
- // We don't handle any other transitions.
- DEBUG_PRINTF("not single-byte\n");
- return false;
- }
-
- return true;
-}
-
-static
-bool extractSEPLiterals(const raw_dfa &rdfa,
- map<ue2_literal, flat_set<ReportID>> &lits_out) {
- if (rdfa.start_floating == DEAD_STATE) {
- DEBUG_PRINTF("not floating?\n");
- return false;
- }
- if (rdfa.start_anchored != rdfa.start_floating) {
- DEBUG_PRINTF("not all floating?\n");
- return false;
- }
-
- map<flat_set<ReportID>, vector<u32>> lits; // reports -> symbols
-
- const dstate &start = rdfa.states[rdfa.start_floating];
-
- const symbol_t alpha_size = rdfa.getImplAlphaSize();
- for (symbol_t i = 0; i < alpha_size; i++) {
- auto next = start.next[i];
- if (next == DEAD_STATE || next == rdfa.start_floating) {
- continue;
- }
-
- if (!stateIsSEPLiteral(next, i, rdfa)) {
- return false;
- }
- lits[rdfa.states[next].reports].push_back(i);
- }
-
- // Map from symbols back to character reachability.
- vector<CharReach> reach(alpha_size);
- for (u32 i = 0; i < N_CHARS; i++) {
- assert(rdfa.alpha_remap[i] < alpha_size);
- reach[rdfa.alpha_remap[i]].set(i);
- }
-
- for (const auto &m : lits) {
- const auto &reports = m.first;
- const auto &symbols = m.second;
-
- CharReach cr;
- for (const auto &sym : symbols) {
- cr |= reach[sym];
- }
-
- for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
- if (myisupper(i) && cr.test(mytolower(i))) {
- // ignore upper half of a nocase pair
- continue;
- }
-
- bool nocase = myislower(i) && cr.test(mytoupper(i));
- insert(&lits_out[ue2_literal((char)i, nocase)], reports);
- }
- }
-
- return true;
-}
-
-static
-bool extractSEPLiterals(const OutfixInfo &outfix, const ReportManager &rm,
- map<ue2_literal, flat_set<ReportID>> &lits_out) {
- if (outfix.minWidth != depth(1) || outfix.maxWidth != depth(1)) {
- DEBUG_PRINTF("outfix must be fixed width of one\n");
- return false;
- }
-
- for (const auto &report_id : all_reports(outfix)) {
- const auto &report = rm.getReport(report_id);
- if (!isSimpleExhaustible(report)) {
- DEBUG_PRINTF("report id %u not simple exhaustible\n", report_id);
- return false;
- }
- }
-
- // SEP cases should always become DFAs, so that's the only extract code we
- // have implemented here.
-
+ g[e].minBound = 0;
+ g[e].maxBound = ROSE_BOUND_INF;
+ g[v].min_offset = 1;
+ g[v].max_offset = ROSE_BOUND_INF;
+ lit_info.vertices.insert(v);
+}
+
+static
+bool stateIsSEPLiteral(const dstate_id_t &s, const symbol_t &sym,
+ const raw_dfa &rdfa) {
+ const dstate &ds = rdfa.states[s];
+ if (!ds.reports_eod.empty() || ds.reports.empty()) {
+ DEBUG_PRINTF("badly formed reports\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("examine state %u reached by sym %u\n", s, sym);
+
+ for (symbol_t i = 0; i < rdfa.getImplAlphaSize(); i++) {
+ const auto &s_next = ds.next[i];
+ DEBUG_PRINTF("state %u -> %u on sym %u\n", s, s_next, i);
+ if (s_next == DEAD_STATE) {
+ continue; // dead, probably pruned
+ } else if (s_next == s && i == sym) {
+ continue; // self loop on same symbol
+ } else if (s_next == rdfa.start_floating) {
+ continue; // return to floating start
+ }
+
+ // We don't handle any other transitions.
+ DEBUG_PRINTF("not single-byte\n");
+ return false;
+ }
+
+ return true;
+}
+
+static
+bool extractSEPLiterals(const raw_dfa &rdfa,
+ map<ue2_literal, flat_set<ReportID>> &lits_out) {
+ if (rdfa.start_floating == DEAD_STATE) {
+ DEBUG_PRINTF("not floating?\n");
+ return false;
+ }
+ if (rdfa.start_anchored != rdfa.start_floating) {
+ DEBUG_PRINTF("not all floating?\n");
+ return false;
+ }
+
+ map<flat_set<ReportID>, vector<u32>> lits; // reports -> symbols
+
+ const dstate &start = rdfa.states[rdfa.start_floating];
+
+ const symbol_t alpha_size = rdfa.getImplAlphaSize();
+ for (symbol_t i = 0; i < alpha_size; i++) {
+ auto next = start.next[i];
+ if (next == DEAD_STATE || next == rdfa.start_floating) {
+ continue;
+ }
+
+ if (!stateIsSEPLiteral(next, i, rdfa)) {
+ return false;
+ }
+ lits[rdfa.states[next].reports].push_back(i);
+ }
+
+ // Map from symbols back to character reachability.
+ vector<CharReach> reach(alpha_size);
+ for (u32 i = 0; i < N_CHARS; i++) {
+ assert(rdfa.alpha_remap[i] < alpha_size);
+ reach[rdfa.alpha_remap[i]].set(i);
+ }
+
+ for (const auto &m : lits) {
+ const auto &reports = m.first;
+ const auto &symbols = m.second;
+
+ CharReach cr;
+ for (const auto &sym : symbols) {
+ cr |= reach[sym];
+ }
+
+ for (size_t i = cr.find_first(); i != cr.npos; i = cr.find_next(i)) {
+ if (myisupper(i) && cr.test(mytolower(i))) {
+ // ignore upper half of a nocase pair
+ continue;
+ }
+
+ bool nocase = myislower(i) && cr.test(mytoupper(i));
+ insert(&lits_out[ue2_literal((char)i, nocase)], reports);
+ }
+ }
+
+ return true;
+}
+
+static
+bool extractSEPLiterals(const OutfixInfo &outfix, const ReportManager &rm,
+ map<ue2_literal, flat_set<ReportID>> &lits_out) {
+ if (outfix.minWidth != depth(1) || outfix.maxWidth != depth(1)) {
+ DEBUG_PRINTF("outfix must be fixed width of one\n");
+ return false;
+ }
+
+ for (const auto &report_id : all_reports(outfix)) {
+ const auto &report = rm.getReport(report_id);
+ if (!isSimpleExhaustible(report)) {
+ DEBUG_PRINTF("report id %u not simple exhaustible\n", report_id);
+ return false;
+ }
+ }
+
+ // SEP cases should always become DFAs, so that's the only extract code we
+ // have implemented here.
+
if (outfix.rdfa()) {
return extractSEPLiterals(*outfix.rdfa(), lits_out);
- }
-
- DEBUG_PRINTF("cannot extract literals from outfix type\n");
- return false;
-}
-
-static
-void addAnchoredSmallBlockLiterals(RoseBuildImpl &tbi) {
- if (tbi.cc.streaming) {
- DEBUG_PRINTF("not block mode\n");
- return;
- }
- if (!tbi.anchored_nfas.empty()) {
- DEBUG_PRINTF("anchored table is not purely literal\n");
- return;
- }
-
- // At the moment, we only use the small-block matcher if all our anchored
- // literals are direct reports (i.e. leaf nodes in the Rose graph).
- for (const set<u32> &lits : tbi.anchored_simple | map_values) {
- for (u32 lit_id : lits) {
- if (!tbi.isDirectReport(lit_id)) {
- DEBUG_PRINTF("not all anchored lits are direct reports\n");
- return;
- }
- }
- }
-
- vector<pair<simple_anchored_info, set<u32> > > anchored_lits;
- vector<OutfixInfo *> sep_outfixes;
- size_t oneByteLiterals = 0;
-
- for (const auto &e : tbi.anchored_simple) {
- const simple_anchored_info &sai = e.first;
- const set<u32> &lit_ids = e.second;
-
- if (sai.literal.length() + sai.min_bound > ROSE_SMALL_BLOCK_LEN) {
- DEBUG_PRINTF("skipping literal '%s' with min bound %u that cannot "
- "match inside small block width\n",
- dumpString(sai.literal).c_str(), sai.min_bound);
- }
-
- anchored_lits.push_back(make_pair(sai, lit_ids));
- if (sai.literal.length() == 1) {
- oneByteLiterals++;
- }
- }
-
- // Capture SEP outfixes as well, adding them as literals to the small block
- // table.
- map<ue2_literal, flat_set<ReportID>> sep_literals;
- for (OutfixInfo &oi : tbi.outfixes) {
- if (extractSEPLiterals(oi, tbi.rm, sep_literals)) {
- sep_outfixes.push_back(&oi);
- }
- }
-
- oneByteLiterals += sep_literals.size();
- DEBUG_PRINTF("%zu one-byte literals\n", oneByteLiterals);
- if (oneByteLiterals > MAX_1BYTE_SMALL_BLOCK_LITERALS) {
- DEBUG_PRINTF("too many one-byte literals, not building small block "
- "table!\n");
- return;
- }
-
- for (const auto &e : tbi.anchored_simple) {
- const simple_anchored_info &sai = e.first;
- const set<u32> &lit_ids = e.second;
-
- addSmallBlockLiteral(tbi, sai, lit_ids);
- }
-
- for (const auto &m : sep_literals) {
- addSmallBlockLiteral(tbi, m.first, m.second);
- }
-
- for (OutfixInfo *oi : sep_outfixes) {
- assert(oi);
- oi->in_sbmatcher = true;
- }
-}
-
-#ifndef NDEBUG
-static
-bool historiesAreValid(const RoseGraph &g) {
- for (const auto &e : edges_range(g)) {
- if (g[e].history == ROSE_ROLE_HISTORY_INVALID) {
- DEBUG_PRINTF("edge [%zu,%zu] has invalid history\n",
+ }
+
+ DEBUG_PRINTF("cannot extract literals from outfix type\n");
+ return false;
+}
+
+static
+void addAnchoredSmallBlockLiterals(RoseBuildImpl &tbi) {
+ if (tbi.cc.streaming) {
+ DEBUG_PRINTF("not block mode\n");
+ return;
+ }
+ if (!tbi.anchored_nfas.empty()) {
+ DEBUG_PRINTF("anchored table is not purely literal\n");
+ return;
+ }
+
+ // At the moment, we only use the small-block matcher if all our anchored
+ // literals are direct reports (i.e. leaf nodes in the Rose graph).
+ for (const set<u32> &lits : tbi.anchored_simple | map_values) {
+ for (u32 lit_id : lits) {
+ if (!tbi.isDirectReport(lit_id)) {
+ DEBUG_PRINTF("not all anchored lits are direct reports\n");
+ return;
+ }
+ }
+ }
+
+ vector<pair<simple_anchored_info, set<u32> > > anchored_lits;
+ vector<OutfixInfo *> sep_outfixes;
+ size_t oneByteLiterals = 0;
+
+ for (const auto &e : tbi.anchored_simple) {
+ const simple_anchored_info &sai = e.first;
+ const set<u32> &lit_ids = e.second;
+
+ if (sai.literal.length() + sai.min_bound > ROSE_SMALL_BLOCK_LEN) {
+ DEBUG_PRINTF("skipping literal '%s' with min bound %u that cannot "
+ "match inside small block width\n",
+ dumpString(sai.literal).c_str(), sai.min_bound);
+ }
+
+ anchored_lits.push_back(make_pair(sai, lit_ids));
+ if (sai.literal.length() == 1) {
+ oneByteLiterals++;
+ }
+ }
+
+ // Capture SEP outfixes as well, adding them as literals to the small block
+ // table.
+ map<ue2_literal, flat_set<ReportID>> sep_literals;
+ for (OutfixInfo &oi : tbi.outfixes) {
+ if (extractSEPLiterals(oi, tbi.rm, sep_literals)) {
+ sep_outfixes.push_back(&oi);
+ }
+ }
+
+ oneByteLiterals += sep_literals.size();
+ DEBUG_PRINTF("%zu one-byte literals\n", oneByteLiterals);
+ if (oneByteLiterals > MAX_1BYTE_SMALL_BLOCK_LITERALS) {
+ DEBUG_PRINTF("too many one-byte literals, not building small block "
+ "table!\n");
+ return;
+ }
+
+ for (const auto &e : tbi.anchored_simple) {
+ const simple_anchored_info &sai = e.first;
+ const set<u32> &lit_ids = e.second;
+
+ addSmallBlockLiteral(tbi, sai, lit_ids);
+ }
+
+ for (const auto &m : sep_literals) {
+ addSmallBlockLiteral(tbi, m.first, m.second);
+ }
+
+ for (OutfixInfo *oi : sep_outfixes) {
+ assert(oi);
+ oi->in_sbmatcher = true;
+ }
+}
+
+#ifndef NDEBUG
+static
+bool historiesAreValid(const RoseGraph &g) {
+ for (const auto &e : edges_range(g)) {
+ if (g[e].history == ROSE_ROLE_HISTORY_INVALID) {
+ DEBUG_PRINTF("edge [%zu,%zu] has invalid history\n",
g[source(e, g)].index, g[target(e, g)].index);
- return false;
- }
- }
-
- return true;
-}
-
-/**
- * Assertion: Returns true if we have a reference hanging around to a vertex
- * that no longer exists in the graph.
- */
-static
-bool danglingVertexRef(RoseBuildImpl &tbi) {
- RoseGraph::vertex_iterator vi, ve;
- tie(vi, ve) = vertices(tbi.g);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+/**
+ * Assertion: Returns true if we have a reference hanging around to a vertex
+ * that no longer exists in the graph.
+ */
+static
+bool danglingVertexRef(RoseBuildImpl &tbi) {
+ RoseGraph::vertex_iterator vi, ve;
+ tie(vi, ve) = vertices(tbi.g);
const unordered_set<RoseVertex> valid_vertices(vi, ve);
-
- if (!contains(valid_vertices, tbi.anchored_root)) {
+
+ if (!contains(valid_vertices, tbi.anchored_root)) {
DEBUG_PRINTF("anchored root vertex %zu not in graph\n",
tbi.g[tbi.anchored_root].index);
- return true;
- }
-
- for (const auto &e : tbi.ghost) {
- if (!contains(valid_vertices, e.first)) {
+ return true;
+ }
+
+ for (const auto &e : tbi.ghost) {
+ if (!contains(valid_vertices, e.first)) {
DEBUG_PRINTF("ghost key vertex %zu not in graph\n",
tbi.g[e.first].index);
- return true;
- }
- if (!contains(valid_vertices, e.second)) {
+ return true;
+ }
+ if (!contains(valid_vertices, e.second)) {
DEBUG_PRINTF("ghost value vertex %zu not in graph\n",
tbi.g[e.second].index);
- return true;
- }
- }
-
- return false;
-}
-
-static
-bool roleOffsetsAreValid(const RoseGraph &g) {
- for (auto v : vertices_range(g)) {
- if (g[v].min_offset >= ROSE_BOUND_INF) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static
+bool roleOffsetsAreValid(const RoseGraph &g) {
+ for (auto v : vertices_range(g)) {
+ if (g[v].min_offset >= ROSE_BOUND_INF) {
DEBUG_PRINTF("invalid min_offset for role %zu\n", g[v].index);
- return false;
- }
- if (g[v].min_offset > g[v].max_offset) {
+ return false;
+ }
+ if (g[v].min_offset > g[v].max_offset) {
DEBUG_PRINTF("min_offset > max_offset for %zu\n", g[v].index);
- return false;
- }
- }
- return true;
-}
-#endif // NDEBUG
-
+ return false;
+ }
+ }
+ return true;
+}
+#endif // NDEBUG
+
bytecode_ptr<RoseEngine> RoseBuildImpl::buildRose(u32 minWidth) {
dumpRoseGraph(*this, "rose_early.dot");
-
- // Early check for Rose implementability.
- assert(canImplementGraphs(*this));
-
- // Sanity check vertex role offsets.
- assert(roleOffsetsAreValid(g));
-
- convertPrefixToBounds(*this);
-
- // Turn flood-prone suffixes into suffix NFAs.
- convertFloodProneSuffixes(*this);
-
- // Turn repeats into Castle prototypes.
- makeCastles(*this);
-
- rehomeAnchoredLiterals(*this);
-
- // If we've got a very small number of EOD-anchored literals, consider
- // moving them into the floating table so that we only have one literal
+
+ // Early check for Rose implementability.
+ assert(canImplementGraphs(*this));
+
+ // Sanity check vertex role offsets.
+ assert(roleOffsetsAreValid(g));
+
+ convertPrefixToBounds(*this);
+
+ // Turn flood-prone suffixes into suffix NFAs.
+ convertFloodProneSuffixes(*this);
+
+ // Turn repeats into Castle prototypes.
+ makeCastles(*this);
+
+ rehomeAnchoredLiterals(*this);
+
+ // If we've got a very small number of EOD-anchored literals, consider
+ // moving them into the floating table so that we only have one literal
// matcher to run. Note that this needs to happen before
// addAnchoredSmallBlockLiterals as it may create anchored literals.
- assert(roleOffsetsAreValid(g));
- stealEodVertices(*this);
-
- addAnchoredSmallBlockLiterals(*this);
-
- // Merge duplicate leaf nodes
- dedupeSuffixes(*this);
- if (cc.grey.roseGraphReduction) {
- mergeDupeLeaves(*this);
- uncalcLeaves(*this);
- }
-
- assert(roleOffsetsAreValid(g));
- handleMixedSensitivity();
-
- assignHistories(*this);
-
- convertAnchPrefixToBounds(*this);
-
- // Do some final graph reduction.
- dedupeLeftfixes(*this);
- aliasRoles(*this, false); // Don't merge leftfixes.
- dedupeLeftfixes(*this);
- uncalcLeaves(*this);
-
- /* note the leftfixes which do not need to keep state across stream
- boundaries */
- findTransientLeftfixes();
-
- dedupeLeftfixesVariableLag(*this);
- mergeLeftfixesVariableLag(*this);
- mergeSmallLeftfixes(*this);
- mergeCastleLeftfixes(*this);
-
- // Do a rose-merging aliasing pass.
- aliasRoles(*this, true);
-
- // Merging of suffixes _below_ role aliasing, as otherwise we'd have to
- // teach role aliasing about suffix tops.
- mergeCastleSuffixes(*this);
- mergePuffixes(*this);
- mergeAcyclicSuffixes(*this);
- mergeSmallSuffixes(*this);
-
- // Convert Castles that would be better off as NFAs back to NGHolder
- // infixes/suffixes.
- if (unmakeCastles(*this)) {
- // We may be able to save some stream state by merging the newly
- // "unmade" Castles.
- mergeSmallSuffixes(*this);
- mergeSmallLeftfixes(*this);
- }
-
+ assert(roleOffsetsAreValid(g));
+ stealEodVertices(*this);
+
+ addAnchoredSmallBlockLiterals(*this);
+
+ // Merge duplicate leaf nodes
+ dedupeSuffixes(*this);
+ if (cc.grey.roseGraphReduction) {
+ mergeDupeLeaves(*this);
+ uncalcLeaves(*this);
+ }
+
+ assert(roleOffsetsAreValid(g));
+ handleMixedSensitivity();
+
+ assignHistories(*this);
+
+ convertAnchPrefixToBounds(*this);
+
+ // Do some final graph reduction.
+ dedupeLeftfixes(*this);
+ aliasRoles(*this, false); // Don't merge leftfixes.
+ dedupeLeftfixes(*this);
+ uncalcLeaves(*this);
+
+ /* note the leftfixes which do not need to keep state across stream
+ boundaries */
+ findTransientLeftfixes();
+
+ dedupeLeftfixesVariableLag(*this);
+ mergeLeftfixesVariableLag(*this);
+ mergeSmallLeftfixes(*this);
+ mergeCastleLeftfixes(*this);
+
+ // Do a rose-merging aliasing pass.
+ aliasRoles(*this, true);
+
+ // Merging of suffixes _below_ role aliasing, as otherwise we'd have to
+ // teach role aliasing about suffix tops.
+ mergeCastleSuffixes(*this);
+ mergePuffixes(*this);
+ mergeAcyclicSuffixes(*this);
+ mergeSmallSuffixes(*this);
+
+ // Convert Castles that would be better off as NFAs back to NGHolder
+ // infixes/suffixes.
+ if (unmakeCastles(*this)) {
+ // We may be able to save some stream state by merging the newly
+ // "unmade" Castles.
+ mergeSmallSuffixes(*this);
+ mergeSmallLeftfixes(*this);
+ }
+
assert(!hasOrphanedTops(*this));
- // Do a rose-merging aliasing pass.
- aliasRoles(*this, true);
+ // Do a rose-merging aliasing pass.
+ aliasRoles(*this, true);
assert(!hasOrphanedTops(*this));
-
- // Run a merge pass over the outfixes as well.
- mergeOutfixes(*this);
-
- assert(!danglingVertexRef(*this));
+
+ // Run a merge pass over the outfixes as well.
+ mergeOutfixes(*this);
+
+ assert(!danglingVertexRef(*this));
assert(!hasOrphanedTops(*this));
-
+
findMoreLiteralMasks(*this);
assignGroupsToLiterals(*this);
assignGroupsToRoles(*this);
- findGroupSquashers(*this);
-
- /* final prep work */
- remapCastleTops(*this);
+ findGroupSquashers(*this);
+
+ /* final prep work */
+ remapCastleTops(*this);
optimiseRoseTops(*this);
- buildRoseSquashMasks(*this);
-
- rm.assignDkeys(this);
-
- /* transfer mpv outfix to main queue */
- if (mpv_outfix) {
- outfixes.push_back(move(*mpv_outfix));
- mpv_outfix = nullptr;
- }
-
- assert(canImplementGraphs(*this));
- assert(!hasOrphanedTops(*this));
- assert(roleOffsetsAreValid(g));
- assert(historiesAreValid(g));
-
+ buildRoseSquashMasks(*this);
+
+ rm.assignDkeys(this);
+
+ /* transfer mpv outfix to main queue */
+ if (mpv_outfix) {
+ outfixes.push_back(move(*mpv_outfix));
+ mpv_outfix = nullptr;
+ }
+
+ assert(canImplementGraphs(*this));
+ assert(!hasOrphanedTops(*this));
+ assert(roleOffsetsAreValid(g));
+ assert(historiesAreValid(g));
+
dumpRoseGraph(*this, "rose_pre_norm.dot");
-
- return buildFinalEngine(minWidth);
-}
-
-} // namespace ue2
+
+ return buildFinalEngine(minWidth);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_convert.cpp b/contrib/libs/hyperscan/src/rose/rose_build_convert.cpp
index f13fac04e1..33351099f7 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_convert.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_convert.cpp
@@ -1,364 +1,364 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_convert.h"
-
-#include "grey.h"
-#include "rose_build.h"
-#include "rose_build_impl.h"
-#include "rose_build_util.h"
-#include "ue2common.h"
-#include "hwlm/hwlm_build.h"
-#include "nfa/castlecompile.h"
-#include "nfa/limex_limits.h"
-#include "nfagraph/ng_dump.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_reports.h"
-#include "nfagraph/ng_split.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
-#include "util/bitutils.h"
-#include "util/charreach.h"
-#include "util/charreach_util.h"
-#include "util/compile_context.h"
-#include "util/depth.h"
-#include "util/graph_range.h"
-#include "util/make_unique.h"
-#include "util/order_check.h"
-#include "util/ue2string.h"
-
-#include <algorithm>
-#include <map>
-#include <queue>
-#include <set>
-#include <string>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_convert.h"
+
+#include "grey.h"
+#include "rose_build.h"
+#include "rose_build_impl.h"
+#include "rose_build_util.h"
+#include "ue2common.h"
+#include "hwlm/hwlm_build.h"
+#include "nfa/castlecompile.h"
+#include "nfa/limex_limits.h"
+#include "nfagraph/ng_dump.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_split.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "util/bitutils.h"
+#include "util/charreach.h"
+#include "util/charreach_util.h"
+#include "util/compile_context.h"
+#include "util/depth.h"
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+#include "util/order_check.h"
+#include "util/ue2string.h"
+
+#include <algorithm>
+#include <map>
+#include <queue>
+#include <set>
+#include <string>
#include <unordered_map>
#include <utility>
-#include <vector>
-
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_values;
-
-namespace ue2 {
-
-static
-NFAVertex addHolderVertex(const CharReach &cr, NGHolder &out) {
- assert(cr.any());
- NFAVertex v = add_vertex(out);
- out[v].char_reach = cr;
- return v;
-}
-
-static
-size_t suffixFloodLen(const ue2_literal &s) {
- if (s.empty()) {
- return 0;
- }
-
- const ue2_literal::elem &c = s.back();
- auto it = find_if(s.rbegin(), s.rend(),
+#include <vector>
+
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
+
+namespace ue2 {
+
+static
+NFAVertex addHolderVertex(const CharReach &cr, NGHolder &out) {
+ assert(cr.any());
+ NFAVertex v = add_vertex(out);
+ out[v].char_reach = cr;
+ return v;
+}
+
+static
+size_t suffixFloodLen(const ue2_literal &s) {
+ if (s.empty()) {
+ return 0;
+ }
+
+ const ue2_literal::elem &c = s.back();
+ auto it = find_if(s.rbegin(), s.rend(),
[&c](const ue2_literal::elem &e) { return e != c; });
- return distance(s.rbegin(), it);
-}
-
-static
-unique_ptr<NGHolder> makeFloodProneSuffix(const ue2_literal &s, size_t len,
- const flat_set<ReportID> &reports) {
- assert(len < s.length());
- assert(!reports.empty());
-
- unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>(NFA_SUFFIX);
-
- NFAVertex u = h->start;
- for (auto it = s.begin() + s.length() - len; it != s.end(); ++it) {
- NFAVertex v = addHolderVertex(*it, *h);
+ return distance(s.rbegin(), it);
+}
+
+static
+unique_ptr<NGHolder> makeFloodProneSuffix(const ue2_literal &s, size_t len,
+ const flat_set<ReportID> &reports) {
+ assert(len < s.length());
+ assert(!reports.empty());
+
+ unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>(NFA_SUFFIX);
+
+ NFAVertex u = h->start;
+ for (auto it = s.begin() + s.length() - len; it != s.end(); ++it) {
+ NFAVertex v = addHolderVertex(*it, *h);
NFAEdge e = add_edge(u, v, *h);
if (u == h->start) {
(*h)[e].tops.insert(DEFAULT_TOP);
}
- u = v;
- }
-
- (*h)[u].reports.insert(reports.begin(), reports.end());
- add_edge(u, h->accept, *h);
- return h;
-}
-
-static
-unique_ptr<NGHolder> makeRosePrefix(const ue2_literal &s) {
- unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>(NFA_PREFIX);
-
- NFAVertex u = h->startDs;
- for (const auto &c : s) {
- NFAVertex v = addHolderVertex(c, *h);
- add_edge(u, v, *h);
- u = v;
- }
- add_edge(u, h->accept, *h);
- return h;
-}
-
-static
-void replaceWithLitPrefix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
- const rose_literal_id &lit, size_t suffixlen,
- size_t delay) {
- assert(suffixlen < lit.s.length());
-
- DEBUG_PRINTF("replacing '%s' with prefix, length=%zu, delay=%zu\n",
- dumpString(lit.s).c_str(), lit.s.length() - suffixlen, delay);
-
- RoseGraph &g = tbi.g;
- ue2_literal new_lit = lit.s.substr(0, lit.s.length() - suffixlen);
- u32 new_id = tbi.getLiteralId(new_lit, delay, ROSE_FLOATING);
- rose_literal_info &old_info = tbi.literal_info.at(lit_id);
- old_info.vertices.erase(v);
- tbi.literal_info.at(new_id).vertices.insert(v);
- g[v].literals.clear();
- g[v].literals.insert(new_id);
-}
-
-static
-bool delayLiteralWithPrefix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
- const rose_literal_id &lit, size_t suffixlen) {
- if (suffixlen > MAX_DELAY) {
- DEBUG_PRINTF("delay too large\n");
- return false;
- }
-
- if (!tbi.isDirectReport(lit_id)) {
- DEBUG_PRINTF("literal is not direct report\n");
- return false;
- }
-
- if (tbi.cc.streaming &&
- lit.s.length() > tbi.cc.grey.maxHistoryAvailable + 1) {
- DEBUG_PRINTF("insufficient history to delay literal of len %zu\n",
- lit.s.length());
- return false;
- }
-
- shared_ptr<NGHolder> h = makeRosePrefix(lit.s);
- ReportID prefix_report = 0;
+ u = v;
+ }
+
+ (*h)[u].reports.insert(reports.begin(), reports.end());
+ add_edge(u, h->accept, *h);
+ return h;
+}
+
+static
+unique_ptr<NGHolder> makeRosePrefix(const ue2_literal &s) {
+ unique_ptr<NGHolder> h = ue2::make_unique<NGHolder>(NFA_PREFIX);
+
+ NFAVertex u = h->startDs;
+ for (const auto &c : s) {
+ NFAVertex v = addHolderVertex(c, *h);
+ add_edge(u, v, *h);
+ u = v;
+ }
+ add_edge(u, h->accept, *h);
+ return h;
+}
+
+static
+void replaceWithLitPrefix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
+ const rose_literal_id &lit, size_t suffixlen,
+ size_t delay) {
+ assert(suffixlen < lit.s.length());
+
+ DEBUG_PRINTF("replacing '%s' with prefix, length=%zu, delay=%zu\n",
+ dumpString(lit.s).c_str(), lit.s.length() - suffixlen, delay);
+
+ RoseGraph &g = tbi.g;
+ ue2_literal new_lit = lit.s.substr(0, lit.s.length() - suffixlen);
+ u32 new_id = tbi.getLiteralId(new_lit, delay, ROSE_FLOATING);
+ rose_literal_info &old_info = tbi.literal_info.at(lit_id);
+ old_info.vertices.erase(v);
+ tbi.literal_info.at(new_id).vertices.insert(v);
+ g[v].literals.clear();
+ g[v].literals.insert(new_id);
+}
+
+static
+bool delayLiteralWithPrefix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
+ const rose_literal_id &lit, size_t suffixlen) {
+ if (suffixlen > MAX_DELAY) {
+ DEBUG_PRINTF("delay too large\n");
+ return false;
+ }
+
+ if (!tbi.isDirectReport(lit_id)) {
+ DEBUG_PRINTF("literal is not direct report\n");
+ return false;
+ }
+
+ if (tbi.cc.streaming &&
+ lit.s.length() > tbi.cc.grey.maxHistoryAvailable + 1) {
+ DEBUG_PRINTF("insufficient history to delay literal of len %zu\n",
+ lit.s.length());
+ return false;
+ }
+
+ shared_ptr<NGHolder> h = makeRosePrefix(lit.s);
+ ReportID prefix_report = 0;
set_report(*h, prefix_report);
-
- if (!isImplementableNFA(*h, &tbi.rm, tbi.cc)) {
- DEBUG_PRINTF("prefix not implementable\n");
- return false;
- }
-
- RoseGraph &g = tbi.g;
- assert(!g[v].left);
- g[v].left.graph = h;
- g[v].left.lag = 0;
- g[v].left.leftfix_report = prefix_report;
-
- // Swap v's literal for a shorter one, delayed by suffix len.
- replaceWithLitPrefix(tbi, v, lit_id, lit, suffixlen, suffixlen);
-
- return true;
-}
-
-static
-void convertFloodProneSuffix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
- const rose_literal_id &lit, size_t suffixlen) {
- DEBUG_PRINTF("flood-prone leaf '%s'\n", dumpString(lit.s).c_str());
- DEBUG_PRINTF("turning last %zu chars into a suffix NFA\n", suffixlen);
- RoseGraph &g = tbi.g;
- assert(!g[v].eod_accept);
-
- // If we're a direct report literal, we may be able to convert this case
- // into a delayed literal with a (very boring) transient prefix that
- // handles our flood-prone suffix.
- if (delayLiteralWithPrefix(tbi, v, lit_id, lit, suffixlen)) {
- DEBUG_PRINTF("implemented as delayed literal with a rose prefix\n");
- return;
- }
-
- // General case: create a suffix that implements the flood-prone portion.
-
- // Create the NFA.
- auto h = makeFloodProneSuffix(lit.s, suffixlen, g[v].reports);
- if (!isImplementableNFA(*h, &tbi.rm, tbi.cc)) {
- DEBUG_PRINTF("not implementable\n");
- return;
- }
-
- // Apply the NFA.
- assert(!g[v].suffix);
- g[v].suffix.graph = move(h);
- g[v].reports.clear();
-
- // Swap v's literal for a shorter one.
- replaceWithLitPrefix(tbi, v, lit_id, lit, suffixlen, 0);
-
- // It's possible that min_offset might be an underestimate, so we
- // subtract min(min_offset, suffixlen) for safety.
- g[v].min_offset -= min((size_t)g[v].min_offset, suffixlen);
-
- if (g[v].max_offset < ROSE_BOUND_INF) {
- assert(g[v].max_offset >= suffixlen);
- g[v].max_offset -= suffixlen;
- }
-}
-
-/**
- * Collect an estimate of the number of literals in the floating table, and use
- * this to estimate the flood prone suffix length.
- */
-static
-size_t findFloodProneSuffixLen(const RoseBuildImpl &tbi) {
- size_t numLiterals = 0;
+
+ if (!isImplementableNFA(*h, &tbi.rm, tbi.cc)) {
+ DEBUG_PRINTF("prefix not implementable\n");
+ return false;
+ }
+
+ RoseGraph &g = tbi.g;
+ assert(!g[v].left);
+ g[v].left.graph = h;
+ g[v].left.lag = 0;
+ g[v].left.leftfix_report = prefix_report;
+
+ // Swap v's literal for a shorter one, delayed by suffix len.
+ replaceWithLitPrefix(tbi, v, lit_id, lit, suffixlen, suffixlen);
+
+ return true;
+}
+
+static
+void convertFloodProneSuffix(RoseBuildImpl &tbi, RoseVertex v, u32 lit_id,
+ const rose_literal_id &lit, size_t suffixlen) {
+ DEBUG_PRINTF("flood-prone leaf '%s'\n", dumpString(lit.s).c_str());
+ DEBUG_PRINTF("turning last %zu chars into a suffix NFA\n", suffixlen);
+ RoseGraph &g = tbi.g;
+ assert(!g[v].eod_accept);
+
+ // If we're a direct report literal, we may be able to convert this case
+ // into a delayed literal with a (very boring) transient prefix that
+ // handles our flood-prone suffix.
+ if (delayLiteralWithPrefix(tbi, v, lit_id, lit, suffixlen)) {
+ DEBUG_PRINTF("implemented as delayed literal with a rose prefix\n");
+ return;
+ }
+
+ // General case: create a suffix that implements the flood-prone portion.
+
+ // Create the NFA.
+ auto h = makeFloodProneSuffix(lit.s, suffixlen, g[v].reports);
+ if (!isImplementableNFA(*h, &tbi.rm, tbi.cc)) {
+ DEBUG_PRINTF("not implementable\n");
+ return;
+ }
+
+ // Apply the NFA.
+ assert(!g[v].suffix);
+ g[v].suffix.graph = move(h);
+ g[v].reports.clear();
+
+ // Swap v's literal for a shorter one.
+ replaceWithLitPrefix(tbi, v, lit_id, lit, suffixlen, 0);
+
+ // It's possible that min_offset might be an underestimate, so we
+ // subtract min(min_offset, suffixlen) for safety.
+ g[v].min_offset -= min((size_t)g[v].min_offset, suffixlen);
+
+ if (g[v].max_offset < ROSE_BOUND_INF) {
+ assert(g[v].max_offset >= suffixlen);
+ g[v].max_offset -= suffixlen;
+ }
+}
+
+/**
+ * Collect an estimate of the number of literals in the floating table, and use
+ * this to estimate the flood prone suffix length.
+ */
+static
+size_t findFloodProneSuffixLen(const RoseBuildImpl &tbi) {
+ size_t numLiterals = 0;
for (const rose_literal_id &lit : tbi.literals) {
- if (lit.delay) {
- continue; // delay ids are virtual-ish
- }
- if (lit.table != ROSE_FLOATING) {
- continue;
- }
-
- numLiterals++;
- }
-
- return hwlmFloodProneSuffixLen(numLiterals, tbi.cc);
-}
-
-/**
- * \brief Convert flood-prone literal suffixes into suffix NFAs.
- *
- * For any trailing string in Rose (string cannot lead to more Rose roles or
- * NFAs, etc) ending with a continuous run of a single character with more than
- * 3 copies of that single character,
- *
- * If the result of removing all but 2 copies of that character yields a string
- * that is greater than FLOOD_PRONE_LIT_MIN_LENGTH characters, remove those
- * final characters from the literal and move them into a suffix NFA.
- */
-void convertFloodProneSuffixes(RoseBuildImpl &tbi) {
- static const size_t FLOOD_PRONE_LIT_MIN_LENGTH = 5;
-
- if (!tbi.cc.grey.roseConvertFloodProneSuffixes) {
- return;
- }
-
- const size_t floodProneLen = findFloodProneSuffixLen(tbi);
- DEBUG_PRINTF("flood prone suffix len = %zu\n", floodProneLen);
-
- RoseGraph &g = tbi.g;
-
- for (auto v : vertices_range(g)) {
- if (!isLeafNode(v, g)) {
- continue;
- }
-
- if (g[v].reports.empty()) {
- continue;
- }
-
- // TODO: currently only boring vertices.
- if (!g[v].isBoring()) {
- continue;
- }
-
- // Currently only handles vertices with a single literal (should always
- // be the case this early in Rose construction).
- if (g[v].literals.size() != 1) {
- continue;
- }
-
- u32 lit_id = *g[v].literals.begin();
+ if (lit.delay) {
+ continue; // delay ids are virtual-ish
+ }
+ if (lit.table != ROSE_FLOATING) {
+ continue;
+ }
+
+ numLiterals++;
+ }
+
+ return hwlmFloodProneSuffixLen(numLiterals, tbi.cc);
+}
+
+/**
+ * \brief Convert flood-prone literal suffixes into suffix NFAs.
+ *
+ * For any trailing string in Rose (string cannot lead to more Rose roles or
+ * NFAs, etc) ending with a continuous run of a single character with more than
+ * 3 copies of that single character,
+ *
+ * If the result of removing all but 2 copies of that character yields a string
+ * that is greater than FLOOD_PRONE_LIT_MIN_LENGTH characters, remove those
+ * final characters from the literal and move them into a suffix NFA.
+ */
+void convertFloodProneSuffixes(RoseBuildImpl &tbi) {
+ static const size_t FLOOD_PRONE_LIT_MIN_LENGTH = 5;
+
+ if (!tbi.cc.grey.roseConvertFloodProneSuffixes) {
+ return;
+ }
+
+ const size_t floodProneLen = findFloodProneSuffixLen(tbi);
+ DEBUG_PRINTF("flood prone suffix len = %zu\n", floodProneLen);
+
+ RoseGraph &g = tbi.g;
+
+ for (auto v : vertices_range(g)) {
+ if (!isLeafNode(v, g)) {
+ continue;
+ }
+
+ if (g[v].reports.empty()) {
+ continue;
+ }
+
+ // TODO: currently only boring vertices.
+ if (!g[v].isBoring()) {
+ continue;
+ }
+
+ // Currently only handles vertices with a single literal (should always
+ // be the case this early in Rose construction).
+ if (g[v].literals.size() != 1) {
+ continue;
+ }
+
+ u32 lit_id = *g[v].literals.begin();
const rose_literal_id &lit = tbi.literals.at(lit_id);
-
- // anchored or delayed literals need thought.
- if (lit.table != ROSE_FLOATING || lit.delay) {
- continue;
- }
-
- // don't do this to literals with msk/cmp.
- if (!lit.msk.empty()) {
- continue;
- }
-
- // Can't safely do this operation to vertices with delayed
- // predecessors.
- if (tbi.hasDelayPred(v)) {
- DEBUG_PRINTF("delayed pred\n");
- continue;
- }
-
- if (lit.s.length() <= FLOOD_PRONE_LIT_MIN_LENGTH) {
- DEBUG_PRINTF("literal is short enough already\n");
- continue;
- }
-
- size_t floodLen = suffixFloodLen(lit.s);
- if (floodLen < floodProneLen) {
- DEBUG_PRINTF("literal not flood-prone\n");
- continue;
- }
-
- if (floodLen == lit.s.length()) {
- DEBUG_PRINTF("whole literal is a flood\n");
- // Removing the part of the flood from the end of the literal would
- // leave us with a shorter, but still flood-prone, prefix. Better
- // to leave it alone.
- continue;
- }
-
- size_t suffixLen = floodLen - (floodProneLen - 1);
- if (lit.s.length() - suffixLen < FLOOD_PRONE_LIT_MIN_LENGTH) {
- DEBUG_PRINTF("removing flood would leave literal too short\n");
- continue;
- }
-
- convertFloodProneSuffix(tbi, v, lit_id, lit, suffixLen);
- }
-}
-
-static
-CharReach getReachOfNormalVertex(const NGHolder &g) {
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- return g[v].char_reach;
- }
- assert(0);
- return CharReach();
-}
-
+
+ // anchored or delayed literals need thought.
+ if (lit.table != ROSE_FLOATING || lit.delay) {
+ continue;
+ }
+
+ // don't do this to literals with msk/cmp.
+ if (!lit.msk.empty()) {
+ continue;
+ }
+
+ // Can't safely do this operation to vertices with delayed
+ // predecessors.
+ if (tbi.hasDelayPred(v)) {
+ DEBUG_PRINTF("delayed pred\n");
+ continue;
+ }
+
+ if (lit.s.length() <= FLOOD_PRONE_LIT_MIN_LENGTH) {
+ DEBUG_PRINTF("literal is short enough already\n");
+ continue;
+ }
+
+ size_t floodLen = suffixFloodLen(lit.s);
+ if (floodLen < floodProneLen) {
+ DEBUG_PRINTF("literal not flood-prone\n");
+ continue;
+ }
+
+ if (floodLen == lit.s.length()) {
+ DEBUG_PRINTF("whole literal is a flood\n");
+ // Removing the part of the flood from the end of the literal would
+ // leave us with a shorter, but still flood-prone, prefix. Better
+ // to leave it alone.
+ continue;
+ }
+
+ size_t suffixLen = floodLen - (floodProneLen - 1);
+ if (lit.s.length() - suffixLen < FLOOD_PRONE_LIT_MIN_LENGTH) {
+ DEBUG_PRINTF("removing flood would leave literal too short\n");
+ continue;
+ }
+
+ convertFloodProneSuffix(tbi, v, lit_id, lit, suffixLen);
+ }
+}
+
+static
+CharReach getReachOfNormalVertex(const NGHolder &g) {
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ return g[v].char_reach;
+ }
+ assert(0);
+ return CharReach();
+}
+
/**
* \brief Set the edge bounds and appropriate history on the given edge in the
* Rose graph.
*/
-static
+static
void setEdgeBounds(RoseGraph &g, const RoseEdge &e, u32 min_bound,
u32 max_bound) {
assert(min_bound <= max_bound);
@@ -375,444 +375,444 @@ void setEdgeBounds(RoseGraph &g, const RoseEdge &e, u32 min_bound,
}
static
-bool handleStartPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
- const RoseEdge &e_old, RoseVertex ar,
- vector<RoseEdge> *to_delete) {
- DEBUG_PRINTF("hi\n");
-
- /* check for prefix cliches connected to start (^.{N,M}) */
- if (!getReachOfNormalVertex(h).all()) {
- DEBUG_PRINTF(":(\n");
- return false;
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(h, repeat)) {
- DEBUG_PRINTF(":(\n");
- return false;
- }
-
- assert(repeat.bounds.min.is_finite());
- assert(repeat.bounds.max.is_reachable());
- assert(repeat.bounds.min <= repeat.bounds.max);
-
- DEBUG_PRINTF("prefix is ^.{%s,%s}\n", repeat.bounds.min.str().c_str(),
- repeat.bounds.max.str().c_str());
-
- /* update bounds on edge */
-
- // Convert to Rose graph bounds, which are not (yet?) depth classes.
- u32 bound_min = repeat.bounds.min;
- u32 bound_max =
- repeat.bounds.max.is_finite() ? (u32)repeat.bounds.max : ROSE_BOUND_INF;
-
- if (source(e_old, g) == ar) {
- assert(g[e_old].minBound <= bound_min);
- assert(g[e_old].maxBound >= bound_max);
+bool handleStartPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
+ const RoseEdge &e_old, RoseVertex ar,
+ vector<RoseEdge> *to_delete) {
+ DEBUG_PRINTF("hi\n");
+
+ /* check for prefix cliches connected to start (^.{N,M}) */
+ if (!getReachOfNormalVertex(h).all()) {
+ DEBUG_PRINTF(":(\n");
+ return false;
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(h, repeat)) {
+ DEBUG_PRINTF(":(\n");
+ return false;
+ }
+
+ assert(repeat.bounds.min.is_finite());
+ assert(repeat.bounds.max.is_reachable());
+ assert(repeat.bounds.min <= repeat.bounds.max);
+
+ DEBUG_PRINTF("prefix is ^.{%s,%s}\n", repeat.bounds.min.str().c_str(),
+ repeat.bounds.max.str().c_str());
+
+ /* update bounds on edge */
+
+ // Convert to Rose graph bounds, which are not (yet?) depth classes.
+ u32 bound_min = repeat.bounds.min;
+ u32 bound_max =
+ repeat.bounds.max.is_finite() ? (u32)repeat.bounds.max : ROSE_BOUND_INF;
+
+ if (source(e_old, g) == ar) {
+ assert(g[e_old].minBound <= bound_min);
+ assert(g[e_old].maxBound >= bound_max);
setEdgeBounds(g, e_old, bound_min, bound_max);
- } else {
+ } else {
RoseEdge e_new = add_edge(ar, v, g);
setEdgeBounds(g, e_new, bound_min, bound_max);
- to_delete->push_back(e_old);
- }
-
- g[v].left.reset(); /* clear the prefix info */
- return true;
-}
-
-static
-bool handleStartDsPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
- const RoseEdge &e) {
- DEBUG_PRINTF("hi\n");
- /* check for prefix cliches connected to start-ds (.{N}, ^.{N,}) */
- u32 repeatCount = 0;
- NFAVertex hu = h.startDs;
-
+ to_delete->push_back(e_old);
+ }
+
+ g[v].left.reset(); /* clear the prefix info */
+ return true;
+}
+
+static
+bool handleStartDsPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
+ const RoseEdge &e) {
+ DEBUG_PRINTF("hi\n");
+ /* check for prefix cliches connected to start-ds (.{N}, ^.{N,}) */
+ u32 repeatCount = 0;
+ NFAVertex hu = h.startDs;
+
auto start_succ = succs<set<NFAVertex>>(h.start, h);
auto startds_succ = succs<set<NFAVertex>>(h.startDs, h);
-
- if (!is_subset_of(start_succ, startds_succ)) {
- DEBUG_PRINTF("not a simple chain\n");
- return false;
- }
-
- set<NFAVertex> seen;
- do {
- if (!h[hu].char_reach.all()) {
- return false;
- }
- NFAVertex hv = getSoleDestVertex(h, hu);
- if (!hv) {
- return false;
- }
- if (contains(seen, hv)) {
- assert(0);
- return false;
- }
- hu = hv;
- repeatCount++;
- if (hu == h.accept) {
- break;
- }
- } while(1);
-
- assert(hu == h.accept);
-
- repeatCount--; /* do not count accept as part of the chain */
-
- DEBUG_PRINTF("prefix is ^.{%u,}\n", repeatCount);
-
- /* update bounds on edge */
- assert(g[e].minBound <= repeatCount);
+
+ if (!is_subset_of(start_succ, startds_succ)) {
+ DEBUG_PRINTF("not a simple chain\n");
+ return false;
+ }
+
+ set<NFAVertex> seen;
+ do {
+ if (!h[hu].char_reach.all()) {
+ return false;
+ }
+ NFAVertex hv = getSoleDestVertex(h, hu);
+ if (!hv) {
+ return false;
+ }
+ if (contains(seen, hv)) {
+ assert(0);
+ return false;
+ }
+ hu = hv;
+ repeatCount++;
+ if (hu == h.accept) {
+ break;
+ }
+ } while(1);
+
+ assert(hu == h.accept);
+
+ repeatCount--; /* do not count accept as part of the chain */
+
+ DEBUG_PRINTF("prefix is ^.{%u,}\n", repeatCount);
+
+ /* update bounds on edge */
+ assert(g[e].minBound <= repeatCount);
setEdgeBounds(g, e, repeatCount, ROSE_BOUND_INF);
-
- g[v].left.reset(); /* clear the prefix info */
-
- return true;
-}
-
-static
-bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
- const RoseEdge &e_old, RoseVertex ar,
- vector<RoseEdge> *to_delete,
- const CompileContext &cc) {
- assert(in_degree(h.acceptEod, h) == 1);
-
- bool anchored = !proper_out_degree(h.startDs, h);
+
+ g[v].left.reset(); /* clear the prefix info */
+
+ return true;
+}
+
+static
+bool handleMixedPrefixCliche(const NGHolder &h, RoseGraph &g, RoseVertex v,
+ const RoseEdge &e_old, RoseVertex ar,
+ vector<RoseEdge> *to_delete,
+ const CompileContext &cc) {
+ assert(in_degree(h.acceptEod, h) == 1);
+
+ bool anchored = !proper_out_degree(h.startDs, h);
NFAVertex key = NGHolder::null_vertex();
- NFAVertex base = anchored ? h.start : h.startDs;
-
- if (!anchored) {
+ NFAVertex base = anchored ? h.start : h.startDs;
+
+ if (!anchored) {
auto start_succ = succs<set<NFAVertex>>(h.start, h);
auto startds_succ = succs<set<NFAVertex>>(h.startDs, h);
-
- if (!is_subset_of(start_succ, startds_succ)) {
- DEBUG_PRINTF("not a simple chain\n");
- return false;
- }
- }
-
- for (auto w : adjacent_vertices_range(base, h)) {
+
+ if (!is_subset_of(start_succ, startds_succ)) {
+ DEBUG_PRINTF("not a simple chain\n");
+ return false;
+ }
+ }
+
+ for (auto w : adjacent_vertices_range(base, h)) {
DEBUG_PRINTF("checking %zu\n", h[w].index);
- if (!h[w].char_reach.all()) {
- continue;
- }
-
- if (!is_special(w, h)) {
- key = w;
- break;
- }
- }
-
- if (!key) {
- return false;
- }
-
- vector<GraphRepeatInfo> repeats;
- findRepeats(h, 2, &repeats);
-
- vector<GraphRepeatInfo>::const_iterator it;
- for (it = repeats.begin(); it != repeats.end(); ++it) {
- DEBUG_PRINTF("checking.. %zu verts\n", it->vertices.size());
- if (find(it->vertices.begin(), it->vertices.end(), key)
- != it->vertices.end()) {
- break;
- }
- }
- if (it == repeats.end()) {
- DEBUG_PRINTF("no repeat found\n");
- return false;
- }
-
- GraphRepeatInfo ri = *it;
-
- set<NFAVertex> exits_and_repeat_verts;
- for (auto repeat_v : ri.vertices) {
+ if (!h[w].char_reach.all()) {
+ continue;
+ }
+
+ if (!is_special(w, h)) {
+ key = w;
+ break;
+ }
+ }
+
+ if (!key) {
+ return false;
+ }
+
+ vector<GraphRepeatInfo> repeats;
+ findRepeats(h, 2, &repeats);
+
+ vector<GraphRepeatInfo>::const_iterator it;
+ for (it = repeats.begin(); it != repeats.end(); ++it) {
+ DEBUG_PRINTF("checking.. %zu verts\n", it->vertices.size());
+ if (find(it->vertices.begin(), it->vertices.end(), key)
+ != it->vertices.end()) {
+ break;
+ }
+ }
+ if (it == repeats.end()) {
+ DEBUG_PRINTF("no repeat found\n");
+ return false;
+ }
+
+ GraphRepeatInfo ri = *it;
+
+ set<NFAVertex> exits_and_repeat_verts;
+ for (auto repeat_v : ri.vertices) {
DEBUG_PRINTF("repeat vertex %zu\n", h[repeat_v].index);
- succ(h, repeat_v, &exits_and_repeat_verts);
- exits_and_repeat_verts.insert(repeat_v);
- }
-
- DEBUG_PRINTF("repeat {%s,%s}\n", ri.repeatMin.str().c_str(),
- ri.repeatMax.str().c_str());
-
- set<NFAVertex> rep_verts;
- insert(&rep_verts, ri.vertices);
-
- set<NFAVertex> exits;
- exits = exits_and_repeat_verts;
- erase_all(&exits, rep_verts);
-
+ succ(h, repeat_v, &exits_and_repeat_verts);
+ exits_and_repeat_verts.insert(repeat_v);
+ }
+
+ DEBUG_PRINTF("repeat {%s,%s}\n", ri.repeatMin.str().c_str(),
+ ri.repeatMax.str().c_str());
+
+ set<NFAVertex> rep_verts;
+ insert(&rep_verts, ri.vertices);
+
+ set<NFAVertex> exits;
+ exits = exits_and_repeat_verts;
+ erase_all(&exits, rep_verts);
+
auto base_succ = succs<set<NFAVertex>>(base, h);
- base_succ.erase(h.startDs);
-
- if (is_subset_of(base_succ, rep_verts)) {
- /* all good: repeat dominates the rest of the pattern */
- } else if (ri.repeatMin == depth(1)
- && is_subset_of(exits, base_succ)
- && is_subset_of(base_succ, exits_and_repeat_verts)) {
- /* we have a jump edge */
+ base_succ.erase(h.startDs);
+
+ if (is_subset_of(base_succ, rep_verts)) {
+ /* all good: repeat dominates the rest of the pattern */
+ } else if (ri.repeatMin == depth(1)
+ && is_subset_of(exits, base_succ)
+ && is_subset_of(base_succ, exits_and_repeat_verts)) {
+ /* we have a jump edge */
ri.repeatMin = depth(0);
- } else {
- return false;
- }
-
- DEBUG_PRINTF("repeat {%s,%s}\n", ri.repeatMin.str().c_str(),
- ri.repeatMax.str().c_str());
- DEBUG_PRINTF("woot?\n");
-
- shared_ptr<NGHolder> h_new = make_shared<NGHolder>();
+ } else {
+ return false;
+ }
+
+ DEBUG_PRINTF("repeat {%s,%s}\n", ri.repeatMin.str().c_str(),
+ ri.repeatMax.str().c_str());
+ DEBUG_PRINTF("woot?\n");
+
+ shared_ptr<NGHolder> h_new = make_shared<NGHolder>();
unordered_map<NFAVertex, NFAVertex> rhs_map;
- vector<NFAVertex> exits_vec;
- insert(&exits_vec, exits_vec.end(), exits);
- splitRHS(h, exits_vec, h_new.get(), &rhs_map);
- h_new->kind = NFA_PREFIX;
-
- if (num_vertices(*h_new) <= N_SPECIALS) {
- DEBUG_PRINTF("not a hybrid??\n");
- /* TODO: pick up these cases, unify code */
- return false;
- }
-
- for (auto w : adjacent_vertices_range(h_new->start, *h_new)) {
- if (w != h_new->startDs) {
- add_edge(h_new->startDs, w, *h_new);
- }
- }
- clear_out_edges(h_new->start, *h_new);
- add_edge(h_new->start, h_new->startDs, *h_new);
-
- depth width = findMinWidth(*h_new);
- if (width != findMaxWidth(*h_new)) {
- return false;
- }
-
- if (g[v].left.dfa) {
- /* we were unable to implement initial graph as an nfa;
- * we need to to check if we still need a dfa and, if so, rebuild. */
- if (!isImplementableNFA(*h_new, nullptr, cc)) {
- return false; /* TODO: handle rebuilding dfa */
- }
- }
-
- if (anchored) {
- if (ri.repeatMax.is_infinite()) {
- return false; /* TODO */
- }
-
- if (source(e_old, g) == ar) {
+ vector<NFAVertex> exits_vec;
+ insert(&exits_vec, exits_vec.end(), exits);
+ splitRHS(h, exits_vec, h_new.get(), &rhs_map);
+ h_new->kind = NFA_PREFIX;
+
+ if (num_vertices(*h_new) <= N_SPECIALS) {
+ DEBUG_PRINTF("not a hybrid??\n");
+ /* TODO: pick up these cases, unify code */
+ return false;
+ }
+
+ for (auto w : adjacent_vertices_range(h_new->start, *h_new)) {
+ if (w != h_new->startDs) {
+ add_edge(h_new->startDs, w, *h_new);
+ }
+ }
+ clear_out_edges(h_new->start, *h_new);
+ add_edge(h_new->start, h_new->startDs, *h_new);
+
+ depth width = findMinWidth(*h_new);
+ if (width != findMaxWidth(*h_new)) {
+ return false;
+ }
+
+ if (g[v].left.dfa) {
+ /* we were unable to implement initial graph as an nfa;
+ * we need to to check if we still need a dfa and, if so, rebuild. */
+ if (!isImplementableNFA(*h_new, nullptr, cc)) {
+ return false; /* TODO: handle rebuilding dfa */
+ }
+ }
+
+ if (anchored) {
+ if (ri.repeatMax.is_infinite()) {
+ return false; /* TODO */
+ }
+
+ if (source(e_old, g) == ar) {
setEdgeBounds(g, e_old, ri.repeatMin + width, ri.repeatMax + width);
- } else {
+ } else {
RoseEdge e_new = add_edge(ar, v, g);
setEdgeBounds(g, e_new, ri.repeatMin + width, ri.repeatMax + width);
- to_delete->push_back(e_old);
- }
-
- } else {
- assert(g[e_old].minBound <= ri.repeatMin + width);
+ to_delete->push_back(e_old);
+ }
+
+ } else {
+ assert(g[e_old].minBound <= ri.repeatMin + width);
setEdgeBounds(g, e_old, ri.repeatMin + width, ROSE_BOUND_INF);
- }
-
- g[v].left.dfa.reset();
- g[v].left.graph = h_new;
-
- return true;
-}
-
-/* turns simple prefixes like /^.{30,} into bounds on the root roles */
-void convertPrefixToBounds(RoseBuildImpl &tbi) {
- RoseGraph &g = tbi.g;
-
- vector<RoseEdge> to_delete;
- RoseVertex ar = tbi.anchored_root;
-
- /* graphs with prefixes produced by rose are wired to tbi.root */
-
- for (const auto &e : out_edges_range(tbi.root, g)) {
- RoseVertex v = target(e, g);
-
- if (in_degree(v, g) != 1) {
- continue;
- }
-
- if (!g[v].left.graph) {
- continue;
- }
-
- if (g[v].left.tracksSom()) {
- continue;
- }
-
- const NGHolder &h = *g[v].left.graph;
-
- if (g[v].left.lag != tbi.minLiteralLen(v)
- || g[v].left.lag != tbi.maxLiteralLen(v)) {
- continue;
- }
-
- if (all_reports(h).size() != 1) {
- assert(0);
- continue;
- }
-
+ }
+
+ g[v].left.dfa.reset();
+ g[v].left.graph = h_new;
+
+ return true;
+}
+
+/* turns simple prefixes like /^.{30,} into bounds on the root roles */
+void convertPrefixToBounds(RoseBuildImpl &tbi) {
+ RoseGraph &g = tbi.g;
+
+ vector<RoseEdge> to_delete;
+ RoseVertex ar = tbi.anchored_root;
+
+ /* graphs with prefixes produced by rose are wired to tbi.root */
+
+ for (const auto &e : out_edges_range(tbi.root, g)) {
+ RoseVertex v = target(e, g);
+
+ if (in_degree(v, g) != 1) {
+ continue;
+ }
+
+ if (!g[v].left.graph) {
+ continue;
+ }
+
+ if (g[v].left.tracksSom()) {
+ continue;
+ }
+
+ const NGHolder &h = *g[v].left.graph;
+
+ if (g[v].left.lag != tbi.minLiteralLen(v)
+ || g[v].left.lag != tbi.maxLiteralLen(v)) {
+ continue;
+ }
+
+ if (all_reports(h).size() != 1) {
+ assert(0);
+ continue;
+ }
+
DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].index);
-
- if (!proper_out_degree(h.startDs, h)) {
- if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) {
- continue;
- }
- } else {
- if (handleStartDsPrefixCliche(h, g, v, e)) {
- continue;
- }
- }
-
- /* prefix is not just a simple dot repeat. However, it is still
- * possible that it consists of dot repeat and fixed width mask that we
- * can handle. */
- handleMixedPrefixCliche(h, g, v, e, ar, &to_delete, tbi.cc);
- }
-
- for (const auto &e : out_edges_range(ar, g)) {
- RoseVertex v = target(e, g);
-
- /* note: vertices that we have rehomed will currently have an in-degree
- * of 2 */
- if (in_degree(v, g) != 1) {
- continue;
- }
-
- if (!g[v].left.graph) {
- continue;
- }
-
- if (g[v].left.tracksSom()) {
- continue;
- }
-
- if (g[v].left.lag != tbi.minLiteralLen(v)
- || g[v].left.lag != tbi.maxLiteralLen(v)) {
- continue;
- }
-
- const NGHolder &h = *g[v].left.graph;
- if (all_reports(h).size() != 1) {
- assert(0);
- continue;
- }
-
+
+ if (!proper_out_degree(h.startDs, h)) {
+ if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) {
+ continue;
+ }
+ } else {
+ if (handleStartDsPrefixCliche(h, g, v, e)) {
+ continue;
+ }
+ }
+
+ /* prefix is not just a simple dot repeat. However, it is still
+ * possible that it consists of dot repeat and fixed width mask that we
+ * can handle. */
+ handleMixedPrefixCliche(h, g, v, e, ar, &to_delete, tbi.cc);
+ }
+
+ for (const auto &e : out_edges_range(ar, g)) {
+ RoseVertex v = target(e, g);
+
+ /* note: vertices that we have rehomed will currently have an in-degree
+ * of 2 */
+ if (in_degree(v, g) != 1) {
+ continue;
+ }
+
+ if (!g[v].left.graph) {
+ continue;
+ }
+
+ if (g[v].left.tracksSom()) {
+ continue;
+ }
+
+ if (g[v].left.lag != tbi.minLiteralLen(v)
+ || g[v].left.lag != tbi.maxLiteralLen(v)) {
+ continue;
+ }
+
+ const NGHolder &h = *g[v].left.graph;
+ if (all_reports(h).size() != 1) {
+ assert(0);
+ continue;
+ }
+
DEBUG_PRINTF("inspecting prefix of %zu\n", g[v].index);
-
- if (!proper_out_degree(h.startDs, h)) {
- if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) {
- continue;
- }
- } else {
- if (handleStartDsPrefixCliche(h, g, v, e)) {
- continue;
- }
- }
-
- /* prefix is not just a simple dot repeat. However, it is still
- * possible that it consists of dot repeat and fixed width mask that we
- * can handle. */
- handleMixedPrefixCliche(h, g, v, e, ar, &to_delete, tbi.cc);
- }
-
- for (const auto &e : to_delete) {
- remove_edge(e, g);
- }
-}
-
-/**
- * Identify dot-repeat infixes after fixed-depth literals and convert them to
- * edges with ROSE_ROLE_HISTORY_ANCH history and equivalent bounds.
- */
-void convertAnchPrefixToBounds(RoseBuildImpl &tbi) {
- RoseGraph &g = tbi.g;
-
- for (const auto v : vertices_range(g)) {
- if (!g[v].left) {
- continue;
- }
-
+
+ if (!proper_out_degree(h.startDs, h)) {
+ if (handleStartPrefixCliche(h, g, v, e, ar, &to_delete)) {
+ continue;
+ }
+ } else {
+ if (handleStartDsPrefixCliche(h, g, v, e)) {
+ continue;
+ }
+ }
+
+ /* prefix is not just a simple dot repeat. However, it is still
+ * possible that it consists of dot repeat and fixed width mask that we
+ * can handle. */
+ handleMixedPrefixCliche(h, g, v, e, ar, &to_delete, tbi.cc);
+ }
+
+ for (const auto &e : to_delete) {
+ remove_edge(e, g);
+ }
+}
+
+/**
+ * Identify dot-repeat infixes after fixed-depth literals and convert them to
+ * edges with ROSE_ROLE_HISTORY_ANCH history and equivalent bounds.
+ */
+void convertAnchPrefixToBounds(RoseBuildImpl &tbi) {
+ RoseGraph &g = tbi.g;
+
+ for (const auto v : vertices_range(g)) {
+ if (!g[v].left) {
+ continue;
+ }
+
DEBUG_PRINTF("vertex %zu\n", g[v].index);
-
- // This pass runs after makeCastles, so we use the fact that bounded
- // repeat detection has already been done for us.
-
- if (!g[v].left.castle) {
- DEBUG_PRINTF("not a castle\n");
- continue;
- }
-
- const CastleProto &castle = *g[v].left.castle;
-
- if (castle.repeats.size() != 1) {
- DEBUG_PRINTF("too many repeats\n");
- assert(0); // Castles should not have been merged yet.
- continue;
- }
-
- if (!castle.reach().all()) {
- DEBUG_PRINTF("not dot\n");
- continue;
- }
-
- if (in_degree(v, g) != 1) {
- DEBUG_PRINTF("too many in-edges\n");
- continue;
- }
-
- RoseEdge e = *in_edges(v, g).first;
- RoseVertex u = source(e, g);
-
- if (g[e].history != ROSE_ROLE_HISTORY_NONE) {
- DEBUG_PRINTF("history already set to something other than NONE?\n");
- assert(0);
- continue;
- }
-
- if (g[u].min_offset != g[u].max_offset) {
- DEBUG_PRINTF("pred not fixed offset\n");
- continue;
- }
- DEBUG_PRINTF("pred is fixed offset, at %u\n", g[u].min_offset);
- assert(g[u].min_offset < ROSE_BOUND_INF);
-
- size_t lit_length = tbi.minLiteralLen(v);
- if (lit_length != tbi.maxLiteralLen(v)) {
- assert(0);
- DEBUG_PRINTF("variable literal lengths\n");
- continue;
- }
-
- u32 lag = g[v].left.lag;
- DEBUG_PRINTF("lit_length=%zu, lag=%u\n", lit_length, lag);
- assert(lag <= lit_length);
- depth delay_adj(lit_length - lag);
-
- const PureRepeat &pr = castle.repeats.begin()->second;
- DEBUG_PRINTF("castle has repeat %s\n", pr.bounds.str().c_str());
+
+ // This pass runs after makeCastles, so we use the fact that bounded
+ // repeat detection has already been done for us.
+
+ if (!g[v].left.castle) {
+ DEBUG_PRINTF("not a castle\n");
+ continue;
+ }
+
+ const CastleProto &castle = *g[v].left.castle;
+
+ if (castle.repeats.size() != 1) {
+ DEBUG_PRINTF("too many repeats\n");
+ assert(0); // Castles should not have been merged yet.
+ continue;
+ }
+
+ if (!castle.reach().all()) {
+ DEBUG_PRINTF("not dot\n");
+ continue;
+ }
+
+ if (in_degree(v, g) != 1) {
+ DEBUG_PRINTF("too many in-edges\n");
+ continue;
+ }
+
+ RoseEdge e = *in_edges(v, g).first;
+ RoseVertex u = source(e, g);
+
+ if (g[e].history != ROSE_ROLE_HISTORY_NONE) {
+ DEBUG_PRINTF("history already set to something other than NONE?\n");
+ assert(0);
+ continue;
+ }
+
+ if (g[u].min_offset != g[u].max_offset) {
+ DEBUG_PRINTF("pred not fixed offset\n");
+ continue;
+ }
+ DEBUG_PRINTF("pred is fixed offset, at %u\n", g[u].min_offset);
+ assert(g[u].min_offset < ROSE_BOUND_INF);
+
+ size_t lit_length = tbi.minLiteralLen(v);
+ if (lit_length != tbi.maxLiteralLen(v)) {
+ assert(0);
+ DEBUG_PRINTF("variable literal lengths\n");
+ continue;
+ }
+
+ u32 lag = g[v].left.lag;
+ DEBUG_PRINTF("lit_length=%zu, lag=%u\n", lit_length, lag);
+ assert(lag <= lit_length);
+ depth delay_adj(lit_length - lag);
+
+ const PureRepeat &pr = castle.repeats.begin()->second;
+ DEBUG_PRINTF("castle has repeat %s\n", pr.bounds.str().c_str());
DEBUG_PRINTF("delay adj %u\n", (u32)delay_adj);
-
+
if (delay_adj >= pr.bounds.max) {
DEBUG_PRINTF("delay adj too large\n");
continue;
}
- DepthMinMax bounds(pr.bounds); // copy
- if (delay_adj > bounds.min) {
+ DepthMinMax bounds(pr.bounds); // copy
+ if (delay_adj > bounds.min) {
bounds.min = depth(0);
} else {
bounds.min -= delay_adj;
- }
- bounds.max -= delay_adj;
+ }
+ bounds.max -= delay_adj;
setEdgeBounds(g, e, bounds.min, bounds.max.is_finite()
? (u32)bounds.max
: ROSE_BOUND_INF);
- g[v].left.reset();
- }
-}
-
-} // namespace ue2
+ g[v].left.reset();
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_convert.h b/contrib/libs/hyperscan/src/rose/rose_build_convert.h
index 35dbad2a51..7307c213ca 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_convert.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_convert.h
@@ -1,42 +1,42 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_CONVERT_H
-#define ROSE_BUILD_CONVERT_H
-
-namespace ue2 {
-
-class RoseBuildImpl;
-
-void convertFloodProneSuffixes(RoseBuildImpl &tbi);
-void convertPrefixToBounds(RoseBuildImpl &tbi);
-void convertAnchPrefixToBounds(RoseBuildImpl &tbi);
-
-} // namespace ue2
-
-#endif
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_CONVERT_H
+#define ROSE_BUILD_CONVERT_H
+
+namespace ue2 {
+
+class RoseBuildImpl;
+
+void convertFloodProneSuffixes(RoseBuildImpl &tbi);
+void convertPrefixToBounds(RoseBuildImpl &tbi);
+void convertAnchPrefixToBounds(RoseBuildImpl &tbi);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_dump.h b/contrib/libs/hyperscan/src/rose/rose_build_dump.h
index 31daec5574..d4c620a3e6 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_dump.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_dump.h
@@ -1,55 +1,55 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_DUMP_H
-#define ROSE_BUILD_DUMP_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_DUMP_H
+#define ROSE_BUILD_DUMP_H
+
#include "ue2common.h"
#include <map>
#include <string>
#include <vector>
-struct RoseEngine;
-
-namespace ue2 {
-
+struct RoseEngine;
+
+namespace ue2 {
+
class RoseBuildImpl;
-struct Grey;
+struct Grey;
struct hwlmLiteral;
struct LitFragment;
struct left_id;
struct suffix_id;
-
-#ifdef DUMP_SUPPORT
-// Dump the Rose graph in graphviz representation.
+
+#ifdef DUMP_SUPPORT
+// Dump the Rose graph in graphviz representation.
void dumpRoseGraph(const RoseBuildImpl &build, const char *filename);
-
+
void dumpRose(const RoseBuildImpl &build,
const std::vector<LitFragment> &fragments,
const std::map<left_id, u32> &leftfix_queue_map,
@@ -59,25 +59,25 @@ void dumpRose(const RoseBuildImpl &build,
void dumpMatcherLiterals(const std::vector<hwlmLiteral> &lits,
const std::string &name, const Grey &grey);
-#else
-
-static UNUSED
+#else
+
+static UNUSED
void dumpRoseGraph(const RoseBuildImpl &, const char *) {
-}
-
-static UNUSED
+}
+
+static UNUSED
void dumpRose(const RoseBuildImpl &, const std::vector<LitFragment> &,
const std::map<left_id, u32> &, const std::map<suffix_id, u32> &,
const RoseEngine *) {
-}
-
+}
+
static UNUSED
void dumpMatcherLiterals(const std::vector<hwlmLiteral> &, const std::string &,
const Grey &) {
}
-#endif
-
-} // namespace ue2
-
-#endif
+#endif
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_impl.h b/contrib/libs/hyperscan/src/rose/rose_build_impl.h
index 9c601f1e5f..7780848b1b 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_impl.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_impl.h
@@ -1,64 +1,64 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
#ifndef ROSE_BUILD_IMPL_H
#define ROSE_BUILD_IMPL_H
-
-#include "rose_build.h"
-#include "rose_build_util.h"
+
+#include "rose_build.h"
+#include "rose_build_util.h"
#include "rose_common.h"
-#include "rose_graph.h"
-#include "nfa/mpvcompile.h"
-#include "nfa/goughcompile.h"
-#include "nfa/nfa_internal.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_revacc.h"
+#include "rose_graph.h"
+#include "nfa/mpvcompile.h"
+#include "nfa/goughcompile.h"
+#include "nfa/nfa_internal.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_revacc.h"
#include "util/bytecode_ptr.h"
#include "util/flat_containers.h"
#include "util/hash.h"
-#include "util/order_check.h"
-#include "util/queue_index_factory.h"
+#include "util/order_check.h"
+#include "util/queue_index_factory.h"
#include "util/ue2string.h"
#include "util/unordered.h"
#include "util/verify_types.h"
-
-#include <deque>
-#include <map>
-#include <string>
-#include <vector>
+
+#include <deque>
+#include <map>
+#include <string>
+#include <vector>
#include <boost/variant.hpp>
-
-struct RoseEngine;
-
-namespace ue2 {
-
-#define ROSE_GROUPS_MAX 64
-
+
+struct RoseEngine;
+
+namespace ue2 {
+
+#define ROSE_GROUPS_MAX 64
+
#define ROSE_LONG_LITERAL_THRESHOLD_MIN 33
/**
@@ -72,66 +72,66 @@ namespace ue2 {
*/
#define ROSE_SHORT_LITERAL_LEN_MAX 8
-struct BoundaryReports;
-struct CastleProto;
-struct CompileContext;
-class ReportManager;
+struct BoundaryReports;
+struct CastleProto;
+struct CompileContext;
+class ReportManager;
class SmallWriteBuild;
-class SomSlotManager;
-
-struct suffix_id {
- suffix_id(const RoseSuffixInfo &in)
- : g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()),
+class SomSlotManager;
+
+struct suffix_id {
+ suffix_id(const RoseSuffixInfo &in)
+ : g(in.graph.get()), c(in.castle.get()), d(in.rdfa.get()),
h(in.haig.get()), t(in.tamarama.get()),
dfa_min_width(in.dfa_min_width),
- dfa_max_width(in.dfa_max_width) {
- assert(!g || g->kind == NFA_SUFFIX);
- }
- bool operator==(const suffix_id &b) const {
+ dfa_max_width(in.dfa_max_width) {
+ assert(!g || g->kind == NFA_SUFFIX);
+ }
+ bool operator==(const suffix_id &b) const {
bool rv = g == b.g && c == b.c && h == b.h && d == b.d && t == b.t;
- assert(!rv || dfa_min_width == b.dfa_min_width);
- assert(!rv || dfa_max_width == b.dfa_max_width);
- return rv;
- }
- bool operator!=(const suffix_id &b) const { return !(*this == b); }
- bool operator<(const suffix_id &b) const {
- const suffix_id &a = *this;
- ORDER_CHECK(g);
- ORDER_CHECK(c);
- ORDER_CHECK(d);
- ORDER_CHECK(h);
+ assert(!rv || dfa_min_width == b.dfa_min_width);
+ assert(!rv || dfa_max_width == b.dfa_max_width);
+ return rv;
+ }
+ bool operator!=(const suffix_id &b) const { return !(*this == b); }
+ bool operator<(const suffix_id &b) const {
+ const suffix_id &a = *this;
+ ORDER_CHECK(g);
+ ORDER_CHECK(c);
+ ORDER_CHECK(d);
+ ORDER_CHECK(h);
ORDER_CHECK(t);
- return false;
- }
-
- NGHolder *graph() {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
- return g;
- }
- const NGHolder *graph() const {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
- return g;
- }
- CastleProto *castle() {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
- return c;
- }
- const CastleProto *castle() const {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
- return c;
- }
+ return false;
+ }
+
+ NGHolder *graph() {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+ return g;
+ }
+ const NGHolder *graph() const {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+ return g;
+ }
+ CastleProto *castle() {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+ return c;
+ }
+ const CastleProto *castle() const {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+ return c;
+ }
TamaProto *tamarama() {
if (!d && !h) {
assert(dfa_min_width == depth(0));
@@ -148,148 +148,148 @@ struct suffix_id {
}
- raw_som_dfa *haig() { return h; }
- const raw_som_dfa *haig() const { return h; }
- raw_dfa *dfa() { return d; }
- const raw_dfa *dfa() const { return d; }
-
- size_t hash() const;
-
-private:
- NGHolder *g;
- CastleProto *c;
- raw_dfa *d;
- raw_som_dfa *h;
+ raw_som_dfa *haig() { return h; }
+ const raw_som_dfa *haig() const { return h; }
+ raw_dfa *dfa() { return d; }
+ const raw_dfa *dfa() const { return d; }
+
+ size_t hash() const;
+
+private:
+ NGHolder *g;
+ CastleProto *c;
+ raw_dfa *d;
+ raw_som_dfa *h;
TamaProto *t;
- depth dfa_min_width;
- depth dfa_max_width;
-
- friend depth findMinWidth(const suffix_id &s);
- friend depth findMaxWidth(const suffix_id &s);
- friend depth findMinWidth(const suffix_id &s, u32 top);
- friend depth findMaxWidth(const suffix_id &s, u32 top);
-};
-
-std::set<ReportID> all_reports(const suffix_id &s);
-std::set<u32> all_tops(const suffix_id &s);
-bool has_eod_accepts(const suffix_id &s);
-bool has_non_eod_accepts(const suffix_id &s);
-depth findMinWidth(const suffix_id &s);
-depth findMaxWidth(const suffix_id &s);
-depth findMinWidth(const suffix_id &s, u32 top);
-depth findMaxWidth(const suffix_id &s, u32 top);
-
-/** \brief represents an engine to the left of a rose role */
-struct left_id {
- left_id(const LeftEngInfo &in)
- : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()),
- h(in.haig.get()), dfa_min_width(in.dfa_min_width),
- dfa_max_width(in.dfa_max_width) {
+ depth dfa_min_width;
+ depth dfa_max_width;
+
+ friend depth findMinWidth(const suffix_id &s);
+ friend depth findMaxWidth(const suffix_id &s);
+ friend depth findMinWidth(const suffix_id &s, u32 top);
+ friend depth findMaxWidth(const suffix_id &s, u32 top);
+};
+
+std::set<ReportID> all_reports(const suffix_id &s);
+std::set<u32> all_tops(const suffix_id &s);
+bool has_eod_accepts(const suffix_id &s);
+bool has_non_eod_accepts(const suffix_id &s);
+depth findMinWidth(const suffix_id &s);
+depth findMaxWidth(const suffix_id &s);
+depth findMinWidth(const suffix_id &s, u32 top);
+depth findMaxWidth(const suffix_id &s, u32 top);
+
+/** \brief represents an engine to the left of a rose role */
+struct left_id {
+ left_id(const LeftEngInfo &in)
+ : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()),
+ h(in.haig.get()), dfa_min_width(in.dfa_min_width),
+ dfa_max_width(in.dfa_max_width) {
assert(!g || !has_managed_reports(*g));
- }
- bool operator==(const left_id &b) const {
- bool rv = g == b.g && c == b.c && h == b.h && d == b.d;
- assert(!rv || dfa_min_width == b.dfa_min_width);
- assert(!rv || dfa_max_width == b.dfa_max_width);
- return rv;
- }
- bool operator!=(const left_id &b) const { return !(*this == b); }
- bool operator<(const left_id &b) const {
- const left_id &a = *this;
- ORDER_CHECK(g);
- ORDER_CHECK(c);
- ORDER_CHECK(d);
- ORDER_CHECK(h);
- return false;
- }
-
- NGHolder *graph() {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
- return g;
- }
- const NGHolder *graph() const {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
- return g;
- }
- CastleProto *castle() {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
-
- return c;
- }
- const CastleProto *castle() const {
- if (!d && !h) {
- assert(dfa_min_width == depth(0));
- assert(dfa_max_width == depth::infinity());
- }
-
- return c;
- }
- raw_som_dfa *haig() { return h; }
- const raw_som_dfa *haig() const { return h; }
- raw_dfa *dfa() { return d; }
- const raw_dfa *dfa() const { return d; }
-
- size_t hash() const;
-
-private:
- NGHolder *g;
- CastleProto *c;
- raw_dfa *d;
- raw_som_dfa *h;
- depth dfa_min_width;
- depth dfa_max_width;
-
- friend bool isAnchored(const left_id &r);
- friend depth findMinWidth(const left_id &r);
- friend depth findMaxWidth(const left_id &r);
-};
-
-std::set<u32> all_tops(const left_id &r);
+ }
+ bool operator==(const left_id &b) const {
+ bool rv = g == b.g && c == b.c && h == b.h && d == b.d;
+ assert(!rv || dfa_min_width == b.dfa_min_width);
+ assert(!rv || dfa_max_width == b.dfa_max_width);
+ return rv;
+ }
+ bool operator!=(const left_id &b) const { return !(*this == b); }
+ bool operator<(const left_id &b) const {
+ const left_id &a = *this;
+ ORDER_CHECK(g);
+ ORDER_CHECK(c);
+ ORDER_CHECK(d);
+ ORDER_CHECK(h);
+ return false;
+ }
+
+ NGHolder *graph() {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+ return g;
+ }
+ const NGHolder *graph() const {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+ return g;
+ }
+ CastleProto *castle() {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+
+ return c;
+ }
+ const CastleProto *castle() const {
+ if (!d && !h) {
+ assert(dfa_min_width == depth(0));
+ assert(dfa_max_width == depth::infinity());
+ }
+
+ return c;
+ }
+ raw_som_dfa *haig() { return h; }
+ const raw_som_dfa *haig() const { return h; }
+ raw_dfa *dfa() { return d; }
+ const raw_dfa *dfa() const { return d; }
+
+ size_t hash() const;
+
+private:
+ NGHolder *g;
+ CastleProto *c;
+ raw_dfa *d;
+ raw_som_dfa *h;
+ depth dfa_min_width;
+ depth dfa_max_width;
+
+ friend bool isAnchored(const left_id &r);
+ friend depth findMinWidth(const left_id &r);
+ friend depth findMaxWidth(const left_id &r);
+};
+
+std::set<u32> all_tops(const left_id &r);
std::set<ReportID> all_reports(const left_id &left);
-bool isAnchored(const left_id &r);
-depth findMinWidth(const left_id &r);
-depth findMaxWidth(const left_id &r);
-u32 num_tops(const left_id &r);
-
-struct rose_literal_info {
+bool isAnchored(const left_id &r);
+depth findMinWidth(const left_id &r);
+depth findMaxWidth(const left_id &r);
+u32 num_tops(const left_id &r);
+
+struct rose_literal_info {
flat_set<u32> delayed_ids;
flat_set<RoseVertex> vertices;
- rose_group group_mask = 0;
- u32 undelayed_id = MO_INVALID_IDX;
- bool squash_group = false;
- bool requires_benefits = false;
-};
-
-/**
- * \brief Main literal struct used at Rose build time. Numeric literal IDs
- * used at build time point at these (via the RoseBuildImpl::literals map).
- */
-struct rose_literal_id {
- rose_literal_id(const ue2_literal &s_in, rose_literal_table table_in,
- u32 delay_in)
- : s(s_in), table(table_in), delay(delay_in), distinctiveness(0) {}
-
- rose_literal_id(const ue2_literal &s_in, const std::vector<u8> &msk_in,
- const std::vector<u8> &cmp_in, rose_literal_table table_in,
- u32 delay_in);
-
- ue2_literal s;
- std::vector<u8> msk;
- std::vector<u8> cmp;
- rose_literal_table table;
- u32 delay;
- u32 distinctiveness;
-
- size_t elength(void) const { return s.length() + delay; }
+ rose_group group_mask = 0;
+ u32 undelayed_id = MO_INVALID_IDX;
+ bool squash_group = false;
+ bool requires_benefits = false;
+};
+
+/**
+ * \brief Main literal struct used at Rose build time. Numeric literal IDs
+ * used at build time point at these (via the RoseBuildImpl::literals map).
+ */
+struct rose_literal_id {
+ rose_literal_id(const ue2_literal &s_in, rose_literal_table table_in,
+ u32 delay_in)
+ : s(s_in), table(table_in), delay(delay_in), distinctiveness(0) {}
+
+ rose_literal_id(const ue2_literal &s_in, const std::vector<u8> &msk_in,
+ const std::vector<u8> &cmp_in, rose_literal_table table_in,
+ u32 delay_in);
+
+ ue2_literal s;
+ std::vector<u8> msk;
+ std::vector<u8> cmp;
+ rose_literal_table table;
+ u32 delay;
+ u32 distinctiveness;
+
+ size_t elength(void) const { return s.length() + delay; }
size_t elength_including_mask(void) const {
size_t mask_len = msk.size();
for (u8 c : msk) {
@@ -310,19 +310,19 @@ struct rose_literal_id {
size_t hash() const {
return hash_all(s, msk, cmp, table, delay, distinctiveness);
}
-};
-
-static inline
-bool operator<(const rose_literal_id &a, const rose_literal_id &b) {
- ORDER_CHECK(distinctiveness);
- ORDER_CHECK(table);
- ORDER_CHECK(s);
- ORDER_CHECK(delay);
- ORDER_CHECK(msk);
- ORDER_CHECK(cmp);
- return 0;
-}
-
+};
+
+static inline
+bool operator<(const rose_literal_id &a, const rose_literal_id &b) {
+ ORDER_CHECK(distinctiveness);
+ ORDER_CHECK(table);
+ ORDER_CHECK(s);
+ ORDER_CHECK(delay);
+ ORDER_CHECK(msk);
+ ORDER_CHECK(cmp);
+ return 0;
+}
+
class RoseLiteralMap {
/**
* \brief Main storage for literals.
@@ -332,7 +332,7 @@ class RoseLiteralMap {
* the loop.
*/
std::deque<rose_literal_id> lits;
-
+
/** \brief Quick-lookup index from literal -> index in lits. */
ue2_unordered_map<rose_literal_id, u32> lits_index;
@@ -372,68 +372,68 @@ public:
}
};
-struct simple_anchored_info {
- simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit)
- : min_bound(min_b), max_bound(max_b), literal(lit) {}
- u32 min_bound; /**< min number of characters required before literal can
- * start matching */
- u32 max_bound; /**< max number of characters allowed before literal can
- * start matching */
- ue2_literal literal;
-};
-
-static really_inline
-bool operator<(const simple_anchored_info &a, const simple_anchored_info &b) {
- ORDER_CHECK(min_bound);
- ORDER_CHECK(max_bound);
- ORDER_CHECK(literal);
- return 0;
-}
-
+struct simple_anchored_info {
+ simple_anchored_info(u32 min_b, u32 max_b, const ue2_literal &lit)
+ : min_bound(min_b), max_bound(max_b), literal(lit) {}
+ u32 min_bound; /**< min number of characters required before literal can
+ * start matching */
+ u32 max_bound; /**< max number of characters allowed before literal can
+ * start matching */
+ ue2_literal literal;
+};
+
+static really_inline
+bool operator<(const simple_anchored_info &a, const simple_anchored_info &b) {
+ ORDER_CHECK(min_bound);
+ ORDER_CHECK(max_bound);
+ ORDER_CHECK(literal);
+ return 0;
+}
+
struct MpvProto {
bool empty() const {
return puffettes.empty() && triggered_puffettes.empty();
- }
+ }
void reset() {
puffettes.clear();
triggered_puffettes.clear();
- }
+ }
std::vector<raw_puff> puffettes;
std::vector<raw_puff> triggered_puffettes;
};
-
+
struct OutfixInfo {
template<class T>
explicit OutfixInfo(std::unique_ptr<T> x) : proto(std::move(x)) {}
explicit OutfixInfo(MpvProto mpv_in) : proto(std::move(mpv_in)) {}
- u32 get_queue(QueueIndexFactory &qif);
-
+ u32 get_queue(QueueIndexFactory &qif);
+
u32 get_queue() const {
assert(queue != ~0U);
return queue;
}
- bool is_nonempty_mpv() const {
+ bool is_nonempty_mpv() const {
auto *m = boost::get<MpvProto>(&proto);
return m && !m->empty();
- }
-
- bool is_dead() const {
+ }
+
+ bool is_dead() const {
auto *m = boost::get<MpvProto>(&proto);
if (m) {
return m->empty();
}
return boost::get<boost::blank>(&proto) != nullptr;
- }
-
- void clear() {
+ }
+
+ void clear() {
proto = boost::blank();
- }
-
+ }
+
// Convenience accessor functions.
-
+
NGHolder *holder() {
auto *up = boost::get<std::unique_ptr<NGHolder>>(&proto);
return up ? up->get() : nullptr;
@@ -449,7 +449,7 @@ struct OutfixInfo {
MpvProto *mpv() {
return boost::get<MpvProto>(&proto);
}
-
+
// Convenience const accessor functions.
const NGHolder *holder() const {
@@ -479,214 +479,214 @@ struct OutfixInfo {
std::unique_ptr<raw_som_dfa>,
MpvProto> proto = boost::blank();
- RevAccInfo rev_info;
- u32 maxBAWidth = 0; //!< max bi-anchored width
+ RevAccInfo rev_info;
+ u32 maxBAWidth = 0; //!< max bi-anchored width
depth minWidth{depth::infinity()};
depth maxWidth{0};
- u64a maxOffset = 0;
- bool in_sbmatcher = false; //!< handled by small-block matcher.
-
-private:
- u32 queue = ~0U;
-};
-
-std::set<ReportID> all_reports(const OutfixInfo &outfix);
-
-// Concrete impl class
-class RoseBuildImpl : public RoseBuild {
-public:
+ u64a maxOffset = 0;
+ bool in_sbmatcher = false; //!< handled by small-block matcher.
+
+private:
+ u32 queue = ~0U;
+};
+
+std::set<ReportID> all_reports(const OutfixInfo &outfix);
+
+// Concrete impl class
+class RoseBuildImpl : public RoseBuild {
+public:
RoseBuildImpl(ReportManager &rm, SomSlotManager &ssm, SmallWriteBuild &smwr,
- const CompileContext &cc, const BoundaryReports &boundary);
-
- ~RoseBuildImpl() override;
-
- // Adds a single literal.
- void add(bool anchored, bool eod, const ue2_literal &lit,
+ const CompileContext &cc, const BoundaryReports &boundary);
+
+ ~RoseBuildImpl() override;
+
+ // Adds a single literal.
+ void add(bool anchored, bool eod, const ue2_literal &lit,
const flat_set<ReportID> &ids) override;
-
+
bool addRose(const RoseInGraph &ig, bool prefilter) override;
- bool addSombeRose(const RoseInGraph &ig) override;
-
- bool addOutfix(const NGHolder &h) override;
- bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) override;
- bool addOutfix(const raw_puff &rp) override;
-
- bool addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) override;
-
- // Returns true if we were able to add it as a mask
- bool add(bool anchored, const std::vector<CharReach> &mask,
+ bool addSombeRose(const RoseInGraph &ig) override;
+
+ bool addOutfix(const NGHolder &h) override;
+ bool addOutfix(const NGHolder &h, const raw_som_dfa &haig) override;
+ bool addOutfix(const raw_puff &rp) override;
+
+ bool addChainTail(const raw_puff &rp, u32 *queue_out, u32 *event_out) override;
+
+ // Returns true if we were able to add it as a mask
+ bool add(bool anchored, const std::vector<CharReach> &mask,
const flat_set<ReportID> &reports) override;
-
- bool addAnchoredAcyclic(const NGHolder &graph) override;
-
- bool validateMask(const std::vector<CharReach> &mask,
+
+ bool addAnchoredAcyclic(const NGHolder &graph) override;
+
+ bool validateMask(const std::vector<CharReach> &mask,
const flat_set<ReportID> &reports, bool anchored,
- bool eod) const override;
- void addMask(const std::vector<CharReach> &mask,
+ bool eod) const override;
+ void addMask(const std::vector<CharReach> &mask,
const flat_set<ReportID> &reports, bool anchored,
- bool eod) override;
-
- // Construct a runtime implementation.
+ bool eod) override;
+
+ // Construct a runtime implementation.
bytecode_ptr<RoseEngine> buildRose(u32 minWidth) override;
bytecode_ptr<RoseEngine> buildFinalEngine(u32 minWidth);
-
- void setSom() override { hasSom = true; }
-
- std::unique_ptr<RoseDedupeAux> generateDedupeAux() const override;
-
- // Find the maximum bound on the edges to this vertex's successors.
- u32 calcSuccMaxBound(RoseVertex u) const;
-
- /* Returns the ID of the given literal in the literal map, adding it if
- * necessary. */
- u32 getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table);
-
- // Variant with msk/cmp.
- u32 getLiteralId(const ue2_literal &s, const std::vector<u8> &msk,
- const std::vector<u8> &cmp, u32 delay,
- rose_literal_table table);
-
- u32 getNewLiteralId(void);
-
- void removeVertices(const std::vector<RoseVertex> &dead);
-
- // Is the Rose anchored?
- bool hasNoFloatingRoots() const;
-
- u32 calcHistoryRequired() const;
-
- rose_group getInitialGroups() const;
- rose_group getSuccGroups(RoseVertex start) const;
- rose_group getGroups(RoseVertex v) const;
-
- bool hasDelayedLiteral(RoseVertex v) const;
- bool hasDelayPred(RoseVertex v) const;
- bool hasLiteralInTable(RoseVertex v, enum rose_literal_table t) const;
- bool hasAnchoredTablePred(RoseVertex v) const;
-
- // Is the given vertex a successor of either root or anchored_root?
- bool isRootSuccessor(const RoseVertex &v) const;
- /* Is the given vertex a successor of something other than root or
- * anchored_root? */
- bool isNonRootSuccessor(const RoseVertex &v) const;
-
- bool isDirectReport(u32 id) const;
- bool isDelayed(u32 id) const;
-
- bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored
- * table */
- bool isFloating(RoseVertex v) const; /* true iff has literal in floating
- * table */
- bool isInETable(RoseVertex v) const; /* true iff has literal in eod
- * table */
-
- size_t maxLiteralLen(RoseVertex v) const;
- size_t minLiteralLen(RoseVertex v) const;
-
- // max overlap considered for every pair (ulit, vlit).
- size_t maxLiteralOverlap(RoseVertex u, RoseVertex v) const;
-
- bool isPseudoStar(const RoseEdge &e) const;
- bool isPseudoStarOrFirstOnly(const RoseEdge &e) const;
- bool hasOnlyPseudoStarInEdges(RoseVertex v) const;
-
- bool isAnyStart(const RoseVertex &v) const {
- return v == root || v == anchored_root;
- }
-
- bool isVirtualVertex(const RoseVertex &v) const {
- return g[v].eod_accept || isAnyStart(v);
- }
-
- void handleMixedSensitivity(void);
-
- void findTransientLeftfixes(void);
-
- const CompileContext &cc;
- RoseGraph g;
- const RoseVertex root;
- const RoseVertex anchored_root;
- RoseLiteralMap literals;
- std::map<RoseVertex, RoseVertex> ghost;
- ReportID getNewNfaReport() override {
- return next_nfa_report++;
- }
- std::deque<rose_literal_info> literal_info;
- bool hasSom; //!< at least one pattern requires SOM.
- std::map<size_t, std::vector<std::unique_ptr<raw_dfa>>> anchored_nfas;
- std::map<simple_anchored_info, std::set<u32>> anchored_simple;
- std::map<u32, std::set<u32> > group_to_literal;
- u32 group_end;
-
- u32 ematcher_region_size; /**< number of bytes the eod table runs over */
-
- /** \brief Mapping from anchored literal ID to the original literal suffix
- * present when the literal was added to the literal matcher. Used for
- * overlap calculation in history assignment. */
- std::map<u32, rose_literal_id> anchoredLitSuffix;
-
+
+ void setSom() override { hasSom = true; }
+
+ std::unique_ptr<RoseDedupeAux> generateDedupeAux() const override;
+
+ // Find the maximum bound on the edges to this vertex's successors.
+ u32 calcSuccMaxBound(RoseVertex u) const;
+
+ /* Returns the ID of the given literal in the literal map, adding it if
+ * necessary. */
+ u32 getLiteralId(const ue2_literal &s, u32 delay, rose_literal_table table);
+
+ // Variant with msk/cmp.
+ u32 getLiteralId(const ue2_literal &s, const std::vector<u8> &msk,
+ const std::vector<u8> &cmp, u32 delay,
+ rose_literal_table table);
+
+ u32 getNewLiteralId(void);
+
+ void removeVertices(const std::vector<RoseVertex> &dead);
+
+ // Is the Rose anchored?
+ bool hasNoFloatingRoots() const;
+
+ u32 calcHistoryRequired() const;
+
+ rose_group getInitialGroups() const;
+ rose_group getSuccGroups(RoseVertex start) const;
+ rose_group getGroups(RoseVertex v) const;
+
+ bool hasDelayedLiteral(RoseVertex v) const;
+ bool hasDelayPred(RoseVertex v) const;
+ bool hasLiteralInTable(RoseVertex v, enum rose_literal_table t) const;
+ bool hasAnchoredTablePred(RoseVertex v) const;
+
+ // Is the given vertex a successor of either root or anchored_root?
+ bool isRootSuccessor(const RoseVertex &v) const;
+ /* Is the given vertex a successor of something other than root or
+ * anchored_root? */
+ bool isNonRootSuccessor(const RoseVertex &v) const;
+
+ bool isDirectReport(u32 id) const;
+ bool isDelayed(u32 id) const;
+
+ bool isAnchored(RoseVertex v) const; /* true iff has literal in anchored
+ * table */
+ bool isFloating(RoseVertex v) const; /* true iff has literal in floating
+ * table */
+ bool isInETable(RoseVertex v) const; /* true iff has literal in eod
+ * table */
+
+ size_t maxLiteralLen(RoseVertex v) const;
+ size_t minLiteralLen(RoseVertex v) const;
+
+ // max overlap considered for every pair (ulit, vlit).
+ size_t maxLiteralOverlap(RoseVertex u, RoseVertex v) const;
+
+ bool isPseudoStar(const RoseEdge &e) const;
+ bool isPseudoStarOrFirstOnly(const RoseEdge &e) const;
+ bool hasOnlyPseudoStarInEdges(RoseVertex v) const;
+
+ bool isAnyStart(const RoseVertex &v) const {
+ return v == root || v == anchored_root;
+ }
+
+ bool isVirtualVertex(const RoseVertex &v) const {
+ return g[v].eod_accept || isAnyStart(v);
+ }
+
+ void handleMixedSensitivity(void);
+
+ void findTransientLeftfixes(void);
+
+ const CompileContext &cc;
+ RoseGraph g;
+ const RoseVertex root;
+ const RoseVertex anchored_root;
+ RoseLiteralMap literals;
+ std::map<RoseVertex, RoseVertex> ghost;
+ ReportID getNewNfaReport() override {
+ return next_nfa_report++;
+ }
+ std::deque<rose_literal_info> literal_info;
+ bool hasSom; //!< at least one pattern requires SOM.
+ std::map<size_t, std::vector<std::unique_ptr<raw_dfa>>> anchored_nfas;
+ std::map<simple_anchored_info, std::set<u32>> anchored_simple;
+ std::map<u32, std::set<u32> > group_to_literal;
+ u32 group_end;
+
+ u32 ematcher_region_size; /**< number of bytes the eod table runs over */
+
+ /** \brief Mapping from anchored literal ID to the original literal suffix
+ * present when the literal was added to the literal matcher. Used for
+ * overlap calculation in history assignment. */
+ std::map<u32, rose_literal_id> anchoredLitSuffix;
+
ue2_unordered_set<left_id> transient;
ue2_unordered_map<left_id, rose_group> rose_squash_masks;
-
- std::vector<OutfixInfo> outfixes;
-
- /** \brief MPV outfix entry. Null if not used, and moved into the outfixes
- * list before we start building the bytecode (at which point it is set to
- * null again). */
- std::unique_ptr<OutfixInfo> mpv_outfix = nullptr;
-
- u32 eod_event_literal_id; // ID of EOD event literal, or MO_INVALID_IDX.
-
- u32 max_rose_anchored_floating_overlap;
-
+
+ std::vector<OutfixInfo> outfixes;
+
+ /** \brief MPV outfix entry. Null if not used, and moved into the outfixes
+ * list before we start building the bytecode (at which point it is set to
+ * null again). */
+ std::unique_ptr<OutfixInfo> mpv_outfix = nullptr;
+
+ u32 eod_event_literal_id; // ID of EOD event literal, or MO_INVALID_IDX.
+
+ u32 max_rose_anchored_floating_overlap;
+
rose_group boundary_group_mask = 0;
-
- QueueIndexFactory qif;
- ReportManager &rm;
- SomSlotManager &ssm;
+
+ QueueIndexFactory qif;
+ ReportManager &rm;
+ SomSlotManager &ssm;
SmallWriteBuild &smwr;
- const BoundaryReports &boundary;
-
-private:
- ReportID next_nfa_report;
-};
-
+ const BoundaryReports &boundary;
+
+private:
+ ReportID next_nfa_report;
+};
+
size_t calcLongLitThreshold(const RoseBuildImpl &build,
const size_t historyRequired);
-// Free functions, in rose_build_misc.cpp
-
-bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v);
-bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v);
-
-size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b);
+// Free functions, in rose_build_misc.cpp
+
+bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v);
+bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v);
+
+size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b);
ue2_literal findNonOverlappingTail(const std::set<ue2_literal> &lits,
const ue2_literal &s);
-
-#ifndef NDEBUG
+
+#ifndef NDEBUG
bool roseHasTops(const RoseBuildImpl &build, RoseVertex v);
bool hasOrphanedTops(const RoseBuildImpl &build);
-#endif
-
-u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm);
-
-// Function that operates on a msk/cmp pair and a literal, as used in
-// hwlmLiteral, and zeroes msk elements that don't add any power to the
-// literal.
-void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk,
- std::vector<u8> &cmp);
-
+#endif
+
+u64a findMaxOffset(const std::set<ReportID> &reports, const ReportManager &rm);
+
+// Function that operates on a msk/cmp pair and a literal, as used in
+// hwlmLiteral, and zeroes msk elements that don't add any power to the
+// literal.
+void normaliseLiteralMask(const ue2_literal &s, std::vector<u8> &msk,
+ std::vector<u8> &cmp);
+
u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id);
u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id);
-
+
bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e);
-
-#ifndef NDEBUG
-bool canImplementGraphs(const RoseBuildImpl &tbi);
-#endif
-
-} // namespace ue2
-
+
+#ifndef NDEBUG
+bool canImplementGraphs(const RoseBuildImpl &tbi);
+#endif
+
+} // namespace ue2
+
namespace std {
template<>
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_infix.cpp b/contrib/libs/hyperscan/src/rose/rose_build_infix.cpp
index e250bf4c7e..80e1254236 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_infix.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_infix.cpp
@@ -1,328 +1,328 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose/rose_build_infix.h"
-
-#include "ue2common.h"
-#include "nfa/castlecompile.h"
-#include "nfagraph/ng_dump.h"
-#include "nfagraph/ng_width.h"
-#include "nfagraph/ng_util.h"
-#include "rose/rose_build_impl.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose/rose_build_infix.h"
+
+#include "ue2common.h"
+#include "nfa/castlecompile.h"
+#include "nfagraph/ng_dump.h"
+#include "nfagraph/ng_width.h"
+#include "nfagraph/ng_util.h"
+#include "rose/rose_build_impl.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
#include "util/flat_containers.h"
-#include "util/graph_range.h"
-#include "util/graph.h"
+#include "util/graph_range.h"
+#include "util/graph.h"
#include "util/hash.h"
-#include "util/ue2string.h"
+#include "util/ue2string.h"
#include "util/unordered.h"
-
-#include <algorithm>
-#include <set>
-
-using namespace std;
-
-namespace ue2 {
-
-static
-bool couldEndLiteral(const ue2_literal &s, NFAVertex initial,
- const NGHolder &h) {
+
+#include <algorithm>
+#include <set>
+
+using namespace std;
+
+namespace ue2 {
+
+static
+bool couldEndLiteral(const ue2_literal &s, NFAVertex initial,
+ const NGHolder &h) {
flat_set<NFAVertex> curr, next;
- curr.insert(initial);
-
- for (auto it = s.rbegin(), ite = s.rend(); it != ite; ++it) {
- const CharReach &cr_s = *it;
- bool matched = false;
- next.clear();
-
- for (auto v : curr) {
- if (v == h.start) {
- // We can't see what we had before the start, so we must assume
- // the literal could overlap with it.
- return true;
- }
- const CharReach &cr_v = h[v].char_reach;
- if (overlaps(cr_v, cr_s)) {
- insert(&next, inv_adjacent_vertices(v, h));
- matched = true;
- }
- }
-
- if (!matched) {
- return false;
- }
-
- curr.swap(next);
- }
-
- return true;
-}
-
+ curr.insert(initial);
+
+ for (auto it = s.rbegin(), ite = s.rend(); it != ite; ++it) {
+ const CharReach &cr_s = *it;
+ bool matched = false;
+ next.clear();
+
+ for (auto v : curr) {
+ if (v == h.start) {
+ // We can't see what we had before the start, so we must assume
+ // the literal could overlap with it.
+ return true;
+ }
+ const CharReach &cr_v = h[v].char_reach;
+ if (overlaps(cr_v, cr_s)) {
+ insert(&next, inv_adjacent_vertices(v, h));
+ matched = true;
+ }
+ }
+
+ if (!matched) {
+ return false;
+ }
+
+ curr.swap(next);
+ }
+
+ return true;
+}
+
using EdgeCache = ue2_unordered_set<pair<NFAVertex, NFAVertex>>;
-static
+static
void contractVertex(NGHolder &g, NFAVertex v, EdgeCache &all_edges) {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (u == v) {
- continue; // self-edge
- }
- for (auto w : adjacent_vertices_range(v, g)) {
- if (w == v) {
- continue; // self-edge
- }
-
- // Construct edge (u, v) only if it doesn't already exist. We use
- // the all_edges container here, as checking existence inside the
- // graph is expensive when u or v have large degree.
- if (all_edges.emplace(u, w).second) {
- add_edge(u, w, g);
- }
- }
- }
-
- // Note that edges to/from v will remain in all_edges.
- clear_vertex(v, g);
-}
-
-static
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (u == v) {
+ continue; // self-edge
+ }
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (w == v) {
+ continue; // self-edge
+ }
+
+ // Construct edge (u, v) only if it doesn't already exist. We use
+ // the all_edges container here, as checking existence inside the
+ // graph is expensive when u or v have large degree.
+ if (all_edges.emplace(u, w).second) {
+ add_edge(u, w, g);
+ }
+ }
+ }
+
+ // Note that edges to/from v will remain in all_edges.
+ clear_vertex(v, g);
+}
+
+static
u32 findMaxLiteralMatches(const NGHolder &h, const set<ue2_literal> &lits) {
- DEBUG_PRINTF("h=%p, %zu literals\n", &h, lits.size());
+ DEBUG_PRINTF("h=%p, %zu literals\n", &h, lits.size());
//dumpGraph("infix.dot", h);
-
- // Indices of vertices that could terminate any of the literals in 'lits'.
- set<u32> terms;
-
- for (const auto &s : lits) {
- DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str());
- if (s.empty()) {
- // Likely an anchored case, be conservative here.
- return NO_MATCH_LIMIT;
- }
-
- for (auto v : vertices_range(h)) {
- if (is_special(v, h)) {
- continue;
- }
-
- if (couldEndLiteral(s, v, h)) {
- u32 idx = h[v].index;
- DEBUG_PRINTF("vertex %u could terminate lit\n", idx);
- terms.insert(idx);
- }
- }
- }
-
- if (terms.empty()) {
- DEBUG_PRINTF("literals cannot match inside infix\n");
- return 0;
- }
-
- NGHolder g;
- cloneHolder(g, h);
- vector<NFAVertex> dead;
-
+
+ // Indices of vertices that could terminate any of the literals in 'lits'.
+ set<u32> terms;
+
+ for (const auto &s : lits) {
+ DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str());
+ if (s.empty()) {
+ // Likely an anchored case, be conservative here.
+ return NO_MATCH_LIMIT;
+ }
+
+ for (auto v : vertices_range(h)) {
+ if (is_special(v, h)) {
+ continue;
+ }
+
+ if (couldEndLiteral(s, v, h)) {
+ u32 idx = h[v].index;
+ DEBUG_PRINTF("vertex %u could terminate lit\n", idx);
+ terms.insert(idx);
+ }
+ }
+ }
+
+ if (terms.empty()) {
+ DEBUG_PRINTF("literals cannot match inside infix\n");
+ return 0;
+ }
+
+ NGHolder g;
+ cloneHolder(g, h);
+ vector<NFAVertex> dead;
+
// The set of all edges in the graph is used for existence checks in
// contractVertex.
EdgeCache all_edges;
- for (const auto &e : edges_range(g)) {
- all_edges.emplace(source(e, g), target(e, g));
- }
-
- for (auto v : vertices_range(g)) {
- if (is_special(v, g)) {
- continue;
- }
- if (contains(terms, g[v].index)) {
- continue;
- }
-
- contractVertex(g, v, all_edges);
- dead.push_back(v);
- }
-
- remove_vertices(dead, g);
+ for (const auto &e : edges_range(g)) {
+ all_edges.emplace(source(e, g), target(e, g));
+ }
+
+ for (auto v : vertices_range(g)) {
+ if (is_special(v, g)) {
+ continue;
+ }
+ if (contains(terms, g[v].index)) {
+ continue;
+ }
+
+ contractVertex(g, v, all_edges);
+ dead.push_back(v);
+ }
+
+ remove_vertices(dead, g);
//dumpGraph("relaxed.dot", g);
-
- depth maxWidth = findMaxWidth(g);
- DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str());
- assert(maxWidth.is_reachable());
-
- if (maxWidth.is_infinite()) {
- // Cycle detected, so we can likely squeeze an unlimited number of
- // matches into this graph.
- return NO_MATCH_LIMIT;
- }
-
- assert(terms.size() >= maxWidth);
- return maxWidth;
-}
-
-namespace {
-struct ReachMismatch {
- explicit ReachMismatch(const CharReach &cr_in) : cr(cr_in) {}
- bool operator()(const CharReach &a) const { return !overlaps(cr, a); }
-
-private:
- CharReach cr;
-};
-}
-
-static
-u32 findMaxInfixMatches(const CastleProto &castle,
- const set<ue2_literal> &lits) {
- DEBUG_PRINTF("castle=%p, %zu literals\n", &castle, lits.size());
-
- if (castle.repeats.size() > 1) {
- DEBUG_PRINTF("more than one top!\n");
- return NO_MATCH_LIMIT;
- }
-
- assert(!castle.repeats.empty());
- const PureRepeat &pr = castle.repeats.begin()->second;
- DEBUG_PRINTF("repeat=%s reach=%s\n", pr.bounds.str().c_str(),
- describeClass(pr.reach).c_str());
-
- size_t max_count = 0;
-
- for (const auto &s : lits) {
- DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str());
- if (s.empty()) {
- // Likely an anchored case, be conservative here.
- return NO_MATCH_LIMIT;
- }
-
- size_t count = 0;
-
- auto f = find_if(s.rbegin(), s.rend(), ReachMismatch(pr.reach));
-
- if (f == s.rbegin()) {
- DEBUG_PRINTF("lit can't terminate inside infix\n");
- count = 0;
- } else if (f != s.rend()) {
- size_t suffix_len = distance(s.rbegin(), f);
- DEBUG_PRINTF("suffix of len %zu matches at start\n", suffix_len);
- if (pr.bounds.max.is_finite()) {
- count = min(suffix_len, (size_t)pr.bounds.max);
- } else {
- count = suffix_len;
- }
- } else {
- DEBUG_PRINTF("whole lit can match inside infix (repeatedly)\n");
- if (pr.bounds.max.is_finite()) {
- count = pr.bounds.max;
- } else {
- DEBUG_PRINTF("inf bound\n");
- return NO_MATCH_LIMIT;
- }
- }
-
- DEBUG_PRINTF("count=%zu\n", count);
- max_count = max(max_count, count);
- }
-
- DEBUG_PRINTF("max_count %zu\n", max_count);
-
- if (max_count > NO_MATCH_LIMIT) {
- assert(0); // This would be a surprise.
- return NO_MATCH_LIMIT;
- }
-
- return (u32)max_count;
-}
-
-u32 findMaxInfixMatches(const left_id &left, const set<ue2_literal> &lits) {
- if (left.castle()) {
- return findMaxInfixMatches(*left.castle(), lits);
- }
- if (left.graph()) {
+
+ depth maxWidth = findMaxWidth(g);
+ DEBUG_PRINTF("maxWidth=%s\n", maxWidth.str().c_str());
+ assert(maxWidth.is_reachable());
+
+ if (maxWidth.is_infinite()) {
+ // Cycle detected, so we can likely squeeze an unlimited number of
+ // matches into this graph.
+ return NO_MATCH_LIMIT;
+ }
+
+ assert(terms.size() >= maxWidth);
+ return maxWidth;
+}
+
+namespace {
+struct ReachMismatch {
+ explicit ReachMismatch(const CharReach &cr_in) : cr(cr_in) {}
+ bool operator()(const CharReach &a) const { return !overlaps(cr, a); }
+
+private:
+ CharReach cr;
+};
+}
+
+static
+u32 findMaxInfixMatches(const CastleProto &castle,
+ const set<ue2_literal> &lits) {
+ DEBUG_PRINTF("castle=%p, %zu literals\n", &castle, lits.size());
+
+ if (castle.repeats.size() > 1) {
+ DEBUG_PRINTF("more than one top!\n");
+ return NO_MATCH_LIMIT;
+ }
+
+ assert(!castle.repeats.empty());
+ const PureRepeat &pr = castle.repeats.begin()->second;
+ DEBUG_PRINTF("repeat=%s reach=%s\n", pr.bounds.str().c_str(),
+ describeClass(pr.reach).c_str());
+
+ size_t max_count = 0;
+
+ for (const auto &s : lits) {
+ DEBUG_PRINTF("lit s='%s'\n", escapeString(s).c_str());
+ if (s.empty()) {
+ // Likely an anchored case, be conservative here.
+ return NO_MATCH_LIMIT;
+ }
+
+ size_t count = 0;
+
+ auto f = find_if(s.rbegin(), s.rend(), ReachMismatch(pr.reach));
+
+ if (f == s.rbegin()) {
+ DEBUG_PRINTF("lit can't terminate inside infix\n");
+ count = 0;
+ } else if (f != s.rend()) {
+ size_t suffix_len = distance(s.rbegin(), f);
+ DEBUG_PRINTF("suffix of len %zu matches at start\n", suffix_len);
+ if (pr.bounds.max.is_finite()) {
+ count = min(suffix_len, (size_t)pr.bounds.max);
+ } else {
+ count = suffix_len;
+ }
+ } else {
+ DEBUG_PRINTF("whole lit can match inside infix (repeatedly)\n");
+ if (pr.bounds.max.is_finite()) {
+ count = pr.bounds.max;
+ } else {
+ DEBUG_PRINTF("inf bound\n");
+ return NO_MATCH_LIMIT;
+ }
+ }
+
+ DEBUG_PRINTF("count=%zu\n", count);
+ max_count = max(max_count, count);
+ }
+
+ DEBUG_PRINTF("max_count %zu\n", max_count);
+
+ if (max_count > NO_MATCH_LIMIT) {
+ assert(0); // This would be a surprise.
+ return NO_MATCH_LIMIT;
+ }
+
+ return (u32)max_count;
+}
+
+u32 findMaxInfixMatches(const left_id &left, const set<ue2_literal> &lits) {
+ if (left.castle()) {
+ return findMaxInfixMatches(*left.castle(), lits);
+ }
+ if (left.graph()) {
if (!onlyOneTop(*left.graph())) {
DEBUG_PRINTF("more than one top!n");
return NO_MATCH_LIMIT;
}
return findMaxLiteralMatches(*left.graph(), lits);
- }
-
- return NO_MATCH_LIMIT;
-}
-
-void findCountingMiracleInfo(const left_id &left, const vector<u8> &stopTable,
- u8 *cm_count, CharReach *cm_cr) {
- DEBUG_PRINTF("hello\n");
- *cm_count = 0;
- cm_cr->clear();
- if (!left.graph()) {
- return;
- }
-
- const NGHolder &g = *left.graph();
-
+ }
+
+ return NO_MATCH_LIMIT;
+}
+
+void findCountingMiracleInfo(const left_id &left, const vector<u8> &stopTable,
+ u8 *cm_count, CharReach *cm_cr) {
+ DEBUG_PRINTF("hello\n");
+ *cm_count = 0;
+ cm_cr->clear();
+ if (!left.graph()) {
+ return;
+ }
+
+ const NGHolder &g = *left.graph();
+
auto cyclics = find_vertices_in_cycles(g);
-
- if (!proper_out_degree(g.startDs, g)) {
- cyclics.erase(g.startDs);
- }
-
- CharReach cyclic_cr;
- for (NFAVertex v : cyclics) {
+
+ if (!proper_out_degree(g.startDs, g)) {
+ cyclics.erase(g.startDs);
+ }
+
+ CharReach cyclic_cr;
+ for (NFAVertex v : cyclics) {
DEBUG_PRINTF("considering %zu ||=%zu\n", g[v].index,
- g[v].char_reach.count());
- cyclic_cr |= g[v].char_reach;
- }
-
- if (cyclic_cr.none() || cyclic_cr.all()) {
- DEBUG_PRINTF("cyclic cr width %zu\n", cyclic_cr.count());
- return; /* useless */
- }
-
- *cm_cr = ~cyclic_cr;
-
- /* stop character will be part of normal miracles, no need to look for them
- * here too */
- assert(stopTable.size() == N_CHARS);
- for (u32 i = 0; i < N_CHARS; i++) {
- if (stopTable[i]) {
- cm_cr->clear(i);
- }
- }
-
- set<ue2_literal> lits;
- for (size_t c = cm_cr->find_first(); c != CharReach::npos;
- c = cm_cr->find_next(c)) {
- DEBUG_PRINTF("considering %hhx as stop character\n", (u8)c);
- lits.insert(ue2_literal(c, false));
- }
-
+ g[v].char_reach.count());
+ cyclic_cr |= g[v].char_reach;
+ }
+
+ if (cyclic_cr.none() || cyclic_cr.all()) {
+ DEBUG_PRINTF("cyclic cr width %zu\n", cyclic_cr.count());
+ return; /* useless */
+ }
+
+ *cm_cr = ~cyclic_cr;
+
+ /* stop character will be part of normal miracles, no need to look for them
+ * here too */
+ assert(stopTable.size() == N_CHARS);
+ for (u32 i = 0; i < N_CHARS; i++) {
+ if (stopTable[i]) {
+ cm_cr->clear(i);
+ }
+ }
+
+ set<ue2_literal> lits;
+ for (size_t c = cm_cr->find_first(); c != CharReach::npos;
+ c = cm_cr->find_next(c)) {
+ DEBUG_PRINTF("considering %hhx as stop character\n", (u8)c);
+ lits.insert(ue2_literal(c, false));
+ }
+
u32 count = findMaxLiteralMatches(*left.graph(), lits);
- DEBUG_PRINTF("counting miracle %u\n", count + 1);
- if (count && count < 50) {
- *cm_count = count + 1;
- }
-}
-
-} // namespace ue2
+ DEBUG_PRINTF("counting miracle %u\n", count + 1);
+ if (count && count < 50) {
+ *cm_count = count + 1;
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_infix.h b/contrib/libs/hyperscan/src/rose/rose_build_infix.h
index 4706cb8c19..dc2685065a 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_infix.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_infix.h
@@ -1,52 +1,52 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_INFIX_H
-#define ROSE_BUILD_INFIX_H
-
-#include "ue2common.h"
-
-#include <set>
-#include <vector>
-
-namespace ue2 {
-
-class CharReach;
-struct left_id;
-struct ue2_literal;
-
-static constexpr u32 NO_MATCH_LIMIT = 0xffffffff;
-
-u32 findMaxInfixMatches(const left_id &left, const std::set<ue2_literal> &lits);
-
-void findCountingMiracleInfo(const left_id &left, const std::vector<u8> &stop,
- u8 *cm_count, CharReach *cm_cr);
-
-} // namespace ue2
-
-#endif // ROSE_BUILD_INFIX_H
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_INFIX_H
+#define ROSE_BUILD_INFIX_H
+
+#include "ue2common.h"
+
+#include <set>
+#include <vector>
+
+namespace ue2 {
+
+class CharReach;
+struct left_id;
+struct ue2_literal;
+
+static constexpr u32 NO_MATCH_LIMIT = 0xffffffff;
+
+u32 findMaxInfixMatches(const left_id &left, const std::set<ue2_literal> &lits);
+
+void findCountingMiracleInfo(const left_id &left, const std::vector<u8> &stop,
+ u8 *cm_count, CharReach *cm_cr);
+
+} // namespace ue2
+
+#endif // ROSE_BUILD_INFIX_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp
index 10e1cbfa5f..d0540d79b0 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.cpp
@@ -1,68 +1,68 @@
-/*
+/*
* Copyright (c) 2015-2020, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose compile-time analysis for lookaround masks.
- */
-#include "rose_build_lookaround.h"
-
-#include "rose_build_impl.h"
-#include "nfa/castlecompile.h"
-#include "nfa/goughcompile.h"
-#include "nfa/rdfa.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_util.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose compile-time analysis for lookaround masks.
+ */
+#include "rose_build_lookaround.h"
+
+#include "rose_build_impl.h"
+#include "nfa/castlecompile.h"
+#include "nfa/goughcompile.h"
+#include "nfa/rdfa.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_util.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
#include "util/flat_containers.h"
-#include "util/verify_types.h"
-
-#include <cstdlib>
-#include <queue>
+#include "util/verify_types.h"
+
+#include <cstdlib>
+#include <queue>
#include <sstream>
-
-using namespace std;
-
-namespace ue2 {
-
-/** \brief Max search distance for reachability in front of a role. */
-static const u32 MAX_FWD_LEN = 64;
-
-/** \brief Max search distance for reachability behind a role. */
-static const u32 MAX_BACK_LEN = 64;
-
-/** \brief Max lookaround entries for a role. */
+
+using namespace std;
+
+namespace ue2 {
+
+/** \brief Max search distance for reachability in front of a role. */
+static const u32 MAX_FWD_LEN = 64;
+
+/** \brief Max search distance for reachability behind a role. */
+static const u32 MAX_BACK_LEN = 64;
+
+/** \brief Max lookaround entries for a role. */
static const u32 MAX_LOOKAROUND_ENTRIES = 32;
-
-/** \brief We would rather have lookarounds with smaller reach than this. */
-static const u32 LOOKAROUND_WIDE_REACH = 200;
-
+
+/** \brief We would rather have lookarounds with smaller reach than this. */
+static const u32 LOOKAROUND_WIDE_REACH = 200;
+
#if defined(DEBUG) || defined(DUMP_SUPPORT)
static UNUSED
string dump(const map<s32, CharReach> &look) {
@@ -77,389 +77,389 @@ string dump(const map<s32, CharReach> &look) {
}
#endif
-static
-void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) {
+static
+void getForwardReach(const NGHolder &g, u32 top, map<s32, CharReach> &look) {
flat_set<NFAVertex> curr, next;
-
- // Consider only successors of start with the required top.
- for (const auto &e : out_edges_range(g.start, g)) {
- NFAVertex v = target(e, g);
- if (v == g.startDs) {
- continue;
- }
+
+ // Consider only successors of start with the required top.
+ for (const auto &e : out_edges_range(g.start, g)) {
+ NFAVertex v = target(e, g);
+ if (v == g.startDs) {
+ continue;
+ }
if (contains(g[e].tops, top)) {
- curr.insert(v);
- }
- }
-
- for (u32 i = 0; i < MAX_FWD_LEN; i++) {
- if (curr.empty() || contains(curr, g.accept) ||
- contains(curr, g.acceptEod)) {
- break;
- }
-
- next.clear();
- CharReach cr;
-
- for (auto v : curr) {
- assert(!is_special(v, g));
- cr |= g[v].char_reach;
- insert(&next, adjacent_vertices(v, g));
- }
-
- assert(cr.any());
- look[i] |= cr;
- curr.swap(next);
- }
-}
-
-static
-void getBackwardReach(const NGHolder &g, ReportID report, u32 lag,
- map<s32, CharReach> &look) {
+ curr.insert(v);
+ }
+ }
+
+ for (u32 i = 0; i < MAX_FWD_LEN; i++) {
+ if (curr.empty() || contains(curr, g.accept) ||
+ contains(curr, g.acceptEod)) {
+ break;
+ }
+
+ next.clear();
+ CharReach cr;
+
+ for (auto v : curr) {
+ assert(!is_special(v, g));
+ cr |= g[v].char_reach;
+ insert(&next, adjacent_vertices(v, g));
+ }
+
+ assert(cr.any());
+ look[i] |= cr;
+ curr.swap(next);
+ }
+}
+
+static
+void getBackwardReach(const NGHolder &g, ReportID report, u32 lag,
+ map<s32, CharReach> &look) {
flat_set<NFAVertex> curr, next;
-
- for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
- if (contains(g[v].reports, report)) {
- curr.insert(v);
- }
- }
-
- for (u32 i = lag + 1; i <= MAX_BACK_LEN; i++) {
- if (curr.empty() || contains(curr, g.start) ||
- contains(curr, g.startDs)) {
- break;
- }
-
- next.clear();
- CharReach cr;
-
- for (auto v : curr) {
- assert(!is_special(v, g));
- cr |= g[v].char_reach;
- insert(&next, inv_adjacent_vertices(v, g));
- }
-
- assert(cr.any());
- look[0 - i] |= cr;
- curr.swap(next);
- }
-}
-
-static
-void getForwardReach(const CastleProto &castle, u32 top,
- map<s32, CharReach> &look) {
- depth len = castle.repeats.at(top).bounds.min;
- len = min(len, depth(MAX_FWD_LEN));
- assert(len.is_finite());
-
- const CharReach &cr = castle.reach();
- for (u32 i = 0; i < len; i++) {
- look[i] |= cr;
- }
-}
-
-static
-void getBackwardReach(const CastleProto &castle, ReportID report, u32 lag,
- map<s32, CharReach> &look) {
- depth min_depth = depth::infinity();
- for (const auto &m : castle.repeats) {
- const PureRepeat &pr = m.second;
- if (contains(pr.reports, report)) {
- min_depth = min(min_depth, pr.bounds.min);
- }
- }
-
- if (!min_depth.is_finite()) {
- assert(0);
- return;
- }
-
- const CharReach &cr = castle.reach();
- for (u32 i = lag + 1; i <= min(lag + (u32)min_depth, MAX_BACK_LEN);
- i++) {
- look[0 - i] |= cr;
- }
-}
-
-static
-void getForwardReach(const raw_dfa &rdfa, map<s32, CharReach> &look) {
- if (rdfa.states.size() < 2) {
- return;
- }
-
+
+ for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
+ if (contains(g[v].reports, report)) {
+ curr.insert(v);
+ }
+ }
+
+ for (u32 i = lag + 1; i <= MAX_BACK_LEN; i++) {
+ if (curr.empty() || contains(curr, g.start) ||
+ contains(curr, g.startDs)) {
+ break;
+ }
+
+ next.clear();
+ CharReach cr;
+
+ for (auto v : curr) {
+ assert(!is_special(v, g));
+ cr |= g[v].char_reach;
+ insert(&next, inv_adjacent_vertices(v, g));
+ }
+
+ assert(cr.any());
+ look[0 - i] |= cr;
+ curr.swap(next);
+ }
+}
+
+static
+void getForwardReach(const CastleProto &castle, u32 top,
+ map<s32, CharReach> &look) {
+ depth len = castle.repeats.at(top).bounds.min;
+ len = min(len, depth(MAX_FWD_LEN));
+ assert(len.is_finite());
+
+ const CharReach &cr = castle.reach();
+ for (u32 i = 0; i < len; i++) {
+ look[i] |= cr;
+ }
+}
+
+static
+void getBackwardReach(const CastleProto &castle, ReportID report, u32 lag,
+ map<s32, CharReach> &look) {
+ depth min_depth = depth::infinity();
+ for (const auto &m : castle.repeats) {
+ const PureRepeat &pr = m.second;
+ if (contains(pr.reports, report)) {
+ min_depth = min(min_depth, pr.bounds.min);
+ }
+ }
+
+ if (!min_depth.is_finite()) {
+ assert(0);
+ return;
+ }
+
+ const CharReach &cr = castle.reach();
+ for (u32 i = lag + 1; i <= min(lag + (u32)min_depth, MAX_BACK_LEN);
+ i++) {
+ look[0 - i] |= cr;
+ }
+}
+
+static
+void getForwardReach(const raw_dfa &rdfa, map<s32, CharReach> &look) {
+ if (rdfa.states.size() < 2) {
+ return;
+ }
+
flat_set<dstate_id_t> curr, next;
- curr.insert(rdfa.start_anchored);
-
- for (u32 i = 0; i < MAX_FWD_LEN && !curr.empty(); i++) {
- next.clear();
- CharReach cr;
-
- for (const auto state_id : curr) {
- const dstate &ds = rdfa.states[state_id];
-
- if (!ds.reports.empty() || !ds.reports_eod.empty()) {
- return;
- }
-
- for (unsigned c = 0; c < N_CHARS; c++) {
- dstate_id_t succ = ds.next[rdfa.alpha_remap[c]];
- if (succ != DEAD_STATE) {
- cr.set(c);
- next.insert(succ);
- }
- }
- }
-
- assert(cr.any());
- look[i] |= cr;
- curr.swap(next);
- }
-}
-
-static
-void getSuffixForwardReach(const suffix_id &suff, u32 top,
- map<s32, CharReach> &look) {
- if (suff.graph()) {
- getForwardReach(*suff.graph(), top, look);
- } else if (suff.castle()) {
- getForwardReach(*suff.castle(), top, look);
- } else if (suff.dfa()) {
- assert(top == 0); // DFA isn't multi-top capable.
- getForwardReach(*suff.dfa(), look);
- } else if (suff.haig()) {
- assert(top == 0); // DFA isn't multi-top capable.
- getForwardReach(*suff.haig(), look);
- }
-}
-
-static
-void getRoseForwardReach(const left_id &left, u32 top,
- map<s32, CharReach> &look) {
- if (left.graph()) {
- getForwardReach(*left.graph(), top, look);
- } else if (left.castle()) {
- getForwardReach(*left.castle(), top, look);
- } else if (left.dfa()) {
- assert(top == 0); // DFA isn't multi-top capable.
- getForwardReach(*left.dfa(), look);
- } else if (left.haig()) {
- assert(top == 0); // DFA isn't multi-top capable.
- getForwardReach(*left.haig(), look);
- }
-}
-
-static
-void combineForwardMasks(const vector<map<s32, CharReach> > &rose_look,
- map<s32, CharReach> &look) {
- for (u32 i = 0; i < MAX_FWD_LEN; i++) {
- for (const auto &rlook : rose_look) {
- if (contains(rlook, i)) {
- look[i] |= rlook.at(i);
- } else {
- look[i].setall();
- }
- }
- }
-}
-
-static
-void findForwardReach(const RoseGraph &g, const RoseVertex v,
- map<s32, CharReach> &look) {
- if (!g[v].reports.empty()) {
- DEBUG_PRINTF("acceptor\n");
- return;
- }
-
- // Non-leaf vertices can pick up a mask per successor prefix rose
- // engine.
- vector<map<s32, CharReach>> rose_look;
- for (const auto &e : out_edges_range(v, g)) {
- RoseVertex t = target(e, g);
- if (!g[t].left) {
+ curr.insert(rdfa.start_anchored);
+
+ for (u32 i = 0; i < MAX_FWD_LEN && !curr.empty(); i++) {
+ next.clear();
+ CharReach cr;
+
+ for (const auto state_id : curr) {
+ const dstate &ds = rdfa.states[state_id];
+
+ if (!ds.reports.empty() || !ds.reports_eod.empty()) {
+ return;
+ }
+
+ for (unsigned c = 0; c < N_CHARS; c++) {
+ dstate_id_t succ = ds.next[rdfa.alpha_remap[c]];
+ if (succ != DEAD_STATE) {
+ cr.set(c);
+ next.insert(succ);
+ }
+ }
+ }
+
+ assert(cr.any());
+ look[i] |= cr;
+ curr.swap(next);
+ }
+}
+
+static
+void getSuffixForwardReach(const suffix_id &suff, u32 top,
+ map<s32, CharReach> &look) {
+ if (suff.graph()) {
+ getForwardReach(*suff.graph(), top, look);
+ } else if (suff.castle()) {
+ getForwardReach(*suff.castle(), top, look);
+ } else if (suff.dfa()) {
+ assert(top == 0); // DFA isn't multi-top capable.
+ getForwardReach(*suff.dfa(), look);
+ } else if (suff.haig()) {
+ assert(top == 0); // DFA isn't multi-top capable.
+ getForwardReach(*suff.haig(), look);
+ }
+}
+
+static
+void getRoseForwardReach(const left_id &left, u32 top,
+ map<s32, CharReach> &look) {
+ if (left.graph()) {
+ getForwardReach(*left.graph(), top, look);
+ } else if (left.castle()) {
+ getForwardReach(*left.castle(), top, look);
+ } else if (left.dfa()) {
+ assert(top == 0); // DFA isn't multi-top capable.
+ getForwardReach(*left.dfa(), look);
+ } else if (left.haig()) {
+ assert(top == 0); // DFA isn't multi-top capable.
+ getForwardReach(*left.haig(), look);
+ }
+}
+
+static
+void combineForwardMasks(const vector<map<s32, CharReach> > &rose_look,
+ map<s32, CharReach> &look) {
+ for (u32 i = 0; i < MAX_FWD_LEN; i++) {
+ for (const auto &rlook : rose_look) {
+ if (contains(rlook, i)) {
+ look[i] |= rlook.at(i);
+ } else {
+ look[i].setall();
+ }
+ }
+ }
+}
+
+static
+void findForwardReach(const RoseGraph &g, const RoseVertex v,
+ map<s32, CharReach> &look) {
+ if (!g[v].reports.empty()) {
+ DEBUG_PRINTF("acceptor\n");
+ return;
+ }
+
+ // Non-leaf vertices can pick up a mask per successor prefix rose
+ // engine.
+ vector<map<s32, CharReach>> rose_look;
+ for (const auto &e : out_edges_range(v, g)) {
+ RoseVertex t = target(e, g);
+ if (!g[t].left) {
DEBUG_PRINTF("successor %zu has no leftfix\n", g[t].index);
- return;
- }
- rose_look.push_back(map<s32, CharReach>());
- getRoseForwardReach(g[t].left, g[e].rose_top, rose_look.back());
- }
-
- if (g[v].suffix) {
- DEBUG_PRINTF("suffix engine\n");
- rose_look.push_back(map<s32, CharReach>());
- getSuffixForwardReach(g[v].suffix, g[v].suffix.top, rose_look.back());
- }
-
- combineForwardMasks(rose_look, look);
-}
-
-static
-void findBackwardReach(const RoseGraph &g, const RoseVertex v,
- map<s32, CharReach> &look) {
- if (!g[v].left) {
- return;
- }
-
- DEBUG_PRINTF("leftfix, report=%u, lag=%u\n", g[v].left.leftfix_report,
- g[v].left.lag);
-
- if (g[v].left.graph) {
- getBackwardReach(*g[v].left.graph, g[v].left.leftfix_report,
- g[v].left.lag, look);
- } else if (g[v].left.castle) {
- getBackwardReach(*g[v].left.castle, g[v].left.leftfix_report,
- g[v].left.lag, look);
- }
-
- // TODO: implement DFA variants if necessary.
-}
-
-static
-void normalise(map<s32, CharReach> &look) {
- // We can erase entries where the reach is "all characters".
- vector<s32> dead;
- for (const auto &m : look) {
- if (m.second.all()) {
- dead.push_back(m.first);
- }
- }
- erase_all(&look, dead);
-}
-
-namespace {
-
-struct LookPriority {
- explicit LookPriority(const map<s32, CharReach> &look_in) : look(look_in) {}
-
- bool operator()(s32 a, s32 b) const {
- const CharReach &a_reach = look.at(a);
- const CharReach &b_reach = look.at(b);
- if (a_reach.count() != b_reach.count()) {
- return a_reach.count() < b_reach.count();
- }
- return abs(a) < abs(b);
- }
-
-private:
- const map<s32, CharReach> &look;
-};
-
-} // namespace
-
-static
-bool isFloodProne(const map<s32, CharReach> &look, const CharReach &flood_cr) {
- for (const auto &m : look) {
- const CharReach &look_cr = m.second;
- if (!overlaps(look_cr, flood_cr)) {
- return false;
- }
- }
- DEBUG_PRINTF("look can't escape flood on %s\n",
- describeClass(flood_cr).c_str());
- return true;
-}
-
-static
-bool isFloodProne(const map<s32, CharReach> &look,
- const set<CharReach> &flood_reach) {
- if (flood_reach.empty()) {
- return false;
- }
-
- for (const CharReach &flood_cr : flood_reach) {
- if (isFloodProne(look, flood_cr)) {
- return true;
- }
- }
-
- return false;
-}
-
-static
-void reduce(map<s32, CharReach> &look, set<CharReach> &flood_reach) {
- if (look.size() <= MAX_LOOKAROUND_ENTRIES) {
- return;
- }
-
- DEBUG_PRINTF("before reduce: %s\n", dump(look).c_str());
-
- // First, remove floods that we already can't escape; they shouldn't affect
- // the analysis below.
- for (auto it = flood_reach.begin(); it != flood_reach.end();) {
- if (isFloodProne(look, *it)) {
- DEBUG_PRINTF("removing inescapable flood on %s from analysis\n",
- describeClass(*it).c_str());
- flood_reach.erase(it++);
- } else {
- ++it;
- }
- }
-
- LookPriority cmp(look);
- priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
- for (const auto &m : look) {
- pq.push(m.first);
- }
-
- while (!pq.empty() && look.size() > MAX_LOOKAROUND_ENTRIES) {
- s32 d = pq.top();
- assert(contains(look, d));
- const CharReach cr(look[d]); // copy
- pq.pop();
-
- DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
- look.erase(d);
-
- // If removing this entry would result in us becoming flood_prone on a
- // particular flood_reach case, reinstate it and move on.
- if (isFloodProne(look, flood_reach)) {
- DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
- describeClass(cr).c_str());
- look.insert(make_pair(d, cr));
- }
- }
-
- while (!pq.empty()) {
- s32 d = pq.top();
- assert(contains(look, d));
- const CharReach cr(look[d]); // copy
- pq.pop();
-
- if (cr.count() < LOOKAROUND_WIDE_REACH) {
- continue;
- }
-
- DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
- look.erase(d);
-
- // If removing this entry would result in us becoming flood_prone on a
- // particular flood_reach case, reinstate it and move on.
- if (isFloodProne(look, flood_reach)) {
- DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
- describeClass(cr).c_str());
- look.insert(make_pair(d, cr));
- }
- }
-
- DEBUG_PRINTF("after reduce: %s\n", dump(look).c_str());
-}
-
-static
-void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v,
- set<CharReach> &flood_reach) {
- for (u32 lit_id : tbi.g[v].literals) {
+ return;
+ }
+ rose_look.push_back(map<s32, CharReach>());
+ getRoseForwardReach(g[t].left, g[e].rose_top, rose_look.back());
+ }
+
+ if (g[v].suffix) {
+ DEBUG_PRINTF("suffix engine\n");
+ rose_look.push_back(map<s32, CharReach>());
+ getSuffixForwardReach(g[v].suffix, g[v].suffix.top, rose_look.back());
+ }
+
+ combineForwardMasks(rose_look, look);
+}
+
+static
+void findBackwardReach(const RoseGraph &g, const RoseVertex v,
+ map<s32, CharReach> &look) {
+ if (!g[v].left) {
+ return;
+ }
+
+ DEBUG_PRINTF("leftfix, report=%u, lag=%u\n", g[v].left.leftfix_report,
+ g[v].left.lag);
+
+ if (g[v].left.graph) {
+ getBackwardReach(*g[v].left.graph, g[v].left.leftfix_report,
+ g[v].left.lag, look);
+ } else if (g[v].left.castle) {
+ getBackwardReach(*g[v].left.castle, g[v].left.leftfix_report,
+ g[v].left.lag, look);
+ }
+
+ // TODO: implement DFA variants if necessary.
+}
+
+static
+void normalise(map<s32, CharReach> &look) {
+ // We can erase entries where the reach is "all characters".
+ vector<s32> dead;
+ for (const auto &m : look) {
+ if (m.second.all()) {
+ dead.push_back(m.first);
+ }
+ }
+ erase_all(&look, dead);
+}
+
+namespace {
+
+struct LookPriority {
+ explicit LookPriority(const map<s32, CharReach> &look_in) : look(look_in) {}
+
+ bool operator()(s32 a, s32 b) const {
+ const CharReach &a_reach = look.at(a);
+ const CharReach &b_reach = look.at(b);
+ if (a_reach.count() != b_reach.count()) {
+ return a_reach.count() < b_reach.count();
+ }
+ return abs(a) < abs(b);
+ }
+
+private:
+ const map<s32, CharReach> &look;
+};
+
+} // namespace
+
+static
+bool isFloodProne(const map<s32, CharReach> &look, const CharReach &flood_cr) {
+ for (const auto &m : look) {
+ const CharReach &look_cr = m.second;
+ if (!overlaps(look_cr, flood_cr)) {
+ return false;
+ }
+ }
+ DEBUG_PRINTF("look can't escape flood on %s\n",
+ describeClass(flood_cr).c_str());
+ return true;
+}
+
+static
+bool isFloodProne(const map<s32, CharReach> &look,
+ const set<CharReach> &flood_reach) {
+ if (flood_reach.empty()) {
+ return false;
+ }
+
+ for (const CharReach &flood_cr : flood_reach) {
+ if (isFloodProne(look, flood_cr)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static
+void reduce(map<s32, CharReach> &look, set<CharReach> &flood_reach) {
+ if (look.size() <= MAX_LOOKAROUND_ENTRIES) {
+ return;
+ }
+
+ DEBUG_PRINTF("before reduce: %s\n", dump(look).c_str());
+
+ // First, remove floods that we already can't escape; they shouldn't affect
+ // the analysis below.
+ for (auto it = flood_reach.begin(); it != flood_reach.end();) {
+ if (isFloodProne(look, *it)) {
+ DEBUG_PRINTF("removing inescapable flood on %s from analysis\n",
+ describeClass(*it).c_str());
+ flood_reach.erase(it++);
+ } else {
+ ++it;
+ }
+ }
+
+ LookPriority cmp(look);
+ priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
+ for (const auto &m : look) {
+ pq.push(m.first);
+ }
+
+ while (!pq.empty() && look.size() > MAX_LOOKAROUND_ENTRIES) {
+ s32 d = pq.top();
+ assert(contains(look, d));
+ const CharReach cr(look[d]); // copy
+ pq.pop();
+
+ DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
+ look.erase(d);
+
+ // If removing this entry would result in us becoming flood_prone on a
+ // particular flood_reach case, reinstate it and move on.
+ if (isFloodProne(look, flood_reach)) {
+ DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
+ describeClass(cr).c_str());
+ look.insert(make_pair(d, cr));
+ }
+ }
+
+ while (!pq.empty()) {
+ s32 d = pq.top();
+ assert(contains(look, d));
+ const CharReach cr(look[d]); // copy
+ pq.pop();
+
+ if (cr.count() < LOOKAROUND_WIDE_REACH) {
+ continue;
+ }
+
+ DEBUG_PRINTF("erasing {%d: %s}\n", d, describeClass(cr).c_str());
+ look.erase(d);
+
+ // If removing this entry would result in us becoming flood_prone on a
+ // particular flood_reach case, reinstate it and move on.
+ if (isFloodProne(look, flood_reach)) {
+ DEBUG_PRINTF("reinstating {%d: %s} due to flood-prone check\n", d,
+ describeClass(cr).c_str());
+ look.insert(make_pair(d, cr));
+ }
+ }
+
+ DEBUG_PRINTF("after reduce: %s\n", dump(look).c_str());
+}
+
+static
+void findFloodReach(const RoseBuildImpl &tbi, const RoseVertex v,
+ set<CharReach> &flood_reach) {
+ for (u32 lit_id : tbi.g[v].literals) {
const ue2_literal &s = tbi.literals.at(lit_id).s;
- if (s.empty()) {
- continue;
- }
- if (is_flood(s)) {
- CharReach cr(*s.begin());
- DEBUG_PRINTF("flood-prone with reach: %s\n",
- describeClass(cr).c_str());
- flood_reach.insert(cr);
- }
- }
-}
-
+ if (s.empty()) {
+ continue;
+ }
+ if (is_flood(s)) {
+ CharReach cr(*s.begin());
+ DEBUG_PRINTF("flood-prone with reach: %s\n",
+ describeClass(cr).c_str());
+ flood_reach.insert(cr);
+ }
+ }
+}
+
namespace {
struct LookProto {
@@ -470,7 +470,7 @@ struct LookProto {
};
}
-static
+static
vector<LookProto> findLiteralReach(const rose_literal_id &lit) {
vector<LookProto> look;
look.reserve(lit.s.length());
@@ -490,15 +490,15 @@ vector<LookProto> findLiteralReach(const RoseBuildImpl &build,
bool first = true;
vector<LookProto> look;
- for (u32 lit_id : build.g[v].literals) {
+ for (u32 lit_id : build.g[v].literals) {
const rose_literal_id &lit = build.literals.at(lit_id);
auto lit_look = findLiteralReach(lit);
-
+
if (first) {
look = std::move(lit_look);
first = false;
continue;
- }
+ }
// Erase elements from look with keys not in lit_look. Where a key is
// in both maps, union its reach with the lookaround.
@@ -523,34 +523,34 @@ vector<LookProto> findLiteralReach(const RoseBuildImpl &build,
++jt;
}
}
- }
-
- return look;
-}
-
-/**
- * Trim lookaround checks from the prefix that overlap with the literals
- * themselves.
- */
-static
-void trimLiterals(const RoseBuildImpl &build, const RoseVertex v,
- map<s32, CharReach> &look) {
- DEBUG_PRINTF("pre-trim lookaround: %s\n", dump(look).c_str());
-
- for (const auto &m : findLiteralReach(build, v)) {
+ }
+
+ return look;
+}
+
+/**
+ * Trim lookaround checks from the prefix that overlap with the literals
+ * themselves.
+ */
+static
+void trimLiterals(const RoseBuildImpl &build, const RoseVertex v,
+ map<s32, CharReach> &look) {
+ DEBUG_PRINTF("pre-trim lookaround: %s\n", dump(look).c_str());
+
+ for (const auto &m : findLiteralReach(build, v)) {
auto it = look.find(m.offset);
- if (it == end(look)) {
- continue;
- }
+ if (it == end(look)) {
+ continue;
+ }
if (m.reach.isSubsetOf(it->second)) {
- DEBUG_PRINTF("can trim entry at %d\n", it->first);
- look.erase(it);
- }
- }
-
- DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str());
-}
-
+ DEBUG_PRINTF("can trim entry at %d\n", it->first);
+ look.erase(it);
+ }
+ }
+
+ DEBUG_PRINTF("post-trim lookaround: %s\n", dump(look).c_str());
+}
+
static
void normaliseLeftfix(map<s32, CharReach> &look) {
// We can erase entries where the reach is "all characters", except for the
@@ -621,44 +621,44 @@ void transToLookaround(const vector<map<s32, CharReach>> &looks,
}
}
-void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
- vector<LookEntry> &lookaround) {
- lookaround.clear();
-
- const RoseGraph &g = tbi.g;
-
- map<s32, CharReach> look;
- findBackwardReach(g, v, look);
- findForwardReach(g, v, look);
- trimLiterals(tbi, v, look);
-
- if (look.empty()) {
- return;
- }
-
- normalise(look);
-
- if (look.empty()) {
- return;
- }
-
- set<CharReach> flood_reach;
- findFloodReach(tbi, v, flood_reach);
- reduce(look, flood_reach);
-
- if (look.empty()) {
- return;
- }
-
- DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str());
- lookaround.reserve(look.size());
- for (const auto &m : look) {
- s8 offset = verify_s8(m.first);
- lookaround.emplace_back(offset, m.second);
- }
-}
-
-static
+void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
+ vector<LookEntry> &lookaround) {
+ lookaround.clear();
+
+ const RoseGraph &g = tbi.g;
+
+ map<s32, CharReach> look;
+ findBackwardReach(g, v, look);
+ findForwardReach(g, v, look);
+ trimLiterals(tbi, v, look);
+
+ if (look.empty()) {
+ return;
+ }
+
+ normalise(look);
+
+ if (look.empty()) {
+ return;
+ }
+
+ set<CharReach> flood_reach;
+ findFloodReach(tbi, v, flood_reach);
+ reduce(look, flood_reach);
+
+ if (look.empty()) {
+ return;
+ }
+
+ DEBUG_PRINTF("lookaround: %s\n", dump(look).c_str());
+ lookaround.reserve(look.size());
+ for (const auto &m : look) {
+ s8 offset = verify_s8(m.first);
+ lookaround.emplace_back(offset, m.second);
+ }
+}
+
+static
bool checkShuftiBuckets(const vector<map<s32, CharReach>> &looks,
u32 bucket_size) {
set<u32> bucket;
@@ -685,25 +685,25 @@ bool checkShuftiBuckets(const vector<map<s32, CharReach>> &looks,
bucket.insert(hi_lo);
}
}
- }
+ }
DEBUG_PRINTF("shufti has %lu bucket(s)\n", bucket.size());
return bucket.size() <= bucket_size;
}
-
+
static
bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag,
vector<map<s32, CharReach>> &looks) {
if (!isAcyclic(g)) {
DEBUG_PRINTF("contains back-edge\n");
- return false;
- }
-
+ return false;
+ }
+
// Must be floating chains wired to startDs.
if (!isFloating(g)) {
DEBUG_PRINTF("not a floating start\n");
- return false;
- }
-
+ return false;
+ }
+
vector<NFAVertex> curr;
for (auto v : inv_adjacent_vertices_range(g.accept, g)) {
if (v == g.start || v == g.startDs) {
@@ -737,16 +737,16 @@ bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag,
size_t curr_size = curr.size();
if (curr.size() > 1 && i > lag + MULTIPATH_MAX_LEN) {
DEBUG_PRINTF("range is larger than 16 in multi-path\n");
- return false;
- }
-
+ return false;
+ }
+
for (size_t idx = 0; idx < curr_size; idx++) {
NFAVertex v = curr[idx];
if (v == g.startDs) {
continue;
}
assert(!is_special(v, g));
-
+
for (auto u : inv_adjacent_vertices_range(v, g)) {
if (u == g.start || u == g.startDs) {
curr[idx] = g.startDs;
@@ -792,88 +792,88 @@ bool getTransientPrefixReach(const NGHolder &g, ReportID report, u32 lag,
u32 bucket_size = total_len > 32 ? 8 : 16;
if (!checkShuftiBuckets(looks, bucket_size)) {
DEBUG_PRINTF("shufti has too many buckets\n");
- return false;
- }
+ return false;
+ }
}
-
+
assert(!looks.empty());
if (looks.size() == 1) {
DEBUG_PRINTF("single lookaround\n");
} else {
DEBUG_PRINTF("multi-path lookaround\n");
- }
- DEBUG_PRINTF("done\n");
- return true;
-}
-
-bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
+ }
+ DEBUG_PRINTF("done\n");
+ return true;
+}
+
+bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
vector<vector<LookEntry>> &lookaround) {
- lookaround.clear();
-
- const RoseGraph &g = build.g;
- const left_id leftfix(g[v].left);
-
- if (!contains(build.transient, leftfix)) {
- DEBUG_PRINTF("not transient\n");
- return false;
- }
-
- if (!leftfix.graph()) {
- DEBUG_PRINTF("only supported for graphs so far\n");
- return false;
- }
-
+ lookaround.clear();
+
+ const RoseGraph &g = build.g;
+ const left_id leftfix(g[v].left);
+
+ if (!contains(build.transient, leftfix)) {
+ DEBUG_PRINTF("not transient\n");
+ return false;
+ }
+
+ if (!leftfix.graph()) {
+ DEBUG_PRINTF("only supported for graphs so far\n");
+ return false;
+ }
+
vector<map<s32, CharReach>> looks;
if (!getTransientPrefixReach(*leftfix.graph(), g[v].left.leftfix_report,
g[v].left.lag, looks)) {
DEBUG_PRINTF("graph has loop or too large\n");
- return false;
- }
-
+ return false;
+ }
+
if (!trimMultipathLeftfix(build, v, looks)) {
- return false;
- }
+ return false;
+ }
transToLookaround(looks, lookaround);
-
+
return !lookaround.empty();
-}
-
-void mergeLookaround(vector<LookEntry> &lookaround,
- const vector<LookEntry> &more_lookaround) {
- if (lookaround.size() >= MAX_LOOKAROUND_ENTRIES) {
- DEBUG_PRINTF("big enough!\n");
- return;
- }
-
- // Don't merge lookarounds at offsets we already have entries for.
+}
+
+void mergeLookaround(vector<LookEntry> &lookaround,
+ const vector<LookEntry> &more_lookaround) {
+ if (lookaround.size() >= MAX_LOOKAROUND_ENTRIES) {
+ DEBUG_PRINTF("big enough!\n");
+ return;
+ }
+
+ // Don't merge lookarounds at offsets we already have entries for.
flat_set<s8> offsets;
- for (const auto &e : lookaround) {
- offsets.insert(e.offset);
- }
-
- map<s32, CharReach> more;
- LookPriority cmp(more);
- priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
- for (const auto &e : more_lookaround) {
- if (!contains(offsets, e.offset)) {
- more.emplace(e.offset, e.reach);
- pq.push(e.offset);
- }
- }
-
- while (!pq.empty() && lookaround.size() < MAX_LOOKAROUND_ENTRIES) {
- const s32 offset = pq.top();
- pq.pop();
- const auto &cr = more.at(offset);
- DEBUG_PRINTF("added {%d,%s}\n", offset, describeClass(cr).c_str());
- lookaround.emplace_back(verify_s8(offset), cr);
- }
-
- // Order by offset.
- sort(begin(lookaround), end(lookaround),
- [](const LookEntry &a, const LookEntry &b) {
- return a.offset < b.offset;
- });
-}
-
-} // namespace ue2
+ for (const auto &e : lookaround) {
+ offsets.insert(e.offset);
+ }
+
+ map<s32, CharReach> more;
+ LookPriority cmp(more);
+ priority_queue<s32, vector<s32>, LookPriority> pq(cmp);
+ for (const auto &e : more_lookaround) {
+ if (!contains(offsets, e.offset)) {
+ more.emplace(e.offset, e.reach);
+ pq.push(e.offset);
+ }
+ }
+
+ while (!pq.empty() && lookaround.size() < MAX_LOOKAROUND_ENTRIES) {
+ const s32 offset = pq.top();
+ pq.pop();
+ const auto &cr = more.at(offset);
+ DEBUG_PRINTF("added {%d,%s}\n", offset, describeClass(cr).c_str());
+ lookaround.emplace_back(verify_s8(offset), cr);
+ }
+
+ // Order by offset.
+ sort(begin(lookaround), end(lookaround),
+ [](const LookEntry &a, const LookEntry &b) {
+ return a.offset < b.offset;
+ });
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.h b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.h
index 814f784ecf..70d4217ccc 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_lookaround.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_lookaround.h
@@ -1,81 +1,81 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose compile-time analysis for lookaround masks.
- */
-#ifndef ROSE_ROSE_BUILD_LOOKAROUND_H
-#define ROSE_ROSE_BUILD_LOOKAROUND_H
-
-#include "rose_graph.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose compile-time analysis for lookaround masks.
+ */
+#ifndef ROSE_ROSE_BUILD_LOOKAROUND_H
+#define ROSE_ROSE_BUILD_LOOKAROUND_H
+
+#include "rose_graph.h"
#include "util/hash.h"
-
-#include <vector>
-
+
+#include <vector>
+
/** \brief Max path number for multi-path lookaround. */
#define MAX_LOOKAROUND_PATHS 8
-namespace ue2 {
-
-class CharReach;
-class RoseBuildImpl;
-
-/** \brief Lookaround entry prototype, describing the reachability at a given
- * distance from the end of a role match. */
-struct LookEntry {
+namespace ue2 {
+
+class CharReach;
+class RoseBuildImpl;
+
+/** \brief Lookaround entry prototype, describing the reachability at a given
+ * distance from the end of a role match. */
+struct LookEntry {
LookEntry() : offset(0) {}
- LookEntry(s8 offset_in, const CharReach &reach_in)
- : offset(offset_in), reach(reach_in) {}
- s8 offset; //!< offset from role match location.
- CharReach reach; //!< reachability at given offset.
-
- bool operator==(const LookEntry &other) const {
- return offset == other.offset && reach == other.reach;
- }
-};
-
-void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
+ LookEntry(s8 offset_in, const CharReach &reach_in)
+ : offset(offset_in), reach(reach_in) {}
+ s8 offset; //!< offset from role match location.
+ CharReach reach; //!< reachability at given offset.
+
+ bool operator==(const LookEntry &other) const {
+ return offset == other.offset && reach == other.reach;
+ }
+};
+
+void findLookaroundMasks(const RoseBuildImpl &tbi, const RoseVertex v,
std::vector<LookEntry> &look_more);
-
-/**
- * \brief If possible, render the prefix of the given vertex as a lookaround.
- *
- * Given a prefix, returns true (and fills the lookaround vector) if
- * it can be satisfied with a lookaround alone.
- */
-bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
+
+/**
+ * \brief If possible, render the prefix of the given vertex as a lookaround.
+ *
+ * Given a prefix, returns true (and fills the lookaround vector) if
+ * it can be satisfied with a lookaround alone.
+ */
+bool makeLeftfixLookaround(const RoseBuildImpl &build, const RoseVertex v,
std::vector<std::vector<LookEntry>> &lookaround);
-
-void mergeLookaround(std::vector<LookEntry> &lookaround,
- const std::vector<LookEntry> &more_lookaround);
-
-} // namespace ue2
-
+
+void mergeLookaround(std::vector<LookEntry> &lookaround,
+ const std::vector<LookEntry> &more_lookaround);
+
+} // namespace ue2
+
namespace std {
template<>
@@ -87,4 +87,4 @@ struct hash<ue2::LookEntry> {
} // namespace std
-#endif // ROSE_ROSE_BUILD_LOOKAROUND_H
+#endif // ROSE_ROSE_BUILD_LOOKAROUND_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp
index 2b92d83fb4..5066dbd578 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_merge.cpp
@@ -1,490 +1,490 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose Build: functions for reducing the size of the Rose graph
- * through merging.
- */
-#include "rose_build_merge.h"
-
-#include "grey.h"
-#include "rose_build.h"
-#include "rose_build_impl.h"
-#include "rose_build_util.h"
-#include "ue2common.h"
-#include "nfa/castlecompile.h"
-#include "nfa/goughcompile.h"
-#include "nfa/limex_limits.h"
-#include "nfa/mcclellancompile.h"
-#include "nfa/nfa_build_util.h"
-#include "nfa/rdfa_merge.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_haig.h"
-#include "nfagraph/ng_is_equal.h"
-#include "nfagraph/ng_lbr.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_mcclellan.h"
-#include "nfagraph/ng_puff.h"
-#include "nfagraph/ng_redundancy.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_reports.h"
-#include "nfagraph/ng_stop.h"
-#include "nfagraph/ng_uncalc_components.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
-#include "util/bitutils.h"
-#include "util/charreach.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/dump_charclass.h"
-#include "util/graph_range.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose Build: functions for reducing the size of the Rose graph
+ * through merging.
+ */
+#include "rose_build_merge.h"
+
+#include "grey.h"
+#include "rose_build.h"
+#include "rose_build_impl.h"
+#include "rose_build_util.h"
+#include "ue2common.h"
+#include "nfa/castlecompile.h"
+#include "nfa/goughcompile.h"
+#include "nfa/limex_limits.h"
+#include "nfa/mcclellancompile.h"
+#include "nfa/nfa_build_util.h"
+#include "nfa/rdfa_merge.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_haig.h"
+#include "nfagraph/ng_is_equal.h"
+#include "nfagraph/ng_lbr.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_mcclellan.h"
+#include "nfagraph/ng_puff.h"
+#include "nfagraph/ng_redundancy.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_stop.h"
+#include "nfagraph/ng_uncalc_components.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "util/bitutils.h"
+#include "util/charreach.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/dump_charclass.h"
+#include "util/graph_range.h"
#include "util/hash.h"
#include "util/insertion_ordered.h"
-#include "util/order_check.h"
-#include "util/report_manager.h"
-#include "util/ue2string.h"
+#include "util/order_check.h"
+#include "util/report_manager.h"
+#include "util/ue2string.h"
#include "util/unordered.h"
-
-#include <algorithm>
-#include <functional>
-#include <list>
-#include <map>
-#include <queue>
-#include <set>
-#include <string>
-#include <vector>
-#include <utility>
-
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_values;
+
+#include <algorithm>
+#include <functional>
+#include <list>
+#include <map>
+#include <queue>
+#include <set>
+#include <string>
+#include <vector>
+#include <utility>
+
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
using boost::adaptors::map_keys;
-
-namespace ue2 {
-
-static const size_t NARROW_START_MAX = 10;
-static const size_t SMALL_MERGE_MAX_VERTICES_STREAM = 128;
-static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64;
-static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32;
-static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10;
-static const size_t MERGE_GROUP_SIZE_MAX = 200;
+
+namespace ue2 {
+
+static const size_t NARROW_START_MAX = 10;
+static const size_t SMALL_MERGE_MAX_VERTICES_STREAM = 128;
+static const size_t SMALL_MERGE_MAX_VERTICES_BLOCK = 64;
+static const size_t SMALL_ROSE_THRESHOLD_STREAM = 32;
+static const size_t SMALL_ROSE_THRESHOLD_BLOCK = 10;
+static const size_t MERGE_GROUP_SIZE_MAX = 200;
static const size_t MERGE_CASTLE_GROUP_SIZE_MAX = 1000;
-
-/** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */
-static const size_t DFA_CHUNK_SIZE_MAX = 200;
-
-/** \brief Max DFA states in a merged DFA. */
-static const size_t DFA_MERGE_MAX_STATES = 8000;
-
+
+/** \brief Max number of DFAs (McClellan, Haig) to pairwise merge together. */
+static const size_t DFA_CHUNK_SIZE_MAX = 200;
+
+/** \brief Max DFA states in a merged DFA. */
+static const size_t DFA_MERGE_MAX_STATES = 8000;
+
/** \brief In block mode, merge two prefixes even if they don't have identical
* literal sets if they have fewer than this many states and the merged graph
* is also small. */
static constexpr size_t MAX_BLOCK_PREFIX_MERGE_VERTICES = 32;
-
-static
-size_t small_merge_max_vertices(const CompileContext &cc) {
- return cc.streaming ? SMALL_MERGE_MAX_VERTICES_STREAM
- : SMALL_MERGE_MAX_VERTICES_BLOCK;
-}
-
-static
-size_t small_rose_threshold(const CompileContext &cc) {
- return cc.streaming ? SMALL_ROSE_THRESHOLD_STREAM
- : SMALL_ROSE_THRESHOLD_BLOCK;
-}
-
-/**
- * Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that
- * reports should not contribute to the hash.
- */
-static
+
+static
+size_t small_merge_max_vertices(const CompileContext &cc) {
+ return cc.streaming ? SMALL_MERGE_MAX_VERTICES_STREAM
+ : SMALL_MERGE_MAX_VERTICES_BLOCK;
+}
+
+static
+size_t small_rose_threshold(const CompileContext &cc) {
+ return cc.streaming ? SMALL_ROSE_THRESHOLD_STREAM
+ : SMALL_ROSE_THRESHOLD_BLOCK;
+}
+
+/**
+ * Returns a loose hash of a leftfix for use in dedupeLeftfixes. Note that
+ * reports should not contribute to the hash.
+ */
+static
size_t hashLeftfix(const left_id &left) {
- size_t val = 0;
-
+ size_t val = 0;
+
if (left.castle()) {
hash_combine(val, left.castle()->reach());
for (const auto &pr : left.castle()->repeats) {
- hash_combine(val, pr.first); // top
- hash_combine(val, pr.second.bounds);
- }
+ hash_combine(val, pr.first); // top
+ hash_combine(val, pr.second.bounds);
+ }
} else if (left.graph()) {
hash_combine(val, hash_holder(*left.graph()));
- }
-
- return val;
-}
-
-namespace {
-
-/** Key used to group sets of leftfixes by the dedupeLeftfixes path. */
-struct RoseGroup {
- RoseGroup(const RoseBuildImpl &build, RoseVertex v)
- : left_hash(hashLeftfix(build.g[v].left)),
- lag(build.g[v].left.lag), eod_table(build.isInETable(v)) {
- const RoseGraph &g = build.g;
- assert(in_degree(v, g) == 1);
- RoseVertex u = *inv_adjacent_vertices(v, g).first;
+ }
+
+ return val;
+}
+
+namespace {
+
+/** Key used to group sets of leftfixes by the dedupeLeftfixes path. */
+struct RoseGroup {
+ RoseGroup(const RoseBuildImpl &build, RoseVertex v)
+ : left_hash(hashLeftfix(build.g[v].left)),
+ lag(build.g[v].left.lag), eod_table(build.isInETable(v)) {
+ const RoseGraph &g = build.g;
+ assert(in_degree(v, g) == 1);
+ RoseVertex u = *inv_adjacent_vertices(v, g).first;
parent = g[u].index;
- }
-
- bool operator<(const RoseGroup &b) const {
- const RoseGroup &a = *this;
- ORDER_CHECK(parent);
- ORDER_CHECK(left_hash);
- ORDER_CHECK(lag);
- ORDER_CHECK(eod_table);
- return false;
- }
-
-private:
- /** Parent vertex index. We must use the index, rather than the descriptor,
- * for compile determinism. */
- size_t parent;
-
- /** Quick hash of the leftfix itself. Must be identical for a given pair of
- * graphs if is_equal would return true. */
- size_t left_hash;
-
- /** Leftfix lag value. */
- u32 lag;
-
- /** True if associated vertex (successor) is in the EOD table. We don't
- * allow sharing of leftfix engines between "normal" and EOD operation. */
- bool eod_table;
-};
-
-/**
+ }
+
+ bool operator<(const RoseGroup &b) const {
+ const RoseGroup &a = *this;
+ ORDER_CHECK(parent);
+ ORDER_CHECK(left_hash);
+ ORDER_CHECK(lag);
+ ORDER_CHECK(eod_table);
+ return false;
+ }
+
+private:
+ /** Parent vertex index. We must use the index, rather than the descriptor,
+ * for compile determinism. */
+ size_t parent;
+
+ /** Quick hash of the leftfix itself. Must be identical for a given pair of
+ * graphs if is_equal would return true. */
+ size_t left_hash;
+
+ /** Leftfix lag value. */
+ u32 lag;
+
+ /** True if associated vertex (successor) is in the EOD table. We don't
+ * allow sharing of leftfix engines between "normal" and EOD operation. */
+ bool eod_table;
+};
+
+/**
* Intended to find graphs that are identical except for their report
* IDs. Relies on vertex and edge indices to pick up graphs that have been
* messily put together in different orderings. Only implemented for castles and
* holders.
- */
+ */
static
bool is_equal(const left_id &u_left, ReportID u_report,
const left_id &v_left, ReportID v_report) {
if (u_left.castle() && v_left.castle()) {
return is_equal(*u_left.castle(), u_report, *v_left.castle(), v_report);
}
-
+
if (!u_left.graph() || !v_left.graph()) {
return false;
- }
-
+ }
+
return is_equal(*u_left.graph(), u_report, *v_left.graph(), v_report);
}
-
-} // namespace
-
-/**
- * This pass performs work similar to \ref dedupeSuffixes - it removes
- * duplicate prefix/infixes (that is, leftfixes) which are identical graphs and
- * share the same trigger vertex and lag. Leftfixes are first grouped by
- * parent role and lag to reduce the number of candidates to be inspected
- * for each leftfix. The graphs in each cluster are then compared with each
- * other and the graph is updated to only refer to a canonical version of each
- * graph.
- *
- * Note: only roles with a single predecessor vertex are considered for this
- * transform - it should probably be generalised to work for roles which share
+
+} // namespace
+
+/**
+ * This pass performs work similar to \ref dedupeSuffixes - it removes
+ * duplicate prefix/infixes (that is, leftfixes) which are identical graphs and
+ * share the same trigger vertex and lag. Leftfixes are first grouped by
+ * parent role and lag to reduce the number of candidates to be inspected
+ * for each leftfix. The graphs in each cluster are then compared with each
+ * other and the graph is updated to only refer to a canonical version of each
+ * graph.
+ *
+ * Note: only roles with a single predecessor vertex are considered for this
+ * transform - it should probably be generalised to work for roles which share
* the same set of predecessor roles as for \ref dedupeLeftfixesVariableLag or
* it should be retired entirely.
- */
-bool dedupeLeftfixes(RoseBuildImpl &tbi) {
- DEBUG_PRINTF("deduping leftfixes\n");
- map<RoseGroup, deque<RoseVertex>> roses;
- bool work_done = false;
-
- /* Note: a leftfix's transientness will not be altered by deduping */
-
- // Collect leftfixes into groups.
- RoseGraph &g = tbi.g;
- for (auto v : vertices_range(g)) {
- if (!g[v].left) {
- continue;
- }
- const left_id left(g[v].left);
-
- if (left.haig()) {
- /* TODO: allow merging of identical haigs */
- continue;
- }
-
- if (in_degree(v, g) != 1) {
- continue;
- }
-
- roses[RoseGroup(tbi, v)].push_back(v);
- }
-
- DEBUG_PRINTF("collected %zu rose groups\n", roses.size());
-
- // Walk groups and dedupe the roses therein.
- for (deque<RoseVertex> &verts : roses | map_values) {
- DEBUG_PRINTF("group has %zu vertices\n", verts.size());
-
+ */
+bool dedupeLeftfixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("deduping leftfixes\n");
+ map<RoseGroup, deque<RoseVertex>> roses;
+ bool work_done = false;
+
+ /* Note: a leftfix's transientness will not be altered by deduping */
+
+ // Collect leftfixes into groups.
+ RoseGraph &g = tbi.g;
+ for (auto v : vertices_range(g)) {
+ if (!g[v].left) {
+ continue;
+ }
+ const left_id left(g[v].left);
+
+ if (left.haig()) {
+ /* TODO: allow merging of identical haigs */
+ continue;
+ }
+
+ if (in_degree(v, g) != 1) {
+ continue;
+ }
+
+ roses[RoseGroup(tbi, v)].push_back(v);
+ }
+
+ DEBUG_PRINTF("collected %zu rose groups\n", roses.size());
+
+ // Walk groups and dedupe the roses therein.
+ for (deque<RoseVertex> &verts : roses | map_values) {
+ DEBUG_PRINTF("group has %zu vertices\n", verts.size());
+
unordered_set<left_id> seen;
-
- for (auto jt = verts.begin(), jte = verts.end(); jt != jte; ++jt) {
- RoseVertex v = *jt;
- left_id left(g[v].left);
-
- // Skip cases we've already handled, and mark as seen otherwise.
- if (!seen.insert(left).second) {
- continue;
- }
-
- // Scan the rest of the list for dupes.
+
+ for (auto jt = verts.begin(), jte = verts.end(); jt != jte; ++jt) {
+ RoseVertex v = *jt;
+ left_id left(g[v].left);
+
+ // Skip cases we've already handled, and mark as seen otherwise.
+ if (!seen.insert(left).second) {
+ continue;
+ }
+
+ // Scan the rest of the list for dupes.
for (auto kt = std::next(jt); kt != jte; ++kt) {
if (g[v].left == g[*kt].left
|| !is_equal(g[v].left, g[v].left.leftfix_report,
g[*kt].left, g[*kt].left.leftfix_report)) {
- continue;
- }
-
- // Dupe found.
- DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n",
+ continue;
+ }
+
+ // Dupe found.
+ DEBUG_PRINTF("rose at vertex %zu is a dupe of %zu\n",
g[*kt].index, g[v].index);
- assert(g[v].left.lag == g[*kt].left.lag);
- g[*kt].left = g[v].left;
- work_done = true;
- }
- }
- }
-
- return work_done;
-}
-
-/**
- * \brief Returns a numeric key that can be used to group this suffix with
- * others that may be its duplicate.
- */
-static
-size_t suffix_size_key(const suffix_id &s) {
- if (s.graph()) {
- return num_vertices(*s.graph());
- }
- if (s.castle()) {
- return s.castle()->repeats.size();
- }
- return 0;
-}
-
-static
-bool is_equal(const suffix_id &s1, const suffix_id &s2) {
- if (s1.graph() && s2.graph()) {
- return is_equal(*s1.graph(), *s2.graph());
- } else if (s1.castle() && s2.castle()) {
- return is_equal(*s1.castle(), *s2.castle());
- }
- return false;
-}
-
-/**
- * This function simply looks for suffix NGHolder graphs which are identical
- * and updates the roles in the RoseGraph to refer to only a single copy. This
- * obviously has benefits in terms of both performance (as we don't run
- * multiple engines doing the same work) and stream state. This function first
- * groups all suffixes by number of vertices and report set to restrict the set
- * of possible candidates. Each group is then walked to find duplicates using
- * the \ref is_equal comparator for NGHolders and updating the RoseGraph as it
- * goes.
- *
- * Note: does not dedupe suffixes of vertices in the EOD table.
- */
-void dedupeSuffixes(RoseBuildImpl &tbi) {
- DEBUG_PRINTF("deduping suffixes\n");
-
+ assert(g[v].left.lag == g[*kt].left.lag);
+ g[*kt].left = g[v].left;
+ work_done = true;
+ }
+ }
+ }
+
+ return work_done;
+}
+
+/**
+ * \brief Returns a numeric key that can be used to group this suffix with
+ * others that may be its duplicate.
+ */
+static
+size_t suffix_size_key(const suffix_id &s) {
+ if (s.graph()) {
+ return num_vertices(*s.graph());
+ }
+ if (s.castle()) {
+ return s.castle()->repeats.size();
+ }
+ return 0;
+}
+
+static
+bool is_equal(const suffix_id &s1, const suffix_id &s2) {
+ if (s1.graph() && s2.graph()) {
+ return is_equal(*s1.graph(), *s2.graph());
+ } else if (s1.castle() && s2.castle()) {
+ return is_equal(*s1.castle(), *s2.castle());
+ }
+ return false;
+}
+
+/**
+ * This function simply looks for suffix NGHolder graphs which are identical
+ * and updates the roles in the RoseGraph to refer to only a single copy. This
+ * obviously has benefits in terms of both performance (as we don't run
+ * multiple engines doing the same work) and stream state. This function first
+ * groups all suffixes by number of vertices and report set to restrict the set
+ * of possible candidates. Each group is then walked to find duplicates using
+ * the \ref is_equal comparator for NGHolders and updating the RoseGraph as it
+ * goes.
+ *
+ * Note: does not dedupe suffixes of vertices in the EOD table.
+ */
+void dedupeSuffixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("deduping suffixes\n");
+
unordered_map<suffix_id, set<RoseVertex>> suffix_map;
- map<pair<size_t, set<ReportID>>, vector<suffix_id>> part;
-
- // Collect suffixes into groups.
- RoseGraph &g = tbi.g;
- for (auto v : vertices_range(g)) {
- if (!g[v].suffix || tbi.isInETable(v)) {
- continue;
- }
-
- const suffix_id s(g[v].suffix);
-
- if (!(s.graph() || s.castle())) {
- continue; // e.g. Haig
- }
-
- set<RoseVertex> &verts = suffix_map[s];
- if (verts.empty()) {
- part[make_pair(suffix_size_key(s), all_reports(s))].push_back(s);
- }
- verts.insert(v);
- }
-
- DEBUG_PRINTF("collected %zu groups\n", part.size());
-
- for (const auto &cand : part | map_values) {
- if (cand.size() <= 1) {
- continue;
- }
- DEBUG_PRINTF("deduping cand set of size %zu\n", cand.size());
-
- for (auto jt = cand.begin(); jt != cand.end(); ++jt) {
- if (suffix_map[*jt].empty()) {
- continue;
- }
- for (auto kt = next(jt); kt != cand.end(); ++kt) {
- if (suffix_map[*kt].empty() || !is_equal(*jt, *kt)) {
- continue;
- }
- DEBUG_PRINTF("found dupe\n");
- for (auto v : suffix_map[*kt]) {
- RoseVertex dupe = *suffix_map[*jt].begin();
- assert(dupe != v);
- g[v].suffix.graph = g[dupe].suffix.graph;
- g[v].suffix.castle = g[dupe].suffix.castle;
- assert(suffix_id(g[v].suffix) ==
- suffix_id(g[dupe].suffix));
- suffix_map[*jt].insert(v);
- }
- suffix_map[*kt].clear();
- }
- }
- }
-}
-
-namespace {
-
-/**
- * This class stores a mapping from an engine reference (left_id, suffix_id,
- * etc) to a list of vertices, and also allows us to iterate over the set of
- * engine references in insertion order -- we add to the mapping in vertex
- * iteration order, so this allows us to provide a consistent ordering.
- */
-template<class EngineRef>
-class Bouquet {
-private:
- list<EngineRef> ordering; // Unique list in insert order.
+ map<pair<size_t, set<ReportID>>, vector<suffix_id>> part;
+
+ // Collect suffixes into groups.
+ RoseGraph &g = tbi.g;
+ for (auto v : vertices_range(g)) {
+ if (!g[v].suffix || tbi.isInETable(v)) {
+ continue;
+ }
+
+ const suffix_id s(g[v].suffix);
+
+ if (!(s.graph() || s.castle())) {
+ continue; // e.g. Haig
+ }
+
+ set<RoseVertex> &verts = suffix_map[s];
+ if (verts.empty()) {
+ part[make_pair(suffix_size_key(s), all_reports(s))].push_back(s);
+ }
+ verts.insert(v);
+ }
+
+ DEBUG_PRINTF("collected %zu groups\n", part.size());
+
+ for (const auto &cand : part | map_values) {
+ if (cand.size() <= 1) {
+ continue;
+ }
+ DEBUG_PRINTF("deduping cand set of size %zu\n", cand.size());
+
+ for (auto jt = cand.begin(); jt != cand.end(); ++jt) {
+ if (suffix_map[*jt].empty()) {
+ continue;
+ }
+ for (auto kt = next(jt); kt != cand.end(); ++kt) {
+ if (suffix_map[*kt].empty() || !is_equal(*jt, *kt)) {
+ continue;
+ }
+ DEBUG_PRINTF("found dupe\n");
+ for (auto v : suffix_map[*kt]) {
+ RoseVertex dupe = *suffix_map[*jt].begin();
+ assert(dupe != v);
+ g[v].suffix.graph = g[dupe].suffix.graph;
+ g[v].suffix.castle = g[dupe].suffix.castle;
+ assert(suffix_id(g[v].suffix) ==
+ suffix_id(g[dupe].suffix));
+ suffix_map[*jt].insert(v);
+ }
+ suffix_map[*kt].clear();
+ }
+ }
+ }
+}
+
+namespace {
+
+/**
+ * This class stores a mapping from an engine reference (left_id, suffix_id,
+ * etc) to a list of vertices, and also allows us to iterate over the set of
+ * engine references in insertion order -- we add to the mapping in vertex
+ * iteration order, so this allows us to provide a consistent ordering.
+ */
+template<class EngineRef>
+class Bouquet {
+private:
+ list<EngineRef> ordering; // Unique list in insert order.
using BouquetMap = ue2_unordered_map<EngineRef, deque<RoseVertex>>;
- BouquetMap bouquet;
-public:
- void insert(const EngineRef &h, RoseVertex v) {
- typename BouquetMap::iterator f = bouquet.find(h);
- if (f == bouquet.end()) {
- ordering.push_back(h);
- bouquet[h].push_back(v);
- } else {
- f->second.push_back(v);
- }
- }
-
- void insert(const EngineRef &h, const deque<RoseVertex> &verts) {
- typename BouquetMap::iterator f = bouquet.find(h);
- if (f == bouquet.end()) {
- ordering.push_back(h);
- bouquet.insert(make_pair(h, verts));
- } else {
- f->second.insert(f->second.end(), verts.begin(), verts.end());
- }
- }
-
- const deque<RoseVertex> &vertices(const EngineRef &h) const {
- typename BouquetMap::const_iterator it = bouquet.find(h);
- assert(it != bouquet.end()); // must be present
- return it->second;
- }
-
- void erase(const EngineRef &h) {
- assert(bouquet.find(h) != bouquet.end());
- bouquet.erase(h);
- ordering.remove(h);
- }
-
- /** Remove all the elements in the given iterator range. */
- template <class Iter>
- void erase_all(Iter erase_begin, Iter erase_end) {
- for (Iter it = erase_begin; it != erase_end; ++it) {
- bouquet.erase(*it);
- }
-
- // Use a quick-lookup container so that we only have to traverse the
- // 'ordering' list once.
- const set<EngineRef> dead(erase_begin, erase_end);
- for (iterator it = begin(); it != end(); /* incremented inside */) {
- if (contains(dead, *it)) {
- ordering.erase(it++);
- } else {
- ++it;
- }
- }
- }
-
- void clear() {
- ordering.clear();
- bouquet.clear();
- }
-
- size_t size() const { return bouquet.size(); }
-
- // iterate over holders in insert order
- typedef typename list<EngineRef>::iterator iterator;
- iterator begin() { return ordering.begin(); }
- iterator end() { return ordering.end(); }
-
- // const iterate over holders in insert order
- typedef typename list<EngineRef>::const_iterator const_iterator;
- const_iterator begin() const { return ordering.begin(); }
- const_iterator end() const { return ordering.end(); }
-};
-
+ BouquetMap bouquet;
+public:
+ void insert(const EngineRef &h, RoseVertex v) {
+ typename BouquetMap::iterator f = bouquet.find(h);
+ if (f == bouquet.end()) {
+ ordering.push_back(h);
+ bouquet[h].push_back(v);
+ } else {
+ f->second.push_back(v);
+ }
+ }
+
+ void insert(const EngineRef &h, const deque<RoseVertex> &verts) {
+ typename BouquetMap::iterator f = bouquet.find(h);
+ if (f == bouquet.end()) {
+ ordering.push_back(h);
+ bouquet.insert(make_pair(h, verts));
+ } else {
+ f->second.insert(f->second.end(), verts.begin(), verts.end());
+ }
+ }
+
+ const deque<RoseVertex> &vertices(const EngineRef &h) const {
+ typename BouquetMap::const_iterator it = bouquet.find(h);
+ assert(it != bouquet.end()); // must be present
+ return it->second;
+ }
+
+ void erase(const EngineRef &h) {
+ assert(bouquet.find(h) != bouquet.end());
+ bouquet.erase(h);
+ ordering.remove(h);
+ }
+
+ /** Remove all the elements in the given iterator range. */
+ template <class Iter>
+ void erase_all(Iter erase_begin, Iter erase_end) {
+ for (Iter it = erase_begin; it != erase_end; ++it) {
+ bouquet.erase(*it);
+ }
+
+ // Use a quick-lookup container so that we only have to traverse the
+ // 'ordering' list once.
+ const set<EngineRef> dead(erase_begin, erase_end);
+ for (iterator it = begin(); it != end(); /* incremented inside */) {
+ if (contains(dead, *it)) {
+ ordering.erase(it++);
+ } else {
+ ++it;
+ }
+ }
+ }
+
+ void clear() {
+ ordering.clear();
+ bouquet.clear();
+ }
+
+ size_t size() const { return bouquet.size(); }
+
+ // iterate over holders in insert order
+ typedef typename list<EngineRef>::iterator iterator;
+ iterator begin() { return ordering.begin(); }
+ iterator end() { return ordering.end(); }
+
+ // const iterate over holders in insert order
+ typedef typename list<EngineRef>::const_iterator const_iterator;
+ const_iterator begin() const { return ordering.begin(); }
+ const_iterator end() const { return ordering.end(); }
+};
+
typedef Bouquet<left_id> LeftfixBouquet;
-typedef Bouquet<suffix_id> SuffixBouquet;
-
-} // namespace
-
-/**
- * Split a \ref Bouquet of some type into several smaller ones.
- */
-template <class EngineRef>
-static void chunkBouquets(const Bouquet<EngineRef> &in,
- deque<Bouquet<EngineRef>> &out,
- const size_t chunk_size) {
- if (in.size() <= chunk_size) {
- out.push_back(in);
- return;
- }
-
- out.push_back(Bouquet<EngineRef>());
- for (const auto &engine : in) {
- if (out.back().size() >= chunk_size) {
- out.push_back(Bouquet<EngineRef>());
- }
- out.back().insert(engine, in.vertices(engine));
- }
-}
-
+typedef Bouquet<suffix_id> SuffixBouquet;
+
+} // namespace
+
+/**
+ * Split a \ref Bouquet of some type into several smaller ones.
+ */
+template <class EngineRef>
+static void chunkBouquets(const Bouquet<EngineRef> &in,
+ deque<Bouquet<EngineRef>> &out,
+ const size_t chunk_size) {
+ if (in.size() <= chunk_size) {
+ out.push_back(in);
+ return;
+ }
+
+ out.push_back(Bouquet<EngineRef>());
+ for (const auto &engine : in) {
+ if (out.back().size() >= chunk_size) {
+ out.push_back(Bouquet<EngineRef>());
+ }
+ out.back().insert(engine, in.vertices(engine));
+ }
+}
+
static
bool stringsCanFinishAtSameSpot(const ue2_literal &u,
ue2_literal::const_iterator v_b,
@@ -504,31 +504,31 @@ bool stringsCanFinishAtSameSpot(const ue2_literal &u,
return true;
}
-/**
+/**
* Check that if after u has been seen, that it is impossible for the arrival of
* v to require the inspection of an engine earlier than u did.
- *
+ *
* Let delta be the earliest that v can be seen after u (may be zero)
*
* ie, we require u_loc - ulag <= v_loc - vlag (v_loc = u_loc + delta)
* ==> - ulag <= delta - vlag
* ==> vlag - ulag <= delta
- */
-static
-bool checkPrefix(const rose_literal_id &ul, const u32 ulag,
- const rose_literal_id &vl, const u32 vlag) {
+ */
+static
+bool checkPrefix(const rose_literal_id &ul, const u32 ulag,
+ const rose_literal_id &vl, const u32 vlag) {
DEBUG_PRINTF("'%s'-%u '%s'-%u\n", escapeString(ul.s).c_str(), ulag,
escapeString(vl.s).c_str(), vlag);
if (vl.delay || ul.delay) {
/* engine related literals should not be delayed anyway */
- return false;
- }
-
+ return false;
+ }
+
if (ulag >= vlag) {
assert(maxOverlap(ul, vl) <= vl.elength() - vlag + ulag);
return true;
- }
+ }
size_t min_allowed_delta = vlag - ulag;
DEBUG_PRINTF("min allow distace %zu\n", min_allowed_delta);
@@ -542,20 +542,20 @@ bool checkPrefix(const rose_literal_id &ul, const u32 ulag,
DEBUG_PRINTF("OK\n");
return true;
-}
-
+}
+
static
bool hasSameEngineType(const RoseVertexProps &u_prop,
const RoseVertexProps &v_prop) {
const left_id u_left = u_prop.left;
const left_id v_left = v_prop.left;
-
+
return !u_left.haig() == !v_left.haig()
&& !u_left.dfa() == !v_left.dfa()
&& !u_left.castle() == !v_left.castle()
&& !u_left.graph() == !v_left.graph();
}
-
+
/**
* Verifies that merging the leftfix of vertices does not cause conflicts due
* to the literals on the right.
@@ -577,25 +577,25 @@ bool compatibleLiteralsForMerge(
// We cannot merge engines that prefix literals in different tables.
if (ulits[0].first->table != vlits[0].first->table) {
- DEBUG_PRINTF("literals in different tables\n");
- return false;
- }
-
+ DEBUG_PRINTF("literals in different tables\n");
+ return false;
+ }
+
// We don't handle delayed cases yet.
for (const auto &ue : ulits) {
const rose_literal_id &ul = *ue.first;
if (ul.delay) {
- return false;
- }
- }
-
+ return false;
+ }
+ }
+
for (const auto &ve : vlits) {
const rose_literal_id &vl = *ve.first;
if (vl.delay) {
- return false;
- }
- }
-
+ return false;
+ }
+ }
+
/* An engine requires that all accesses to it are ordered by offsets. (ie,
we can not check an engine's state at offset Y, if we have already
checked its status at offset X and X > Y). If we can not establish that
@@ -614,9 +614,9 @@ bool compatibleLiteralsForMerge(
DEBUG_PRINTF("prefix check failed\n");
return false;
}
- }
- }
-
+ }
+ }
+
return true;
}
@@ -647,8 +647,8 @@ bool safeBlockModeMerge(const RoseBuildImpl &build, RoseVertex u,
// mergeableRoseVertices).
if (!build.isRootSuccessor(u)) {
return true;
- }
-
+ }
+
const RoseGraph &g = build.g;
// Merge prefixes with identical literal sets (as we'd have to run them
@@ -725,43 +725,43 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
return false;
}
- /* We cannot merge prefixes/vertices if they are successors of different
- * root vertices */
- if (tbi.isRootSuccessor(u)) {
- assert(tbi.isRootSuccessor(v));
- set<RoseVertex> u_preds;
- set<RoseVertex> v_preds;
- insert(&u_preds, inv_adjacent_vertices(u, tbi.g));
- insert(&v_preds, inv_adjacent_vertices(v, tbi.g));
-
- if (u_preds != v_preds) {
- return false;
- }
- }
-
+ /* We cannot merge prefixes/vertices if they are successors of different
+ * root vertices */
+ if (tbi.isRootSuccessor(u)) {
+ assert(tbi.isRootSuccessor(v));
+ set<RoseVertex> u_preds;
+ set<RoseVertex> v_preds;
+ insert(&u_preds, inv_adjacent_vertices(u, tbi.g));
+ insert(&v_preds, inv_adjacent_vertices(v, tbi.g));
+
+ if (u_preds != v_preds) {
+ return false;
+ }
+ }
+
u32 ulag = tbi.g[u].left.lag;
vector<pair<const rose_literal_id *, u32>> ulits;
ulits.reserve(tbi.g[u].literals.size());
for (u32 id : tbi.g[u].literals) {
ulits.emplace_back(&tbi.literals.at(id), ulag);
}
-
+
u32 vlag = tbi.g[v].left.lag;
vector<pair<const rose_literal_id *, u32>> vlits;
vlits.reserve(tbi.g[v].literals.size());
for (u32 id : tbi.g[v].literals) {
vlits.emplace_back(&tbi.literals.at(id), vlag);
}
-
+
if (!compatibleLiteralsForMerge(ulits, vlits)) {
return false;
- }
-
+ }
+
DEBUG_PRINTF("roses on %zu and %zu are mergeable\n", tbi.g[u].index,
tbi.g[v].index);
- return true;
-}
-
+ return true;
+}
+
/* We cannot merge an engine, if a trigger literal and a post literal overlap
* in such a way that engine status needs to be check at a point before the
* engine's current location.
@@ -773,32 +773,32 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
* ==> delta >= v_lag
*
*/
-static
+static
bool checkPredDelay(const rose_literal_id &ul, const rose_literal_id &vl,
u32 vlag) {
DEBUG_PRINTF("%s %s (lag %u)\n", escapeString(ul.s).c_str(),
escapeString(vl.s).c_str(), vlag);
-
+
for (size_t i = 0; i < vlag; i++) {
if (stringsCanFinishAtSameSpot(ul.s, vl.s.begin(), vl.s.end() - i)) {
DEBUG_PRINTF("v can follow u at a (too close) distance of %zu\n", i);
return false;
- }
- }
+ }
+ }
DEBUG_PRINTF("OK\n");
- return true;
-}
-
+ return true;
+}
+
template<typename VertexCont>
static never_inline
bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1,
const VertexCont &v2) {
flat_set<RoseVertex> preds;
- for (auto v : v1) {
+ for (auto v : v1) {
insert(&preds, inv_adjacent_vertices(v, build.g));
- }
-
+ }
+
flat_set<u32> pred_lits;
/* No need to examine delays of a common pred - as it must already have
@@ -811,7 +811,7 @@ bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1,
insert(&known_good_preds, inv_adjacent_vertices(v, build.g));
}
- for (auto u : preds) {
+ for (auto u : preds) {
if (!contains(known_good_preds, u)) {
insert(&pred_lits, build.g[u].literals);
}
@@ -838,17 +838,17 @@ bool checkPredDelays(const RoseBuildImpl &build, const VertexCont &v1,
if (!checkPredDelay(*ul, vl, vlag)) {
return false;
}
- }
- }
- }
-
- return true;
-}
-
-static
-bool mergeableRoseVertices(const RoseBuildImpl &tbi,
- const deque<RoseVertex> &verts1,
- const deque<RoseVertex> &verts2) {
+ }
+ }
+ }
+
+ return true;
+}
+
+static
+bool mergeableRoseVertices(const RoseBuildImpl &tbi,
+ const deque<RoseVertex> &verts1,
+ const deque<RoseVertex> &verts2) {
assert(!verts1.empty());
assert(!verts2.empty());
@@ -874,9 +874,9 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi,
if (u_preds != v_preds) {
return false;
- }
- }
-
+ }
+ }
+
vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */
for (auto a : verts1) {
if (!tbi.cc.streaming && !safeBlockModeMerge(tbi, v_front, a)) {
@@ -905,90 +905,90 @@ bool mergeableRoseVertices(const RoseBuildImpl &tbi,
return false;
}
- // Check preds are compatible as well.
+ // Check preds are compatible as well.
if (!checkPredDelays(tbi, verts1, verts2)
|| !checkPredDelays(tbi, verts2, verts1)) {
- return false;
- }
-
- DEBUG_PRINTF("vertex sets are mergeable\n");
- return true;
-}
-
-bool mergeableRoseVertices(const RoseBuildImpl &tbi, const set<RoseVertex> &v1,
- const set<RoseVertex> &v2) {
- const deque<RoseVertex> vv1(v1.begin(), v1.end());
- const deque<RoseVertex> vv2(v2.begin(), v2.end());
- return mergeableRoseVertices(tbi, vv1, vv2);
-}
-
-/** \brief Priority queue element for Rose merges. */
-namespace {
-struct RoseMergeCandidate {
- RoseMergeCandidate(const left_id &r1_in, const left_id &r2_in, u32 cpl_in,
- u32 tb)
- : r1(r1_in), r2(r2_in), stopxor(0), cpl(cpl_in), states(0),
- tie_breaker(tb) {
- if (r1.graph() && r2.graph()) {
- const NGHolder &h1 = *r1.graph(), &h2 = *r2.graph();
- /* som_none as haigs don't merge and just a guiding heuristic */
- CharReach stop1 = findStopAlphabet(h1, SOM_NONE);
- CharReach stop2 = findStopAlphabet(h2, SOM_NONE);
- stopxor = (stop1 ^ stop2).count();
-
- // We use the number of vertices as an approximation of the state
- // count here, as this is just feeding a comparison.
- u32 vertex_count = num_vertices(h1) + num_vertices(h2);
- states = vertex_count - min(vertex_count, cpl);
- } else if (r1.castle() && r2.castle()) {
- // FIXME
- }
- }
-
- bool operator<(const RoseMergeCandidate &a) const {
- if (stopxor != a.stopxor) {
- return stopxor > a.stopxor;
- }
- if (cpl != a.cpl) {
- return cpl < a.cpl;
- }
- if (states != a.states) {
- return states > a.states;
- }
- return tie_breaker < a.tie_breaker;
- }
-
- left_id r1;
- left_id r2;
- u32 stopxor;
- u32 cpl; //!< common prefix length
- u32 states;
- u32 tie_breaker; //!< determinism
-};
-}
-
-static
+ return false;
+ }
+
+ DEBUG_PRINTF("vertex sets are mergeable\n");
+ return true;
+}
+
+bool mergeableRoseVertices(const RoseBuildImpl &tbi, const set<RoseVertex> &v1,
+ const set<RoseVertex> &v2) {
+ const deque<RoseVertex> vv1(v1.begin(), v1.end());
+ const deque<RoseVertex> vv2(v2.begin(), v2.end());
+ return mergeableRoseVertices(tbi, vv1, vv2);
+}
+
+/** \brief Priority queue element for Rose merges. */
+namespace {
+struct RoseMergeCandidate {
+ RoseMergeCandidate(const left_id &r1_in, const left_id &r2_in, u32 cpl_in,
+ u32 tb)
+ : r1(r1_in), r2(r2_in), stopxor(0), cpl(cpl_in), states(0),
+ tie_breaker(tb) {
+ if (r1.graph() && r2.graph()) {
+ const NGHolder &h1 = *r1.graph(), &h2 = *r2.graph();
+ /* som_none as haigs don't merge and just a guiding heuristic */
+ CharReach stop1 = findStopAlphabet(h1, SOM_NONE);
+ CharReach stop2 = findStopAlphabet(h2, SOM_NONE);
+ stopxor = (stop1 ^ stop2).count();
+
+ // We use the number of vertices as an approximation of the state
+ // count here, as this is just feeding a comparison.
+ u32 vertex_count = num_vertices(h1) + num_vertices(h2);
+ states = vertex_count - min(vertex_count, cpl);
+ } else if (r1.castle() && r2.castle()) {
+ // FIXME
+ }
+ }
+
+ bool operator<(const RoseMergeCandidate &a) const {
+ if (stopxor != a.stopxor) {
+ return stopxor > a.stopxor;
+ }
+ if (cpl != a.cpl) {
+ return cpl < a.cpl;
+ }
+ if (states != a.states) {
+ return states > a.states;
+ }
+ return tie_breaker < a.tie_breaker;
+ }
+
+ left_id r1;
+ left_id r2;
+ u32 stopxor;
+ u32 cpl; //!< common prefix length
+ u32 states;
+ u32 tie_breaker; //!< determinism
+};
+}
+
+static
bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2,
const vector<RoseVertex> &verts1,
const vector<RoseVertex> &verts2) {
- assert(!verts1.empty() && !verts2.empty());
-
+ assert(!verts1.empty() && !verts2.empty());
+
DEBUG_PRINTF("merging pair of leftfixes:\n");
DEBUG_PRINTF(" A:%016zx: tops %s\n", r1.hash(),
as_string_list(all_tops(r1)).c_str());
DEBUG_PRINTF(" B:%016zx: tops %s\n", r2.hash(),
as_string_list(all_tops(r2)).c_str());
-
+
RoseGraph &g = build.g;
- if (r1.graph()) {
- assert(r2.graph());
- assert(r1.graph()->kind == r2.graph()->kind);
+ if (r1.graph()) {
+ assert(r2.graph());
+ assert(r1.graph()->kind == r2.graph()->kind);
if (!mergeNfaPair(*r1.graph(), *r2.graph(), nullptr, build.cc)) {
- DEBUG_PRINTF("nfa merge failed\n");
- return false;
- }
-
+ DEBUG_PRINTF("nfa merge failed\n");
+ return false;
+ }
+
/* The graph in r1 has been merged into the graph in r2. Update r1's
* vertices with the new graph ptr. mergeNfaPair() does not alter the
* tops from the input graph so no need to update top values.
@@ -997,38 +997,38 @@ bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2,
* distinct when they have different trigger conditions.
* [Note: mergeLeftfixesVariableLag() should have a common parent set]
*/
- shared_ptr<NGHolder> &h = g[verts2.front()].left.graph;
- for (RoseVertex v : verts1) {
- g[v].left.graph = h;
- }
-
- return true;
- } else if (r1.castle()) {
- assert(r2.castle());
+ shared_ptr<NGHolder> &h = g[verts2.front()].left.graph;
+ for (RoseVertex v : verts1) {
+ g[v].left.graph = h;
+ }
+
+ return true;
+ } else if (r1.castle()) {
+ assert(r2.castle());
assert(build.cc.grey.allowCastle);
-
- map<u32, u32> top_map;
- if (!mergeCastle(*r2.castle(), *r1.castle(), top_map)) {
- DEBUG_PRINTF("castle merge failed\n");
- return false;
- }
-
- // The castle in r1 has been merged into the castle in r2, with tops
- // remapped as per top_map.
- const shared_ptr<CastleProto> &c = g[verts2.front()].left.castle;
- for (RoseVertex v : verts1) {
- g[v].left.castle = c;
- for (const auto &e : in_edges_range(v, g)) {
- g[e].rose_top = top_map.at(g[e].rose_top);
- }
- }
- return true;
- }
-
- assert(0);
- return false;
-}
-
+
+ map<u32, u32> top_map;
+ if (!mergeCastle(*r2.castle(), *r1.castle(), top_map)) {
+ DEBUG_PRINTF("castle merge failed\n");
+ return false;
+ }
+
+ // The castle in r1 has been merged into the castle in r2, with tops
+ // remapped as per top_map.
+ const shared_ptr<CastleProto> &c = g[verts2.front()].left.castle;
+ for (RoseVertex v : verts1) {
+ g[v].left.castle = c;
+ for (const auto &e : in_edges_range(v, g)) {
+ g[e].rose_top = top_map.at(g[e].rose_top);
+ }
+ }
+ return true;
+ }
+
+ assert(0);
+ return false;
+}
+
/**
* Checks that there is no problem due to the involved vertices if we merge two
* leftfix engines.
@@ -1039,13 +1039,13 @@ bool mergeLeftfixPair(RoseBuildImpl &build, left_id &r1, left_id &r2,
* - check that engines themselves can be merged
* - use heuristics to find out if merging the engines is wise.
*/
-static
+static
bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build,
const vector<RoseVertex> &targets_1,
const vector<RoseVertex> &targets_2) {
assert(!targets_1.empty());
assert(!targets_2.empty());
-
+
vector<pair<const rose_literal_id *, u32>> ulits; /* lit + lag pairs */
for (auto a : targets_1) {
u32 ulag = build.g[a].left.lag;
@@ -1053,7 +1053,7 @@ bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build,
ulits.emplace_back(&build.literals.at(id), ulag);
}
}
-
+
vector<pair<const rose_literal_id *, u32>> vlits;
for (auto a : targets_2) {
u32 vlag = build.g[a].left.lag;
@@ -1061,21 +1061,21 @@ bool checkVerticesOkForLeftfixMerge(const RoseBuildImpl &build,
vlits.emplace_back(&build.literals.at(id), vlag);
}
}
-
+
if (!compatibleLiteralsForMerge(ulits, vlits)) {
return false;
}
-
+
// Check preds are compatible as well.
if (!checkPredDelays(build, targets_1, targets_2)
|| !checkPredDelays(build, targets_2, targets_1)) {
return false;
}
-
+
DEBUG_PRINTF("vertex sets are mergeable\n");
return true;
}
-
+
/**
* In block mode, we want to be a little more selective -- we will only merge
* prefix engines when the literal sets are the same or if the merged graph
@@ -1087,13 +1087,13 @@ bool goodBlockModeMerge(const RoseBuildImpl &build,
const vector<RoseVertex> &v_verts,
const left_id &v_eng) {
assert(!build.cc.streaming);
-
+
// Always merge infixes if we can (subject to the other criteria in
// mergeableRoseVertices).
if (!build.isRootSuccessor(u_verts.front())) {
return true;
}
-
+
const RoseGraph &g = build.g;
flat_set<u32> u_lits;
@@ -1197,20 +1197,20 @@ bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1,
&& (stop1.count() > 10 || stop2.count() > 10)) {
DEBUG_PRINTF("skip merge, would kill stop alphabet\n");
return false;
- }
+ }
size_t maxstop = max(stop1.count(), stop2.count());
if (maxstop > 200 && stopboth.count() < 200) {
DEBUG_PRINTF("skip merge, would reduce stop alphabet\n");
return false;
}
}
-
+
/* Rechecking that the targets are compatible, as we may have already
* merged new states into r1 or r2 and we need to verify that this
* candidate is still ok. */
if (!checkVerticesOkForLeftfixMerge(build, targets_1, targets_2)) {
return false;
- }
+ }
if (!build.cc.streaming
&& !goodBlockModeMerge(build, targets_1, r1, targets_2, r2)) {
@@ -1218,62 +1218,62 @@ bool mergeLeftVL_tryMergeCandidate(RoseBuildImpl &build, left_id &r1,
}
return mergeLeftfixPair(build, r1, r2, targets_1, targets_2);
-}
-
-static
-bool nfaHasNarrowStart(const NGHolder &g) {
+}
+
+static
+bool nfaHasNarrowStart(const NGHolder &g) {
if (out_degree(g.startDs, g) > 1) {
- return false; // unanchored
- }
-
- CharReach cr;
-
- for (auto v : adjacent_vertices_range(g.start, g)) {
- if (v == g.startDs) {
- continue;
- }
- cr |= g[v].char_reach;
- }
- return cr.count() <= NARROW_START_MAX;
-}
-
-static
-bool nfaHasFiniteMaxWidth(const NGHolder &g) {
- return findMaxWidth(g).is_finite();
-}
-
-static
-bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) {
- if (!proper_out_degree(h.startDs, h)) {
- return false;
- }
-
- assert(!is_triggered(h));
-
- NGHolder h_temp;
- cloneHolder(h_temp, h);
-
- vector<BoundedRepeatData> repeats;
- bool suitable_for_sds_reforming = false;
- const map<u32, u32> fixed_depth_tops; /* not relevant for cfa check */
- const map<u32, vector<vector<CharReach>>> triggers; /* not for cfa check */
- const bool simple_model_selection = true; // FIRST is considered simple
- analyseRepeats(h_temp, nullptr, fixed_depth_tops, triggers, &repeats, true,
- simple_model_selection, grey, &suitable_for_sds_reforming);
-
- return suitable_for_sds_reforming;
-}
-
-static
-u32 commonPrefixLength(left_id &r1, left_id &r2) {
- if (r1.graph() && r2.graph()) {
+ return false; // unanchored
+ }
+
+ CharReach cr;
+
+ for (auto v : adjacent_vertices_range(g.start, g)) {
+ if (v == g.startDs) {
+ continue;
+ }
+ cr |= g[v].char_reach;
+ }
+ return cr.count() <= NARROW_START_MAX;
+}
+
+static
+bool nfaHasFiniteMaxWidth(const NGHolder &g) {
+ return findMaxWidth(g).is_finite();
+}
+
+static
+bool hasReformedStartDotStar(const NGHolder &h, const Grey &grey) {
+ if (!proper_out_degree(h.startDs, h)) {
+ return false;
+ }
+
+ assert(!is_triggered(h));
+
+ NGHolder h_temp;
+ cloneHolder(h_temp, h);
+
+ vector<BoundedRepeatData> repeats;
+ bool suitable_for_sds_reforming = false;
+ const map<u32, u32> fixed_depth_tops; /* not relevant for cfa check */
+ const map<u32, vector<vector<CharReach>>> triggers; /* not for cfa check */
+ const bool simple_model_selection = true; // FIRST is considered simple
+ analyseRepeats(h_temp, nullptr, fixed_depth_tops, triggers, &repeats, true,
+ simple_model_selection, grey, &suitable_for_sds_reforming);
+
+ return suitable_for_sds_reforming;
+}
+
+static
+u32 commonPrefixLength(left_id &r1, left_id &r2) {
+ if (r1.graph() && r2.graph()) {
return commonPrefixLength(*r1.graph(), *r2.graph());
- } else if (r1.castle() && r2.castle()) {
- return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle()));
- }
- return 0;
-}
-
+ } else if (r1.castle() && r2.castle()) {
+ return min(findMinWidth(*r1.castle()), findMinWidth(*r2.castle()));
+ }
+ return 0;
+}
+
namespace {
struct MergeKey {
MergeKey(const left_id &left, flat_set<RoseVertex> parents_in) :
@@ -1352,89 +1352,89 @@ insertion_ordered_map<left_id, vector<RoseVertex>> get_eng_verts(RoseGraph &g) {
return eng_verts;
}
-/**
- * This pass attempts to merge prefix/infix engines which share a common set of
- * parent vertices.
- *
- * Engines are greedily merged pairwise by this process based on a priority
- * queue keyed off the common prefix length.
- *
- * Engines are not merged if the lags are not compatible or if it would damage
- * the stop alphabet.
- *
- * Infixes:
+/**
+ * This pass attempts to merge prefix/infix engines which share a common set of
+ * parent vertices.
+ *
+ * Engines are greedily merged pairwise by this process based on a priority
+ * queue keyed off the common prefix length.
+ *
+ * Engines are not merged if the lags are not compatible or if it would damage
+ * the stop alphabet.
+ *
+ * Infixes:
* - It is expected that when this is run all infixes are still at the single
* top stage as we have not yet merged unrelated infixes together. After
* execution, castles may have multiple (but equivalent) tops.
- *
- * Prefixes:
- * - transient prefixes are not considered.
- * - with a max width or a narrow start are kept segregated by
- * this phase and can only be merged with similar infixes.
- * - in block mode, merges are only performed if literal sets are the same.
- * - merges are not considered in cases where dot star start state will be
- * reformed to optimise a leading repeat.
- */
+ *
+ * Prefixes:
+ * - transient prefixes are not considered.
+ * - with a max width or a narrow start are kept segregated by
+ * this phase and can only be merged with similar infixes.
+ * - in block mode, merges are only performed if literal sets are the same.
+ * - merges are not considered in cases where dot star start state will be
+ * reformed to optimise a leading repeat.
+ */
void mergeLeftfixesVariableLag(RoseBuildImpl &build) {
if (!build.cc.grey.mergeRose) {
- return;
- }
+ return;
+ }
assert(!hasOrphanedTops(build));
-
+
RoseGraph &g = build.g;
-
- DEBUG_PRINTF("-----\n");
- DEBUG_PRINTF("entry\n");
- DEBUG_PRINTF("-----\n");
-
+
+ DEBUG_PRINTF("-----\n");
+ DEBUG_PRINTF("entry\n");
+ DEBUG_PRINTF("-----\n");
+
auto eng_verts = get_eng_verts(g);
-
+
map<MergeKey, vector<left_id>> engine_groups;
for (const auto &e : eng_verts) {
const left_id &left = e.first;
const auto &verts = e.second;
- // Only non-transient for the moment.
+ // Only non-transient for the moment.
if (contains(build.transient, left)) {
- continue;
- }
-
- // No forced McClellan or Haig infix merges.
+ continue;
+ }
+
+ // No forced McClellan or Haig infix merges.
if (left.dfa() || left.haig()) {
- continue;
- }
+ continue;
+ }
assert(left.graph() || left.castle());
-
+
if (left.graph()) {
const NGHolder &h = *left.graph();
/* we should not have merged yet */
assert(!is_triggered(h) || onlyOneTop(h));
-
+
if (hasReformedStartDotStar(h, build.cc.grey)) {
- continue; // preserve the optimisation of the leading repeat
- }
+ continue; // preserve the optimisation of the leading repeat
+ }
} else {
assert(left.castle());
-
+
if (!build.cc.grey.allowCastle) {
DEBUG_PRINTF("castle merging disallowed by greybox\n");
- continue;
- }
- }
-
- // We collapse the anchored root into the root vertex when calculating
- // parents, so that we can merge differently-anchored prefix roses
- // together. (Prompted by UE-2100)
-
+ continue;
+ }
+ }
+
+ // We collapse the anchored root into the root vertex when calculating
+ // parents, so that we can merge differently-anchored prefix roses
+ // together. (Prompted by UE-2100)
+
flat_set<RoseVertex> parents;
for (RoseVertex v : verts) {
insert(&parents, inv_adjacent_vertices_range(v, g));
- }
-
+ }
+
if (contains(parents, build.anchored_root)) {
parents.erase(build.anchored_root);
parents.insert(build.root);
- }
-
+ }
+
assert(!parents.empty());
#ifndef _WIN32
@@ -1450,8 +1450,8 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) {
MergeKey *mk = new MergeKey(left, parents);
engine_groups[*mk].push_back(left);
#endif
- }
-
+ }
+
vector<vector<left_id>> chunks;
for (auto &raw_group : engine_groups | map_values) {
chunk(move(raw_group), &chunks, MERGE_GROUP_SIZE_MAX);
@@ -1462,37 +1462,37 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) {
for (auto &roses : chunks) {
if (roses.size() < 2) {
- continue;
- }
+ continue;
+ }
// All pairs on the prio queue.
u32 tie_breaker = 0;
priority_queue<RoseMergeCandidate> pq;
for (auto it = roses.begin(), ite = roses.end(); it != ite; ++it) {
left_id r1 = *it;
const vector<RoseVertex> &targets_1 = eng_verts[r1];
-
+
for (auto jt = next(it); jt != ite; ++jt) {
left_id r2 = *jt;
-
+
/* we should have already split on engine types and reach */
assert(!r1.castle() == !r2.castle());
assert(!r1.graph() == !r2.graph());
assert(!r1.castle()
|| r1.castle()->reach() == r2.castle()->reach());
-
+
const vector<RoseVertex> &targets_2 = eng_verts[r2];
if (!checkVerticesOkForLeftfixMerge(build, targets_1,
targets_2)) {
continue; // No point queueing unmergeable cases.
}
-
+
u32 cpl = commonPrefixLength(r1, r2);
pq.push(RoseMergeCandidate(r1, r2, cpl, tie_breaker++));
}
}
-
+
DEBUG_PRINTF("merge queue has %zu entries\n", pq.size());
-
+
while (!pq.empty()) {
left_id r1 = pq.top().r1;
left_id r2 = pq.top().r2;
@@ -1505,47 +1505,47 @@ void mergeLeftfixesVariableLag(RoseBuildImpl &build) {
targets_2)) {
insert(&targets_2, targets_2.end(), targets_1);
targets_1.clear();
- }
- }
- }
-
- DEBUG_PRINTF("-----\n");
- DEBUG_PRINTF("exit\n");
- DEBUG_PRINTF("-----\n");
+ }
+ }
+ }
+
+ DEBUG_PRINTF("-----\n");
+ DEBUG_PRINTF("exit\n");
+ DEBUG_PRINTF("-----\n");
assert(!hasOrphanedTops(build));
-}
-
-namespace {
-
-/**
- * Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path.
- */
-struct DedupeLeftKey {
+}
+
+namespace {
+
+/**
+ * Key used to group sets of leftfixes for the dedupeLeftfixesVariableLag path.
+ */
+struct DedupeLeftKey {
DedupeLeftKey(const RoseBuildImpl &build,
flat_set<pair<size_t, u32>> preds_in, const left_id &left)
: left_hash(hashLeftfix(left)), preds(move(preds_in)),
transient(contains(build.transient, left)) {
- }
-
- bool operator<(const DedupeLeftKey &b) const {
+ }
+
+ bool operator<(const DedupeLeftKey &b) const {
return tie(left_hash, preds, transient)
< tie(b.left_hash, b.preds, b.transient);
- }
-
-private:
- /** Quick hash of the leftfix itself. Must be identical for a given pair of
- * graphs if is_equal would return true. */
- size_t left_hash;
-
- /** For each in-edge, the pair of (parent index, edge top). */
+ }
+
+private:
+ /** Quick hash of the leftfix itself. Must be identical for a given pair of
+ * graphs if is_equal would return true. */
+ size_t left_hash;
+
+ /** For each in-edge, the pair of (parent index, edge top). */
flat_set<pair<size_t, u32>> preds;
/** We don't want to combine transient with non-transient. */
bool transient;
-};
-
-} // namespace
-
+};
+
+} // namespace
+
static
flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) {
flat_set<pair<size_t, u32>> preds;
@@ -1555,50 +1555,50 @@ flat_set<pair<size_t, u32>> get_pred_tops(RoseVertex v, const RoseGraph &g) {
return preds;
}
-/**
- * This is a generalisation of \ref dedupeLeftfixes which relaxes two
- * restrictions: multiple predecessor roles are allowed and the delay used by
- * each vertex may not be the same for each vertex. Like \ref dedupeLeftfixes,
- * the leftfixes' successor vertices are first grouped to reduce the number of
- * potential candidates - the grouping in this case is by the set of
- * predecessor roles with their associated top events. For the dedupe to be
- * possible, it is required that:
- *
- * 1. the nfa graphs with respect to the relevant reports are identical
- * 2. the nfa graphs are triggered by the same roles with same events (ensured
- * by the initial grouping pass)
- * 3. all the successor roles of either graph can inspect the combined leftfix
- * without advancing the state of the leftfix past the point that another
- * successor may want to inspect it; the overlap relationships between the
- * involved literals are examined to ensure that this property holds.
- *
+/**
+ * This is a generalisation of \ref dedupeLeftfixes which relaxes two
+ * restrictions: multiple predecessor roles are allowed and the delay used by
+ * each vertex may not be the same for each vertex. Like \ref dedupeLeftfixes,
+ * the leftfixes' successor vertices are first grouped to reduce the number of
+ * potential candidates - the grouping in this case is by the set of
+ * predecessor roles with their associated top events. For the dedupe to be
+ * possible, it is required that:
+ *
+ * 1. the nfa graphs with respect to the relevant reports are identical
+ * 2. the nfa graphs are triggered by the same roles with same events (ensured
+ * by the initial grouping pass)
+ * 3. all the successor roles of either graph can inspect the combined leftfix
+ * without advancing the state of the leftfix past the point that another
+ * successor may want to inspect it; the overlap relationships between the
+ * involved literals are examined to ensure that this property holds.
+ *
* Note: this is unable to dedupe when delayed literals are involved unlike
* dedupeLeftfixes.
- */
+ */
void dedupeLeftfixesVariableLag(RoseBuildImpl &build) {
- DEBUG_PRINTF("entry\n");
-
+ DEBUG_PRINTF("entry\n");
+
RoseGraph &g = build.g;
auto eng_verts = get_eng_verts(g);
-
+
map<DedupeLeftKey, vector<left_id>> engine_groups;
for (const auto &e : eng_verts) {
const left_id &left = e.first;
const auto &verts = e.second;
-
+
/* There should only be one report on an engine as no merges have
* happened yet. (aside from eod prefixes) */
if (all_reports(left).size() != 1) {
assert(any_of_in(adjacent_vertices_range(verts.front(), g),
[&](RoseVertex w) { return g[w].eod_accept; }));
- continue;
- }
-
+ continue;
+ }
+
if (left.haig()) {
/* TODO: allow deduping of identical haigs */
- continue;
- }
-
+ continue;
+ }
+
if (left.graph()) {
/* we should not have merged yet */
assert(!is_triggered(*left.graph()) || onlyOneTop(*left.graph()));
@@ -1612,52 +1612,52 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &build) {
}
}
engine_groups[DedupeLeftKey(build, move(preds), left)].push_back(left);
- }
-
+ }
+
/* We don't bother chunking as we expect deduping to be successful if the
* hashes match */
-
+
for (auto &group : engine_groups | map_values) {
DEBUG_PRINTF("group of %zu roses\n", group.size());
if (group.size() < 2) {
- continue;
- }
-
+ continue;
+ }
+
for (auto it = group.begin(); it != group.end(); ++it) {
- left_id r1 = *it;
+ left_id r1 = *it;
vector<RoseVertex> &verts1 = eng_verts[r1];
assert(!verts1.empty()); /* cleared engines should be behind us */
-
+
assert(all_reports(r1).size() == 1);
ReportID r1_report = *all_reports(r1).begin();
for (auto jt = next(it); jt != group.end(); ++jt) {
- left_id r2 = *jt;
+ left_id r2 = *jt;
vector<RoseVertex> &verts2 = eng_verts[r2];
assert(!verts2.empty());
assert(all_reports(r2).size() == 1);
ReportID r2_report = *all_reports(r2).begin();
-
+
if (!is_equal(r1, r1_report, r2, r2_report)) {
- continue;
- }
-
+ continue;
+ }
+
if (!checkVerticesOkForLeftfixMerge(build, verts1, verts2)) {
- continue;
- }
-
- DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph());
-
+ continue;
+ }
+
+ DEBUG_PRINTF("%p and %p are dupes\n", r1.graph(), r2.graph());
+
// Replace r1 with r2.
-
- for (auto v : verts1) {
- DEBUG_PRINTF("replacing report %u with %u on %zu\n",
+
+ for (auto v : verts1) {
+ DEBUG_PRINTF("replacing report %u with %u on %zu\n",
r2_report, r1_report, g[v].index);
- u32 orig_lag = g[v].left.lag;
+ u32 orig_lag = g[v].left.lag;
g[v].left = g[verts2.front()].left;
- g[v].left.lag = orig_lag;
- }
+ g[v].left.lag = orig_lag;
+ }
insert(&verts2, verts2.end(), verts1);
verts1.clear();
@@ -1665,306 +1665,306 @@ void dedupeLeftfixesVariableLag(RoseBuildImpl &build) {
/* remove stale entry from transient set, if present */
build.transient.erase(r1);
- break;
- }
- }
- }
-}
-
-static
+ break;
+ }
+ }
+ }
+}
+
+static
u32 findUnusedTop(const flat_set<u32> &tops) {
- u32 i = 0;
- while (contains(tops, i)) {
- i++;
- }
- return i;
-}
-
-// Replace top 't' on edges with new top 'u'.
-static
-void replaceTops(NGHolder &h, const map<u32, u32> &top_mapping) {
- for (const auto &e : out_edges_range(h.start, h)) {
- NFAVertex v = target(e, h);
- if (v == h.startDs) {
- continue;
- }
+ u32 i = 0;
+ while (contains(tops, i)) {
+ i++;
+ }
+ return i;
+}
+
+// Replace top 't' on edges with new top 'u'.
+static
+void replaceTops(NGHolder &h, const map<u32, u32> &top_mapping) {
+ for (const auto &e : out_edges_range(h.start, h)) {
+ NFAVertex v = target(e, h);
+ if (v == h.startDs) {
+ continue;
+ }
flat_set<u32> new_tops;
for (u32 t : h[e].tops) {
DEBUG_PRINTF("vertex %zu has top %u\n", h[v].index, t);
new_tops.insert(top_mapping.at(t));
}
h[e].tops = std::move(new_tops);
- }
-}
-
-static
-bool setDistinctTops(NGHolder &h1, const NGHolder &h2,
- map<u32, u32> &top_mapping) {
+ }
+}
+
+static
+bool setDistinctTops(NGHolder &h1, const NGHolder &h2,
+ map<u32, u32> &top_mapping) {
flat_set<u32> tops1 = getTops(h1), tops2 = getTops(h2);
-
- DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(),
- tops2.size());
-
- // If our tops don't intersect, we're OK to merge with no changes.
- if (!has_intersection(tops1, tops2)) {
- DEBUG_PRINTF("tops don't intersect\n");
- return true;
- }
-
- // Otherwise, we have to renumber the tops in h1 so that they don't overlap
- // with the tops in h2.
- top_mapping.clear();
- for (u32 t : tops1) {
- u32 u = findUnusedTop(tops2);
- DEBUG_PRINTF("replacing top %u with %u in h1\n", t, u);
- top_mapping.insert(make_pair(t, u));
- assert(!contains(tops2, u));
- tops2.insert(u);
- }
-
- replaceTops(h1, top_mapping);
- return true;
-}
-
-bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
- const deque<RoseVertex> &verts1) {
- map<u32, u32> top_mapping;
- if (!setDistinctTops(h1, h2, top_mapping)) {
- return false;
- }
-
- if (top_mapping.empty()) {
- return true; // No remapping necessary.
- }
-
- for (auto v : verts1) {
+
+ DEBUG_PRINTF("before: h1 has %zu tops, h2 has %zu tops\n", tops1.size(),
+ tops2.size());
+
+ // If our tops don't intersect, we're OK to merge with no changes.
+ if (!has_intersection(tops1, tops2)) {
+ DEBUG_PRINTF("tops don't intersect\n");
+ return true;
+ }
+
+ // Otherwise, we have to renumber the tops in h1 so that they don't overlap
+ // with the tops in h2.
+ top_mapping.clear();
+ for (u32 t : tops1) {
+ u32 u = findUnusedTop(tops2);
+ DEBUG_PRINTF("replacing top %u with %u in h1\n", t, u);
+ top_mapping.insert(make_pair(t, u));
+ assert(!contains(tops2, u));
+ tops2.insert(u);
+ }
+
+ replaceTops(h1, top_mapping);
+ return true;
+}
+
+bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
+ const deque<RoseVertex> &verts1) {
+ map<u32, u32> top_mapping;
+ if (!setDistinctTops(h1, h2, top_mapping)) {
+ return false;
+ }
+
+ if (top_mapping.empty()) {
+ return true; // No remapping necessary.
+ }
+
+ for (auto v : verts1) {
DEBUG_PRINTF("vertex %zu\n", g[v].index);
- assert(!g[v].left.haig);
- assert(!g[v].left.dfa);
- for (const auto &e : in_edges_range(v, g)) {
- u32 t = g[e].rose_top;
- DEBUG_PRINTF("t=%u\n", t);
- assert(contains(top_mapping, t));
- g[e].rose_top = top_mapping[t];
- DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n",
+ assert(!g[v].left.haig);
+ assert(!g[v].left.dfa);
+ for (const auto &e : in_edges_range(v, g)) {
+ u32 t = g[e].rose_top;
+ DEBUG_PRINTF("t=%u\n", t);
+ assert(contains(top_mapping, t));
+ g[e].rose_top = top_mapping[t];
+ DEBUG_PRINTF("edge (%zu,%zu) went from top %u to %u\n",
g[source(e, g)].index, g[target(e, g)].index, t,
- top_mapping[t]);
- }
- }
-
- return true;
-}
-
-static
-bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
- const deque<RoseVertex> &verts1) {
- map<u32, u32> top_mapping;
- if (!setDistinctTops(h1, h2, top_mapping)) {
- return false;
- }
-
- if (top_mapping.empty()) {
- return true; // No remapping necessary.
- }
-
- for (auto v : verts1) {
+ top_mapping[t]);
+ }
+ }
+
+ return true;
+}
+
+static
+bool setDistinctSuffixTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
+ const deque<RoseVertex> &verts1) {
+ map<u32, u32> top_mapping;
+ if (!setDistinctTops(h1, h2, top_mapping)) {
+ return false;
+ }
+
+ if (top_mapping.empty()) {
+ return true; // No remapping necessary.
+ }
+
+ for (auto v : verts1) {
DEBUG_PRINTF("vertex %zu\n", g[v].index);
- u32 t = g[v].suffix.top;
- assert(contains(top_mapping, t));
- g[v].suffix.top = top_mapping[t];
- }
-
- return true;
-}
-
-/** \brief Estimate the number of accel states in the given graph when built as
- * an NFA.
- *
- * (The easiest way to estimate something like this is to actually build it:
- * the criteria for NFA acceleration are quite complicated and buried in
- * limex_compile.)
- */
-static
-u32 estimatedAccelStates(const RoseBuildImpl &tbi, const NGHolder &h) {
- return countAccelStates(h, &tbi.rm, tbi.cc);
-}
-
-static
+ u32 t = g[v].suffix.top;
+ assert(contains(top_mapping, t));
+ g[v].suffix.top = top_mapping[t];
+ }
+
+ return true;
+}
+
+/** \brief Estimate the number of accel states in the given graph when built as
+ * an NFA.
+ *
+ * (The easiest way to estimate something like this is to actually build it:
+ * the criteria for NFA acceleration are quite complicated and buried in
+ * limex_compile.)
+ */
+static
+u32 estimatedAccelStates(const RoseBuildImpl &tbi, const NGHolder &h) {
+ return countAccelStates(h, &tbi.rm, tbi.cc);
+}
+
+static
void mergeNfaLeftfixes(RoseBuildImpl &tbi, LeftfixBouquet &roses) {
- RoseGraph &g = tbi.g;
- DEBUG_PRINTF("%zu nfa rose merge candidates\n", roses.size());
-
- // We track the number of accelerable states for each graph in a map and
- // only recompute them when the graph is modified.
+ RoseGraph &g = tbi.g;
+ DEBUG_PRINTF("%zu nfa rose merge candidates\n", roses.size());
+
+ // We track the number of accelerable states for each graph in a map and
+ // only recompute them when the graph is modified.
unordered_map<left_id, u32> accel_count;
- for (const auto &rose : roses) {
- assert(rose.graph()->kind == NFA_INFIX);
- accel_count[rose] = estimatedAccelStates(tbi, *rose.graph());
- }
-
- for (auto it = roses.begin(); it != roses.end(); ++it) {
- left_id r1 = *it;
- const deque<RoseVertex> &verts1 = roses.vertices(r1);
-
- deque<left_id> merged;
- for (auto jt = next(it); jt != roses.end(); ++jt) {
- left_id r2 = *jt;
- const deque<RoseVertex> &verts2 = roses.vertices(r2);
-
- DEBUG_PRINTF("consider merging rose %p (%zu verts) "
- "with %p (%zu verts)\n",
- r1.graph(), verts1.size(), r2.graph(), verts2.size());
-
- u32 accel1 = accel_count[r1];
- if (accel1 >= NFA_MAX_ACCEL_STATES) {
- DEBUG_PRINTF("h1 has hit max accel\n");
- break; // next h1
- }
-
- u32 accel2 = accel_count[r2];
- if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
- DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
- "accel2=%u)\n",
- accel1, accel2);
- continue; // next h2
- }
-
- if (!mergeableRoseVertices(tbi, verts1, verts2)) {
- DEBUG_PRINTF("not mergeable\n");
- continue; // next h2
- }
-
- // Attempt to merge h2 into h1.
-
- NGHolder victim;
- cloneHolder(victim, *r2.graph());
-
- // Store a copy of the in-edge properties in case we have to roll
- // back.
- map<RoseEdge, RoseEdgeProps> edge_props;
- for (auto v : verts2) {
- for (const auto &e : in_edges_range(v, g)) {
- edge_props[e] = g[e];
- }
- }
-
- if (!setDistinctRoseTops(g, victim, *r1.graph(), verts2)) {
- DEBUG_PRINTF("can't set distinct tops\n");
- continue; // next h2
- }
-
- assert(victim.kind == r1.graph()->kind);
- assert(!generates_callbacks(*r1.graph()));
- if (!mergeNfaPair(victim, *r1.graph(), nullptr, tbi.cc)) {
- DEBUG_PRINTF("merge failed\n");
- // Roll back in-edge properties.
- for (const auto &m : edge_props) {
- g[m.first] = m.second;
- }
- continue; // next h2
- }
-
- // Update h2's roses to point to h1 now
- shared_ptr<NGHolder> winner = g[verts1.front()].left.graph;
- for (auto v : verts2) {
- g[v].left.graph = winner;
- }
- roses.insert(r1, verts2);
-
- merged.push_back(r2);
-
- if (num_vertices(*winner) >= small_merge_max_vertices(tbi.cc)) {
- DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
- num_vertices(*winner));
- break; // next h1
- }
-
- // Update h1's accel count estimate.
- accel_count[r1] = estimatedAccelStates(tbi, *winner);
- }
-
- DEBUG_PRINTF("%zu roses merged\n", merged.size());
- roses.erase_all(merged.begin(), merged.end());
- }
-}
-
-/**
- * This pass attempts to merge prefix/infix engines with a small number of
- * vertices together into larger engines. The engines must not be have a
- * reformed start dot star (due to a leading repeat) nor an infix LBR. Engines
- * that have compatible lag are greedily grouped such that they remain
- * accelerable and only have a small number of states. Note: if a role has an
- * infix with multiple trigger vertices, the role will be left unchanged by this
- * pass and will remain using an unmerged graph.
- */
-void mergeSmallLeftfixes(RoseBuildImpl &tbi) {
- DEBUG_PRINTF("entry\n");
-
- if (!tbi.cc.grey.mergeRose || !tbi.cc.grey.roseMultiTopRoses) {
- return;
- }
-
- RoseGraph &g = tbi.g;
-
+ for (const auto &rose : roses) {
+ assert(rose.graph()->kind == NFA_INFIX);
+ accel_count[rose] = estimatedAccelStates(tbi, *rose.graph());
+ }
+
+ for (auto it = roses.begin(); it != roses.end(); ++it) {
+ left_id r1 = *it;
+ const deque<RoseVertex> &verts1 = roses.vertices(r1);
+
+ deque<left_id> merged;
+ for (auto jt = next(it); jt != roses.end(); ++jt) {
+ left_id r2 = *jt;
+ const deque<RoseVertex> &verts2 = roses.vertices(r2);
+
+ DEBUG_PRINTF("consider merging rose %p (%zu verts) "
+ "with %p (%zu verts)\n",
+ r1.graph(), verts1.size(), r2.graph(), verts2.size());
+
+ u32 accel1 = accel_count[r1];
+ if (accel1 >= NFA_MAX_ACCEL_STATES) {
+ DEBUG_PRINTF("h1 has hit max accel\n");
+ break; // next h1
+ }
+
+ u32 accel2 = accel_count[r2];
+ if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
+ DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
+ "accel2=%u)\n",
+ accel1, accel2);
+ continue; // next h2
+ }
+
+ if (!mergeableRoseVertices(tbi, verts1, verts2)) {
+ DEBUG_PRINTF("not mergeable\n");
+ continue; // next h2
+ }
+
+ // Attempt to merge h2 into h1.
+
+ NGHolder victim;
+ cloneHolder(victim, *r2.graph());
+
+ // Store a copy of the in-edge properties in case we have to roll
+ // back.
+ map<RoseEdge, RoseEdgeProps> edge_props;
+ for (auto v : verts2) {
+ for (const auto &e : in_edges_range(v, g)) {
+ edge_props[e] = g[e];
+ }
+ }
+
+ if (!setDistinctRoseTops(g, victim, *r1.graph(), verts2)) {
+ DEBUG_PRINTF("can't set distinct tops\n");
+ continue; // next h2
+ }
+
+ assert(victim.kind == r1.graph()->kind);
+ assert(!generates_callbacks(*r1.graph()));
+ if (!mergeNfaPair(victim, *r1.graph(), nullptr, tbi.cc)) {
+ DEBUG_PRINTF("merge failed\n");
+ // Roll back in-edge properties.
+ for (const auto &m : edge_props) {
+ g[m.first] = m.second;
+ }
+ continue; // next h2
+ }
+
+ // Update h2's roses to point to h1 now
+ shared_ptr<NGHolder> winner = g[verts1.front()].left.graph;
+ for (auto v : verts2) {
+ g[v].left.graph = winner;
+ }
+ roses.insert(r1, verts2);
+
+ merged.push_back(r2);
+
+ if (num_vertices(*winner) >= small_merge_max_vertices(tbi.cc)) {
+ DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
+ num_vertices(*winner));
+ break; // next h1
+ }
+
+ // Update h1's accel count estimate.
+ accel_count[r1] = estimatedAccelStates(tbi, *winner);
+ }
+
+ DEBUG_PRINTF("%zu roses merged\n", merged.size());
+ roses.erase_all(merged.begin(), merged.end());
+ }
+}
+
+/**
+ * This pass attempts to merge prefix/infix engines with a small number of
+ * vertices together into larger engines. The engines must not be have a
+ * reformed start dot star (due to a leading repeat) nor an infix LBR. Engines
+ * that have compatible lag are greedily grouped such that they remain
+ * accelerable and only have a small number of states. Note: if a role has an
+ * infix with multiple trigger vertices, the role will be left unchanged by this
+ * pass and will remain using an unmerged graph.
+ */
+void mergeSmallLeftfixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("entry\n");
+
+ if (!tbi.cc.grey.mergeRose || !tbi.cc.grey.roseMultiTopRoses) {
+ return;
+ }
+
+ RoseGraph &g = tbi.g;
+
LeftfixBouquet nfa_leftfixes;
-
- for (auto v : vertices_range(g)) {
- if (!g[v].left) {
- continue;
- }
-
- // Handle single-parent infixes only.
- if (tbi.isRootSuccessor(v)) {
- continue;
- }
-
- left_id left(g[v].left);
-
- // Only non-transient for the moment.
- if (contains(tbi.transient, left)) {
- continue;
- }
-
- // No DFAs or Haigs right now.
- if (left.dfa() || left.haig()) {
- continue;
- }
-
- // Castles are handled by a different pass.
- if (left.castle()) {
- continue;
- }
-
- assert(left.graph());
- NGHolder &h = *left.graph();
-
- /* Ensure that kind on the graph is correct */
- assert(h.kind == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX));
-
- if (hasReformedStartDotStar(h, tbi.cc.grey)) {
- /* We would lose optimisations of the leading repeat by merging. */
- continue;
- }
-
- // Small roses only.
- if (num_vertices(h) > small_rose_threshold(tbi.cc)) {
- continue;
- }
-
+
+ for (auto v : vertices_range(g)) {
+ if (!g[v].left) {
+ continue;
+ }
+
+ // Handle single-parent infixes only.
+ if (tbi.isRootSuccessor(v)) {
+ continue;
+ }
+
+ left_id left(g[v].left);
+
+ // Only non-transient for the moment.
+ if (contains(tbi.transient, left)) {
+ continue;
+ }
+
+ // No DFAs or Haigs right now.
+ if (left.dfa() || left.haig()) {
+ continue;
+ }
+
+ // Castles are handled by a different pass.
+ if (left.castle()) {
+ continue;
+ }
+
+ assert(left.graph());
+ NGHolder &h = *left.graph();
+
+ /* Ensure that kind on the graph is correct */
+ assert(h.kind == (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX));
+
+ if (hasReformedStartDotStar(h, tbi.cc.grey)) {
+ /* We would lose optimisations of the leading repeat by merging. */
+ continue;
+ }
+
+ // Small roses only.
+ if (num_vertices(h) > small_rose_threshold(tbi.cc)) {
+ continue;
+ }
+
nfa_leftfixes.insert(left, v);
- }
-
+ }
+
deque<LeftfixBouquet> leftfix_groups;
chunkBouquets(nfa_leftfixes, leftfix_groups, MERGE_GROUP_SIZE_MAX);
nfa_leftfixes.clear();
DEBUG_PRINTF("chunked nfa leftfixes into %zu groups\n",
leftfix_groups.size());
-
+
for (auto &group : leftfix_groups) {
- mergeNfaLeftfixes(tbi, group);
- }
-}
-
+ mergeNfaLeftfixes(tbi, group);
+ }
+}
+
static
void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands,
insertion_ordered_map<left_id, vector<RoseVertex>> &eng_verts) {
@@ -2029,514 +2029,514 @@ void mergeCastleChunk(RoseBuildImpl &build, vector<left_id> &cands,
* mainly depends on the reach being scanned.
*/
void mergeCastleLeftfixes(RoseBuildImpl &build) {
- DEBUG_PRINTF("entry\n");
-
+ DEBUG_PRINTF("entry\n");
+
if (!build.cc.grey.mergeRose || !build.cc.grey.roseMultiTopRoses
|| !build.cc.grey.allowCastle) {
- return;
- }
-
+ return;
+ }
+
RoseGraph &g = build.g;
-
+
insertion_ordered_map<left_id, vector<RoseVertex>> eng_verts;
-
- for (auto v : vertices_range(g)) {
+
+ for (auto v : vertices_range(g)) {
if (!g[v].left.castle) {
- continue;
- }
-
+ continue;
+ }
+
// Handle infixes only.
if (build.isRootSuccessor(v)) {
- continue;
- }
-
+ continue;
+ }
+
eng_verts[g[v].left].push_back(v);
}
-
+
map<CharReach, vector<left_id>> by_reach;
for (const auto &left : eng_verts | map_keys) {
by_reach[left.castle()->reach()].push_back(left);
}
-
+
vector<vector<left_id>> chunks;
for (auto &raw_group : by_reach | map_values) {
chunk(move(raw_group), &chunks, MERGE_CASTLE_GROUP_SIZE_MAX);
- }
+ }
by_reach.clear();
-
+
DEBUG_PRINTF("chunked castles into %zu groups\n", chunks.size());
-
+
for (auto &chunk : chunks) {
mergeCastleChunk(build, chunk, eng_verts);
- }
-}
-
-static
-void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes,
- const bool acyclic) {
- RoseGraph &g = tbi.g;
-
- DEBUG_PRINTF("group has %zu suffixes\n", suffixes.size());
-
- // If this isn't an acyclic case, we track the number of accelerable states
- // for each graph in a map and only recompute them when the graph is
- // modified.
+ }
+}
+
+static
+void mergeSuffixes(RoseBuildImpl &tbi, SuffixBouquet &suffixes,
+ const bool acyclic) {
+ RoseGraph &g = tbi.g;
+
+ DEBUG_PRINTF("group has %zu suffixes\n", suffixes.size());
+
+ // If this isn't an acyclic case, we track the number of accelerable states
+ // for each graph in a map and only recompute them when the graph is
+ // modified.
unordered_map<suffix_id, u32> accel_count;
- if (!acyclic) {
- for (const auto &suffix : suffixes) {
- assert(suffix.graph() && suffix.graph()->kind == NFA_SUFFIX);
- accel_count[suffix] = estimatedAccelStates(tbi, *suffix.graph());
- }
- }
-
- for (auto it = suffixes.begin(); it != suffixes.end(); ++it) {
- suffix_id s1 = *it;
- const deque<RoseVertex> &verts1 = suffixes.vertices(s1);
- assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX);
+ if (!acyclic) {
+ for (const auto &suffix : suffixes) {
+ assert(suffix.graph() && suffix.graph()->kind == NFA_SUFFIX);
+ accel_count[suffix] = estimatedAccelStates(tbi, *suffix.graph());
+ }
+ }
+
+ for (auto it = suffixes.begin(); it != suffixes.end(); ++it) {
+ suffix_id s1 = *it;
+ const deque<RoseVertex> &verts1 = suffixes.vertices(s1);
+ assert(s1.graph() && s1.graph()->kind == NFA_SUFFIX);
// Caller should ensure that we don't propose merges of graphs that are
// already too big.
assert(num_vertices(*s1.graph()) < small_merge_max_vertices(tbi.cc));
- deque<suffix_id> merged;
- for (auto jt = next(it); jt != suffixes.end(); ++jt) {
- suffix_id s2 = *jt;
- const deque<RoseVertex> &verts2 = suffixes.vertices(s2);
- assert(s2.graph() && s2.graph()->kind == NFA_SUFFIX);
-
- if (!acyclic) {
- u32 accel1 = accel_count[s1];
- if (accel1 >= NFA_MAX_ACCEL_STATES) {
- DEBUG_PRINTF("h1 has hit max accel\n");
- break; // next h1
- }
-
- u32 accel2 = accel_count[s2];
- if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
- DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
- "accel2=%u)\n",
- accel1, accel2);
- continue; // next h2
- }
- }
-
- // Attempt to merge h2 into h1.
-
- NGHolder victim;
- cloneHolder(victim, *s2.graph());
-
- // Store a copy of the suffix tops in case we have to roll back.
- map<RoseVertex, u32> old_tops;
- for (auto v : verts2) {
- old_tops[v] = g[v].suffix.top;
- }
-
- if (!setDistinctSuffixTops(g, victim, *s1.graph(), verts2)) {
- DEBUG_PRINTF("can't set distinct tops\n");
- continue; // next h2
- }
-
- if (!mergeNfaPair(victim, *s1.graph(), &tbi.rm, tbi.cc)) {
- DEBUG_PRINTF("merge failed\n");
- // Roll back in-edge properties.
- for (const auto &m : old_tops) {
- g[m.first].suffix.top = m.second;
- }
- continue; // next h2
- }
-
- // Update h2's roses to point to h1 now
- shared_ptr<NGHolder> winner = g[verts1.front()].suffix.graph;
- for (auto v : verts2) {
- g[v].suffix.graph = winner;
- }
- suffixes.insert(s1, verts2);
- merged.push_back(s2);
-
- if (num_vertices(*s1.graph()) >= small_merge_max_vertices(tbi.cc)) {
- DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
- num_vertices(*s1.graph()));
- break; // next h1
- }
-
- if (!acyclic) {
- // Update h1's accel count estimate.
- accel_count[s1] = estimatedAccelStates(tbi, *s1.graph());
- }
- }
-
- DEBUG_PRINTF("%zu suffixes merged\n", merged.size());
- suffixes.erase_all(merged.begin(), merged.end());
- }
-}
-
-/**
- * This merge pass combines suffixes from unrelated roles into a single
- * suffix with multiple top events in order to distinguish the triggers
- * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
- * while mergeSmallSuffixes only considers small suffixes. The merges will
- * group roles with suffixes in the graph into clusters of at most
- * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
- * suffixes and attempting to pairwise merge it with another member. Merges
- * will fail if the result is not implementable, requires too many distinct top
- * events, or if it losses the ability to be accelerated. The merge will modify
- * the existing suffix graph of the one member (g1), the other member updates
- * it graph to refer to g1 instead of its previous graph (g2) and use the new
- * tops created. Other roles may have been sharing g1 - these are unaffected by
- * the change as the existing top events are left untouched. Other roles using
- * g2 are also unaffected as g2 will continue to exist until while it has any
- * roles triggering it.
- *
- * Note: suffixes destined for the LBR are not considered for these merges as
- * the LBR can only handle a single repeat and this type of repeat is ideally
- * handled outside of an NFA or DFA.
- */
-void mergeAcyclicSuffixes(RoseBuildImpl &tbi) {
- DEBUG_PRINTF("entry\n");
-
- if (!tbi.cc.grey.mergeSuffixes) {
- return;
- }
-
- SuffixBouquet suffixes;
-
- RoseGraph &g = tbi.g;
-
- for (auto v : vertices_range(g)) {
- shared_ptr<NGHolder> h = g[v].suffix.graph;
- if (!h || tbi.isInETable(v)) {
- continue;
- }
-
- assert(!g[v].suffix.haig);
-
+ deque<suffix_id> merged;
+ for (auto jt = next(it); jt != suffixes.end(); ++jt) {
+ suffix_id s2 = *jt;
+ const deque<RoseVertex> &verts2 = suffixes.vertices(s2);
+ assert(s2.graph() && s2.graph()->kind == NFA_SUFFIX);
+
+ if (!acyclic) {
+ u32 accel1 = accel_count[s1];
+ if (accel1 >= NFA_MAX_ACCEL_STATES) {
+ DEBUG_PRINTF("h1 has hit max accel\n");
+ break; // next h1
+ }
+
+ u32 accel2 = accel_count[s2];
+ if (accel1 + accel2 > NFA_MAX_ACCEL_STATES) {
+ DEBUG_PRINTF("not merging, might make unaccel (accel1=%u, "
+ "accel2=%u)\n",
+ accel1, accel2);
+ continue; // next h2
+ }
+ }
+
+ // Attempt to merge h2 into h1.
+
+ NGHolder victim;
+ cloneHolder(victim, *s2.graph());
+
+ // Store a copy of the suffix tops in case we have to roll back.
+ map<RoseVertex, u32> old_tops;
+ for (auto v : verts2) {
+ old_tops[v] = g[v].suffix.top;
+ }
+
+ if (!setDistinctSuffixTops(g, victim, *s1.graph(), verts2)) {
+ DEBUG_PRINTF("can't set distinct tops\n");
+ continue; // next h2
+ }
+
+ if (!mergeNfaPair(victim, *s1.graph(), &tbi.rm, tbi.cc)) {
+ DEBUG_PRINTF("merge failed\n");
+ // Roll back in-edge properties.
+ for (const auto &m : old_tops) {
+ g[m.first].suffix.top = m.second;
+ }
+ continue; // next h2
+ }
+
+ // Update h2's roses to point to h1 now
+ shared_ptr<NGHolder> winner = g[verts1.front()].suffix.graph;
+ for (auto v : verts2) {
+ g[v].suffix.graph = winner;
+ }
+ suffixes.insert(s1, verts2);
+ merged.push_back(s2);
+
+ if (num_vertices(*s1.graph()) >= small_merge_max_vertices(tbi.cc)) {
+ DEBUG_PRINTF("h1 now has %zu vertices, proceeding to next\n",
+ num_vertices(*s1.graph()));
+ break; // next h1
+ }
+
+ if (!acyclic) {
+ // Update h1's accel count estimate.
+ accel_count[s1] = estimatedAccelStates(tbi, *s1.graph());
+ }
+ }
+
+ DEBUG_PRINTF("%zu suffixes merged\n", merged.size());
+ suffixes.erase_all(merged.begin(), merged.end());
+ }
+}
+
+/**
+ * This merge pass combines suffixes from unrelated roles into a single
+ * suffix with multiple top events in order to distinguish the triggers
+ * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
+ * while mergeSmallSuffixes only considers small suffixes. The merges will
+ * group roles with suffixes in the graph into clusters of at most
+ * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
+ * suffixes and attempting to pairwise merge it with another member. Merges
+ * will fail if the result is not implementable, requires too many distinct top
+ * events, or if it losses the ability to be accelerated. The merge will modify
+ * the existing suffix graph of the one member (g1), the other member updates
+ * it graph to refer to g1 instead of its previous graph (g2) and use the new
+ * tops created. Other roles may have been sharing g1 - these are unaffected by
+ * the change as the existing top events are left untouched. Other roles using
+ * g2 are also unaffected as g2 will continue to exist until while it has any
+ * roles triggering it.
+ *
+ * Note: suffixes destined for the LBR are not considered for these merges as
+ * the LBR can only handle a single repeat and this type of repeat is ideally
+ * handled outside of an NFA or DFA.
+ */
+void mergeAcyclicSuffixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("entry\n");
+
+ if (!tbi.cc.grey.mergeSuffixes) {
+ return;
+ }
+
+ SuffixBouquet suffixes;
+
+ RoseGraph &g = tbi.g;
+
+ for (auto v : vertices_range(g)) {
+ shared_ptr<NGHolder> h = g[v].suffix.graph;
+ if (!h || tbi.isInETable(v)) {
+ continue;
+ }
+
+ assert(!g[v].suffix.haig);
+
if (num_vertices(*h) >= small_merge_max_vertices(tbi.cc)) {
- continue;
- }
-
+ continue;
+ }
+
if (!isAcyclic(*h)) {
- continue;
- }
-
- suffixes.insert(g[v].suffix, v);
- }
-
- deque<SuffixBouquet> suff_groups;
- chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
- DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
- suff_groups.size());
- suffixes.clear();
-
- for (auto &group : suff_groups) {
- mergeSuffixes(tbi, group, true);
- }
-}
-
-/**
- * This merge pass combines suffixes from unrelated roles into a single
- * suffix with multiple top events in order to distinguish the triggers
- * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
- * while mergeSmallSuffixes only considers small suffixes. The merges will
- * group roles with suffixes in the graph into clusters of at most
- * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
- * suffixes and attempting to pairwise merge it with another member. Merges
- * will fail if the result is not implementable, requires too many distinct top
- * events, or if it losses the ability to be accelerated. The merge will modify
- * the existing suffix graph of the one member (g1), the other member updates
- * it graph to refer to g1 instead of its previous graph (g2) and use the new
- * tops created. Other roles may have been sharing g1 - these are unaffected by
- * the change as the existing top events are left untouched. Other roles using
- * g2 are also unaffected as g2 will continue to exist until while it has any
- * roles triggering it.
- *
- * Note: suffixes destined for the LBR are not considered for these merges as
- * the LBR can only handle a single repeat and this type of repeat is ideally
- * handled outside of an NFA or DFA.
- */
-void mergeSmallSuffixes(RoseBuildImpl &tbi) {
- DEBUG_PRINTF("entry\n");
-
- if (!tbi.cc.grey.mergeSuffixes) {
- return;
- }
-
- RoseGraph &g = tbi.g;
- SuffixBouquet suffixes;
-
- for (auto v : vertices_range(g)) {
- shared_ptr<NGHolder> h = g[v].suffix.graph;
- if (!h || tbi.isInETable(v)) {
- continue;
- }
- assert(!g[v].suffix.haig);
-
- // Leave acyclics out for the moment.
- if (isAcyclic(*h)) {
- continue;
- }
-
- // Small-ish suffixes only.
- if (num_vertices(*h) > 32) {
- continue;
- }
-
- suffixes.insert(g[v].suffix, v);
- }
-
- deque<SuffixBouquet> suff_groups;
- chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
- DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
- suff_groups.size());
- suffixes.clear();
-
- for (auto &group : suff_groups) {
- mergeSuffixes(tbi, group, false);
- }
-}
-
-static
-void removeDeadOutfixes(vector<OutfixInfo> &outfixes) {
- auto is_dead = [](const OutfixInfo &outfix) { return outfix.is_dead(); };
- outfixes.erase(remove_if(begin(outfixes), end(outfixes), is_dead),
- end(outfixes));
-}
-
-static
-void mergeOutfixInfo(OutfixInfo &winner, const OutfixInfo &victim) {
- assert(!winner.is_dead());
-
- winner.maxBAWidth = max(winner.maxBAWidth, victim.maxBAWidth);
- winner.minWidth = min(winner.minWidth, victim.minWidth);
- winner.maxWidth = max(winner.maxWidth, victim.maxWidth);
- winner.maxOffset = max(winner.maxOffset, victim.maxOffset);
- mergeReverseAccelerationInfo(winner.rev_info, victim.rev_info);
-
- // This outfix can be ignored in small block mode if both were. The dedupe
- // layer at runtime will protect us from extra matches if only one was in
- // the small block matcher.
- winner.in_sbmatcher &= victim.in_sbmatcher;
-}
-
-static
-map<NGHolder *, NGHolder *> chunkedNfaMerge(RoseBuildImpl &build,
- const vector<NGHolder *> &nfas) {
- map<NGHolder *, NGHolder *> merged;
-
- vector<NGHolder *> batch;
- for (auto it = begin(nfas), ite = end(nfas); it != ite; ++it) {
- batch.push_back(*it);
- assert((*it)->kind == NFA_OUTFIX);
- if (batch.size() == MERGE_GROUP_SIZE_MAX || next(it) == ite) {
+ continue;
+ }
+
+ suffixes.insert(g[v].suffix, v);
+ }
+
+ deque<SuffixBouquet> suff_groups;
+ chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
+ DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
+ suff_groups.size());
+ suffixes.clear();
+
+ for (auto &group : suff_groups) {
+ mergeSuffixes(tbi, group, true);
+ }
+}
+
+/**
+ * This merge pass combines suffixes from unrelated roles into a single
+ * suffix with multiple top events in order to distinguish the triggers
+ * from differing roles. mergeAcyclicSuffixes only considers acyclic suffixes
+ * while mergeSmallSuffixes only considers small suffixes. The merges will
+ * group roles with suffixes in the graph into clusters of at most
+ * \ref MERGE_GROUP_SIZE_MAX. Each cluster is processed by iterating over the
+ * suffixes and attempting to pairwise merge it with another member. Merges
+ * will fail if the result is not implementable, requires too many distinct top
+ * events, or if it losses the ability to be accelerated. The merge will modify
+ * the existing suffix graph of the one member (g1), the other member updates
+ * it graph to refer to g1 instead of its previous graph (g2) and use the new
+ * tops created. Other roles may have been sharing g1 - these are unaffected by
+ * the change as the existing top events are left untouched. Other roles using
+ * g2 are also unaffected as g2 will continue to exist until while it has any
+ * roles triggering it.
+ *
+ * Note: suffixes destined for the LBR are not considered for these merges as
+ * the LBR can only handle a single repeat and this type of repeat is ideally
+ * handled outside of an NFA or DFA.
+ */
+void mergeSmallSuffixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("entry\n");
+
+ if (!tbi.cc.grey.mergeSuffixes) {
+ return;
+ }
+
+ RoseGraph &g = tbi.g;
+ SuffixBouquet suffixes;
+
+ for (auto v : vertices_range(g)) {
+ shared_ptr<NGHolder> h = g[v].suffix.graph;
+ if (!h || tbi.isInETable(v)) {
+ continue;
+ }
+ assert(!g[v].suffix.haig);
+
+ // Leave acyclics out for the moment.
+ if (isAcyclic(*h)) {
+ continue;
+ }
+
+ // Small-ish suffixes only.
+ if (num_vertices(*h) > 32) {
+ continue;
+ }
+
+ suffixes.insert(g[v].suffix, v);
+ }
+
+ deque<SuffixBouquet> suff_groups;
+ chunkBouquets(suffixes, suff_groups, MERGE_GROUP_SIZE_MAX);
+ DEBUG_PRINTF("chunked %zu suffixes into %zu groups\n", suffixes.size(),
+ suff_groups.size());
+ suffixes.clear();
+
+ for (auto &group : suff_groups) {
+ mergeSuffixes(tbi, group, false);
+ }
+}
+
+static
+void removeDeadOutfixes(vector<OutfixInfo> &outfixes) {
+ auto is_dead = [](const OutfixInfo &outfix) { return outfix.is_dead(); };
+ outfixes.erase(remove_if(begin(outfixes), end(outfixes), is_dead),
+ end(outfixes));
+}
+
+static
+void mergeOutfixInfo(OutfixInfo &winner, const OutfixInfo &victim) {
+ assert(!winner.is_dead());
+
+ winner.maxBAWidth = max(winner.maxBAWidth, victim.maxBAWidth);
+ winner.minWidth = min(winner.minWidth, victim.minWidth);
+ winner.maxWidth = max(winner.maxWidth, victim.maxWidth);
+ winner.maxOffset = max(winner.maxOffset, victim.maxOffset);
+ mergeReverseAccelerationInfo(winner.rev_info, victim.rev_info);
+
+ // This outfix can be ignored in small block mode if both were. The dedupe
+ // layer at runtime will protect us from extra matches if only one was in
+ // the small block matcher.
+ winner.in_sbmatcher &= victim.in_sbmatcher;
+}
+
+static
+map<NGHolder *, NGHolder *> chunkedNfaMerge(RoseBuildImpl &build,
+ const vector<NGHolder *> &nfas) {
+ map<NGHolder *, NGHolder *> merged;
+
+ vector<NGHolder *> batch;
+ for (auto it = begin(nfas), ite = end(nfas); it != ite; ++it) {
+ batch.push_back(*it);
+ assert((*it)->kind == NFA_OUTFIX);
+ if (batch.size() == MERGE_GROUP_SIZE_MAX || next(it) == ite) {
auto batch_merged = mergeNfaCluster(batch, &build.rm, build.cc);
insert(&merged, batch_merged);
- batch.clear();
- }
- }
-
- return merged;
-}
-
-static
-void mergeOutfixNfas(RoseBuildImpl &tbi, vector<NGHolder *> &nfas) {
- DEBUG_PRINTF("merging %zu nfas\n", nfas.size());
- if (nfas.size() < 2) {
- return;
- }
-
- vector<OutfixInfo> &outfixes = tbi.outfixes;
-
- map<NGHolder *, size_t> nfa_mapping;
- for (size_t i = 0; i < outfixes.size(); i++) {
+ batch.clear();
+ }
+ }
+
+ return merged;
+}
+
+static
+void mergeOutfixNfas(RoseBuildImpl &tbi, vector<NGHolder *> &nfas) {
+ DEBUG_PRINTF("merging %zu nfas\n", nfas.size());
+ if (nfas.size() < 2) {
+ return;
+ }
+
+ vector<OutfixInfo> &outfixes = tbi.outfixes;
+
+ map<NGHolder *, size_t> nfa_mapping;
+ for (size_t i = 0; i < outfixes.size(); i++) {
auto *holder = outfixes[i].holder();
if (holder) {
nfa_mapping[holder] = i;
- }
- }
-
- map<NGHolder *, NGHolder *> merged = chunkedNfaMerge(tbi, nfas);
- if (merged.empty()) {
- return;
- }
-
- DEBUG_PRINTF("%zu nfas merged\n", merged.size());
-
- // Update the outfix info for merged holders.
- for (const auto &m : merged) {
- OutfixInfo &victim = outfixes.at(nfa_mapping[m.first]);
- OutfixInfo &winner = outfixes.at(nfa_mapping[m.second]);
- mergeOutfixInfo(winner, victim);
- victim.clear();
- }
-
- removeDeadOutfixes(outfixes);
-}
-
-namespace {
-struct MergeMcClellan {
- MergeMcClellan(const ReportManager &rm_in, const Grey &grey_in)
- : rm(rm_in), grey(grey_in) {}
-
- unique_ptr<raw_dfa> operator()(const raw_dfa *d1, const raw_dfa *d2) const {
- assert(d1 && d2);
- return mergeTwoDfas(d1, d2, DFA_MERGE_MAX_STATES, &rm, grey);
- }
-
-private:
- const ReportManager &rm;
- const Grey &grey;
-};
-
-struct MergeHaig {
- explicit MergeHaig(u32 limit_in) : limit(limit_in) {}
-
- unique_ptr<raw_som_dfa> operator()(const raw_som_dfa *d1,
- const raw_som_dfa *d2) const {
- assert(d1 && d2);
- return attemptToMergeHaig({d1, d2}, limit);
- }
-
-private:
- const u32 limit; //!< state limit for merged result.
-};
-}
-
-/**
- * Generic pairwise merge algorithm that can be used for either McClellan
- * (RawDfa=raw_dfa) or Haig (RawDfa=raw_som_dfa). Delegates the actual merge
- * operation to a merge functor, which allows the caller to set some policy
- * (state limits, etc).
- *
- * This is currently astonishingly simple and just considers every pair of
- * DFAs, slow and steady. We may wish to actually apply a merge ordering
- * strategy in the future.
- */
-template<class RawDfa, class MergeFunctor>
-static
-void pairwiseDfaMerge(vector<RawDfa *> &dfas,
+ }
+ }
+
+ map<NGHolder *, NGHolder *> merged = chunkedNfaMerge(tbi, nfas);
+ if (merged.empty()) {
+ return;
+ }
+
+ DEBUG_PRINTF("%zu nfas merged\n", merged.size());
+
+ // Update the outfix info for merged holders.
+ for (const auto &m : merged) {
+ OutfixInfo &victim = outfixes.at(nfa_mapping[m.first]);
+ OutfixInfo &winner = outfixes.at(nfa_mapping[m.second]);
+ mergeOutfixInfo(winner, victim);
+ victim.clear();
+ }
+
+ removeDeadOutfixes(outfixes);
+}
+
+namespace {
+struct MergeMcClellan {
+ MergeMcClellan(const ReportManager &rm_in, const Grey &grey_in)
+ : rm(rm_in), grey(grey_in) {}
+
+ unique_ptr<raw_dfa> operator()(const raw_dfa *d1, const raw_dfa *d2) const {
+ assert(d1 && d2);
+ return mergeTwoDfas(d1, d2, DFA_MERGE_MAX_STATES, &rm, grey);
+ }
+
+private:
+ const ReportManager &rm;
+ const Grey &grey;
+};
+
+struct MergeHaig {
+ explicit MergeHaig(u32 limit_in) : limit(limit_in) {}
+
+ unique_ptr<raw_som_dfa> operator()(const raw_som_dfa *d1,
+ const raw_som_dfa *d2) const {
+ assert(d1 && d2);
+ return attemptToMergeHaig({d1, d2}, limit);
+ }
+
+private:
+ const u32 limit; //!< state limit for merged result.
+};
+}
+
+/**
+ * Generic pairwise merge algorithm that can be used for either McClellan
+ * (RawDfa=raw_dfa) or Haig (RawDfa=raw_som_dfa). Delegates the actual merge
+ * operation to a merge functor, which allows the caller to set some policy
+ * (state limits, etc).
+ *
+ * This is currently astonishingly simple and just considers every pair of
+ * DFAs, slow and steady. We may wish to actually apply a merge ordering
+ * strategy in the future.
+ */
+template<class RawDfa, class MergeFunctor>
+static
+void pairwiseDfaMerge(vector<RawDfa *> &dfas,
unordered_map<RawDfa *, size_t> &dfa_mapping,
- vector<OutfixInfo> &outfixes,
- MergeFunctor merge_func) {
- DEBUG_PRINTF("merging group of size %zu\n", dfas.size());
-
- for (auto it = dfas.begin(), ite = dfas.end(); it != ite; ++it) {
- if (!*it) {
- continue;
- }
- for (auto jt = next(it); jt != ite; ++jt) {
- if (!*jt) {
- continue;
- }
-
- DEBUG_PRINTF("try merge %p and %p\n", *it, *jt);
- unique_ptr<RawDfa> rdfa = merge_func(*it, *jt);
- if (!rdfa) {
- continue; // Merge failed.
- }
-
- DEBUG_PRINTF("merge succeeded, built %p\n", rdfa.get());
- OutfixInfo &winner = outfixes.at(dfa_mapping[*it]);
- OutfixInfo &victim = outfixes.at(dfa_mapping[*jt]);
- assert(!winner.is_dead() && !victim.is_dead());
-
- RawDfa *dfa_ptr = rdfa.get();
- dfa_mapping[dfa_ptr] = dfa_mapping[*it];
- dfa_mapping.erase(*it);
+ vector<OutfixInfo> &outfixes,
+ MergeFunctor merge_func) {
+ DEBUG_PRINTF("merging group of size %zu\n", dfas.size());
+
+ for (auto it = dfas.begin(), ite = dfas.end(); it != ite; ++it) {
+ if (!*it) {
+ continue;
+ }
+ for (auto jt = next(it); jt != ite; ++jt) {
+ if (!*jt) {
+ continue;
+ }
+
+ DEBUG_PRINTF("try merge %p and %p\n", *it, *jt);
+ unique_ptr<RawDfa> rdfa = merge_func(*it, *jt);
+ if (!rdfa) {
+ continue; // Merge failed.
+ }
+
+ DEBUG_PRINTF("merge succeeded, built %p\n", rdfa.get());
+ OutfixInfo &winner = outfixes.at(dfa_mapping[*it]);
+ OutfixInfo &victim = outfixes.at(dfa_mapping[*jt]);
+ assert(!winner.is_dead() && !victim.is_dead());
+
+ RawDfa *dfa_ptr = rdfa.get();
+ dfa_mapping[dfa_ptr] = dfa_mapping[*it];
+ dfa_mapping.erase(*it);
winner.proto = move(rdfa);
-
- mergeOutfixInfo(winner, victim);
-
- victim.clear();
- *jt = nullptr; // to be deleted.
- *it = dfa_ptr;
- }
- }
-}
-
-template<class RawDfa, class MergeFunctor>
-static
-void chunkedDfaMerge(vector<RawDfa *> &dfas,
+
+ mergeOutfixInfo(winner, victim);
+
+ victim.clear();
+ *jt = nullptr; // to be deleted.
+ *it = dfa_ptr;
+ }
+ }
+}
+
+template<class RawDfa, class MergeFunctor>
+static
+void chunkedDfaMerge(vector<RawDfa *> &dfas,
unordered_map<RawDfa *, size_t> &dfa_mapping,
- vector<OutfixInfo> &outfixes,
- MergeFunctor merge_func) {
- DEBUG_PRINTF("begin merge of %zu dfas\n", dfas.size());
-
- vector<RawDfa *> out_dfas;
- vector<RawDfa *> chunk;
- for (auto it = begin(dfas), ite = end(dfas); it != ite; ++it) {
- chunk.push_back(*it);
- if (chunk.size() >= DFA_CHUNK_SIZE_MAX || next(it) == ite) {
- pairwiseDfaMerge(chunk, dfa_mapping, outfixes, merge_func);
- out_dfas.insert(end(out_dfas), begin(chunk), end(chunk));
- chunk.clear();
- }
- }
-
- // Remove null (merged) DFAs and update vector for subsequent use.
- out_dfas.erase(remove(out_dfas.begin(), out_dfas.end(), nullptr),
- out_dfas.end());
- dfas.swap(out_dfas);
- DEBUG_PRINTF("after merge there are %zu dfas\n", dfas.size());
-}
-
-static
-void mergeOutfixDfas(RoseBuildImpl &tbi, vector<raw_dfa *> &dfas) {
- DEBUG_PRINTF("merging %zu nfas\n", dfas.size());
- if (dfas.size() < 2) {
- return;
- }
-
- vector<OutfixInfo> &outfixes = tbi.outfixes;
-
- /* key is index into outfix array as iterators, etc may be invalidated by
- * element addition. */
+ vector<OutfixInfo> &outfixes,
+ MergeFunctor merge_func) {
+ DEBUG_PRINTF("begin merge of %zu dfas\n", dfas.size());
+
+ vector<RawDfa *> out_dfas;
+ vector<RawDfa *> chunk;
+ for (auto it = begin(dfas), ite = end(dfas); it != ite; ++it) {
+ chunk.push_back(*it);
+ if (chunk.size() >= DFA_CHUNK_SIZE_MAX || next(it) == ite) {
+ pairwiseDfaMerge(chunk, dfa_mapping, outfixes, merge_func);
+ out_dfas.insert(end(out_dfas), begin(chunk), end(chunk));
+ chunk.clear();
+ }
+ }
+
+ // Remove null (merged) DFAs and update vector for subsequent use.
+ out_dfas.erase(remove(out_dfas.begin(), out_dfas.end(), nullptr),
+ out_dfas.end());
+ dfas.swap(out_dfas);
+ DEBUG_PRINTF("after merge there are %zu dfas\n", dfas.size());
+}
+
+static
+void mergeOutfixDfas(RoseBuildImpl &tbi, vector<raw_dfa *> &dfas) {
+ DEBUG_PRINTF("merging %zu nfas\n", dfas.size());
+ if (dfas.size() < 2) {
+ return;
+ }
+
+ vector<OutfixInfo> &outfixes = tbi.outfixes;
+
+ /* key is index into outfix array as iterators, etc may be invalidated by
+ * element addition. */
unordered_map<raw_dfa *, size_t> dfa_mapping;
- for (size_t i = 0; i < outfixes.size(); i++) {
+ for (size_t i = 0; i < outfixes.size(); i++) {
auto *rdfa = outfixes[i].rdfa();
if (rdfa) {
dfa_mapping[rdfa] = i;
- }
- }
-
- chunkedDfaMerge(dfas, dfa_mapping, outfixes,
- MergeMcClellan(tbi.rm, tbi.cc.grey));
- removeDeadOutfixes(outfixes);
-}
-
-static
-void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm,
- const Grey &grey) {
- if (!grey.roseMcClellanOutfix) {
- return;
- }
-
- DEBUG_PRINTF("merge combo\n");
-
- bool seen_dfa = false;
- u32 nfa_count = 0;
- for (const auto &outfix : tbi.outfixes) {
+ }
+ }
+
+ chunkedDfaMerge(dfas, dfa_mapping, outfixes,
+ MergeMcClellan(tbi.rm, tbi.cc.grey));
+ removeDeadOutfixes(outfixes);
+}
+
+static
+void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm,
+ const Grey &grey) {
+ if (!grey.roseMcClellanOutfix) {
+ return;
+ }
+
+ DEBUG_PRINTF("merge combo\n");
+
+ bool seen_dfa = false;
+ u32 nfa_count = 0;
+ for (const auto &outfix : tbi.outfixes) {
if (outfix.holder()) {
- DEBUG_PRINTF("nfa\n");
- nfa_count++;
+ DEBUG_PRINTF("nfa\n");
+ nfa_count++;
} else if (outfix.rdfa()) {
- DEBUG_PRINTF("dfa\n");
- seen_dfa = true;
- }
- }
-
- DEBUG_PRINTF("nfa %u dfas present %d\n", nfa_count,
- (int)seen_dfa);
- if (!nfa_count || (nfa_count == 1 && !seen_dfa)) {
- DEBUG_PRINTF("no combo merges possible\n");
- return;
- }
-
- /* key is index into outfix array as iterators, etc may be invalidated by
- * element addition. */
- size_t new_dfas = 0;
+ DEBUG_PRINTF("dfa\n");
+ seen_dfa = true;
+ }
+ }
+
+ DEBUG_PRINTF("nfa %u dfas present %d\n", nfa_count,
+ (int)seen_dfa);
+ if (!nfa_count || (nfa_count == 1 && !seen_dfa)) {
+ DEBUG_PRINTF("no combo merges possible\n");
+ return;
+ }
+
+ /* key is index into outfix array as iterators, etc may be invalidated by
+ * element addition. */
+ size_t new_dfas = 0;
unordered_map<raw_dfa *, size_t> dfa_mapping;
- vector<raw_dfa *> dfas;
-
- for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) {
+ vector<raw_dfa *> dfas;
+
+ for (auto it = tbi.outfixes.begin(); it != tbi.outfixes.end(); ++it) {
auto &outfix = *it;
assert(!outfix.is_dead());
@@ -2544,75 +2544,75 @@ void mergeOutfixCombo(RoseBuildImpl &tbi, const ReportManager &rm,
auto *rdfa = outfix.rdfa();
dfas.push_back(rdfa);
dfa_mapping[rdfa] = it - tbi.outfixes.begin();
- continue;
- }
-
+ continue;
+ }
+
if (!outfix.holder()) {
- continue;
- }
-
+ continue;
+ }
+
NGHolder *h = outfix.holder();
- assert(h->kind == NFA_OUTFIX);
- auto rdfa = buildMcClellan(*h, &rm, grey);
- if (rdfa) {
- // Transform this outfix into a DFA and add it to the merge set.
- dfa_mapping[rdfa.get()] = it - tbi.outfixes.begin();
- dfas.push_back(rdfa.get());
+ assert(h->kind == NFA_OUTFIX);
+ auto rdfa = buildMcClellan(*h, &rm, grey);
+ if (rdfa) {
+ // Transform this outfix into a DFA and add it to the merge set.
+ dfa_mapping[rdfa.get()] = it - tbi.outfixes.begin();
+ dfas.push_back(rdfa.get());
outfix.proto = move(rdfa);
- new_dfas++;
- }
- }
-
- DEBUG_PRINTF("constructed %zu new dfas\n", new_dfas);
-
- if (!new_dfas) {
- /* assumes normal dfas have already been fully merged */
- return;
- }
-
- chunkedDfaMerge(dfas, dfa_mapping, tbi.outfixes,
- MergeMcClellan(tbi.rm, tbi.cc.grey));
- removeDeadOutfixes(tbi.outfixes);
-}
-
-static
-void mergeOutfixHaigs(RoseBuildImpl &tbi, vector<raw_som_dfa *> &dfas,
- u32 limit) {
- if (dfas.size() < 2) {
- return;
- }
-
- vector<OutfixInfo> &outfixes = tbi.outfixes;
-
+ new_dfas++;
+ }
+ }
+
+ DEBUG_PRINTF("constructed %zu new dfas\n", new_dfas);
+
+ if (!new_dfas) {
+ /* assumes normal dfas have already been fully merged */
+ return;
+ }
+
+ chunkedDfaMerge(dfas, dfa_mapping, tbi.outfixes,
+ MergeMcClellan(tbi.rm, tbi.cc.grey));
+ removeDeadOutfixes(tbi.outfixes);
+}
+
+static
+void mergeOutfixHaigs(RoseBuildImpl &tbi, vector<raw_som_dfa *> &dfas,
+ u32 limit) {
+ if (dfas.size() < 2) {
+ return;
+ }
+
+ vector<OutfixInfo> &outfixes = tbi.outfixes;
+
unordered_map<raw_som_dfa *, size_t> dfa_mapping;
- for (size_t i = 0; i < outfixes.size(); i++) {
+ for (size_t i = 0; i < outfixes.size(); i++) {
auto *haig = outfixes[i].haig();
if (haig) {
dfa_mapping[haig] = i;
- }
- }
-
- chunkedDfaMerge(dfas, dfa_mapping, outfixes, MergeHaig(limit));
- removeDeadOutfixes(outfixes);
-}
-
-/**
- * This pass attempts to merge outfix engines together. At this point in time,
- * the engine type (NFA, DFA, Haig) has already been decided for each outfix
- * and outfixes can only merged with others of their same type. NFAs are merged
- * in a priority order based on common prefix length. The other types are
- * merged blindly. Engines are merged to the extent that they can still be
- * implemented efficiently.
- */
-void mergeOutfixes(RoseBuildImpl &tbi) {
- if (!tbi.cc.grey.mergeOutfixes) {
- return;
- }
-
- vector<NGHolder *> nfas;
- vector<raw_dfa *> dfas;
- vector<raw_som_dfa *> som_dfas;
-
+ }
+ }
+
+ chunkedDfaMerge(dfas, dfa_mapping, outfixes, MergeHaig(limit));
+ removeDeadOutfixes(outfixes);
+}
+
+/**
+ * This pass attempts to merge outfix engines together. At this point in time,
+ * the engine type (NFA, DFA, Haig) has already been decided for each outfix
+ * and outfixes can only merged with others of their same type. NFAs are merged
+ * in a priority order based on common prefix length. The other types are
+ * merged blindly. Engines are merged to the extent that they can still be
+ * implemented efficiently.
+ */
+void mergeOutfixes(RoseBuildImpl &tbi) {
+ if (!tbi.cc.grey.mergeOutfixes) {
+ return;
+ }
+
+ vector<NGHolder *> nfas;
+ vector<raw_dfa *> dfas;
+ vector<raw_som_dfa *> som_dfas;
+
for (auto &outfix : tbi.outfixes) {
if (outfix.rdfa()) {
dfas.push_back(outfix.rdfa());
@@ -2620,199 +2620,199 @@ void mergeOutfixes(RoseBuildImpl &tbi) {
nfas.push_back(outfix.holder());
} else if (outfix.haig()) {
som_dfas.push_back(outfix.haig());
- }
- }
-
- DEBUG_PRINTF("merging %zu dfas, %zu nfas\n",
- dfas.size(), nfas.size());
-
- mergeOutfixNfas(tbi, nfas);
- mergeOutfixDfas(tbi, dfas);
- mergeOutfixHaigs(tbi, som_dfas, 255);
- mergeOutfixHaigs(tbi, som_dfas, 8192);
- mergeOutfixCombo(tbi, tbi.rm, tbi.cc.grey);
-}
-
-static
-u32 allowedSquashDistance(const CharReach &cr, u32 min_width,
- const RoseBuildImpl &tbi,
- RoseVertex tv) {
- CharReach accept_cr;
- DEBUG_PRINTF("hello |cr|=%zu\n", cr.count());
-
- const RoseGraph &g = tbi.g;
-
- /* TODO: inspect further back in the pattern */
- for (u32 lit_id : g[tv].literals) {
+ }
+ }
+
+ DEBUG_PRINTF("merging %zu dfas, %zu nfas\n",
+ dfas.size(), nfas.size());
+
+ mergeOutfixNfas(tbi, nfas);
+ mergeOutfixDfas(tbi, dfas);
+ mergeOutfixHaigs(tbi, som_dfas, 255);
+ mergeOutfixHaigs(tbi, som_dfas, 8192);
+ mergeOutfixCombo(tbi, tbi.rm, tbi.cc.grey);
+}
+
+static
+u32 allowedSquashDistance(const CharReach &cr, u32 min_width,
+ const RoseBuildImpl &tbi,
+ RoseVertex tv) {
+ CharReach accept_cr;
+ DEBUG_PRINTF("hello |cr|=%zu\n", cr.count());
+
+ const RoseGraph &g = tbi.g;
+
+ /* TODO: inspect further back in the pattern */
+ for (u32 lit_id : g[tv].literals) {
const rose_literal_id &lit = tbi.literals.at(lit_id);
- if (lit.delay) {
- return 0; /* TODO: better */
- }
- if (lit.table != ROSE_FLOATING && lit.table != ROSE_EOD_ANCHORED) {
- return 0;
- }
- assert(!lit.s.empty());
- accept_cr |= *lit.s.rbegin();
- }
-
- DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count());
-
- if ((accept_cr & cr).any()) {
- DEBUG_PRINTF("no squash\n");
- return 0; /* the accept byte doesn't always kill the puffette. TODO:
- * maybe if we look further back we could find something that
- * would kill the puffette... */
- }
-
- DEBUG_PRINTF("allowed to squash %u\n", min_width);
- return min_width;
-}
-
-void mergePuffixes(RoseBuildImpl &tbi) {
- DEBUG_PRINTF("entry\n");
-
- if (!tbi.cc.grey.mergeSuffixes) {
- return;
- }
-
- RoseGraph &g = tbi.g;
-
- for (auto v : vertices_range(g)) {
- shared_ptr<NGHolder> h = g[v].suffix.graph;
- if (!h) {
- continue;
- }
- assert(!g[v].suffix.haig);
- assert(!g[v].eod_accept);
-
- assert(onlyOneTop(*h)); /* we should not have merged yet */
- bool fixed_depth = g[v].min_offset == g[v].max_offset;
-
- if (!isPuffable(*h, fixed_depth, tbi.rm, tbi.cc.grey)) {
- continue;
- }
-
- PureRepeat repeat;
- if (!isPureRepeat(*h, repeat)) {
- assert(0);
- continue;
- }
-
- if (repeat.bounds.min == depth(0)) {
- assert(0); // No vacuous puffs allowed.
- continue;
- }
-
- assert(repeat.bounds.min.is_finite() &&
- repeat.bounds.max.is_reachable());
- assert(repeat.bounds.max == repeat.bounds.min ||
- repeat.bounds.max.is_infinite());
-
- const bool unbounded = repeat.bounds.max.is_infinite();
- const set<ReportID> reports = all_reports(*h);
- assert(reports.size() == 1);
- ReportID report = *reports.begin();
-
- DEBUG_PRINTF("got puffette candidate %u:%s\n", report,
- repeat.bounds.str().c_str());
-
- raw_puff rp(repeat.bounds.min, unbounded, report, repeat.reach);
-
- u32 queue;
- u32 event;
- tbi.addChainTail(rp, &queue, &event);
- u32 squashDistance =
- allowedSquashDistance(repeat.reach, repeat.bounds.min, tbi, v);
-
+ if (lit.delay) {
+ return 0; /* TODO: better */
+ }
+ if (lit.table != ROSE_FLOATING && lit.table != ROSE_EOD_ANCHORED) {
+ return 0;
+ }
+ assert(!lit.s.empty());
+ accept_cr |= *lit.s.rbegin();
+ }
+
+ DEBUG_PRINTF("|accept_cr|=%zu\n", accept_cr.count());
+
+ if ((accept_cr & cr).any()) {
+ DEBUG_PRINTF("no squash\n");
+ return 0; /* the accept byte doesn't always kill the puffette. TODO:
+ * maybe if we look further back we could find something that
+ * would kill the puffette... */
+ }
+
+ DEBUG_PRINTF("allowed to squash %u\n", min_width);
+ return min_width;
+}
+
+void mergePuffixes(RoseBuildImpl &tbi) {
+ DEBUG_PRINTF("entry\n");
+
+ if (!tbi.cc.grey.mergeSuffixes) {
+ return;
+ }
+
+ RoseGraph &g = tbi.g;
+
+ for (auto v : vertices_range(g)) {
+ shared_ptr<NGHolder> h = g[v].suffix.graph;
+ if (!h) {
+ continue;
+ }
+ assert(!g[v].suffix.haig);
+ assert(!g[v].eod_accept);
+
+ assert(onlyOneTop(*h)); /* we should not have merged yet */
+ bool fixed_depth = g[v].min_offset == g[v].max_offset;
+
+ if (!isPuffable(*h, fixed_depth, tbi.rm, tbi.cc.grey)) {
+ continue;
+ }
+
+ PureRepeat repeat;
+ if (!isPureRepeat(*h, repeat)) {
+ assert(0);
+ continue;
+ }
+
+ if (repeat.bounds.min == depth(0)) {
+ assert(0); // No vacuous puffs allowed.
+ continue;
+ }
+
+ assert(repeat.bounds.min.is_finite() &&
+ repeat.bounds.max.is_reachable());
+ assert(repeat.bounds.max == repeat.bounds.min ||
+ repeat.bounds.max.is_infinite());
+
+ const bool unbounded = repeat.bounds.max.is_infinite();
+ const set<ReportID> reports = all_reports(*h);
+ assert(reports.size() == 1);
+ ReportID report = *reports.begin();
+
+ DEBUG_PRINTF("got puffette candidate %u:%s\n", report,
+ repeat.bounds.str().c_str());
+
+ raw_puff rp(repeat.bounds.min, unbounded, report, repeat.reach);
+
+ u32 queue;
+ u32 event;
+ tbi.addChainTail(rp, &queue, &event);
+ u32 squashDistance =
+ allowedSquashDistance(repeat.reach, repeat.bounds.min, tbi, v);
+
Report ir = makeMpvTrigger(event, squashDistance);
- ReportID id = tbi.rm.getInternalId(ir);
-
- DEBUG_PRINTF("puffette event q%u t%u\n", queue, event);
- g[v].suffix.reset();
- g[v].reports.insert(id);
- }
-}
-
-static
-void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m,
- u32 top, const vector<RoseVertex> &verts) {
+ ReportID id = tbi.rm.getInternalId(ir);
+
+ DEBUG_PRINTF("puffette event q%u t%u\n", queue, event);
+ g[v].suffix.reset();
+ g[v].reports.insert(id);
+ }
+}
+
+static
+void updateCastleSuffix(RoseGraph &g, const shared_ptr<CastleProto> &m,
+ u32 top, const vector<RoseVertex> &verts) {
DEBUG_PRINTF("merged in as top %u of %p, updating %zu vertices\n", top,
m.get(), verts.size());
-
- for (auto v : verts) {
- assert(g[v].suffix.castle);
- g[v].suffix.castle = m;
- g[v].suffix.top = top;
- }
-}
-
-static
+
+ for (auto v : verts) {
+ assert(g[v].suffix.castle);
+ g[v].suffix.castle = m;
+ g[v].suffix.top = top;
+ }
+}
+
+static
void mergeCastleSuffixChunk(RoseGraph &g, const vector<CastleProto *> &castles,
const unordered_map<CastleProto *, vector<RoseVertex>> &eng_verts) {
- if (castles.size() <= 1) {
- return;
- }
-
+ if (castles.size() <= 1) {
+ return;
+ }
+
DEBUG_PRINTF("merging reach %s, %zu elements\n",
describeClass(castles[0]->reach()).c_str(), castles.size());
-
+
CastleProto *m = nullptr;
-
+
for (CastleProto *c : castles) {
- assert(c->repeats.size() == 1); // Not yet merged.
+ assert(c->repeats.size() == 1); // Not yet merged.
assert(g[eng_verts.at(c).front()].suffix.castle.get() == c);
if (!m) {
m = c;
- continue;
- }
-
+ continue;
+ }
+
u32 top = m->merge(c->repeats[0]);
if (top == CastleProto::max_occupancy) {
- // No room left to merge into 'm'. This one becomes the new 'm'.
- DEBUG_PRINTF("next mergee\n");
- m = c;
+ // No room left to merge into 'm'. This one becomes the new 'm'.
+ DEBUG_PRINTF("next mergee\n");
+ m = c;
continue;
- }
+ }
updateCastleSuffix(g, g[eng_verts.at(m).front()].suffix.castle, top,
eng_verts.at(c));
DEBUG_PRINTF("added to %p, top %u\n", m, top);
- }
-}
-
+ }
+}
+
void mergeCastleSuffixes(RoseBuildImpl &build) {
- DEBUG_PRINTF("entry\n");
-
+ DEBUG_PRINTF("entry\n");
+
if (!build.cc.grey.allowCastle || !build.cc.grey.mergeSuffixes) {
- return;
- }
-
+ return;
+ }
+
unordered_map<CastleProto *, vector<RoseVertex>> eng_verts;
map<CharReach, vector<CastleProto *>> by_reach;
-
+
RoseGraph &g = build.g;
-
- for (auto v : vertices_range(g)) {
- if (!g[v].suffix.castle) {
- continue;
- }
-
+
+ for (auto v : vertices_range(g)) {
+ if (!g[v].suffix.castle) {
+ continue;
+ }
+
CastleProto *c = g[v].suffix.castle.get();
-
- if (c->repeats.size() != 1) {
- // This code assumes it's the only place merging is being done.
- assert(0);
- continue;
- }
-
+
+ if (c->repeats.size() != 1) {
+ // This code assumes it's the only place merging is being done.
+ assert(0);
+ continue;
+ }
+
if (!contains(eng_verts, c)) {
- by_reach[c->reach()].push_back(c);
- }
+ by_reach[c->reach()].push_back(c);
+ }
eng_verts[c].push_back(v);
- }
-
+ }
+
for (auto &chunk : by_reach | map_values) {
mergeCastleSuffixChunk(g, chunk, eng_verts);
- }
-}
-
-} // namespace ue2
+ }
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_merge.h b/contrib/libs/hyperscan/src/rose/rose_build_merge.h
index c0f0d65c8b..6de6c7786a 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_merge.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_merge.h
@@ -1,70 +1,70 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
* \brief Rose Build: functions for reducing the number of engines in a Rose
* graph through merging or deduplicating engines.
- */
-
-#ifndef ROSE_BUILD_MERGE_H
-#define ROSE_BUILD_MERGE_H
-
-#include "rose_graph.h"
-
-#include <deque>
-#include <set>
-
-namespace ue2 {
-
-class NGHolder;
-class RoseBuildImpl;
-
-bool dedupeLeftfixes(RoseBuildImpl &tbi);
-void mergeLeftfixesVariableLag(RoseBuildImpl &tbi);
-void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi);
-void dedupeSuffixes(RoseBuildImpl &tbi);
-
-void mergeAcyclicSuffixes(RoseBuildImpl &tbi);
-void mergeSmallSuffixes(RoseBuildImpl &tbi);
-void mergeSmallLeftfixes(RoseBuildImpl &tbi);
-void mergeCastleLeftfixes(RoseBuildImpl &tbi);
-void mergeOutfixes(RoseBuildImpl &tbi);
-void mergePuffixes(RoseBuildImpl &tbi);
-void mergeCastleSuffixes(RoseBuildImpl &tbi);
-
-bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
- RoseVertex v);
-bool mergeableRoseVertices(const RoseBuildImpl &tbi,
- const std::set<RoseVertex> &v1,
- const std::set<RoseVertex> &v2);
-bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
- const std::deque<RoseVertex> &verts1);
-
-} // namespace ue2
-
-#endif // ROSE_BUILD_MERGE_H
+ */
+
+#ifndef ROSE_BUILD_MERGE_H
+#define ROSE_BUILD_MERGE_H
+
+#include "rose_graph.h"
+
+#include <deque>
+#include <set>
+
+namespace ue2 {
+
+class NGHolder;
+class RoseBuildImpl;
+
+bool dedupeLeftfixes(RoseBuildImpl &tbi);
+void mergeLeftfixesVariableLag(RoseBuildImpl &tbi);
+void dedupeLeftfixesVariableLag(RoseBuildImpl &tbi);
+void dedupeSuffixes(RoseBuildImpl &tbi);
+
+void mergeAcyclicSuffixes(RoseBuildImpl &tbi);
+void mergeSmallSuffixes(RoseBuildImpl &tbi);
+void mergeSmallLeftfixes(RoseBuildImpl &tbi);
+void mergeCastleLeftfixes(RoseBuildImpl &tbi);
+void mergeOutfixes(RoseBuildImpl &tbi);
+void mergePuffixes(RoseBuildImpl &tbi);
+void mergeCastleSuffixes(RoseBuildImpl &tbi);
+
+bool mergeableRoseVertices(const RoseBuildImpl &tbi, RoseVertex u,
+ RoseVertex v);
+bool mergeableRoseVertices(const RoseBuildImpl &tbi,
+ const std::set<RoseVertex> &v1,
+ const std::set<RoseVertex> &v2);
+bool setDistinctRoseTops(RoseGraph &g, NGHolder &h1, const NGHolder &h2,
+ const std::deque<RoseVertex> &verts1);
+
+} // namespace ue2
+
+#endif // ROSE_BUILD_MERGE_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_misc.cpp b/contrib/libs/hyperscan/src/rose/rose_build_misc.cpp
index ca7a131910..0b0e689c99 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_misc.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_misc.cpp
@@ -1,297 +1,297 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
#include "rose_build_misc.h"
-#include "rose_build_impl.h"
-
+#include "rose_build_impl.h"
+
#include "rose_build_resources.h"
#include "hwlm/hwlm_literal.h"
-#include "nfa/castlecompile.h"
-#include "nfa/goughcompile.h"
-#include "nfa/mcclellancompile_util.h"
-#include "nfa/nfa_api.h"
-#include "nfa/rdfa.h"
+#include "nfa/castlecompile.h"
+#include "nfa/goughcompile.h"
+#include "nfa/mcclellancompile_util.h"
+#include "nfa/nfa_api.h"
+#include "nfa/rdfa.h"
#include "nfa/tamaramacompile.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_reports.h"
-#include "nfagraph/ng_repeat.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
-#include "smallwrite/smallwrite_build.h"
-#include "util/alloc.h"
-#include "util/boundary_reports.h"
-#include "util/compile_context.h"
-#include "util/container.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-#include "util/make_unique.h"
-#include "util/order_check.h"
-#include "util/report_manager.h"
-#include "util/ue2string.h"
-#include "util/verify_types.h"
-#include "ue2common.h"
-#include "grey.h"
-
-#include <boost/graph/breadth_first_search.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-// just to get it out of the header
-RoseBuild::~RoseBuild() { }
-
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_reports.h"
+#include "nfagraph/ng_repeat.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "smallwrite/smallwrite_build.h"
+#include "util/alloc.h"
+#include "util/boundary_reports.h"
+#include "util/compile_context.h"
+#include "util/container.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+#include "util/order_check.h"
+#include "util/report_manager.h"
+#include "util/ue2string.h"
+#include "util/verify_types.h"
+#include "ue2common.h"
+#include "grey.h"
+
+#include <boost/graph/breadth_first_search.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
+// just to get it out of the header
+RoseBuild::~RoseBuild() { }
+
RoseBuildImpl::RoseBuildImpl(ReportManager &rm_in,
SomSlotManager &ssm_in,
SmallWriteBuild &smwr_in,
- const CompileContext &cc_in,
- const BoundaryReports &boundary_in)
- : cc(cc_in),
- root(add_vertex(g)),
- anchored_root(add_vertex(g)),
- hasSom(false),
- group_end(0),
- ematcher_region_size(0),
- eod_event_literal_id(MO_INVALID_IDX),
- max_rose_anchored_floating_overlap(0),
- rm(rm_in),
- ssm(ssm_in),
+ const CompileContext &cc_in,
+ const BoundaryReports &boundary_in)
+ : cc(cc_in),
+ root(add_vertex(g)),
+ anchored_root(add_vertex(g)),
+ hasSom(false),
+ group_end(0),
+ ematcher_region_size(0),
+ eod_event_literal_id(MO_INVALID_IDX),
+ max_rose_anchored_floating_overlap(0),
+ rm(rm_in),
+ ssm(ssm_in),
smwr(smwr_in),
- boundary(boundary_in),
- next_nfa_report(0) {
- // add root vertices to graph
- g[root].min_offset = 0;
- g[root].max_offset = 0;
-
- g[anchored_root].min_offset = 0;
- g[anchored_root].max_offset = 0;
-}
-
-RoseBuildImpl::~RoseBuildImpl() {
- // empty
-}
-
-bool RoseVertexProps::isBoring(void) const {
- return !suffix && !left;
-}
-
-bool RoseVertexProps::fixedOffset(void) const {
- assert(min_offset <= max_offset); /* ensure offsets calculated */
- return max_offset == min_offset && max_offset != ROSE_BOUND_INF;
-}
-
-bool RoseBuildImpl::isRootSuccessor(const RoseVertex &v) const {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (isAnyStart(u)) {
- return true;
- }
- }
- return false;
-}
-
-bool RoseBuildImpl::isNonRootSuccessor(const RoseVertex &v) const {
- for (auto u : inv_adjacent_vertices_range(v, g)) {
- if (!isAnyStart(u)) {
- return true;
- }
- }
- return false;
-}
-
-bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v) {
- for (const auto &e : out_edges_range(v, g)) {
- if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
- return true;
- }
- }
-
- return false;
-}
-
-bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v) {
- for (const auto &e : out_edges_range(v, g)) {
- if (g[e].history == ROSE_ROLE_HISTORY_LAST_BYTE) {
- return true;
- }
- }
-
- return false;
-}
-
-static
-bool isInTable(const RoseBuildImpl &tbi, RoseVertex v,
- rose_literal_table table) {
- const auto &lit_ids = tbi.g[v].literals;
- if (lit_ids.empty()) {
- return false; // special role with no literals
- }
-
- // All literals for a given vertex will be in the same table, so we need
- // only inspect the first one.
+ boundary(boundary_in),
+ next_nfa_report(0) {
+ // add root vertices to graph
+ g[root].min_offset = 0;
+ g[root].max_offset = 0;
+
+ g[anchored_root].min_offset = 0;
+ g[anchored_root].max_offset = 0;
+}
+
+RoseBuildImpl::~RoseBuildImpl() {
+ // empty
+}
+
+bool RoseVertexProps::isBoring(void) const {
+ return !suffix && !left;
+}
+
+bool RoseVertexProps::fixedOffset(void) const {
+ assert(min_offset <= max_offset); /* ensure offsets calculated */
+ return max_offset == min_offset && max_offset != ROSE_BOUND_INF;
+}
+
+bool RoseBuildImpl::isRootSuccessor(const RoseVertex &v) const {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (isAnyStart(u)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool RoseBuildImpl::isNonRootSuccessor(const RoseVertex &v) const {
+ for (auto u : inv_adjacent_vertices_range(v, g)) {
+ if (!isAnyStart(u)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v) {
+ for (const auto &e : out_edges_range(v, g)) {
+ if (g[e].history == ROSE_ROLE_HISTORY_ANCH) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool hasLastByteHistorySucc(const RoseGraph &g, RoseVertex v) {
+ for (const auto &e : out_edges_range(v, g)) {
+ if (g[e].history == ROSE_ROLE_HISTORY_LAST_BYTE) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static
+bool isInTable(const RoseBuildImpl &tbi, RoseVertex v,
+ rose_literal_table table) {
+ const auto &lit_ids = tbi.g[v].literals;
+ if (lit_ids.empty()) {
+ return false; // special role with no literals
+ }
+
+ // All literals for a given vertex will be in the same table, so we need
+ // only inspect the first one.
const auto lit_table = tbi.literals.at(*lit_ids.begin()).table;
-
- // Verify that all literals for this vertex are in the same table.
+
+ // Verify that all literals for this vertex are in the same table.
assert(all_of_in(lit_ids, [&](u32 lit_id) {
return tbi.literals.at(lit_id).table == lit_table;
}));
-
- return lit_table == table;
-}
-
-bool RoseBuildImpl::isAnchored(RoseVertex v) const {
- return isInTable(*this, v, ROSE_ANCHORED);
-}
-
-bool RoseBuildImpl::isFloating(RoseVertex v) const {
- return isInTable(*this, v, ROSE_FLOATING);
-}
-
-bool RoseBuildImpl::isInETable(RoseVertex v) const {
- return isInTable(*this, v, ROSE_EOD_ANCHORED);
-}
-
-bool RoseBuildImpl::hasLiteralInTable(RoseVertex v,
- enum rose_literal_table t) const {
- return isInTable(*this, v, t);
-}
-
-/* Indicates that the floating table (if it exists) will be only run
- conditionally based on matches from the anchored table. */
-bool RoseBuildImpl::hasNoFloatingRoots() const {
- for (auto v : adjacent_vertices_range(root, g)) {
- if (isFloating(v)) {
+
+ return lit_table == table;
+}
+
+bool RoseBuildImpl::isAnchored(RoseVertex v) const {
+ return isInTable(*this, v, ROSE_ANCHORED);
+}
+
+bool RoseBuildImpl::isFloating(RoseVertex v) const {
+ return isInTable(*this, v, ROSE_FLOATING);
+}
+
+bool RoseBuildImpl::isInETable(RoseVertex v) const {
+ return isInTable(*this, v, ROSE_EOD_ANCHORED);
+}
+
+bool RoseBuildImpl::hasLiteralInTable(RoseVertex v,
+ enum rose_literal_table t) const {
+ return isInTable(*this, v, t);
+}
+
+/* Indicates that the floating table (if it exists) will be only run
+ conditionally based on matches from the anchored table. */
+bool RoseBuildImpl::hasNoFloatingRoots() const {
+ for (auto v : adjacent_vertices_range(root, g)) {
+ if (isFloating(v)) {
DEBUG_PRINTF("direct floating root %zu\n", g[v].index);
- return false;
- }
- }
-
- /* need to check if the anchored_root has any literals which are too deep */
- for (auto v : adjacent_vertices_range(anchored_root, g)) {
- if (isFloating(v)) {
+ return false;
+ }
+ }
+
+ /* need to check if the anchored_root has any literals which are too deep */
+ for (auto v : adjacent_vertices_range(anchored_root, g)) {
+ if (isFloating(v)) {
DEBUG_PRINTF("indirect floating root %zu\n", g[v].index);
- return false;
- }
- }
-
- return true;
-}
-
-size_t RoseBuildImpl::maxLiteralLen(RoseVertex v) const {
- const auto &lit_ids = g[v].literals;
- assert(!lit_ids.empty());
-
- size_t maxlen = 0;
-
- for (const auto &lit_id : lit_ids) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+size_t RoseBuildImpl::maxLiteralLen(RoseVertex v) const {
+ const auto &lit_ids = g[v].literals;
+ assert(!lit_ids.empty());
+
+ size_t maxlen = 0;
+
+ for (const auto &lit_id : lit_ids) {
maxlen = max(maxlen, literals.at(lit_id).elength());
- }
-
- return maxlen;
-}
-
-size_t RoseBuildImpl::minLiteralLen(RoseVertex v) const {
- const auto &lit_ids = g[v].literals;
- assert(!lit_ids.empty());
-
- size_t minlen = ROSE_BOUND_INF;
-
- for (const auto &lit_id : lit_ids) {
+ }
+
+ return maxlen;
+}
+
+size_t RoseBuildImpl::minLiteralLen(RoseVertex v) const {
+ const auto &lit_ids = g[v].literals;
+ assert(!lit_ids.empty());
+
+ size_t minlen = ROSE_BOUND_INF;
+
+ for (const auto &lit_id : lit_ids) {
minlen = min(minlen, literals.at(lit_id).elength());
- }
-
- return minlen;
-}
-
-// RoseBuild factory
+ }
+
+ return minlen;
+}
+
+// RoseBuild factory
unique_ptr<RoseBuild> makeRoseBuilder(ReportManager &rm,
SomSlotManager &ssm,
SmallWriteBuild &smwr,
- const CompileContext &cc,
- const BoundaryReports &boundary) {
+ const CompileContext &cc,
+ const BoundaryReports &boundary) {
return ue2::make_unique<RoseBuildImpl>(rm, ssm, smwr, cc, boundary);
-}
-
-bool roseIsPureLiteral(const RoseEngine *t) {
- return t->runtimeImpl == ROSE_RUNTIME_PURE_LITERAL;
-}
-
-// Returns non-zero max overlap len if a suffix of the literal 'a' overlaps
-// with a prefix of the literal 'b' or 'a' can be contained in 'b'.
-size_t maxOverlap(const ue2_literal &a, const ue2_literal &b, u32 b_delay) {
- /* overly conservative if only part of the string is nocase */
- bool nocase = a.any_nocase() || b.any_nocase();
- DEBUG_PRINTF("max overlap %s %s+%u %d\n", dumpString(a).c_str(),
- dumpString(b).c_str(), b_delay, (int)nocase);
- size_t a_len = a.length();
- size_t b_len = b.length();
- const char *a_end = a.c_str() + a_len;
- const char *b_end = b.c_str() + b_len;
- if (b_delay >= a_len) {
- return b_len + b_delay;
- } else if (b_delay) {
- /* a can be a substring of b which overlaps some of the end dots
- * OR b can be a substring near the end of a */
- /* ignore overlap due to the final trailing dot as delayed literals
- * are delivered before undelayed */
- for (u32 j = b_delay - 1; j > 0; j--) {
- if (b_len + j >= a_len) {
- if (!cmp(a.c_str(), b_end + j - a_len, a_len - j, nocase)) {
- return b_len + j;
- }
- } else {
- if (!cmp(a_end - j - b_len, b.c_str(), b_len, nocase)) {
- return b_len + j;
- }
- }
- }
- }
-
- return maxStringOverlap(a.get_string(), b.get_string(), nocase);
-}
-
-// Returns non-zero max overlap len if a suffix of the literal ID 'a' overlaps
-// with a prefix of the literal ID 'b' or 'a' can be contained in 'b'.
-size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b) {
- assert(!a.delay);
- return maxOverlap(a.s, b.s, b.delay);
-}
-
-static
-const rose_literal_id &getOverlapLiteral(const RoseBuildImpl &tbi,
- u32 literal_id) {
+}
+
+bool roseIsPureLiteral(const RoseEngine *t) {
+ return t->runtimeImpl == ROSE_RUNTIME_PURE_LITERAL;
+}
+
+// Returns non-zero max overlap len if a suffix of the literal 'a' overlaps
+// with a prefix of the literal 'b' or 'a' can be contained in 'b'.
+size_t maxOverlap(const ue2_literal &a, const ue2_literal &b, u32 b_delay) {
+ /* overly conservative if only part of the string is nocase */
+ bool nocase = a.any_nocase() || b.any_nocase();
+ DEBUG_PRINTF("max overlap %s %s+%u %d\n", dumpString(a).c_str(),
+ dumpString(b).c_str(), b_delay, (int)nocase);
+ size_t a_len = a.length();
+ size_t b_len = b.length();
+ const char *a_end = a.c_str() + a_len;
+ const char *b_end = b.c_str() + b_len;
+ if (b_delay >= a_len) {
+ return b_len + b_delay;
+ } else if (b_delay) {
+ /* a can be a substring of b which overlaps some of the end dots
+ * OR b can be a substring near the end of a */
+ /* ignore overlap due to the final trailing dot as delayed literals
+ * are delivered before undelayed */
+ for (u32 j = b_delay - 1; j > 0; j--) {
+ if (b_len + j >= a_len) {
+ if (!cmp(a.c_str(), b_end + j - a_len, a_len - j, nocase)) {
+ return b_len + j;
+ }
+ } else {
+ if (!cmp(a_end - j - b_len, b.c_str(), b_len, nocase)) {
+ return b_len + j;
+ }
+ }
+ }
+ }
+
+ return maxStringOverlap(a.get_string(), b.get_string(), nocase);
+}
+
+// Returns non-zero max overlap len if a suffix of the literal ID 'a' overlaps
+// with a prefix of the literal ID 'b' or 'a' can be contained in 'b'.
+size_t maxOverlap(const rose_literal_id &a, const rose_literal_id &b) {
+ assert(!a.delay);
+ return maxOverlap(a.s, b.s, b.delay);
+}
+
+static
+const rose_literal_id &getOverlapLiteral(const RoseBuildImpl &tbi,
+ u32 literal_id) {
auto it = tbi.anchoredLitSuffix.find(literal_id);
- if (it != tbi.anchoredLitSuffix.end()) {
- return it->second;
- }
+ if (it != tbi.anchoredLitSuffix.end()) {
+ return it->second;
+ }
return tbi.literals.at(literal_id);
-}
-
+}
+
ue2_literal findNonOverlappingTail(const set<ue2_literal> &lits,
const ue2_literal &s) {
size_t max_overlap = 0;
@@ -309,236 +309,236 @@ ue2_literal findNonOverlappingTail(const set<ue2_literal> &lits,
return tail;
}
-size_t RoseBuildImpl::maxLiteralOverlap(RoseVertex u, RoseVertex v) const {
- size_t overlap = 0;
- for (auto u_lit_id : g[u].literals) {
- const rose_literal_id &ul = getOverlapLiteral(*this, u_lit_id);
- for (auto v_lit_id : g[v].literals) {
- const rose_literal_id &vl = getOverlapLiteral(*this, v_lit_id);
- overlap = max(overlap, maxOverlap(ul, vl));
- }
- }
- return overlap;
-}
-
-void RoseBuildImpl::removeVertices(const vector<RoseVertex> &dead) {
- for (auto v : dead) {
- assert(!isAnyStart(v));
+size_t RoseBuildImpl::maxLiteralOverlap(RoseVertex u, RoseVertex v) const {
+ size_t overlap = 0;
+ for (auto u_lit_id : g[u].literals) {
+ const rose_literal_id &ul = getOverlapLiteral(*this, u_lit_id);
+ for (auto v_lit_id : g[v].literals) {
+ const rose_literal_id &vl = getOverlapLiteral(*this, v_lit_id);
+ overlap = max(overlap, maxOverlap(ul, vl));
+ }
+ }
+ return overlap;
+}
+
+void RoseBuildImpl::removeVertices(const vector<RoseVertex> &dead) {
+ for (auto v : dead) {
+ assert(!isAnyStart(v));
DEBUG_PRINTF("removing vertex %zu\n", g[v].index);
- for (auto lit_id : g[v].literals) {
- literal_info[lit_id].vertices.erase(v);
- }
+ for (auto lit_id : g[v].literals) {
+ literal_info[lit_id].vertices.erase(v);
+ }
clear_vertex(v, g);
- remove_vertex(v, g);
- }
+ remove_vertex(v, g);
+ }
renumber_vertices(g);
-}
-
-// Find the maximum bound on the edges to this vertex's successors ignoring
-// those via infixes.
-u32 RoseBuildImpl::calcSuccMaxBound(RoseVertex u) const {
- u32 maxBound = 0;
- for (const auto &e : out_edges_range(u, g)) {
- RoseVertex v = target(e, g);
-
- if (g[v].left) {
- continue;
- }
-
- u32 thisBound = g[e].maxBound;
-
- if (thisBound == ROSE_BOUND_INF) {
- return ROSE_BOUND_INF;
- }
-
- if (!g[v].eod_accept) {
- // Add the length of the longest of our literals.
- thisBound += maxLiteralLen(v);
- }
-
- maxBound = max(maxBound, thisBound);
- }
-
- assert(maxBound <= ROSE_BOUND_INF);
- return maxBound;
-}
-
-u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, u32 delay,
- rose_literal_table table) {
+}
+
+// Find the maximum bound on the edges to this vertex's successors ignoring
+// those via infixes.
+u32 RoseBuildImpl::calcSuccMaxBound(RoseVertex u) const {
+ u32 maxBound = 0;
+ for (const auto &e : out_edges_range(u, g)) {
+ RoseVertex v = target(e, g);
+
+ if (g[v].left) {
+ continue;
+ }
+
+ u32 thisBound = g[e].maxBound;
+
+ if (thisBound == ROSE_BOUND_INF) {
+ return ROSE_BOUND_INF;
+ }
+
+ if (!g[v].eod_accept) {
+ // Add the length of the longest of our literals.
+ thisBound += maxLiteralLen(v);
+ }
+
+ maxBound = max(maxBound, thisBound);
+ }
+
+ assert(maxBound <= ROSE_BOUND_INF);
+ return maxBound;
+}
+
+u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, u32 delay,
+ rose_literal_table table) {
DEBUG_PRINTF("getting id for %s in table %d\n", dumpString(s).c_str(),
table);
- assert(table != ROSE_ANCHORED);
- rose_literal_id key(s, table, delay);
-
+ assert(table != ROSE_ANCHORED);
+ rose_literal_id key(s, table, delay);
+
auto m = literals.insert(key);
u32 id = m.first;
bool inserted = m.second;
-
- if (inserted) {
- literal_info.push_back(rose_literal_info());
- assert(literal_info.size() == id + 1);
-
- if (delay) {
- u32 undelayed_id = getLiteralId(s, 0, table);
- literal_info[id].undelayed_id = undelayed_id;
- literal_info[undelayed_id].delayed_ids.insert(id);
- } else {
- literal_info[id].undelayed_id = id;
- }
- }
- return id;
-}
-
-// Function that operates on a msk/cmp pair and a literal, as used in
-// hwlmLiteral, and zeroes msk elements that don't add any power to the
-// literal.
-void normaliseLiteralMask(const ue2_literal &s_in, vector<u8> &msk,
- vector<u8> &cmp) {
- assert(msk.size() == cmp.size());
- assert(msk.size() <= HWLM_MASKLEN);
-
- if (msk.empty()) {
- return;
- }
-
- // Work over a caseless copy if the string contains nocase chars. This will
- // ensure that we treat masks designed to handle mixed-sensitivity literals
- // correctly: these will be matched by the literal matcher in caseless
- // mode, with the mask used to narrow the matches.
- ue2_literal s(s_in);
- if (s.any_nocase()) {
- make_nocase(&s);
- }
-
- ue2_literal::const_reverse_iterator it = s.rbegin(), ite = s.rend();
- size_t i = msk.size();
- while (i-- != 0 && it != ite) {
- const CharReach &cr = *it;
- for (size_t c = cr.find_first(); c != CharReach::npos;
- c = cr.find_next(c)) {
- if (((u8)c & msk[i]) != cmp[i]) {
- goto skip;
- }
- }
-
- // If we didn't jump out of the loop to skip, then this mask position
- // doesn't further narrow the set of acceptable literals from those
- // accepted by s. So we can zero this element.
- msk[i] = 0;
- cmp[i] = 0;
- skip:
- ++it;
- }
-
- // Wipe out prefix zeroes.
- while (!msk.empty() && msk[0] == 0) {
- msk.erase(msk.begin());
- cmp.erase(cmp.begin());
- }
-}
-
-rose_literal_id::rose_literal_id(const ue2_literal &s_in,
- const vector<u8> &msk_in, const vector<u8> &cmp_in,
- rose_literal_table table_in, u32 delay_in)
- : s(s_in), msk(msk_in), cmp(cmp_in), table(table_in),
- delay(delay_in), distinctiveness(0) {
- assert(msk.size() == cmp.size());
- assert(msk.size() <= HWLM_MASKLEN);
- assert(delay <= MAX_DELAY);
-
- normaliseLiteralMask(s, msk, cmp);
-}
-
-u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, const vector<u8> &msk,
- const vector<u8> &cmp, u32 delay,
- rose_literal_table table) {
+
+ if (inserted) {
+ literal_info.push_back(rose_literal_info());
+ assert(literal_info.size() == id + 1);
+
+ if (delay) {
+ u32 undelayed_id = getLiteralId(s, 0, table);
+ literal_info[id].undelayed_id = undelayed_id;
+ literal_info[undelayed_id].delayed_ids.insert(id);
+ } else {
+ literal_info[id].undelayed_id = id;
+ }
+ }
+ return id;
+}
+
+// Function that operates on a msk/cmp pair and a literal, as used in
+// hwlmLiteral, and zeroes msk elements that don't add any power to the
+// literal.
+void normaliseLiteralMask(const ue2_literal &s_in, vector<u8> &msk,
+ vector<u8> &cmp) {
+ assert(msk.size() == cmp.size());
+ assert(msk.size() <= HWLM_MASKLEN);
+
+ if (msk.empty()) {
+ return;
+ }
+
+ // Work over a caseless copy if the string contains nocase chars. This will
+ // ensure that we treat masks designed to handle mixed-sensitivity literals
+ // correctly: these will be matched by the literal matcher in caseless
+ // mode, with the mask used to narrow the matches.
+ ue2_literal s(s_in);
+ if (s.any_nocase()) {
+ make_nocase(&s);
+ }
+
+ ue2_literal::const_reverse_iterator it = s.rbegin(), ite = s.rend();
+ size_t i = msk.size();
+ while (i-- != 0 && it != ite) {
+ const CharReach &cr = *it;
+ for (size_t c = cr.find_first(); c != CharReach::npos;
+ c = cr.find_next(c)) {
+ if (((u8)c & msk[i]) != cmp[i]) {
+ goto skip;
+ }
+ }
+
+ // If we didn't jump out of the loop to skip, then this mask position
+ // doesn't further narrow the set of acceptable literals from those
+ // accepted by s. So we can zero this element.
+ msk[i] = 0;
+ cmp[i] = 0;
+ skip:
+ ++it;
+ }
+
+ // Wipe out prefix zeroes.
+ while (!msk.empty() && msk[0] == 0) {
+ msk.erase(msk.begin());
+ cmp.erase(cmp.begin());
+ }
+}
+
+rose_literal_id::rose_literal_id(const ue2_literal &s_in,
+ const vector<u8> &msk_in, const vector<u8> &cmp_in,
+ rose_literal_table table_in, u32 delay_in)
+ : s(s_in), msk(msk_in), cmp(cmp_in), table(table_in),
+ delay(delay_in), distinctiveness(0) {
+ assert(msk.size() == cmp.size());
+ assert(msk.size() <= HWLM_MASKLEN);
+ assert(delay <= MAX_DELAY);
+
+ normaliseLiteralMask(s, msk, cmp);
+}
+
+u32 RoseBuildImpl::getLiteralId(const ue2_literal &s, const vector<u8> &msk,
+ const vector<u8> &cmp, u32 delay,
+ rose_literal_table table) {
DEBUG_PRINTF("getting id for %s in table %d\n", dumpString(s).c_str(),
table);
- assert(table != ROSE_ANCHORED);
- rose_literal_id key(s, msk, cmp, table, delay);
-
- /* ue2_literals are always uppercased if nocase and must have an
- * alpha char */
-
+ assert(table != ROSE_ANCHORED);
+ rose_literal_id key(s, msk, cmp, table, delay);
+
+ /* ue2_literals are always uppercased if nocase and must have an
+ * alpha char */
+
auto m = literals.insert(key);
u32 id = m.first;
bool inserted = m.second;
-
- if (inserted) {
- literal_info.push_back(rose_literal_info());
- assert(literal_info.size() == id + 1);
-
- if (delay) {
- u32 undelayed_id = getLiteralId(s, msk, cmp, 0, table);
- literal_info[id].undelayed_id = undelayed_id;
- literal_info[undelayed_id].delayed_ids.insert(id);
- } else {
- literal_info[id].undelayed_id = id;
- }
- }
- return id;
-}
-
-u32 RoseBuildImpl::getNewLiteralId() {
- rose_literal_id key(ue2_literal(), ROSE_ANCHORED, 0);
+
+ if (inserted) {
+ literal_info.push_back(rose_literal_info());
+ assert(literal_info.size() == id + 1);
+
+ if (delay) {
+ u32 undelayed_id = getLiteralId(s, msk, cmp, 0, table);
+ literal_info[id].undelayed_id = undelayed_id;
+ literal_info[undelayed_id].delayed_ids.insert(id);
+ } else {
+ literal_info[id].undelayed_id = id;
+ }
+ }
+ return id;
+}
+
+u32 RoseBuildImpl::getNewLiteralId() {
+ rose_literal_id key(ue2_literal(), ROSE_ANCHORED, 0);
u32 numLiterals = verify_u32(literals.size());
- key.distinctiveness = numLiterals;
-
+ key.distinctiveness = numLiterals;
+
auto m = literals.insert(key);
assert(m.second);
u32 id = m.first;
-
- literal_info.push_back(rose_literal_info());
- assert(literal_info.size() == id + 1);
-
- literal_info[id].undelayed_id = id;
-
- return id;
-}
-
-bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b) {
- ORDER_CHECK(minBound);
- ORDER_CHECK(maxBound);
- ORDER_CHECK(history);
- return false;
-}
-
-#ifndef NDEBUG
+
+ literal_info.push_back(rose_literal_info());
+ assert(literal_info.size() == id + 1);
+
+ literal_info[id].undelayed_id = id;
+
+ return id;
+}
+
+bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b) {
+ ORDER_CHECK(minBound);
+ ORDER_CHECK(maxBound);
+ ORDER_CHECK(history);
+ return false;
+}
+
+#ifndef NDEBUG
bool roseHasTops(const RoseBuildImpl &build, RoseVertex v) {
const RoseGraph &g = build.g;
- assert(g[v].left);
-
- set<u32> graph_tops;
+ assert(g[v].left);
+
+ set<u32> graph_tops;
if (!build.isRootSuccessor(v)) {
for (const auto &e : in_edges_range(v, g)) {
graph_tops.insert(g[e].rose_top);
}
- }
-
- return is_subset_of(graph_tops, all_tops(g[v].left));
-}
-#endif
-
-u32 OutfixInfo::get_queue(QueueIndexFactory &qif) {
- if (queue == ~0U) {
- queue = qif.get_queue();
- }
-
- return queue;
-}
-
+ }
+
+ return is_subset_of(graph_tops, all_tops(g[v].left));
+}
+#endif
+
+u32 OutfixInfo::get_queue(QueueIndexFactory &qif) {
+ if (queue == ~0U) {
+ queue = qif.get_queue();
+ }
+
+ return queue;
+}
+
namespace {
class OutfixAllReports : public boost::static_visitor<set<ReportID>> {
public:
set<ReportID> operator()(const boost::blank &) const {
return set<ReportID>();
- }
+ }
template<class T>
set<ReportID> operator()(const unique_ptr<T> &x) const {
return all_reports(*x);
- }
-
+ }
+
set<ReportID> operator()(const MpvProto &mpv) const {
set<ReportID> reports;
for (const auto &puff : mpv.puffettes) {
@@ -548,160 +548,160 @@ public:
reports.insert(puff.report);
}
return reports;
- }
+ }
};
}
-
+
set<ReportID> all_reports(const OutfixInfo &outfix) {
auto reports = boost::apply_visitor(OutfixAllReports(), outfix.proto);
- assert(!reports.empty());
- return reports;
-}
-
-bool RoseSuffixInfo::operator==(const RoseSuffixInfo &b) const {
- return top == b.top && graph == b.graph && castle == b.castle &&
+ assert(!reports.empty());
+ return reports;
+}
+
+bool RoseSuffixInfo::operator==(const RoseSuffixInfo &b) const {
+ return top == b.top && graph == b.graph && castle == b.castle &&
rdfa == b.rdfa && haig == b.haig && tamarama == b.tamarama;
-}
-
-bool RoseSuffixInfo::operator<(const RoseSuffixInfo &b) const {
- const RoseSuffixInfo &a = *this;
- ORDER_CHECK(top);
- ORDER_CHECK(graph);
- ORDER_CHECK(castle);
- ORDER_CHECK(haig);
- ORDER_CHECK(rdfa);
+}
+
+bool RoseSuffixInfo::operator<(const RoseSuffixInfo &b) const {
+ const RoseSuffixInfo &a = *this;
+ ORDER_CHECK(top);
+ ORDER_CHECK(graph);
+ ORDER_CHECK(castle);
+ ORDER_CHECK(haig);
+ ORDER_CHECK(rdfa);
ORDER_CHECK(tamarama);
- assert(a.dfa_min_width == b.dfa_min_width);
- assert(a.dfa_max_width == b.dfa_max_width);
- return false;
-}
-
+ assert(a.dfa_min_width == b.dfa_min_width);
+ assert(a.dfa_max_width == b.dfa_max_width);
+ return false;
+}
+
size_t RoseSuffixInfo::hash() const {
return hash_all(top, graph, castle, rdfa, haig, tamarama);
}
-
-void RoseSuffixInfo::reset(void) {
- top = 0;
- graph.reset();
- castle.reset();
- rdfa.reset();
- haig.reset();
+
+void RoseSuffixInfo::reset(void) {
+ top = 0;
+ graph.reset();
+ castle.reset();
+ rdfa.reset();
+ haig.reset();
tamarama.reset();
dfa_min_width = depth(0);
- dfa_max_width = depth::infinity();
-}
-
-std::set<ReportID> all_reports(const suffix_id &s) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ dfa_max_width = depth::infinity();
+}
+
+std::set<ReportID> all_reports(const suffix_id &s) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
if (s.tamarama()) {
return all_reports(*s.tamarama());
} else if (s.graph()) {
- return all_reports(*s.graph());
- } else if (s.castle()) {
- return all_reports(*s.castle());
- } else if (s.dfa()) {
- return all_reports(*s.dfa());
- } else {
- return all_reports(*s.haig());
- }
-}
-
-depth findMinWidth(const suffix_id &s) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
- if (s.graph()) {
- return findMinWidth(*s.graph());
- } else if (s.castle()) {
- return findMinWidth(*s.castle());
- } else {
- return s.dfa_min_width;
- }
-}
-
-depth findMinWidth(const suffix_id &s, u32 top) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
- if (s.graph()) {
- return findMinWidth(*s.graph(), top);
- } else if (s.castle()) {
- return findMinWidth(*s.castle(), top);
- } else {
- return s.dfa_min_width;
- }
-}
-
-depth findMaxWidth(const suffix_id &s) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
- if (s.graph()) {
- return findMaxWidth(*s.graph());
- } else if (s.castle()) {
- return findMaxWidth(*s.castle());
- } else {
- return s.dfa_max_width;
- }
-}
-
-depth findMaxWidth(const suffix_id &s, u32 top) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
- if (s.graph()) {
- return findMaxWidth(*s.graph(), top);
- } else if (s.castle()) {
- return findMaxWidth(*s.castle(), top);
- } else {
- return s.dfa_max_width;
- }
-}
-
-bool has_eod_accepts(const suffix_id &s) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
- if (s.graph()) {
- /* ignore accept -> eod edge */
- return in_degree(s.graph()->acceptEod, *s.graph()) > 1;
- } else if (s.castle()) {
- return false;
- } else if (s.dfa()) {
- return has_eod_accepts(*s.dfa());
- } else {
- return has_eod_accepts(*s.haig());
- }
-}
-
-bool has_non_eod_accepts(const suffix_id &s) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
- if (s.graph()) {
- return in_degree(s.graph()->accept, *s.graph());
- } else if (s.castle()) {
- return true;
- } else if (s.dfa()) {
- return has_non_eod_accepts(*s.dfa());
- } else {
- return has_non_eod_accepts(*s.haig());
- }
-}
-
-set<u32> all_tops(const suffix_id &s) {
- assert(s.graph() || s.castle() || s.haig() || s.dfa());
- if (s.graph()) {
+ return all_reports(*s.graph());
+ } else if (s.castle()) {
+ return all_reports(*s.castle());
+ } else if (s.dfa()) {
+ return all_reports(*s.dfa());
+ } else {
+ return all_reports(*s.haig());
+ }
+}
+
+depth findMinWidth(const suffix_id &s) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ if (s.graph()) {
+ return findMinWidth(*s.graph());
+ } else if (s.castle()) {
+ return findMinWidth(*s.castle());
+ } else {
+ return s.dfa_min_width;
+ }
+}
+
+depth findMinWidth(const suffix_id &s, u32 top) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ if (s.graph()) {
+ return findMinWidth(*s.graph(), top);
+ } else if (s.castle()) {
+ return findMinWidth(*s.castle(), top);
+ } else {
+ return s.dfa_min_width;
+ }
+}
+
+depth findMaxWidth(const suffix_id &s) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ if (s.graph()) {
+ return findMaxWidth(*s.graph());
+ } else if (s.castle()) {
+ return findMaxWidth(*s.castle());
+ } else {
+ return s.dfa_max_width;
+ }
+}
+
+depth findMaxWidth(const suffix_id &s, u32 top) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ if (s.graph()) {
+ return findMaxWidth(*s.graph(), top);
+ } else if (s.castle()) {
+ return findMaxWidth(*s.castle(), top);
+ } else {
+ return s.dfa_max_width;
+ }
+}
+
+bool has_eod_accepts(const suffix_id &s) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ if (s.graph()) {
+ /* ignore accept -> eod edge */
+ return in_degree(s.graph()->acceptEod, *s.graph()) > 1;
+ } else if (s.castle()) {
+ return false;
+ } else if (s.dfa()) {
+ return has_eod_accepts(*s.dfa());
+ } else {
+ return has_eod_accepts(*s.haig());
+ }
+}
+
+bool has_non_eod_accepts(const suffix_id &s) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ if (s.graph()) {
+ return in_degree(s.graph()->accept, *s.graph());
+ } else if (s.castle()) {
+ return true;
+ } else if (s.dfa()) {
+ return has_non_eod_accepts(*s.dfa());
+ } else {
+ return has_non_eod_accepts(*s.haig());
+ }
+}
+
+set<u32> all_tops(const suffix_id &s) {
+ assert(s.graph() || s.castle() || s.haig() || s.dfa());
+ if (s.graph()) {
flat_set<u32> tops = getTops(*s.graph());
assert(!tops.empty());
return {tops.begin(), tops.end()};
- }
-
- if (s.castle()) {
- return assoc_keys(s.castle()->repeats);
- }
-
- // Other types of suffix are not multi-top.
- return {0};
-}
-
-size_t suffix_id::hash() const {
+ }
+
+ if (s.castle()) {
+ return assoc_keys(s.castle()->repeats);
+ }
+
+ // Other types of suffix are not multi-top.
+ return {0};
+}
+
+size_t suffix_id::hash() const {
return hash_all(g, c, d, h, t);
-}
-
-bool isAnchored(const left_id &r) {
- assert(r.graph() || r.castle() || r.haig() || r.dfa());
- if (r.graph()) {
- return isAnchored(*r.graph());
- }
+}
+
+bool isAnchored(const left_id &r) {
+ assert(r.graph() || r.castle() || r.haig() || r.dfa());
+ if (r.graph()) {
+ return isAnchored(*r.graph());
+ }
if (r.dfa()) {
return r.dfa()->start_anchored == DEAD_STATE;
}
@@ -709,47 +709,47 @@ bool isAnchored(const left_id &r) {
return r.haig()->start_anchored == DEAD_STATE;
}
- // All other types are explicitly anchored.
- return true;
-}
-
-depth findMinWidth(const left_id &r) {
- assert(r.graph() || r.castle() || r.haig() || r.dfa());
- if (r.graph()) {
- return findMinWidth(*r.graph());
- } else if (r.castle()) {
- return findMinWidth(*r.castle());
- } else {
- return r.dfa_min_width;
- }
-}
-
-depth findMaxWidth(const left_id &r) {
- assert(r.graph() || r.castle() || r.haig() || r.dfa());
- if (r.graph()) {
- return findMaxWidth(*r.graph());
- } else if (r.castle()) {
- return findMaxWidth(*r.castle());
- } else {
- return r.dfa_max_width;
- }
-}
-
-set<u32> all_tops(const left_id &r) {
- assert(r.graph() || r.castle() || r.haig() || r.dfa());
- if (r.graph()) {
+ // All other types are explicitly anchored.
+ return true;
+}
+
+depth findMinWidth(const left_id &r) {
+ assert(r.graph() || r.castle() || r.haig() || r.dfa());
+ if (r.graph()) {
+ return findMinWidth(*r.graph());
+ } else if (r.castle()) {
+ return findMinWidth(*r.castle());
+ } else {
+ return r.dfa_min_width;
+ }
+}
+
+depth findMaxWidth(const left_id &r) {
+ assert(r.graph() || r.castle() || r.haig() || r.dfa());
+ if (r.graph()) {
+ return findMaxWidth(*r.graph());
+ } else if (r.castle()) {
+ return findMaxWidth(*r.castle());
+ } else {
+ return r.dfa_max_width;
+ }
+}
+
+set<u32> all_tops(const left_id &r) {
+ assert(r.graph() || r.castle() || r.haig() || r.dfa());
+ if (r.graph()) {
flat_set<u32> tops = getTops(*r.graph());
return {tops.begin(), tops.end()};
- }
-
- if (r.castle()) {
- return assoc_keys(r.castle()->repeats);
- }
-
- // Other types of rose are not multi-top.
- return {0};
-}
-
+ }
+
+ if (r.castle()) {
+ return assoc_keys(r.castle()->repeats);
+ }
+
+ // Other types of rose are not multi-top.
+ return {0};
+}
+
set<u32> all_reports(const left_id &left) {
assert(left.graph() || left.castle() || left.haig() || left.dfa());
if (left.graph()) {
@@ -763,142 +763,142 @@ set<u32> all_reports(const left_id &left) {
}
}
-u32 num_tops(const left_id &r) {
- return all_tops(r).size();
-}
-
-size_t left_id::hash() const {
+u32 num_tops(const left_id &r) {
+ return all_tops(r).size();
+}
+
+size_t left_id::hash() const {
return hash_all(g, c, d, h);
-}
-
-u64a findMaxOffset(const set<ReportID> &reports, const ReportManager &rm) {
- assert(!reports.empty());
- u64a maxOffset = 0;
- for (const auto &report_id : reports) {
- const Report &ir = rm.getReport(report_id);
- if (ir.hasBounds()) {
- maxOffset = max(maxOffset, ir.maxOffset);
- } else {
- return MAX_OFFSET;
- }
- }
- return maxOffset;
-}
-
+}
+
+u64a findMaxOffset(const set<ReportID> &reports, const ReportManager &rm) {
+ assert(!reports.empty());
+ u64a maxOffset = 0;
+ for (const auto &report_id : reports) {
+ const Report &ir = rm.getReport(report_id);
+ if (ir.hasBounds()) {
+ maxOffset = max(maxOffset, ir.maxOffset);
+ } else {
+ return MAX_OFFSET;
+ }
+ }
+ return maxOffset;
+}
+
size_t LeftEngInfo::hash() const {
return hash_all(graph, castle, dfa, haig, tamarama, lag, leftfix_report);
}
-void LeftEngInfo::reset(void) {
- graph.reset();
- castle.reset();
- dfa.reset();
- haig.reset();
+void LeftEngInfo::reset(void) {
+ graph.reset();
+ castle.reset();
+ dfa.reset();
+ haig.reset();
tamarama.reset();
- lag = 0;
- leftfix_report = MO_INVALID_IDX;
+ lag = 0;
+ leftfix_report = MO_INVALID_IDX;
dfa_min_width = depth(0);
- dfa_max_width = depth::infinity();
-}
-
-LeftEngInfo::operator bool() const {
- assert((int)!!castle + (int)!!dfa + (int)!!haig <= 1);
- assert(!castle || !graph);
- assert(!dfa || graph); /* dfas always have the graph as well */
- assert(!haig || graph);
- return graph || castle || dfa || haig;
-}
-
+ dfa_max_width = depth::infinity();
+}
+
+LeftEngInfo::operator bool() const {
+ assert((int)!!castle + (int)!!dfa + (int)!!haig <= 1);
+ assert(!castle || !graph);
+ assert(!dfa || graph); /* dfas always have the graph as well */
+ assert(!haig || graph);
+ return graph || castle || dfa || haig;
+}
+
u32 roseQuality(const RoseResources &res, const RoseEngine *t) {
- /* Rose is low quality if the atable is a Mcclellan 16 or has multiple DFAs
- */
+ /* Rose is low quality if the atable is a Mcclellan 16 or has multiple DFAs
+ */
if (res.has_anchored) {
if (res.has_anchored_multiple) {
- DEBUG_PRINTF("multiple atable engines\n");
- return 0;
- }
-
+ DEBUG_PRINTF("multiple atable engines\n");
+ return 0;
+ }
+
if (res.has_anchored_large) {
- DEBUG_PRINTF("m16 atable engine\n");
- return 0;
- }
- }
-
- /* if we always run multiple engines then we are slow */
- u32 always_run = 0;
-
+ DEBUG_PRINTF("m16 atable engine\n");
+ return 0;
+ }
+ }
+
+ /* if we always run multiple engines then we are slow */
+ u32 always_run = 0;
+
if (res.has_anchored) {
- always_run++;
- }
-
+ always_run++;
+ }
+
if (t->eagerIterOffset) {
/* eager prefixes are always run */
always_run++;
}
if (res.has_floating) {
- /* TODO: ignore conditional ftables, or ftables beyond smwr region */
- always_run++;
- }
-
- if (t->ematcherOffset) {
- always_run++;
- }
-
- /* ignore mpv outfixes as they are v good, mpv outfixes are before begin */
- if (t->outfixBeginQueue != t->outfixEndQueue) {
- /* TODO: ignore outfixes > smwr region */
- always_run++;
- }
-
- bool eod_prefix = false;
-
- const LeftNfaInfo *left = getLeftTable(t);
- for (u32 i = 0; i < t->activeLeftCount; i++) {
- if (left->eod_check) {
- eod_prefix = true;
- break;
- }
- }
-
- if (eod_prefix) {
- always_run++;
- DEBUG_PRINTF("eod prefixes are slow");
- return 0;
- }
-
- if (always_run > 1) {
- DEBUG_PRINTF("we always run %u engines\n", always_run);
- return 0;
- }
-
- return 1;
-}
-
+ /* TODO: ignore conditional ftables, or ftables beyond smwr region */
+ always_run++;
+ }
+
+ if (t->ematcherOffset) {
+ always_run++;
+ }
+
+ /* ignore mpv outfixes as they are v good, mpv outfixes are before begin */
+ if (t->outfixBeginQueue != t->outfixEndQueue) {
+ /* TODO: ignore outfixes > smwr region */
+ always_run++;
+ }
+
+ bool eod_prefix = false;
+
+ const LeftNfaInfo *left = getLeftTable(t);
+ for (u32 i = 0; i < t->activeLeftCount; i++) {
+ if (left->eod_check) {
+ eod_prefix = true;
+ break;
+ }
+ }
+
+ if (eod_prefix) {
+ always_run++;
+ DEBUG_PRINTF("eod prefixes are slow");
+ return 0;
+ }
+
+ if (always_run > 1) {
+ DEBUG_PRINTF("we always run %u engines\n", always_run);
+ return 0;
+ }
+
+ return 1;
+}
+
u32 findMinOffset(const RoseBuildImpl &build, u32 lit_id) {
const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
assert(!lit_vertices.empty());
-
+
u32 min_offset = UINT32_MAX;
for (const auto &v : lit_vertices) {
min_offset = min(min_offset, build.g[v].min_offset);
}
-
+
return min_offset;
}
-
+
u32 findMaxOffset(const RoseBuildImpl &build, u32 lit_id) {
const auto &lit_vertices = build.literal_info.at(lit_id).vertices;
assert(!lit_vertices.empty());
-
+
u32 max_offset = 0;
for (const auto &v : lit_vertices) {
max_offset = max(max_offset, build.g[v].max_offset);
}
-
+
return max_offset;
-}
-
+}
+
bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) {
const auto &g = build.g;
const auto v = target(e, g);
@@ -928,70 +928,70 @@ bool canEagerlyReportAtEod(const RoseBuildImpl &build, const RoseEdge &e) {
return true;
}
-#ifndef NDEBUG
-/** \brief Returns true if all the graphs (NFA, DFA, Haig, etc) in this Rose
- * graph are implementable. */
-bool canImplementGraphs(const RoseBuildImpl &tbi) {
- const RoseGraph &g = tbi.g;
-
- // First, check the Rose leftfixes.
-
- for (auto v : vertices_range(g)) {
+#ifndef NDEBUG
+/** \brief Returns true if all the graphs (NFA, DFA, Haig, etc) in this Rose
+ * graph are implementable. */
+bool canImplementGraphs(const RoseBuildImpl &tbi) {
+ const RoseGraph &g = tbi.g;
+
+ // First, check the Rose leftfixes.
+
+ for (auto v : vertices_range(g)) {
DEBUG_PRINTF("leftfix: check vertex %zu\n", g[v].index);
-
- if (g[v].left.castle) {
- DEBUG_PRINTF("castle ok\n");
- continue;
- }
- if (g[v].left.dfa) {
- DEBUG_PRINTF("dfa ok\n");
- continue;
- }
- if (g[v].left.haig) {
- DEBUG_PRINTF("haig ok\n");
- continue;
- }
- if (g[v].left.graph) {
- assert(g[v].left.graph->kind
+
+ if (g[v].left.castle) {
+ DEBUG_PRINTF("castle ok\n");
+ continue;
+ }
+ if (g[v].left.dfa) {
+ DEBUG_PRINTF("dfa ok\n");
+ continue;
+ }
+ if (g[v].left.haig) {
+ DEBUG_PRINTF("haig ok\n");
+ continue;
+ }
+ if (g[v].left.graph) {
+ assert(g[v].left.graph->kind
== (tbi.isRootSuccessor(v) ? NFA_PREFIX : NFA_INFIX));
- if (!isImplementableNFA(*g[v].left.graph, nullptr, tbi.cc)) {
+ if (!isImplementableNFA(*g[v].left.graph, nullptr, tbi.cc)) {
DEBUG_PRINTF("nfa prefix %zu failed (%zu vertices)\n",
g[v].index, num_vertices(*g[v].left.graph));
- return false;
- }
- }
- }
-
- // Suffix graphs.
-
- for (auto v : vertices_range(g)) {
+ return false;
+ }
+ }
+ }
+
+ // Suffix graphs.
+
+ for (auto v : vertices_range(g)) {
DEBUG_PRINTF("suffix: check vertex %zu\n", g[v].index);
-
- const RoseSuffixInfo &suffix = g[v].suffix;
- if (suffix.castle) {
- DEBUG_PRINTF("castle suffix ok\n");
- continue;
- }
- if (suffix.rdfa) {
- DEBUG_PRINTF("dfa suffix ok\n");
- continue;
- }
- if (suffix.haig) {
- DEBUG_PRINTF("haig suffix ok\n");
- continue;
- }
- if (suffix.graph) {
- assert(suffix.graph->kind == NFA_SUFFIX);
- if (!isImplementableNFA(*suffix.graph, &tbi.rm, tbi.cc)) {
+
+ const RoseSuffixInfo &suffix = g[v].suffix;
+ if (suffix.castle) {
+ DEBUG_PRINTF("castle suffix ok\n");
+ continue;
+ }
+ if (suffix.rdfa) {
+ DEBUG_PRINTF("dfa suffix ok\n");
+ continue;
+ }
+ if (suffix.haig) {
+ DEBUG_PRINTF("haig suffix ok\n");
+ continue;
+ }
+ if (suffix.graph) {
+ assert(suffix.graph->kind == NFA_SUFFIX);
+ if (!isImplementableNFA(*suffix.graph, &tbi.rm, tbi.cc)) {
DEBUG_PRINTF("nfa suffix %zu failed (%zu vertices)\n",
g[v].index, num_vertices(*suffix.graph));
- return false;
- }
- }
- }
-
- return true;
-}
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
/**
* \brief True if there is an engine with a top that is not triggered by a
@@ -1039,6 +1039,6 @@ bool hasOrphanedTops(const RoseBuildImpl &build) {
return false;
}
-#endif // NDEBUG
-
-} // namespace ue2
+#endif // NDEBUG
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.cpp b/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.cpp
index 3d2af35dc6..359550e118 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.cpp
@@ -1,180 +1,180 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_role_aliasing.h"
-
-#include "ue2common.h"
-#include "rose_build_impl.h"
-#include "rose_build_merge.h"
-#include "rose_build_util.h"
-#include "grey.h"
-#include "nfa/castlecompile.h"
-#include "nfa/goughcompile.h"
-#include "nfa/mcclellancompile_util.h"
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_is_equal.h"
-#include "nfagraph/ng_limex.h"
-#include "nfagraph/ng_prune.h"
-#include "nfagraph/ng_uncalc_components.h"
-#include "nfagraph/ng_util.h"
-#include "util/bitutils.h"
-#include "util/compile_context.h"
-#include "util/container.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_role_aliasing.h"
+
+#include "ue2common.h"
+#include "rose_build_impl.h"
+#include "rose_build_merge.h"
+#include "rose_build_util.h"
+#include "grey.h"
+#include "nfa/castlecompile.h"
+#include "nfa/goughcompile.h"
+#include "nfa/mcclellancompile_util.h"
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_is_equal.h"
+#include "nfagraph/ng_limex.h"
+#include "nfagraph/ng_prune.h"
+#include "nfagraph/ng_uncalc_components.h"
+#include "nfagraph/ng_util.h"
+#include "util/bitutils.h"
+#include "util/compile_context.h"
+#include "util/container.h"
#include "util/flat_containers.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
#include "util/hash.h"
-#include "util/order_check.h"
-
-#include <algorithm>
-#include <numeric>
-#include <vector>
-#include <boost/graph/adjacency_iterator.hpp>
-#include <boost/range/adaptor/map.hpp>
-
-using namespace std;
-using boost::adaptors::map_values;
-
-namespace ue2 {
-
+#include "util/order_check.h"
+
+#include <algorithm>
+#include <numeric>
+#include <vector>
+#include <boost/graph/adjacency_iterator.hpp>
+#include <boost/range/adaptor/map.hpp>
+
+using namespace std;
+using boost::adaptors::map_values;
+
+namespace ue2 {
+
static constexpr size_t MERGE_GROUP_SIZE_MAX = 200;
-namespace {
-// Used for checking edge sets (both in- and out-) against each other.
-struct EdgeAndVertex {
- EdgeAndVertex(const RoseEdge &e, const RoseVertex v_,
- const RoseGraph &g) : v(v_), eprops(g[e]) {}
- virtual ~EdgeAndVertex() {}
-
- virtual bool operator<(const EdgeAndVertex &a) const {
- if (v != a.v) {
- return v < a.v;
- }
- if (eprops.minBound != a.eprops.minBound) {
- return eprops.minBound < a.eprops.minBound;
- }
- if (eprops.maxBound != a.eprops.maxBound) {
- return eprops.maxBound < a.eprops.maxBound;
- }
- if (eprops.rose_top != a.eprops.rose_top) {
- return eprops.rose_top < a.eprops.rose_top;
-
- }
- return eprops.history < a.eprops.history;
- }
-
- virtual bool operator==(const EdgeAndVertex &a) const {
- return v == a.v &&
- eprops.minBound == a.eprops.minBound &&
- eprops.maxBound == a.eprops.maxBound &&
- eprops.rose_top == a.eprops.rose_top &&
- eprops.history == a.eprops.history;
- }
-
-private:
- RoseVertex v;
- const RoseEdgeProps &eprops;
-};
-
-struct AliasOutEdge : EdgeAndVertex {
- AliasOutEdge(const RoseEdge &e, const RoseGraph &g) :
- EdgeAndVertex(e, target(e, g), g) {}
-};
-
-struct AliasInEdge : EdgeAndVertex {
- AliasInEdge(const RoseEdge &e, const RoseGraph &g) :
- EdgeAndVertex(e, source(e, g), g) {}
-};
-
-class CandidateSet {
-public:
+namespace {
+// Used for checking edge sets (both in- and out-) against each other.
+struct EdgeAndVertex {
+ EdgeAndVertex(const RoseEdge &e, const RoseVertex v_,
+ const RoseGraph &g) : v(v_), eprops(g[e]) {}
+ virtual ~EdgeAndVertex() {}
+
+ virtual bool operator<(const EdgeAndVertex &a) const {
+ if (v != a.v) {
+ return v < a.v;
+ }
+ if (eprops.minBound != a.eprops.minBound) {
+ return eprops.minBound < a.eprops.minBound;
+ }
+ if (eprops.maxBound != a.eprops.maxBound) {
+ return eprops.maxBound < a.eprops.maxBound;
+ }
+ if (eprops.rose_top != a.eprops.rose_top) {
+ return eprops.rose_top < a.eprops.rose_top;
+
+ }
+ return eprops.history < a.eprops.history;
+ }
+
+ virtual bool operator==(const EdgeAndVertex &a) const {
+ return v == a.v &&
+ eprops.minBound == a.eprops.minBound &&
+ eprops.maxBound == a.eprops.maxBound &&
+ eprops.rose_top == a.eprops.rose_top &&
+ eprops.history == a.eprops.history;
+ }
+
+private:
+ RoseVertex v;
+ const RoseEdgeProps &eprops;
+};
+
+struct AliasOutEdge : EdgeAndVertex {
+ AliasOutEdge(const RoseEdge &e, const RoseGraph &g) :
+ EdgeAndVertex(e, target(e, g), g) {}
+};
+
+struct AliasInEdge : EdgeAndVertex {
+ AliasInEdge(const RoseEdge &e, const RoseGraph &g) :
+ EdgeAndVertex(e, source(e, g), g) {}
+};
+
+class CandidateSet {
+public:
using key_type = RoseVertex;
using iterator = set<RoseVertex>::iterator;
using const_iterator = set<RoseVertex>::const_iterator;
-
- iterator begin() { return main_cont.begin(); }
- iterator end() { return main_cont.end(); }
+
+ iterator begin() { return main_cont.begin(); }
+ iterator end() { return main_cont.end(); }
const_iterator begin() const { return main_cont.begin(); }
const_iterator end() const { return main_cont.end(); }
-
- bool contains(RoseVertex a) const {
- return hash_cont.find(a) != hash_cont.end();
- }
-
- void insert(RoseVertex a) {
- main_cont.insert(a);
- hash_cont.insert(a);
- }
-
- void erase(iterator aa) {
- RoseVertex a = *aa;
- main_cont.erase(aa);
- hash_cont.erase(a);
- }
-
- void erase(RoseVertex a) {
- main_cont.erase(a);
- hash_cont.erase(a);
- }
-
- size_t size() const {
- assert(hash_cont.size() == main_cont.size());
- return main_cont.size();
- }
-
- bool empty() const {
- assert(hash_cont.size() == main_cont.size());
- return main_cont.empty();
- }
-
-private:
- /* if a vertex is worth storing, it is worth storing twice */
+
+ bool contains(RoseVertex a) const {
+ return hash_cont.find(a) != hash_cont.end();
+ }
+
+ void insert(RoseVertex a) {
+ main_cont.insert(a);
+ hash_cont.insert(a);
+ }
+
+ void erase(iterator aa) {
+ RoseVertex a = *aa;
+ main_cont.erase(aa);
+ hash_cont.erase(a);
+ }
+
+ void erase(RoseVertex a) {
+ main_cont.erase(a);
+ hash_cont.erase(a);
+ }
+
+ size_t size() const {
+ assert(hash_cont.size() == main_cont.size());
+ return main_cont.size();
+ }
+
+ bool empty() const {
+ assert(hash_cont.size() == main_cont.size());
+ return main_cont.empty();
+ }
+
+private:
+ /* if a vertex is worth storing, it is worth storing twice */
set<RoseVertex> main_cont; /* deterministic iterator */
unordered_set<RoseVertex> hash_cont; /* member checks */
-};
-
+};
+
struct RoseAliasingInfo {
RoseAliasingInfo(const RoseBuildImpl &build) {
const auto &g = build.g;
-
+
// Populate reverse leftfix map.
for (auto v : vertices_range(g)) {
if (g[v].left) {
rev_leftfix[g[v].left].insert(v);
}
}
-
+
// Populate reverse ghost vertex map.
for (const auto &m : build.ghost) {
rev_ghost[m.second].insert(m.first);
- }
- }
-
+ }
+ }
+
/** \brief Mapping from leftfix to vertices. */
unordered_map<left_id, set<RoseVertex>> rev_leftfix;
@@ -184,179 +184,179 @@ struct RoseAliasingInfo {
} // namespace
-// Check successor set: must lead to the same vertices via edges with the
-// same properties.
-static
-bool sameSuccessors(RoseVertex a, RoseVertex b, const RoseGraph &g) {
- if (out_degree(a, g) != out_degree(b, g)) {
- return false;
- }
-
- set<AliasOutEdge> succs_a, succs_b;
-
- for (const auto &e : out_edges_range(a, g)) {
- succs_a.insert(AliasOutEdge(e, g));
- }
-
- for (const auto &e : out_edges_range(b, g)) {
- succs_b.insert(AliasOutEdge(e, g));
- }
-
- return (succs_a == succs_b);
-}
-
-/* unlike LeftEngInfo::==, this does a deep check to see if the leftfixes are
- * equivalent rather than checking for pointer equality. */
-static
-bool hasEqualLeftfixes(RoseVertex a, RoseVertex b, const RoseGraph &g) {
- assert(g[a].left || g[b].left);
- if (!g[a].left || !g[b].left) {
- return false;
- }
- const LeftEngInfo &a_left = g[a].left;
- const LeftEngInfo &b_left = g[b].left;
-
- if (a_left.castle && b_left.castle) {
- return is_equal(*a_left.castle, a_left.leftfix_report,
- *b_left.castle, b_left.leftfix_report);
- }
-
- if (a_left.graph && b_left.graph) {
- /* non-castle engines have graphs */
- return is_equal(*a_left.graph, a_left.leftfix_report, *b_left.graph,
- b_left.leftfix_report);
- }
-
- /* graph <-> castle cases are not equal */
- return false;
-}
-
-// Check predecessor set: must come from the same vertices via edges with
-// the same properties.
-static
-bool samePredecessors(RoseVertex a, RoseVertex b, const RoseGraph &g) {
- if (in_degree(a, g) != in_degree(b, g)) {
- return false;
- }
-
- set<AliasInEdge> preds_a, preds_b;
-
- for (const auto &e : in_edges_range(a, g)) {
- preds_a.insert(AliasInEdge(e, g));
- }
-
- for (const auto &e : in_edges_range(b, g)) {
- preds_b.insert(AliasInEdge(e, g));
- }
-
- if (preds_a != preds_b) {
- return false;
- }
-
- if (g[a].left || g[b].left) {
- if (!hasEqualLeftfixes(a, b, g)) {
- return false;
- }
-
- for (const auto &e_a : in_edges_range(a, g)) {
+// Check successor set: must lead to the same vertices via edges with the
+// same properties.
+static
+bool sameSuccessors(RoseVertex a, RoseVertex b, const RoseGraph &g) {
+ if (out_degree(a, g) != out_degree(b, g)) {
+ return false;
+ }
+
+ set<AliasOutEdge> succs_a, succs_b;
+
+ for (const auto &e : out_edges_range(a, g)) {
+ succs_a.insert(AliasOutEdge(e, g));
+ }
+
+ for (const auto &e : out_edges_range(b, g)) {
+ succs_b.insert(AliasOutEdge(e, g));
+ }
+
+ return (succs_a == succs_b);
+}
+
+/* unlike LeftEngInfo::==, this does a deep check to see if the leftfixes are
+ * equivalent rather than checking for pointer equality. */
+static
+bool hasEqualLeftfixes(RoseVertex a, RoseVertex b, const RoseGraph &g) {
+ assert(g[a].left || g[b].left);
+ if (!g[a].left || !g[b].left) {
+ return false;
+ }
+ const LeftEngInfo &a_left = g[a].left;
+ const LeftEngInfo &b_left = g[b].left;
+
+ if (a_left.castle && b_left.castle) {
+ return is_equal(*a_left.castle, a_left.leftfix_report,
+ *b_left.castle, b_left.leftfix_report);
+ }
+
+ if (a_left.graph && b_left.graph) {
+ /* non-castle engines have graphs */
+ return is_equal(*a_left.graph, a_left.leftfix_report, *b_left.graph,
+ b_left.leftfix_report);
+ }
+
+ /* graph <-> castle cases are not equal */
+ return false;
+}
+
+// Check predecessor set: must come from the same vertices via edges with
+// the same properties.
+static
+bool samePredecessors(RoseVertex a, RoseVertex b, const RoseGraph &g) {
+ if (in_degree(a, g) != in_degree(b, g)) {
+ return false;
+ }
+
+ set<AliasInEdge> preds_a, preds_b;
+
+ for (const auto &e : in_edges_range(a, g)) {
+ preds_a.insert(AliasInEdge(e, g));
+ }
+
+ for (const auto &e : in_edges_range(b, g)) {
+ preds_b.insert(AliasInEdge(e, g));
+ }
+
+ if (preds_a != preds_b) {
+ return false;
+ }
+
+ if (g[a].left || g[b].left) {
+ if (!hasEqualLeftfixes(a, b, g)) {
+ return false;
+ }
+
+ for (const auto &e_a : in_edges_range(a, g)) {
RoseEdge e = edge(source(e_a, g), b, g);
if (!e || g[e].rose_top != g[e_a].rose_top) {
- DEBUG_PRINTF("bad tops\n");
- return false;
- }
- }
- }
-
- return true;
-}
-
-static
+ DEBUG_PRINTF("bad tops\n");
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static
bool hasCommonSuccWithBadBounds(RoseVertex a, RoseVertex b,
const RoseGraph &g) {
- for (const auto &e_a : out_edges_range(a, g)) {
+ for (const auto &e_a : out_edges_range(a, g)) {
if (RoseEdge e = edge(b, target(e_a, g), g)) {
- if (g[e_a].maxBound < g[e].minBound
- || g[e].maxBound < g[e_a].minBound) {
- return true;
- }
- if (g[e_a].rose_top != g[e].rose_top) {
- // Can't trigger two tops on the same leftfix, we can't merge
- // this.
- return true;
- }
- }
- }
- return false;
-}
-
-static
+ if (g[e_a].maxBound < g[e].minBound
+ || g[e].maxBound < g[e_a].minBound) {
+ return true;
+ }
+ if (g[e_a].rose_top != g[e].rose_top) {
+ // Can't trigger two tops on the same leftfix, we can't merge
+ // this.
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+static
bool hasCommonPredWithBadBounds(RoseVertex a, RoseVertex b,
const RoseGraph &g) {
- for (const auto &e_a : in_edges_range(a, g)) {
+ for (const auto &e_a : in_edges_range(a, g)) {
if (RoseEdge e = edge(source(e_a, g), b, g)) {
- if (g[e_a].maxBound < g[e].minBound
- || g[e].maxBound < g[e_a].minBound) {
- return true;
- }
-
- // XXX: if we're merging two vertices with different roses, we
- // cannot allow them to share a pred, as we would be unable to
- // merge the (necessarily different) tops on the in-edges. This
- // could be relaxed if we made the tops mergeable (by making
- // edge_top a bitfield, for example).
- if (g[a].left != g[b].left) {
- return true;
- }
-
- }
- }
- return false;
-}
-
-static
+ if (g[e_a].maxBound < g[e].minBound
+ || g[e].maxBound < g[e_a].minBound) {
+ return true;
+ }
+
+ // XXX: if we're merging two vertices with different roses, we
+ // cannot allow them to share a pred, as we would be unable to
+ // merge the (necessarily different) tops on the in-edges. This
+ // could be relaxed if we made the tops mergeable (by making
+ // edge_top a bitfield, for example).
+ if (g[a].left != g[b].left) {
+ return true;
+ }
+
+ }
+ }
+ return false;
+}
+
+static
bool canMergeLiterals(RoseVertex a, RoseVertex b, const RoseBuildImpl &build) {
const auto &lits_a = build.g[a].literals;
const auto &lits_b = build.g[b].literals;
- assert(!lits_a.empty() && !lits_b.empty());
-
- // If both vertices have only pseudo-dotstar in-edges, we can merge
- // literals of different lengths and can avoid the check below.
+ assert(!lits_a.empty() && !lits_b.empty());
+
+ // If both vertices have only pseudo-dotstar in-edges, we can merge
+ // literals of different lengths and can avoid the check below.
if (build.hasOnlyPseudoStarInEdges(a) &&
build.hasOnlyPseudoStarInEdges(b)) {
- DEBUG_PRINTF("both have pseudo-dotstar in-edges\n");
- return true;
- }
-
- // Otherwise, all the literals involved must have the same length.
- for (u32 a_id : lits_a) {
+ DEBUG_PRINTF("both have pseudo-dotstar in-edges\n");
+ return true;
+ }
+
+ // Otherwise, all the literals involved must have the same length.
+ for (u32 a_id : lits_a) {
const rose_literal_id &la = build.literals.at(a_id);
- for (u32 b_id : lits_b) {
+ for (u32 b_id : lits_b) {
const rose_literal_id &lb = build.literals.at(b_id);
-
- if (la.elength() != lb.elength()) {
- DEBUG_PRINTF("bad merge %zu!=%zu '%s', '%s'\n", la.elength(),
- lb.elength(), la.s.c_str(), lb.s.c_str());
- return false;
- }
- }
- }
-
- return true;
-}
-
-static
+
+ if (la.elength() != lb.elength()) {
+ DEBUG_PRINTF("bad merge %zu!=%zu '%s', '%s'\n", la.elength(),
+ lb.elength(), la.s.c_str(), lb.s.c_str());
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static
bool isAliasingCandidate(RoseVertex v, const RoseBuildImpl &build) {
const RoseVertexProps &props = build.g[v];
-
- // Must have literals.
- if (props.literals.empty()) {
- return false;
- }
-
+
+ // Must have literals.
+ if (props.literals.empty()) {
+ return false;
+ }
+
assert(*props.literals.begin() != MO_INVALID_IDX);
return true;
}
-
+
static
bool sameGhostProperties(const RoseBuildImpl &build,
const RoseAliasingInfo &rai, RoseVertex a,
@@ -374,8 +374,8 @@ bool sameGhostProperties(const RoseBuildImpl &build,
}
DEBUG_PRINTF("ghost mappings ok\n");
return true;
- }
-
+ }
+
// If they are ghost vertices, then they must have the same literals.
if (contains(rai.rev_ghost, a) || contains(rai.rev_ghost, b)) {
if (!contains(rai.rev_ghost, a) || !contains(rai.rev_ghost, b)) {
@@ -384,158 +384,158 @@ bool sameGhostProperties(const RoseBuildImpl &build,
}
return build.g[a].literals == build.g[b].literals;
}
-
- return true;
-}
-
-static
+
+ return true;
+}
+
+static
bool sameRoleProperties(const RoseBuildImpl &build, const RoseAliasingInfo &rai,
RoseVertex a, RoseVertex b) {
- const RoseGraph &g = build.g;
- const RoseVertexProps &aprops = g[a], &bprops = g[b];
-
+ const RoseGraph &g = build.g;
+ const RoseVertexProps &aprops = g[a], &bprops = g[b];
+
if (aprops.eod_accept != bprops.eod_accept) {
- return false;
- }
-
- // We don't want to merge a role with LAST_BYTE history with one without,
- // as a role that can only be triggered at EOD cannot safely precede
- // "ordinary" roles.
- if (hasLastByteHistorySucc(g, a) != hasLastByteHistorySucc(g, b)) {
- return false;
- }
-
- // We certainly don't want to merge root roles with non-root roles.
- /* TODO: explain */
- if (build.isRootSuccessor(a) != build.isRootSuccessor(b)) {
- return false;
- }
-
- if (aprops.som_adjust != bprops.som_adjust) {
- return false;
- }
-
+ return false;
+ }
+
+ // We don't want to merge a role with LAST_BYTE history with one without,
+ // as a role that can only be triggered at EOD cannot safely precede
+ // "ordinary" roles.
+ if (hasLastByteHistorySucc(g, a) != hasLastByteHistorySucc(g, b)) {
+ return false;
+ }
+
+ // We certainly don't want to merge root roles with non-root roles.
+ /* TODO: explain */
+ if (build.isRootSuccessor(a) != build.isRootSuccessor(b)) {
+ return false;
+ }
+
+ if (aprops.som_adjust != bprops.som_adjust) {
+ return false;
+ }
+
if (!sameGhostProperties(build, rai, a, b)) {
return false;
}
- /* "roses are mergeable" check are handled elsewhere */
-
- return true;
-}
-
+ /* "roses are mergeable" check are handled elsewhere */
+
+ return true;
+}
+
/* Checks compatibility of role properties if we require that two roles are
* right equiv. */
-static
-bool sameRightRoleProperties(const RoseBuildImpl &build, RoseVertex a,
- RoseVertex b) {
- const RoseGraph &g = build.g;
- const RoseVertexProps &aprops = g[a], &bprops = g[b];
-
- if (aprops.reports != bprops.reports) {
- return false;
- }
-
- if (hasAnchHistorySucc(g, a) != hasAnchHistorySucc(g, b)) {
- return false;
- }
-
- // If the history type is ANCH, then we need to be careful that we only
- // merge literals that occur at the same offsets.
- if (hasAnchHistorySucc(g, a) || hasAnchHistorySucc(g, b)) {
- if (aprops.min_offset != bprops.min_offset
- || aprops.max_offset != bprops.max_offset) {
- return false;
- }
- }
-
- if (aprops.suffix != bprops.suffix) {
- return false;
- }
-
- return true;
-}
-
-static
-void mergeEdgeAdd(RoseVertex u, RoseVertex v, const RoseEdge &from_edge,
- const RoseEdge *to_edge, RoseGraph &g) {
- const RoseEdgeProps &from_props = g[from_edge];
-
- if (!to_edge) {
+static
+bool sameRightRoleProperties(const RoseBuildImpl &build, RoseVertex a,
+ RoseVertex b) {
+ const RoseGraph &g = build.g;
+ const RoseVertexProps &aprops = g[a], &bprops = g[b];
+
+ if (aprops.reports != bprops.reports) {
+ return false;
+ }
+
+ if (hasAnchHistorySucc(g, a) != hasAnchHistorySucc(g, b)) {
+ return false;
+ }
+
+ // If the history type is ANCH, then we need to be careful that we only
+ // merge literals that occur at the same offsets.
+ if (hasAnchHistorySucc(g, a) || hasAnchHistorySucc(g, b)) {
+ if (aprops.min_offset != bprops.min_offset
+ || aprops.max_offset != bprops.max_offset) {
+ return false;
+ }
+ }
+
+ if (aprops.suffix != bprops.suffix) {
+ return false;
+ }
+
+ return true;
+}
+
+static
+void mergeEdgeAdd(RoseVertex u, RoseVertex v, const RoseEdge &from_edge,
+ const RoseEdge *to_edge, RoseGraph &g) {
+ const RoseEdgeProps &from_props = g[from_edge];
+
+ if (!to_edge) {
DEBUG_PRINTF("adding edge [%zu,%zu]\n", g[u].index, g[v].index);
- add_edge(u, v, from_props, g);
- } else {
- // union of the two edges.
+ add_edge(u, v, from_props, g);
+ } else {
+ // union of the two edges.
DEBUG_PRINTF("updating edge [%zu,%zu]\n", g[u].index, g[v].index);
- RoseEdgeProps &to_props = g[*to_edge];
- to_props.minBound = min(to_props.minBound, from_props.minBound);
- to_props.maxBound = max(to_props.maxBound, from_props.maxBound);
- assert(to_props.rose_top == from_props.rose_top);
- }
-}
-
-/* clone a's edges onto b */
-static
-void mergeEdges(RoseVertex a, RoseVertex b, RoseGraph &g) {
- // All the edges to or from b for quick lookup.
- typedef map<RoseVertex, RoseEdge> EdgeCache;
- EdgeCache b_edges;
-
- // Cache b's in-edges so we can look them up by source quickly.
- for (const auto &e : in_edges_range(b, g)) {
- RoseVertex u = source(e, g);
+ RoseEdgeProps &to_props = g[*to_edge];
+ to_props.minBound = min(to_props.minBound, from_props.minBound);
+ to_props.maxBound = max(to_props.maxBound, from_props.maxBound);
+ assert(to_props.rose_top == from_props.rose_top);
+ }
+}
+
+/* clone a's edges onto b */
+static
+void mergeEdges(RoseVertex a, RoseVertex b, RoseGraph &g) {
+ // All the edges to or from b for quick lookup.
+ typedef map<RoseVertex, RoseEdge> EdgeCache;
+ EdgeCache b_edges;
+
+ // Cache b's in-edges so we can look them up by source quickly.
+ for (const auto &e : in_edges_range(b, g)) {
+ RoseVertex u = source(e, g);
b_edges.emplace(u, e);
- }
-
- // Add a's in-edges to b, merging them in where b already has the new edge.
- // Once handled, the in-edges to a are removed.
- RoseGraph::in_edge_iterator ei, ee;
- tie(ei, ee) = in_edges(a, g);
- while (ei != ee) {
- RoseVertex u = source(*ei, g);
- EdgeCache::const_iterator it = b_edges.find(u);
- const RoseEdge *to_edge = (it == b_edges.end() ? nullptr : &it->second);
- mergeEdgeAdd(u, b, *ei, to_edge, g);
- remove_edge(*ei++, g);
- }
-
- // Cache b's out-edges so we can look them up by target quickly.
- b_edges.clear();
- for (const auto &e : out_edges_range(b, g)) {
- RoseVertex v = target(e, g);
+ }
+
+ // Add a's in-edges to b, merging them in where b already has the new edge.
+ // Once handled, the in-edges to a are removed.
+ RoseGraph::in_edge_iterator ei, ee;
+ tie(ei, ee) = in_edges(a, g);
+ while (ei != ee) {
+ RoseVertex u = source(*ei, g);
+ EdgeCache::const_iterator it = b_edges.find(u);
+ const RoseEdge *to_edge = (it == b_edges.end() ? nullptr : &it->second);
+ mergeEdgeAdd(u, b, *ei, to_edge, g);
+ remove_edge(*ei++, g);
+ }
+
+ // Cache b's out-edges so we can look them up by target quickly.
+ b_edges.clear();
+ for (const auto &e : out_edges_range(b, g)) {
+ RoseVertex v = target(e, g);
b_edges.emplace(v, e);
- }
-
- // Add a's out-edges to b, merging them in where b already has the new edge.
- // Once handled, the out-edges to a are removed.
- RoseGraph::out_edge_iterator oi, oe;
- tie(oi, oe) = out_edges(a, g);
- while (oi != oe) {
- RoseVertex v = target(*oi, g);
- EdgeCache::const_iterator it = b_edges.find(v);
- const RoseEdge *to_edge = (it == b_edges.end() ? nullptr : &it->second);
- mergeEdgeAdd(b, v, *oi, to_edge, g);
- remove_edge(*oi++, g);
- }
-
- // Vertex a should no longer have any in- or out-edges.
- assert(degree(a, g) == 0);
-}
-
-static
+ }
+
+ // Add a's out-edges to b, merging them in where b already has the new edge.
+ // Once handled, the out-edges to a are removed.
+ RoseGraph::out_edge_iterator oi, oe;
+ tie(oi, oe) = out_edges(a, g);
+ while (oi != oe) {
+ RoseVertex v = target(*oi, g);
+ EdgeCache::const_iterator it = b_edges.find(v);
+ const RoseEdge *to_edge = (it == b_edges.end() ? nullptr : &it->second);
+ mergeEdgeAdd(b, v, *oi, to_edge, g);
+ remove_edge(*oi++, g);
+ }
+
+ // Vertex a should no longer have any in- or out-edges.
+ assert(degree(a, g) == 0);
+}
+
+static
void mergeLiteralSets(RoseVertex a, RoseVertex b, RoseBuildImpl &build) {
RoseGraph &g = build.g;
- const auto &a_literals = g[a].literals;
- for (u32 lit_id : a_literals) {
+ const auto &a_literals = g[a].literals;
+ for (u32 lit_id : a_literals) {
auto &lit_vertices = build.literal_info[lit_id].vertices;
- lit_vertices.erase(a);
- lit_vertices.insert(b);
- }
-
- insert(&g[b].literals, a_literals);
-}
-
-static
+ lit_vertices.erase(a);
+ lit_vertices.insert(b);
+ }
+
+ insert(&g[b].literals, a_literals);
+}
+
+static
void updateAliasingInfo(RoseBuildImpl &build, RoseAliasingInfo &rai,
RoseVertex a, RoseVertex b) {
if (build.g[a].left) {
@@ -549,7 +549,7 @@ void updateAliasingInfo(RoseBuildImpl &build, RoseAliasingInfo &rai,
build.ghost.erase(a);
rai.rev_ghost[ghost].erase(a);
}
-
+
if (contains(rai.rev_ghost, a)) {
for (const auto &v : rai.rev_ghost[a]) {
build.ghost[v] = b;
@@ -565,21 +565,21 @@ void mergeCommon(RoseBuildImpl &build, RoseAliasingInfo &rai, RoseVertex a,
RoseVertex b) {
RoseGraph &g = build.g;
- assert(g[a].eod_accept == g[b].eod_accept);
- assert(g[a].left == g[b].left);
+ assert(g[a].eod_accept == g[b].eod_accept);
+ assert(g[a].left == g[b].left);
assert(!g[a].suffix || g[a].suffix == g[b].suffix);
-
- // In some situations (ghost roles etc), we can have different groups.
- assert(!g[a].groups && !g[b].groups); /* current structure means groups
- * haven't been assigned yet */
- g[b].groups |= g[a].groups;
-
+
+ // In some situations (ghost roles etc), we can have different groups.
+ assert(!g[a].groups && !g[b].groups); /* current structure means groups
+ * haven't been assigned yet */
+ g[b].groups |= g[a].groups;
+
mergeLiteralSets(a, b, build);
updateAliasingInfo(build, rai, a, b);
-
+
// Our min and max_offsets should be sane.
assert(g[b].min_offset <= g[b].max_offset);
-
+
// Safety check: we should not have created through a merge a vertex that
// has an out-edge with ANCH history but is not fixed-offset.
assert(!hasAnchHistorySucc(g, b) || g[b].fixedOffset());
@@ -599,14 +599,14 @@ void mergeVerticesLeft(RoseVertex a, RoseVertex b, RoseBuildImpl &build,
g[b].min_offset = max(g[a].min_offset, g[b].min_offset);
g[b].max_offset = min(g[a].max_offset, g[b].max_offset);
- if (!g[b].suffix) {
- g[b].suffix = g[a].suffix;
- }
-
- mergeEdges(a, b, g);
+ if (!g[b].suffix) {
+ g[b].suffix = g[a].suffix;
+ }
+
+ mergeEdges(a, b, g);
mergeCommon(build, rai, a, b);
-}
-
+}
+
/** \brief Merge role 'a' into 'b', right merge path. */
static
void mergeVerticesRight(RoseVertex a, RoseVertex b, RoseBuildImpl &build,
@@ -622,201 +622,201 @@ void mergeVerticesRight(RoseVertex a, RoseVertex b, RoseBuildImpl &build,
mergeCommon(build, rai, a, b);
}
-/**
- * Faster version of \ref mergeVertices for diamond merges, for which we know
- * that the in- and out-edge sets, reports and suffixes are identical.
- */
-static
+/**
+ * Faster version of \ref mergeVertices for diamond merges, for which we know
+ * that the in- and out-edge sets, reports and suffixes are identical.
+ */
+static
void mergeVerticesDiamond(RoseVertex a, RoseVertex b, RoseBuildImpl &build,
RoseAliasingInfo &rai) {
RoseGraph &g = build.g;
DEBUG_PRINTF("merging vertex %zu into %zu\n", g[a].index, g[b].index);
-
+
// For a diamond merge, most properties are already the same (with the
// notable exception of the literal set).
- assert(g[a].reports == g[b].reports);
- assert(g[a].suffix == g[b].suffix);
-
- g[b].min_offset = min(g[a].min_offset, g[b].min_offset);
- g[b].max_offset = max(g[a].max_offset, g[b].max_offset);
-
+ assert(g[a].reports == g[b].reports);
+ assert(g[a].suffix == g[b].suffix);
+
+ g[b].min_offset = min(g[a].min_offset, g[b].min_offset);
+ g[b].max_offset = max(g[a].max_offset, g[b].max_offset);
+
mergeCommon(build, rai, a, b);
-}
-
-static never_inline
+}
+
+static never_inline
void findCandidates(const RoseBuildImpl &build, CandidateSet *candidates) {
for (auto v : vertices_range(build.g)) {
if (isAliasingCandidate(v, build)) {
DEBUG_PRINTF("candidate %zu\n", build.g[v].index);
DEBUG_PRINTF("lits: %u\n", *build.g[v].literals.begin());
- candidates->insert(v);
- }
- }
-
+ candidates->insert(v);
+ }
+ }
+
assert(candidates->size() <= num_vertices(build.g));
- DEBUG_PRINTF("found %zu/%zu candidates\n", candidates->size(),
+ DEBUG_PRINTF("found %zu/%zu candidates\n", candidates->size(),
num_vertices(build.g));
-}
-
-static
-RoseVertex pickPred(const RoseVertex v, const RoseGraph &g,
+}
+
+static
+RoseVertex pickPred(const RoseVertex v, const RoseGraph &g,
const RoseBuildImpl &build) {
- RoseGraph::in_edge_iterator ei, ee;
- tie(ei, ee) = in_edges(v, g);
- if (ei == ee) {
- assert(0); // every candidate should have in-degree!
- return RoseGraph::null_vertex();
- }
-
- // Avoid roots if we have other options, since it doesn't matter to the
- // merge pass which predecessor we pick.
- RoseVertex u = source(*ei, g);
+ RoseGraph::in_edge_iterator ei, ee;
+ tie(ei, ee) = in_edges(v, g);
+ if (ei == ee) {
+ assert(0); // every candidate should have in-degree!
+ return RoseGraph::null_vertex();
+ }
+
+ // Avoid roots if we have other options, since it doesn't matter to the
+ // merge pass which predecessor we pick.
+ RoseVertex u = source(*ei, g);
while (build.isAnyStart(u) && ++ei != ee) {
- u = source(*ei, g);
- }
- return u;
-}
-
-template<>
-bool contains<>(const CandidateSet &container, const RoseVertex &key) {
- return container.contains(key);
-}
-
-// Simplified version of hasCommonPredWithBadBounds for diamond merges.
-static
-bool hasCommonPredWithDiffRoses(RoseVertex a, RoseVertex b,
- const RoseGraph &g) {
- if (!g[a].left || !g[b].left) {
- DEBUG_PRINTF("one of (a, b) doesn't have a prefix\n");
- return true;
- }
-
- // XXX: if we're merging two vertices with different leftfixes, we
- // cannot allow them to share a pred, as we would be unable to
- // merge the (necessarily different) tops on the in-edges. This
- // could be relaxed if we made the tops mergeable (by making
- // edge_top a bitfield, for example).
-
- const bool equal_roses = hasEqualLeftfixes(a, b, g);
-
- for (const auto &e_a : in_edges_range(a, g)) {
+ u = source(*ei, g);
+ }
+ return u;
+}
+
+template<>
+bool contains<>(const CandidateSet &container, const RoseVertex &key) {
+ return container.contains(key);
+}
+
+// Simplified version of hasCommonPredWithBadBounds for diamond merges.
+static
+bool hasCommonPredWithDiffRoses(RoseVertex a, RoseVertex b,
+ const RoseGraph &g) {
+ if (!g[a].left || !g[b].left) {
+ DEBUG_PRINTF("one of (a, b) doesn't have a prefix\n");
+ return true;
+ }
+
+ // XXX: if we're merging two vertices with different leftfixes, we
+ // cannot allow them to share a pred, as we would be unable to
+ // merge the (necessarily different) tops on the in-edges. This
+ // could be relaxed if we made the tops mergeable (by making
+ // edge_top a bitfield, for example).
+
+ const bool equal_roses = hasEqualLeftfixes(a, b, g);
+
+ for (const auto &e_a : in_edges_range(a, g)) {
if (RoseEdge e = edge(source(e_a, g), b, g)) {
- DEBUG_PRINTF("common pred, e_r=%d r_t %u,%u\n",
- (int)equal_roses, g[e].rose_top, g[e_a].rose_top);
- if (!equal_roses) {
- DEBUG_PRINTF("different roses\n");
- return true;
- }
- if (g[e].rose_top != g[e_a].rose_top) {
- DEBUG_PRINTF("bad tops\n");
- return true;
- }
- }
- }
- DEBUG_PRINTF("ok\n");
- return false;
-}
-
-static
+ DEBUG_PRINTF("common pred, e_r=%d r_t %u,%u\n",
+ (int)equal_roses, g[e].rose_top, g[e_a].rose_top);
+ if (!equal_roses) {
+ DEBUG_PRINTF("different roses\n");
+ return true;
+ }
+ if (g[e].rose_top != g[e_a].rose_top) {
+ DEBUG_PRINTF("bad tops\n");
+ return true;
+ }
+ }
+ }
+ DEBUG_PRINTF("ok\n");
+ return false;
+}
+
+static
void pruneReportIfUnused(const RoseBuildImpl &build, shared_ptr<NGHolder> h,
- const set<RoseVertex> &verts, ReportID report) {
- DEBUG_PRINTF("trying to prune %u from %p (v %zu)\n", report, h.get(),
- verts.size());
- for (RoseVertex v : verts) {
+ const set<RoseVertex> &verts, ReportID report) {
+ DEBUG_PRINTF("trying to prune %u from %p (v %zu)\n", report, h.get(),
+ verts.size());
+ for (RoseVertex v : verts) {
if (build.g[v].left.graph == h &&
build.g[v].left.leftfix_report == report) {
- DEBUG_PRINTF("report %u still in use\n", report);
- return;
- }
- }
-
- if (!verts.empty()) {
- // Report no longer in use, but graph h is still alive: we should prune
- // the report if we can do so without rendering the graph
- // unimplementable.
-
- DEBUG_PRINTF("report %u has been merged away, pruning\n", report);
+ DEBUG_PRINTF("report %u still in use\n", report);
+ return;
+ }
+ }
+
+ if (!verts.empty()) {
+ // Report no longer in use, but graph h is still alive: we should prune
+ // the report if we can do so without rendering the graph
+ // unimplementable.
+
+ DEBUG_PRINTF("report %u has been merged away, pruning\n", report);
assert(h->kind == (build.isRootSuccessor(*verts.begin()) ? NFA_PREFIX
: NFA_INFIX));
- unique_ptr<NGHolder> h_new = cloneHolder(*h);
- pruneReport(*h_new, report);
-
+ unique_ptr<NGHolder> h_new = cloneHolder(*h);
+ pruneReport(*h_new, report);
+
if (isImplementableNFA(*h_new, nullptr, build.cc)) {
- clear_graph(*h);
- cloneHolder(*h, *h_new);
- } else {
- DEBUG_PRINTF("prune produced unimplementable graph, "
- "leaving as-is\n");
- }
- }
-}
-
-/** \brief Remove any tops that don't lead to the given report from this
- * Castle. */
-static
-void pruneCastle(CastleProto &castle, ReportID report) {
- unordered_set<u32> dead; // tops to remove.
- for (const auto &m : castle.repeats) {
- if (!contains(m.second.reports, report)) {
- dead.insert(m.first);
- }
- }
-
- for (const auto &top : dead) {
- castle.erase(top);
- }
-
- assert(!castle.repeats.empty());
-}
-
-/** \brief Set all reports to the given one. */
-static
-void setReports(CastleProto &castle, ReportID report) {
+ clear_graph(*h);
+ cloneHolder(*h, *h_new);
+ } else {
+ DEBUG_PRINTF("prune produced unimplementable graph, "
+ "leaving as-is\n");
+ }
+ }
+}
+
+/** \brief Remove any tops that don't lead to the given report from this
+ * Castle. */
+static
+void pruneCastle(CastleProto &castle, ReportID report) {
+ unordered_set<u32> dead; // tops to remove.
+ for (const auto &m : castle.repeats) {
+ if (!contains(m.second.reports, report)) {
+ dead.insert(m.first);
+ }
+ }
+
+ for (const auto &top : dead) {
+ castle.erase(top);
+ }
+
+ assert(!castle.repeats.empty());
+}
+
+/** \brief Set all reports to the given one. */
+static
+void setReports(CastleProto &castle, ReportID report) {
castle.report_map.clear();
for (auto &e : castle.repeats) {
u32 top = e.first;
auto &repeat = e.second;
- repeat.reports.clear();
- repeat.reports.insert(report);
+ repeat.reports.clear();
+ repeat.reports.insert(report);
castle.report_map[report].insert(top);
- }
-}
-
-static
-void updateEdgeTops(RoseGraph &g, RoseVertex v, const map<u32, u32> &top_map) {
- for (const auto &e : in_edges_range(v, g)) {
- g[e].rose_top = top_map.at(g[e].rose_top);
- }
-}
-
-static
-void pruneUnusedTops(CastleProto &castle, const RoseGraph &g,
- const set<RoseVertex> &verts) {
+ }
+}
+
+static
+void updateEdgeTops(RoseGraph &g, RoseVertex v, const map<u32, u32> &top_map) {
+ for (const auto &e : in_edges_range(v, g)) {
+ g[e].rose_top = top_map.at(g[e].rose_top);
+ }
+}
+
+static
+void pruneUnusedTops(CastleProto &castle, const RoseGraph &g,
+ const set<RoseVertex> &verts) {
unordered_set<u32> used_tops;
- for (auto v : verts) {
- assert(g[v].left.castle.get() == &castle);
-
- for (const auto &e : in_edges_range(v, g)) {
- u32 top = g[e].rose_top;
- assert(contains(castle.repeats, top));
- used_tops.insert(top);
- }
- }
-
- DEBUG_PRINTF("castle has %zu tops, graph has %zu tops\n",
- castle.repeats.size(), used_tops.size());
-
- for (u32 top : assoc_keys(castle.repeats)) {
- if (!contains(used_tops, top)) {
- DEBUG_PRINTF("removing unused top %u\n", top);
- castle.erase(top);
- }
- }
-}
-
-static
-void pruneUnusedTops(NGHolder &h, const RoseGraph &g,
- const set<RoseVertex> &verts) {
+ for (auto v : verts) {
+ assert(g[v].left.castle.get() == &castle);
+
+ for (const auto &e : in_edges_range(v, g)) {
+ u32 top = g[e].rose_top;
+ assert(contains(castle.repeats, top));
+ used_tops.insert(top);
+ }
+ }
+
+ DEBUG_PRINTF("castle has %zu tops, graph has %zu tops\n",
+ castle.repeats.size(), used_tops.size());
+
+ for (u32 top : assoc_keys(castle.repeats)) {
+ if (!contains(used_tops, top)) {
+ DEBUG_PRINTF("removing unused top %u\n", top);
+ castle.erase(top);
+ }
+ }
+}
+
+static
+void pruneUnusedTops(NGHolder &h, const RoseGraph &g,
+ const set<RoseVertex> &verts) {
if (!is_triggered(h)) {
DEBUG_PRINTF("not triggered, no tops\n");
return;
@@ -824,21 +824,21 @@ void pruneUnusedTops(NGHolder &h, const RoseGraph &g,
assert(isCorrectlyTopped(h));
DEBUG_PRINTF("pruning unused tops\n");
flat_set<u32> used_tops;
- for (auto v : verts) {
- assert(g[v].left.graph.get() == &h);
-
- for (const auto &e : in_edges_range(v, g)) {
- u32 top = g[e].rose_top;
- used_tops.insert(top);
- }
- }
-
- vector<NFAEdge> dead;
- for (const auto &e : out_edges_range(h.start, h)) {
- NFAVertex v = target(e, h);
- if (v == h.startDs) {
- continue; // stylised edge, leave it alone.
- }
+ for (auto v : verts) {
+ assert(g[v].left.graph.get() == &h);
+
+ for (const auto &e : in_edges_range(v, g)) {
+ u32 top = g[e].rose_top;
+ used_tops.insert(top);
+ }
+ }
+
+ vector<NFAEdge> dead;
+ for (const auto &e : out_edges_range(h.start, h)) {
+ NFAVertex v = target(e, h);
+ if (v == h.startDs) {
+ continue; // stylised edge, leave it alone.
+ }
flat_set<u32> pruned_tops;
auto pt_inserter = inserter(pruned_tops, pruned_tops.end());
set_intersection(h[e].tops.begin(), h[e].tops.end(),
@@ -846,567 +846,567 @@ void pruneUnusedTops(NGHolder &h, const RoseGraph &g,
h[e].tops = std::move(pruned_tops);
if (h[e].tops.empty()) {
DEBUG_PRINTF("edge (start,%zu) has only unused tops\n", h[v].index);
- dead.push_back(e);
- }
- }
-
- if (dead.empty()) {
- return;
- }
-
- remove_edges(dead, h);
- pruneUseless(h);
- clearReports(h); // As we may have removed vacuous edges.
-}
-
-static
+ dead.push_back(e);
+ }
+ }
+
+ if (dead.empty()) {
+ return;
+ }
+
+ remove_edges(dead, h);
+ pruneUseless(h);
+ clearReports(h); // As we may have removed vacuous edges.
+}
+
+static
bool mergeSameCastle(RoseBuildImpl &build, RoseVertex a, RoseVertex b,
RoseAliasingInfo &rai) {
RoseGraph &g = build.g;
- LeftEngInfo &a_left = g[a].left;
- LeftEngInfo &b_left = g[b].left;
- CastleProto &castle = *a_left.castle;
-
- DEBUG_PRINTF("a report=%u, b report=%u\n", a_left.leftfix_report,
- b_left.leftfix_report);
-
- u32 merge_count = 0;
- for (const auto &c : castle.repeats) {
- DEBUG_PRINTF("top %u -> %s report %u\n", c.first,
- c.second.bounds.str().c_str(), *c.second.reports.begin());
- if (contains(c.second.reports, a_left.leftfix_report) ||
- contains(c.second.reports, b_left.leftfix_report)) {
- merge_count++;
- }
- }
-
- if (castle.repeats.size() + merge_count > castle.max_occupancy) {
- DEBUG_PRINTF("too big to merge\n");
- return false;
- }
-
+ LeftEngInfo &a_left = g[a].left;
+ LeftEngInfo &b_left = g[b].left;
+ CastleProto &castle = *a_left.castle;
+
+ DEBUG_PRINTF("a report=%u, b report=%u\n", a_left.leftfix_report,
+ b_left.leftfix_report);
+
+ u32 merge_count = 0;
+ for (const auto &c : castle.repeats) {
+ DEBUG_PRINTF("top %u -> %s report %u\n", c.first,
+ c.second.bounds.str().c_str(), *c.second.reports.begin());
+ if (contains(c.second.reports, a_left.leftfix_report) ||
+ contains(c.second.reports, b_left.leftfix_report)) {
+ merge_count++;
+ }
+ }
+
+ if (castle.repeats.size() + merge_count > castle.max_occupancy) {
+ DEBUG_PRINTF("too big to merge\n");
+ return false;
+ }
+
const ReportID new_report = build.getNewNfaReport();
- map<u32, u32> a_top_map, b_top_map;
-
- for (const auto &c : castle.repeats) {
- u32 old_top = c.first;
- if (contains(c.second.reports, a_left.leftfix_report)) {
- PureRepeat pr = c.second;
- pr.reports.clear();
- pr.reports.insert(new_report);
- u32 new_top = castle.merge(pr);
- assert(new_top < castle.max_occupancy);
- a_top_map[old_top] = new_top;
- } else if (contains(c.second.reports, b_left.leftfix_report)) {
- PureRepeat pr = c.second;
- pr.reports.clear();
- pr.reports.insert(new_report);
- u32 new_top = castle.merge(pr);
- assert(new_top < castle.max_occupancy);
- b_top_map[old_top] = new_top;
- }
- }
-
+ map<u32, u32> a_top_map, b_top_map;
+
+ for (const auto &c : castle.repeats) {
+ u32 old_top = c.first;
+ if (contains(c.second.reports, a_left.leftfix_report)) {
+ PureRepeat pr = c.second;
+ pr.reports.clear();
+ pr.reports.insert(new_report);
+ u32 new_top = castle.merge(pr);
+ assert(new_top < castle.max_occupancy);
+ a_top_map[old_top] = new_top;
+ } else if (contains(c.second.reports, b_left.leftfix_report)) {
+ PureRepeat pr = c.second;
+ pr.reports.clear();
+ pr.reports.insert(new_report);
+ u32 new_top = castle.merge(pr);
+ assert(new_top < castle.max_occupancy);
+ b_top_map[old_top] = new_top;
+ }
+ }
+
assert(contains(rai.rev_leftfix[b_left], b));
rai.rev_leftfix[b_left].erase(b);
rai.rev_leftfix[a_left].insert(b);
-
- a_left.leftfix_report = new_report;
- b_left.leftfix_report = new_report;
- assert(a_left == b_left);
-
- updateEdgeTops(g, a, a_top_map);
- updateEdgeTops(g, b, b_top_map);
-
+
+ a_left.leftfix_report = new_report;
+ b_left.leftfix_report = new_report;
+ assert(a_left == b_left);
+
+ updateEdgeTops(g, a, a_top_map);
+ updateEdgeTops(g, b, b_top_map);
+
pruneUnusedTops(castle, g, rai.rev_leftfix[a_left]);
- return true;
-}
-
-static
+ return true;
+}
+
+static
bool attemptRoseCastleMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a,
- RoseVertex b, bool trivialCasesOnly,
+ RoseVertex b, bool trivialCasesOnly,
RoseAliasingInfo &rai) {
RoseGraph &g = build.g;
- LeftEngInfo &a_left = g[a].left;
- LeftEngInfo &b_left = g[b].left;
- left_id a_left_id(a_left);
- left_id b_left_id(b_left);
- CastleProto &a_castle = *a_left_id.castle();
- CastleProto &b_castle = *b_left_id.castle();
-
- if (a_castle.reach() != b_castle.reach()) {
- DEBUG_PRINTF("different reach\n");
- return false;
- }
-
- DEBUG_PRINTF("a castle=%p, report=%u\n", &a_castle, a_left.leftfix_report);
- DEBUG_PRINTF("b castle=%p, report=%u\n", &b_castle, b_left.leftfix_report);
-
- if (&a_castle == &b_castle) {
- DEBUG_PRINTF("castles are the same\n");
+ LeftEngInfo &a_left = g[a].left;
+ LeftEngInfo &b_left = g[b].left;
+ left_id a_left_id(a_left);
+ left_id b_left_id(b_left);
+ CastleProto &a_castle = *a_left_id.castle();
+ CastleProto &b_castle = *b_left_id.castle();
+
+ if (a_castle.reach() != b_castle.reach()) {
+ DEBUG_PRINTF("different reach\n");
+ return false;
+ }
+
+ DEBUG_PRINTF("a castle=%p, report=%u\n", &a_castle, a_left.leftfix_report);
+ DEBUG_PRINTF("b castle=%p, report=%u\n", &b_castle, b_left.leftfix_report);
+
+ if (&a_castle == &b_castle) {
+ DEBUG_PRINTF("castles are the same\n");
return mergeSameCastle(build, a, b, rai);
- }
-
- if (is_equal(a_castle, a_left.leftfix_report, b_castle,
- b_left.leftfix_report)) {
- DEBUG_PRINTF("castles are equiv with respect to reports\n");
+ }
+
+ if (is_equal(a_castle, a_left.leftfix_report, b_castle,
+ b_left.leftfix_report)) {
+ DEBUG_PRINTF("castles are equiv with respect to reports\n");
if (rai.rev_leftfix[a_left_id].size() == 1) {
- /* nobody else is using a_castle */
+ /* nobody else is using a_castle */
rai.rev_leftfix[b_left_id].erase(b);
rai.rev_leftfix[a_left_id].insert(b);
pruneUnusedTops(b_castle, g, rai.rev_leftfix[b_left_id]);
- b_left.castle = a_left.castle;
- b_left.leftfix_report = a_left.leftfix_report;
- DEBUG_PRINTF("OK -> only user of a_castle\n");
- return true;
- }
-
+ b_left.castle = a_left.castle;
+ b_left.leftfix_report = a_left.leftfix_report;
+ DEBUG_PRINTF("OK -> only user of a_castle\n");
+ return true;
+ }
+
if (rai.rev_leftfix[b_left_id].size() == 1) {
- /* nobody else is using b_castle */
+ /* nobody else is using b_castle */
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].insert(a);
pruneUnusedTops(a_castle, g, rai.rev_leftfix[a_left_id]);
- a_left.castle = b_left.castle;
- a_left.leftfix_report = b_left.leftfix_report;
- DEBUG_PRINTF("OK -> only user of b_castle\n");
- return true;
- }
-
- if (preds_same) {
- /* preds are the same anyway in diamond/left merges just need to
+ a_left.castle = b_left.castle;
+ a_left.leftfix_report = b_left.leftfix_report;
+ DEBUG_PRINTF("OK -> only user of b_castle\n");
+ return true;
+ }
+
+ if (preds_same) {
+ /* preds are the same anyway in diamond/left merges just need to
* check that all the literals in rev_leftfix[b_h] can handle a_h */
for (auto v : rai.rev_leftfix[b_left_id]) {
if (!mergeableRoseVertices(build, a, v)) {
- goto literal_mismatch_1;
- }
- }
-
+ goto literal_mismatch_1;
+ }
+ }
+
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].insert(a);
pruneUnusedTops(a_castle, g, rai.rev_leftfix[a_left_id]);
- a_left.castle = b_left.castle;
- a_left.leftfix_report = b_left.leftfix_report;
- DEBUG_PRINTF("OK -> same preds ???\n");
- return true;
- literal_mismatch_1:
- /* preds are the same anyway in diamond/left merges just need to
+ a_left.castle = b_left.castle;
+ a_left.leftfix_report = b_left.leftfix_report;
+ DEBUG_PRINTF("OK -> same preds ???\n");
+ return true;
+ literal_mismatch_1:
+ /* preds are the same anyway in diamond/left merges just need to
* check that all the literals in rev_leftfix[a_h] can handle b_h */
for (auto v : rai.rev_leftfix[a_left_id]) {
if (!mergeableRoseVertices(build, v, b)) {
- goto literal_mismatch_2;
- }
- }
-
+ goto literal_mismatch_2;
+ }
+ }
+
rai.rev_leftfix[b_left_id].erase(b);
rai.rev_leftfix[a_left_id].insert(b);
pruneUnusedTops(b_castle, g, rai.rev_leftfix[b_left_id]);
- b_left.castle = a_left.castle;
- b_left.leftfix_report = a_left.leftfix_report;
- DEBUG_PRINTF("OK -> same preds ???\n");
- return true;
- literal_mismatch_2:;
- }
- DEBUG_PRINTF("OK -> create new\n");
- /* we need to create a new graph as there may be other people
- * using b_left and it would be bad if a's preds started triggering it
- */
+ b_left.castle = a_left.castle;
+ b_left.leftfix_report = a_left.leftfix_report;
+ DEBUG_PRINTF("OK -> same preds ???\n");
+ return true;
+ literal_mismatch_2:;
+ }
+ DEBUG_PRINTF("OK -> create new\n");
+ /* we need to create a new graph as there may be other people
+ * using b_left and it would be bad if a's preds started triggering it
+ */
ReportID new_report = build.getNewNfaReport();
- shared_ptr<CastleProto> new_castle = make_shared<CastleProto>(a_castle);
- pruneCastle(*new_castle, a_left.leftfix_report);
- setReports(*new_castle, new_report);
-
+ shared_ptr<CastleProto> new_castle = make_shared<CastleProto>(a_castle);
+ pruneCastle(*new_castle, a_left.leftfix_report);
+ setReports(*new_castle, new_report);
+
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].erase(b);
pruneUnusedTops(*a_left.castle, g, rai.rev_leftfix[a_left_id]);
pruneUnusedTops(*b_left.castle, g, rai.rev_leftfix[b_left_id]);
-
- a_left.leftfix_report = new_report;
- b_left.leftfix_report = new_report;
- a_left.castle = new_castle;
- b_left.castle = new_castle;
-
- assert(a_left == b_left);
+
+ a_left.leftfix_report = new_report;
+ b_left.leftfix_report = new_report;
+ a_left.castle = new_castle;
+ b_left.castle = new_castle;
+
+ assert(a_left == b_left);
rai.rev_leftfix[a_left].insert(a);
rai.rev_leftfix[a_left].insert(b);
pruneUnusedTops(*new_castle, g, rai.rev_leftfix[a_left]);
- return true;
- }
-
- // Everything after this point requires more work, so we guard it with the
- // trivial cases argument..
- if (trivialCasesOnly) {
- return false;
- }
-
- // Only infixes. Prefixes require special care when doing non-trivial
- // merges.
+ return true;
+ }
+
+ // Everything after this point requires more work, so we guard it with the
+ // trivial cases argument..
+ if (trivialCasesOnly) {
+ return false;
+ }
+
+ // Only infixes. Prefixes require special care when doing non-trivial
+ // merges.
if (!build.isNonRootSuccessor(a) || !build.isNonRootSuccessor(b)) {
- return false;
- }
-
+ return false;
+ }
+
set<RoseVertex> &b_verts = rai.rev_leftfix[b_left_id];
- set<RoseVertex> aa;
- aa.insert(a);
-
+ set<RoseVertex> aa;
+ aa.insert(a);
+
if (!mergeableRoseVertices(build, aa, b_verts)) {
- DEBUG_PRINTF("vertices not mergeable\n");
- return false;
- }
-
+ DEBUG_PRINTF("vertices not mergeable\n");
+ return false;
+ }
+
if (!build.cc.grey.roseMultiTopRoses || !build.cc.grey.allowCastle) {
- return false;
- }
-
- DEBUG_PRINTF("merging into new castle\n");
-
- // Clone new castle with a's repeats in it, set to a new report.
+ return false;
+ }
+
+ DEBUG_PRINTF("merging into new castle\n");
+
+ // Clone new castle with a's repeats in it, set to a new report.
ReportID new_report = build.getNewNfaReport();
- shared_ptr<CastleProto> m_castle = make_shared<CastleProto>(a_castle);
- pruneCastle(*m_castle, a_left.leftfix_report);
- setReports(*m_castle, new_report);
-
- // Merge in the relevant repeats from b with the new report. Note that
- // we'll have to remap tops appropriately.
- map<u32, u32> b_top_map;
- for (const auto &e : in_edges_range(b, g)) {
- u32 top = g[e].rose_top;
- assert(contains(b_castle.repeats, top));
-
- PureRepeat pr = b_castle.repeats[top]; // mutable copy
- pr.reports.clear();
- pr.reports.insert(new_report);
-
- // We should be protected from merging common preds with tops leading
- // to completely different repeats by earlier checks, but just in
- // case...
+ shared_ptr<CastleProto> m_castle = make_shared<CastleProto>(a_castle);
+ pruneCastle(*m_castle, a_left.leftfix_report);
+ setReports(*m_castle, new_report);
+
+ // Merge in the relevant repeats from b with the new report. Note that
+ // we'll have to remap tops appropriately.
+ map<u32, u32> b_top_map;
+ for (const auto &e : in_edges_range(b, g)) {
+ u32 top = g[e].rose_top;
+ assert(contains(b_castle.repeats, top));
+
+ PureRepeat pr = b_castle.repeats[top]; // mutable copy
+ pr.reports.clear();
+ pr.reports.insert(new_report);
+
+ // We should be protected from merging common preds with tops leading
+ // to completely different repeats by earlier checks, but just in
+ // case...
if (RoseEdge a_edge = edge(source(e, g), a, g)) {
- u32 a_top = g[a_edge].rose_top;
- const PureRepeat &a_pr = m_castle->repeats[a_top]; // new report
- if (pr != a_pr) {
- DEBUG_PRINTF("merge failed, common pred with diff repeat\n");
- return false;
- }
- }
-
- u32 new_top = m_castle->merge(pr);
- if (new_top == CastleProto::max_occupancy) {
- DEBUG_PRINTF("merge failed\n");
- return false;
- }
- b_top_map[top] = new_top;
- }
-
- updateEdgeTops(g, b, b_top_map);
-
- DEBUG_PRINTF("merged into castle containing %zu repeats\n",
- m_castle->repeats.size());
-
+ u32 a_top = g[a_edge].rose_top;
+ const PureRepeat &a_pr = m_castle->repeats[a_top]; // new report
+ if (pr != a_pr) {
+ DEBUG_PRINTF("merge failed, common pred with diff repeat\n");
+ return false;
+ }
+ }
+
+ u32 new_top = m_castle->merge(pr);
+ if (new_top == CastleProto::max_occupancy) {
+ DEBUG_PRINTF("merge failed\n");
+ return false;
+ }
+ b_top_map[top] = new_top;
+ }
+
+ updateEdgeTops(g, b, b_top_map);
+
+ DEBUG_PRINTF("merged into castle containing %zu repeats\n",
+ m_castle->repeats.size());
+
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].erase(b);
pruneUnusedTops(*a_left.castle, g, rai.rev_leftfix[a_left_id]);
pruneUnusedTops(*b_left.castle, g, rai.rev_leftfix[b_left_id]);
-
- a_left.castle = m_castle;
- a_left.leftfix_report = new_report;
- b_left.castle = m_castle;
- b_left.leftfix_report = new_report;
-
- assert(a_left == b_left);
+
+ a_left.castle = m_castle;
+ a_left.leftfix_report = new_report;
+ b_left.castle = m_castle;
+ b_left.leftfix_report = new_report;
+
+ assert(a_left == b_left);
rai.rev_leftfix[a_left].insert(a);
rai.rev_leftfix[a_left].insert(b);
pruneUnusedTops(*m_castle, g, rai.rev_leftfix[a_left]);
- return true;
-}
-
-static
+ return true;
+}
+
+static
bool attemptRoseGraphMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a,
- RoseVertex b, bool trivialCasesOnly,
+ RoseVertex b, bool trivialCasesOnly,
RoseAliasingInfo &rai) {
RoseGraph &g = build.g;
- LeftEngInfo &a_left = g[a].left;
- LeftEngInfo &b_left = g[b].left;
- left_id a_left_id(a_left);
- left_id b_left_id(b_left);
- shared_ptr<NGHolder> a_h = a_left.graph;
- shared_ptr<NGHolder> b_h = b_left.graph;
- assert(a_h && b_h);
+ LeftEngInfo &a_left = g[a].left;
+ LeftEngInfo &b_left = g[b].left;
+ left_id a_left_id(a_left);
+ left_id b_left_id(b_left);
+ shared_ptr<NGHolder> a_h = a_left.graph;
+ shared_ptr<NGHolder> b_h = b_left.graph;
+ assert(a_h && b_h);
assert(isImplementableNFA(*a_h, nullptr, build.cc));
assert(isImplementableNFA(*b_h, nullptr, build.cc));
-
- // If we only differ in reports, this is a very easy merge. Just use b's
- // report for both.
- /* Actually not so easy, there may be other poor suckers using a and/or b's
- * reports who will be surprised by this change */
- if (a_h == b_h) {
- DEBUG_PRINTF("OK -> same actual holder\n");
- ReportID a_oldreport = a_left.leftfix_report;
- ReportID b_oldreport = b_left.leftfix_report;
+
+ // If we only differ in reports, this is a very easy merge. Just use b's
+ // report for both.
+ /* Actually not so easy, there may be other poor suckers using a and/or b's
+ * reports who will be surprised by this change */
+ if (a_h == b_h) {
+ DEBUG_PRINTF("OK -> same actual holder\n");
+ ReportID a_oldreport = a_left.leftfix_report;
+ ReportID b_oldreport = b_left.leftfix_report;
ReportID new_report = build.getNewNfaReport();
- duplicateReport(*a_h, a_left.leftfix_report, new_report);
- duplicateReport(*b_h, b_left.leftfix_report, new_report);
- a_left.leftfix_report = new_report;
- b_left.leftfix_report = new_report;
+ duplicateReport(*a_h, a_left.leftfix_report, new_report);
+ duplicateReport(*b_h, b_left.leftfix_report, new_report);
+ a_left.leftfix_report = new_report;
+ b_left.leftfix_report = new_report;
pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id],
a_oldreport);
pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id],
b_oldreport);
pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]);
- assert(a_left == b_left);
- return true;
- }
-
- /* if it is the same graph, it is also fairly easy */
- if (is_equal(*a_h, a_left.leftfix_report, *b_h, b_left.leftfix_report)) {
+ assert(a_left == b_left);
+ return true;
+ }
+
+ /* if it is the same graph, it is also fairly easy */
+ if (is_equal(*a_h, a_left.leftfix_report, *b_h, b_left.leftfix_report)) {
if (rai.rev_leftfix[a_left_id].size() == 1) {
- /* nobody else is using a_h */
+ /* nobody else is using a_h */
rai.rev_leftfix[b_left_id].erase(b);
rai.rev_leftfix[a_left_id].insert(b);
- b_left.graph = a_h;
- b_left.leftfix_report = a_left.leftfix_report;
+ b_left.graph = a_h;
+ b_left.leftfix_report = a_left.leftfix_report;
pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]);
- DEBUG_PRINTF("OK -> only user of a_h\n");
- return true;
- }
-
+ DEBUG_PRINTF("OK -> only user of a_h\n");
+ return true;
+ }
+
if (rai.rev_leftfix[b_left_id].size() == 1) {
- /* nobody else is using b_h */
+ /* nobody else is using b_h */
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].insert(a);
- a_left.graph = b_h;
- a_left.leftfix_report = b_left.leftfix_report;
+ a_left.graph = b_h;
+ a_left.leftfix_report = b_left.leftfix_report;
pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]);
- DEBUG_PRINTF("OK -> only user of b_h\n");
- return true;
- }
-
- if (preds_same) {
- /* preds are the same anyway in diamond/left merges just need to
+ DEBUG_PRINTF("OK -> only user of b_h\n");
+ return true;
+ }
+
+ if (preds_same) {
+ /* preds are the same anyway in diamond/left merges just need to
* check that all the literals in rev_leftfix[b_h] can handle a_h */
for (auto v : rai.rev_leftfix[b_left_id]) {
if (!mergeableRoseVertices(build, a, v)) {
- goto literal_mismatch_1;
- }
- }
-
+ goto literal_mismatch_1;
+ }
+ }
+
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].insert(a);
- a_left.graph = b_h;
- a_left.leftfix_report = b_left.leftfix_report;
+ a_left.graph = b_h;
+ a_left.leftfix_report = b_left.leftfix_report;
pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]);
- DEBUG_PRINTF("OK -> same preds ???\n");
- return true;
- literal_mismatch_1:
- /* preds are the same anyway in diamond/left merges just need to
+ DEBUG_PRINTF("OK -> same preds ???\n");
+ return true;
+ literal_mismatch_1:
+ /* preds are the same anyway in diamond/left merges just need to
* check that all the literals in rev_leftfix[a_h] can handle b_h */
for (auto v : rai.rev_leftfix[a_left_id]) {
if (!mergeableRoseVertices(build, v, b)) {
- goto literal_mismatch_2;
- }
- }
-
+ goto literal_mismatch_2;
+ }
+ }
+
rai.rev_leftfix[b_left_id].erase(b);
rai.rev_leftfix[a_left_id].insert(b);
- b_left.graph = a_h;
- b_left.leftfix_report = a_left.leftfix_report;
+ b_left.graph = a_h;
+ b_left.leftfix_report = a_left.leftfix_report;
pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]);
- DEBUG_PRINTF("OK -> same preds ???\n");
- return true;
- literal_mismatch_2:;
- }
- DEBUG_PRINTF("OK -> create new\n");
- /* we need to create a new graph as there may be other people
- * using b_left and it would be bad if a's preds started triggering it
- */
+ DEBUG_PRINTF("OK -> same preds ???\n");
+ return true;
+ literal_mismatch_2:;
+ }
+ DEBUG_PRINTF("OK -> create new\n");
+ /* we need to create a new graph as there may be other people
+ * using b_left and it would be bad if a's preds started triggering it
+ */
ReportID new_report = build.getNewNfaReport();
- shared_ptr<NGHolder> new_graph = cloneHolder(*b_h);
- duplicateReport(*new_graph, b_left.leftfix_report, new_report);
+ shared_ptr<NGHolder> new_graph = cloneHolder(*b_h);
+ duplicateReport(*new_graph, b_left.leftfix_report, new_report);
pruneAllOtherReports(*new_graph, new_report);
-
+
if (!isImplementableNFA(*new_graph, nullptr, build.cc)) {
DEBUG_PRINTF("new graph not implementable\n");
return false;
}
-
+
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].erase(b);
pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]);
pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]);
- a_left.leftfix_report = new_report;
- b_left.leftfix_report = new_report;
- a_left.graph = new_graph;
- b_left.graph = new_graph;
-
+ a_left.leftfix_report = new_report;
+ b_left.leftfix_report = new_report;
+ a_left.graph = new_graph;
+ b_left.graph = new_graph;
+
rai.rev_leftfix[a_left].insert(a);
rai.rev_leftfix[a_left].insert(b);
pruneUnusedTops(*new_graph, g, rai.rev_leftfix[a_left]);
- return true;
- }
-
- // Everything after this point requires merging via the uncalc code, so we
- // guard it with the trivial cases arg.
- if (trivialCasesOnly) {
- return false;
- }
-
- // Only infixes. Prefixes require special care when doing non-trivial
- // merges.
+ return true;
+ }
+
+ // Everything after this point requires merging via the uncalc code, so we
+ // guard it with the trivial cases arg.
+ if (trivialCasesOnly) {
+ return false;
+ }
+
+ // Only infixes. Prefixes require special care when doing non-trivial
+ // merges.
if (!build.isNonRootSuccessor(a) || !build.isNonRootSuccessor(b)) {
- return false;
- }
-
- DEBUG_PRINTF("attempting merge of roses on vertices %zu and %zu\n",
+ return false;
+ }
+
+ DEBUG_PRINTF("attempting merge of roses on vertices %zu and %zu\n",
g[a].index, g[b].index);
-
+
set<RoseVertex> &b_verts = rai.rev_leftfix[b_left];
- set<RoseVertex> aa;
- aa.insert(a);
-
+ set<RoseVertex> aa;
+ aa.insert(a);
+
if (!mergeableRoseVertices(build, aa, b_verts)) {
- DEBUG_PRINTF("vertices not mergeable\n");
- return false;
- }
-
+ DEBUG_PRINTF("vertices not mergeable\n");
+ return false;
+ }
+
if (!build.cc.grey.roseMultiTopRoses) {
- return false;
- }
-
- // Clone a copy of a's NFA to operate on, and store a copy of its in-edge
- // properties.
-
- /* We need to allocate a new report id because */
- ReportID a_oldreport = a_left.leftfix_report;
- ReportID b_oldreport = b_left.leftfix_report;
+ return false;
+ }
+
+ // Clone a copy of a's NFA to operate on, and store a copy of its in-edge
+ // properties.
+
+ /* We need to allocate a new report id because */
+ ReportID a_oldreport = a_left.leftfix_report;
+ ReportID b_oldreport = b_left.leftfix_report;
ReportID new_report = build.getNewNfaReport();
- duplicateReport(*b_h, b_left.leftfix_report, new_report);
- b_left.leftfix_report = new_report;
+ duplicateReport(*b_h, b_left.leftfix_report, new_report);
+ b_left.leftfix_report = new_report;
pruneReportIfUnused(build, b_h, rai.rev_leftfix[b_left_id], b_oldreport);
-
- NGHolder victim;
- cloneHolder(victim, *a_h);
- duplicateReport(victim, a_left.leftfix_report, new_report);
- pruneAllOtherReports(victim, new_report);
-
- map<RoseVertex, RoseEdgeProps> a_props;
- for (const auto &e : in_edges_range(a, g)) {
- a_props[source(e, g)] = g[e];
- }
-
- DEBUG_PRINTF("victim %zu states\n", num_vertices(*a_h));
- DEBUG_PRINTF("winner %zu states\n", num_vertices(*b_h));
-
- if (!setDistinctRoseTops(g, victim, *b_h, deque<RoseVertex>(1, a))) {
+
+ NGHolder victim;
+ cloneHolder(victim, *a_h);
+ duplicateReport(victim, a_left.leftfix_report, new_report);
+ pruneAllOtherReports(victim, new_report);
+
+ map<RoseVertex, RoseEdgeProps> a_props;
+ for (const auto &e : in_edges_range(a, g)) {
+ a_props[source(e, g)] = g[e];
+ }
+
+ DEBUG_PRINTF("victim %zu states\n", num_vertices(*a_h));
+ DEBUG_PRINTF("winner %zu states\n", num_vertices(*b_h));
+
+ if (!setDistinctRoseTops(g, victim, *b_h, deque<RoseVertex>(1, a))) {
assert(roseHasTops(build, a));
assert(roseHasTops(build, b));
- return false;
- }
-
- assert(victim.kind == b_h->kind);
- assert(!generates_callbacks(*b_h));
-
+ return false;
+ }
+
+ assert(victim.kind == b_h->kind);
+ assert(!generates_callbacks(*b_h));
+
if (!mergeNfaPair(victim, *b_h, nullptr, build.cc)) {
- DEBUG_PRINTF("merge failed\n");
- // Restore in-edge properties.
- for (const auto &e : in_edges_range(a, g)) {
- g[e] = a_props[source(e, g)];
- }
+ DEBUG_PRINTF("merge failed\n");
+ // Restore in-edge properties.
+ for (const auto &e : in_edges_range(a, g)) {
+ g[e] = a_props[source(e, g)];
+ }
assert(roseHasTops(build, a));
assert(roseHasTops(build, b));
- return false;
- }
-
- DEBUG_PRINTF("merge succeeded -> %zu vertices\n", num_vertices(*b_h));
-
- // update A's rose data to point to the merged graph.
- a_left.graph = b_h;
- a_left.leftfix_report = new_report;
-
+ return false;
+ }
+
+ DEBUG_PRINTF("merge succeeded -> %zu vertices\n", num_vertices(*b_h));
+
+ // update A's rose data to point to the merged graph.
+ a_left.graph = b_h;
+ a_left.leftfix_report = new_report;
+
assert(contains(rai.rev_leftfix[a_left_id], a));
assert(contains(rai.rev_leftfix[b_left_id], b));
rai.rev_leftfix[a_left_id].erase(a);
rai.rev_leftfix[b_left_id].insert(a);
-
+
pruneUnusedTops(*a_h, g, rai.rev_leftfix[a_left_id]);
pruneUnusedTops(*b_h, g, rai.rev_leftfix[b_left_id]);
-
- // Prune A's report from its old prefix if it was only used by A.
+
+ // Prune A's report from its old prefix if it was only used by A.
pruneReportIfUnused(build, a_h, rai.rev_leftfix[a_left_id], a_oldreport);
-
+
reduceImplementableGraph(*b_h, SOM_NONE, nullptr, build.cc);
-
+
assert(roseHasTops(build, a));
assert(roseHasTops(build, b));
assert(isImplementableNFA(*b_h, nullptr, build.cc));
- return true;
-}
-
-// Called by the role aliasing pass: Attempt to merge rose a into b, updating
-// the two LeftEngInfo structures to be the same. Returns false if the merge
-// is not possible.
-static
+ return true;
+}
+
+// Called by the role aliasing pass: Attempt to merge rose a into b, updating
+// the two LeftEngInfo structures to be the same. Returns false if the merge
+// is not possible.
+static
bool attemptRoseMerge(RoseBuildImpl &build, bool preds_same, RoseVertex a,
RoseVertex b, bool trivialCasesOnly,
RoseAliasingInfo &rai) {
- DEBUG_PRINTF("attempting rose merge, vertices a=%zu, b=%zu\n",
+ DEBUG_PRINTF("attempting rose merge, vertices a=%zu, b=%zu\n",
build.g[a].index, build.g[b].index);
- assert(a != b);
-
+ assert(a != b);
+
RoseGraph &g = build.g;
- LeftEngInfo &a_left = g[a].left;
- LeftEngInfo &b_left = g[b].left;
-
- // Trivial case.
- if (a_left == b_left) {
- DEBUG_PRINTF("roses are identical, no leftfix or already merged\n");
- return true;
- }
-
- const left_id a_left_id(a_left);
- const left_id b_left_id(b_left);
-
- /* Haig merges not supported at the moment */
- if (a_left.haig || b_left.haig) {
- return false;
- }
-
- /* dfa merges not supported at the moment (no multitop) */
- if (a_left.dfa || b_left.dfa) {
- return false;
- }
-
- // Only non-transients for the moment.
+ LeftEngInfo &a_left = g[a].left;
+ LeftEngInfo &b_left = g[b].left;
+
+ // Trivial case.
+ if (a_left == b_left) {
+ DEBUG_PRINTF("roses are identical, no leftfix or already merged\n");
+ return true;
+ }
+
+ const left_id a_left_id(a_left);
+ const left_id b_left_id(b_left);
+
+ /* Haig merges not supported at the moment */
+ if (a_left.haig || b_left.haig) {
+ return false;
+ }
+
+ /* dfa merges not supported at the moment (no multitop) */
+ if (a_left.dfa || b_left.dfa) {
+ return false;
+ }
+
+ // Only non-transients for the moment.
if (contains(build.transient, a_left_id) ||
contains(build.transient, b_left_id)) {
- return false;
- }
-
- /* It is not possible to merge roles with different lags as we can only
- * test the leftfix at one location relative to the literal match */
- if (a_left.lag != b_left.lag) {
- return false;
- }
-
+ return false;
+ }
+
+ /* It is not possible to merge roles with different lags as we can only
+ * test the leftfix at one location relative to the literal match */
+ if (a_left.lag != b_left.lag) {
+ return false;
+ }
+
assert(roseHasTops(build, a));
assert(roseHasTops(build, b));
-
- if (a_left_id.graph() && b_left_id.graph()) {
+
+ if (a_left_id.graph() && b_left_id.graph()) {
return attemptRoseGraphMerge(build, preds_same, a, b, trivialCasesOnly,
rai);
- }
-
- if (a_left_id.castle() && b_left_id.castle()) {
+ }
+
+ if (a_left_id.castle() && b_left_id.castle()) {
return attemptRoseCastleMerge(build, preds_same, a, b, trivialCasesOnly,
rai);
- }
-
- return false;
-}
-
+ }
+
+ return false;
+}
+
/**
* \brief Buckets that only contain one vertex are never going to lead to a
* merge.
*/
-static
+static
void removeSingletonBuckets(vector<vector<RoseVertex>> &buckets) {
auto it = remove_if(
begin(buckets), end(buckets),
@@ -1415,10 +1415,10 @@ void removeSingletonBuckets(vector<vector<RoseVertex>> &buckets) {
DEBUG_PRINTF("deleting %zu singleton buckets\n",
distance(it, end(buckets)));
buckets.erase(it, end(buckets));
- }
-}
-
-static
+ }
+}
+
+static
void buildInvBucketMap(const vector<vector<RoseVertex>> &buckets,
unordered_map<RoseVertex, size_t> &inv) {
inv.clear();
@@ -1429,7 +1429,7 @@ void buildInvBucketMap(const vector<vector<RoseVertex>> &buckets,
}
}
}
-
+
/**
* \brief Generic splitter that will use the given split function to partition
* the vector of buckets, then remove buckets with <= 1 entry.
@@ -1441,35 +1441,35 @@ void splitAndFilterBuckets(vector<vector<RoseVertex>> &buckets,
return;
}
- vector<vector<RoseVertex>> out;
-
+ vector<vector<RoseVertex>> out;
+
// Mapping from split key value to new bucket index.
using key_type = decltype(make_split_key(RoseGraph::null_vertex()));
unordered_map<key_type, size_t> dest_map;
dest_map.reserve(buckets.front().size());
- for (const auto &bucket : buckets) {
- assert(!bucket.empty());
+ for (const auto &bucket : buckets) {
+ assert(!bucket.empty());
dest_map.clear();
- for (RoseVertex v : bucket) {
+ for (RoseVertex v : bucket) {
auto p = dest_map.emplace(make_split_key(v), out.size());
if (p.second) { // New key, add a bucket.
out.emplace_back();
- }
+ }
auto out_bucket = p.first->second;
- out[out_bucket].push_back(v);
- }
- }
-
+ out[out_bucket].push_back(v);
+ }
+ }
+
if (out.size() == buckets.size()) {
return; // No new buckets created.
}
buckets = std::move(out);
removeSingletonBuckets(buckets);
-}
-
-static
+}
+
+static
void splitByReportSuffixBehaviour(const RoseGraph &g,
vector<vector<RoseVertex>> &buckets) {
// Split by report set and suffix info.
@@ -1495,74 +1495,74 @@ void splitByLiteralTable(const RoseBuildImpl &build,
}
static
-void splitByNeighbour(const RoseGraph &g, vector<vector<RoseVertex>> &buckets,
+void splitByNeighbour(const RoseGraph &g, vector<vector<RoseVertex>> &buckets,
unordered_map<RoseVertex, size_t> &inv, bool succ) {
- vector<vector<RoseVertex>> extras;
- map<size_t, vector<RoseVertex>> neighbours_by_bucket;
- set<RoseVertex> picked;
- vector<RoseVertex> leftovers;
-
- for (RoseVertex u : vertices_range(g)) {
- /* once split by v, stays split. also keeps iterator in buckets valid */
- extras.clear();
- neighbours_by_bucket.clear();
- if (succ) {
- /* forward pass */
- for (RoseVertex v : adjacent_vertices_range(u, g)) {
- auto it = inv.find(v);
- if (it != end(inv)) {
- neighbours_by_bucket[it->second].push_back(v);
- }
- }
- } else {
- /* backward pass */
- for (RoseVertex v : inv_adjacent_vertices_range(u, g)) {
- auto it = inv.find(v);
- if (it != end(inv)) {
- neighbours_by_bucket[it->second].push_back(v);
- }
- }
- }
- for (const auto &e : neighbours_by_bucket) {
- size_t old_key = e.first;
- if (buckets[old_key].size() == e.second.size()) {
- /* did not split */
- continue;
- }
- assert(!e.second.empty());
-
- picked.clear();
- picked.insert(begin(e.second), end(e.second));
-
- size_t new_key = buckets.size() + extras.size();
- leftovers.clear();
- for (RoseVertex v : buckets[old_key]) {
- if (contains(picked, v)) {
- inv[v] = new_key;
- } else {
- leftovers.push_back(v);
- }
- }
-
- assert(!leftovers.empty());
- assert(e.second.size() + leftovers.size()
- == buckets[old_key].size());
- extras.push_back(e.second);
- buckets[old_key].swap(leftovers);
- }
- insert(&buckets, buckets.end(), extras);
- }
+ vector<vector<RoseVertex>> extras;
+ map<size_t, vector<RoseVertex>> neighbours_by_bucket;
+ set<RoseVertex> picked;
+ vector<RoseVertex> leftovers;
+
+ for (RoseVertex u : vertices_range(g)) {
+ /* once split by v, stays split. also keeps iterator in buckets valid */
+ extras.clear();
+ neighbours_by_bucket.clear();
+ if (succ) {
+ /* forward pass */
+ for (RoseVertex v : adjacent_vertices_range(u, g)) {
+ auto it = inv.find(v);
+ if (it != end(inv)) {
+ neighbours_by_bucket[it->second].push_back(v);
+ }
+ }
+ } else {
+ /* backward pass */
+ for (RoseVertex v : inv_adjacent_vertices_range(u, g)) {
+ auto it = inv.find(v);
+ if (it != end(inv)) {
+ neighbours_by_bucket[it->second].push_back(v);
+ }
+ }
+ }
+ for (const auto &e : neighbours_by_bucket) {
+ size_t old_key = e.first;
+ if (buckets[old_key].size() == e.second.size()) {
+ /* did not split */
+ continue;
+ }
+ assert(!e.second.empty());
+
+ picked.clear();
+ picked.insert(begin(e.second), end(e.second));
+
+ size_t new_key = buckets.size() + extras.size();
+ leftovers.clear();
+ for (RoseVertex v : buckets[old_key]) {
+ if (contains(picked, v)) {
+ inv[v] = new_key;
+ } else {
+ leftovers.push_back(v);
+ }
+ }
+
+ assert(!leftovers.empty());
+ assert(e.second.size() + leftovers.size()
+ == buckets[old_key].size());
+ extras.push_back(e.second);
+ buckets[old_key].swap(leftovers);
+ }
+ insert(&buckets, buckets.end(), extras);
+ }
removeSingletonBuckets(buckets);
buildInvBucketMap(buckets, inv);
-}
-
-static
+}
+
+static
vector<vector<RoseVertex>>
splitDiamondMergeBuckets(CandidateSet &candidates, const RoseBuildImpl &build) {
- const RoseGraph &g = build.g;
-
- vector<vector<RoseVertex>> buckets(1);
+ const RoseGraph &g = build.g;
+
+ vector<vector<RoseVertex>> buckets(1);
buckets[0].reserve(candidates.size());
insert(&buckets[0], buckets[0].end(), candidates);
@@ -1572,8 +1572,8 @@ splitDiamondMergeBuckets(CandidateSet &candidates, const RoseBuildImpl &build) {
DEBUG_PRINTF("split by report/suffix, %zu buckets\n", buckets.size());
if (buckets.empty()) {
return buckets;
- }
-
+ }
+
splitByLiteralTable(build, buckets);
DEBUG_PRINTF("split by lit table, %zu buckets\n", buckets.size());
if (buckets.empty()) {
@@ -1584,130 +1584,130 @@ splitDiamondMergeBuckets(CandidateSet &candidates, const RoseBuildImpl &build) {
unordered_map<RoseVertex, size_t> inv;
buildInvBucketMap(buckets, inv);
- splitByNeighbour(g, buckets, inv, true);
+ splitByNeighbour(g, buckets, inv, true);
DEBUG_PRINTF("split by successor, %zu buckets\n", buckets.size());
if (buckets.empty()) {
return buckets;
}
- splitByNeighbour(g, buckets, inv, false);
+ splitByNeighbour(g, buckets, inv, false);
DEBUG_PRINTF("split by predecessor, %zu buckets\n", buckets.size());
-
- return buckets;
-}
-static never_inline
+ return buckets;
+}
+
+static never_inline
void diamondMergePass(CandidateSet &candidates, RoseBuildImpl &build,
- vector<RoseVertex> *dead, bool mergeRoses,
+ vector<RoseVertex> *dead, bool mergeRoses,
RoseAliasingInfo &rai) {
- DEBUG_PRINTF("begin\n");
+ DEBUG_PRINTF("begin\n");
RoseGraph &g = build.g;
-
- if (candidates.empty()) {
- return;
- }
-
- /* Vertices may only be diamond merged with others in the same bucket */
+
+ if (candidates.empty()) {
+ return;
+ }
+
+ /* Vertices may only be diamond merged with others in the same bucket */
auto cand_buckets = splitDiamondMergeBuckets(candidates, build);
-
- for (const vector<RoseVertex> &siblings : cand_buckets) {
- for (auto it = siblings.begin(); it != siblings.end();) {
- RoseVertex a = *it;
- ++it;
-
- assert(contains(candidates, a));
-
+
+ for (const vector<RoseVertex> &siblings : cand_buckets) {
+ for (auto it = siblings.begin(); it != siblings.end();) {
+ RoseVertex a = *it;
+ ++it;
+
+ assert(contains(candidates, a));
+
DEBUG_PRINTF("trying to merge %zu into somebody\n", g[a].index);
- for (auto jt = it; jt != siblings.end(); ++jt) {
- RoseVertex b = *jt;
- assert(contains(candidates, b));
-
+ for (auto jt = it; jt != siblings.end(); ++jt) {
+ RoseVertex b = *jt;
+ assert(contains(candidates, b));
+
if (!sameRoleProperties(build, rai, a, b)) {
- DEBUG_PRINTF("diff role prop\n");
- continue;
- }
-
- // Check "diamond" requirements: must have same right side
- // (successors, reports) and left side (predecessors).
- /* Note: bucketing does not check edge properties (bounds, tops)
- * so we still have to checks successors and predecessors. */
-
- if (!sameSuccessors(a, b, g)
+ DEBUG_PRINTF("diff role prop\n");
+ continue;
+ }
+
+ // Check "diamond" requirements: must have same right side
+ // (successors, reports) and left side (predecessors).
+ /* Note: bucketing does not check edge properties (bounds, tops)
+ * so we still have to checks successors and predecessors. */
+
+ if (!sameSuccessors(a, b, g)
|| !sameRightRoleProperties(build, a, b)
- || !samePredecessors(a, b, g)) {
- DEBUG_PRINTF("not diamond\n");
- continue;
- }
-
+ || !samePredecessors(a, b, g)) {
+ DEBUG_PRINTF("not diamond\n");
+ continue;
+ }
+
if (!canMergeLiterals(a, b, build)) {
- DEBUG_PRINTF("incompatible lits\n");
- continue;
- }
-
+ DEBUG_PRINTF("incompatible lits\n");
+ continue;
+ }
+
if (!attemptRoseMerge(build, true, a, b, !mergeRoses, rai)) {
- DEBUG_PRINTF("rose fail\n");
- continue;
- }
-
+ DEBUG_PRINTF("rose fail\n");
+ continue;
+ }
+
mergeVerticesDiamond(a, b, build, rai);
- dead->push_back(a);
- candidates.erase(a);
- break; // next a
- }
- }
- }
-
- DEBUG_PRINTF("%zu candidates remaining\n", candidates.size());
-}
-
-static
-vector<RoseVertex>::iterator findLeftMergeSibling(
- vector<RoseVertex>::iterator it,
- const vector<RoseVertex>::iterator &end,
- const RoseVertex a, const RoseBuildImpl &build,
+ dead->push_back(a);
+ candidates.erase(a);
+ break; // next a
+ }
+ }
+ }
+
+ DEBUG_PRINTF("%zu candidates remaining\n", candidates.size());
+}
+
+static
+vector<RoseVertex>::iterator findLeftMergeSibling(
+ vector<RoseVertex>::iterator it,
+ const vector<RoseVertex>::iterator &end,
+ const RoseVertex a, const RoseBuildImpl &build,
const RoseAliasingInfo &rai,
- const CandidateSet &candidates) {
- const RoseGraph &g = build.g;
-
- for (; it != end; ++it) {
- RoseVertex b = *it;
- if (a == b) {
- continue;
- }
-
- if (!contains(candidates, b)) {
- continue;
- }
-
+ const CandidateSet &candidates) {
+ const RoseGraph &g = build.g;
+
+ for (; it != end; ++it) {
+ RoseVertex b = *it;
+ if (a == b) {
+ continue;
+ }
+
+ if (!contains(candidates, b)) {
+ continue;
+ }
+
if (!sameRoleProperties(build, rai, a, b)) {
- continue;
- }
-
- // Check left-equivalence: must have same predecessors and same
- // literals.
-
- if (g[a].literals != g[b].literals) {
- continue;
- }
-
- if (!samePredecessors(a, b, g)) {
- continue;
- }
-
- if (hasCommonSuccWithBadBounds(a, b, g)) {
- continue;
- }
-
- if (g[a].suffix && g[b].suffix && g[a].suffix != g[b].suffix) {
- continue; /* we can only trigger one suffix */
- }
-
- return it;
- }
-
- return end;
-}
-
+ continue;
+ }
+
+ // Check left-equivalence: must have same predecessors and same
+ // literals.
+
+ if (g[a].literals != g[b].literals) {
+ continue;
+ }
+
+ if (!samePredecessors(a, b, g)) {
+ continue;
+ }
+
+ if (hasCommonSuccWithBadBounds(a, b, g)) {
+ continue;
+ }
+
+ if (g[a].suffix && g[b].suffix && g[a].suffix != g[b].suffix) {
+ continue; /* we can only trigger one suffix */
+ }
+
+ return it;
+ }
+
+ return end;
+}
+
static
void getLeftMergeSiblings(const RoseBuildImpl &build, RoseVertex a,
vector<RoseVertex> &siblings) {
@@ -1734,20 +1734,20 @@ void getLeftMergeSiblings(const RoseBuildImpl &build, RoseVertex a,
}
}
-static never_inline
+static never_inline
void leftMergePass(CandidateSet &candidates, RoseBuildImpl &build,
vector<RoseVertex> *dead, RoseAliasingInfo &rai) {
- DEBUG_PRINTF("begin (%zu)\n", candidates.size());
- vector<RoseVertex> siblings;
-
+ DEBUG_PRINTF("begin (%zu)\n", candidates.size());
+ vector<RoseVertex> siblings;
+
auto it = candidates.begin();
- while (it != candidates.end()) {
- RoseVertex a = *it;
- CandidateSet::iterator ait = it;
- ++it;
-
+ while (it != candidates.end()) {
+ RoseVertex a = *it;
+ CandidateSet::iterator ait = it;
+ ++it;
+
getLeftMergeSiblings(build, a, siblings);
-
+
auto jt = siblings.begin();
while (jt != siblings.end()) {
jt = findLeftMergeSibling(jt, siblings.end(), a, build, rai,
@@ -1763,98 +1763,98 @@ void leftMergePass(CandidateSet &candidates, RoseBuildImpl &build,
break; // consider next a
}
++jt;
- }
- }
-
- DEBUG_PRINTF("%zu candidates remaining\n", candidates.size());
+ }
+ }
+
+ DEBUG_PRINTF("%zu candidates remaining\n", candidates.size());
assert(!hasOrphanedTops(build));
-}
-
-// Can't merge vertices with different root predecessors.
-static
-bool safeRootPreds(RoseVertex a, RoseVertex b, const RoseGraph &g) {
- set<RoseVertex> a_roots, b_roots;
-
- for (auto u : inv_adjacent_vertices_range(a, g)) {
+}
+
+// Can't merge vertices with different root predecessors.
+static
+bool safeRootPreds(RoseVertex a, RoseVertex b, const RoseGraph &g) {
+ set<RoseVertex> a_roots, b_roots;
+
+ for (auto u : inv_adjacent_vertices_range(a, g)) {
if (!in_degree(u, g)) {
- a_roots.insert(u);
- }
- }
- for (auto u : inv_adjacent_vertices_range(b, g)) {
+ a_roots.insert(u);
+ }
+ }
+ for (auto u : inv_adjacent_vertices_range(b, g)) {
if (!in_degree(u, g)) {
- b_roots.insert(u);
- }
- }
-
- assert(a_roots.size() <= 1);
- assert(b_roots.size() <= 1);
-
- return a_roots == b_roots;
-}
-
-static never_inline
-vector<RoseVertex>::const_iterator findRightMergeSibling(
- vector<RoseVertex>::const_iterator it,
- const vector<RoseVertex>::const_iterator &end,
- const RoseVertex a, const RoseBuildImpl &build,
+ b_roots.insert(u);
+ }
+ }
+
+ assert(a_roots.size() <= 1);
+ assert(b_roots.size() <= 1);
+
+ return a_roots == b_roots;
+}
+
+static never_inline
+vector<RoseVertex>::const_iterator findRightMergeSibling(
+ vector<RoseVertex>::const_iterator it,
+ const vector<RoseVertex>::const_iterator &end,
+ const RoseVertex a, const RoseBuildImpl &build,
const RoseAliasingInfo &rai,
- const CandidateSet &candidates) {
- const RoseGraph &g = build.g;
-
- for (; it != end; ++it) {
- RoseVertex b = *it;
- if (a == b) {
- continue;
- }
-
- if (!contains(candidates, b)) {
- continue;
- }
-
+ const CandidateSet &candidates) {
+ const RoseGraph &g = build.g;
+
+ for (; it != end; ++it) {
+ RoseVertex b = *it;
+ if (a == b) {
+ continue;
+ }
+
+ if (!contains(candidates, b)) {
+ continue;
+ }
+
if (!sameRoleProperties(build, rai, a, b)) {
- continue;
- }
-
- // Check right-equivalence: must have same successors, reports and same
- // literals.
-
- if (g[a].literals != g[b].literals) {
- continue;
- }
-
- if (!sameSuccessors(a, b, g)
- || !sameRightRoleProperties(build, a, b)) {
- continue;
- }
-
- // An extra wrinkle: we cannot merge two vertices that are root
- // successors if their preds are different. (e.g. one is anchored and
- // one is not)
- if (!safeRootPreds(a, b, g)) {
- continue;
- }
-
- if (hasCommonPredWithBadBounds(a, b, g)) {
- continue;
- }
-
- if (hasCommonPredWithDiffRoses(a, b, g)) {
- continue;
- }
-
- return it;
- }
-
- return end;
-}
-
-static
+ continue;
+ }
+
+ // Check right-equivalence: must have same successors, reports and same
+ // literals.
+
+ if (g[a].literals != g[b].literals) {
+ continue;
+ }
+
+ if (!sameSuccessors(a, b, g)
+ || !sameRightRoleProperties(build, a, b)) {
+ continue;
+ }
+
+ // An extra wrinkle: we cannot merge two vertices that are root
+ // successors if their preds are different. (e.g. one is anchored and
+ // one is not)
+ if (!safeRootPreds(a, b, g)) {
+ continue;
+ }
+
+ if (hasCommonPredWithBadBounds(a, b, g)) {
+ continue;
+ }
+
+ if (hasCommonPredWithDiffRoses(a, b, g)) {
+ continue;
+ }
+
+ return it;
+ }
+
+ return end;
+}
+
+static
void splitByRightProps(const RoseGraph &g,
vector<vector<RoseVertex>> &buckets) {
// Successor vector used in make_split_key. We declare it here so we can
// reuse storage.
vector<RoseVertex> succ;
-
+
// Split by {successors, literals, reports}.
auto make_split_key = [&](RoseVertex v) {
succ.clear();
@@ -1863,48 +1863,48 @@ void splitByRightProps(const RoseGraph &g,
return hash_all(g[v].literals, g[v].reports, succ);
};
splitAndFilterBuckets(buckets, make_split_key);
-}
-
-static never_inline
+}
+
+static never_inline
vector<vector<RoseVertex>>
splitRightMergeBuckets(const CandidateSet &candidates,
const RoseBuildImpl &build) {
const RoseGraph &g = build.g;
-
+
vector<vector<RoseVertex>> buckets(1);
buckets[0].reserve(candidates.size());
insert(&buckets[0], buckets[0].end(), candidates);
-
+
DEBUG_PRINTF("at start, %zu candidates in 1 bucket\n", candidates.size());
-
+
splitByReportSuffixBehaviour(g, buckets);
DEBUG_PRINTF("split by report/suffix, %zu buckets\n", buckets.size());
if (buckets.empty()) {
return buckets;
- }
-
+ }
+
splitByRightProps(g, buckets);
DEBUG_PRINTF("split by right-merge properties, %zu buckets\n",
buckets.size());
if (buckets.empty()) {
return buckets;
- }
-
+ }
+
return buckets;
-}
-
-static never_inline
+}
+
+static never_inline
void rightMergePass(CandidateSet &candidates, RoseBuildImpl &build,
- vector<RoseVertex> *dead, bool mergeRoses,
+ vector<RoseVertex> *dead, bool mergeRoses,
RoseAliasingInfo &rai) {
- DEBUG_PRINTF("begin\n");
-
+ DEBUG_PRINTF("begin\n");
+
if (candidates.empty()) {
return;
}
-
+
auto buckets = splitRightMergeBuckets(candidates, build);
-
+
for (const auto &bucket : buckets) {
assert(!bucket.empty());
for (auto it = bucket.begin(); it != bucket.end(); it++) {
@@ -1922,116 +1922,116 @@ void rightMergePass(CandidateSet &candidates, RoseBuildImpl &build,
candidates.erase(a);
break; // consider next a
}
- }
- }
- }
-
- DEBUG_PRINTF("%zu candidates remaining\n", candidates.size());
+ }
+ }
+ }
+
+ DEBUG_PRINTF("%zu candidates remaining\n", candidates.size());
assert(!hasOrphanedTops(build));
-}
-
-/**
- * \brief True if the given vertex has no siblings for the purposes of a
- * diamond merge.
- *
- * This is the case if it has no successors with more than one predecessor
- * (itself), or no predecessors with more than one successor (itself).
- */
-static
-bool hasNoDiamondSiblings(const RoseGraph &g, RoseVertex v) {
- if (has_successor(v, g)) {
- bool only_succ = true;
- for (const auto &w : adjacent_vertices_range(v, g)) {
+}
+
+/**
+ * \brief True if the given vertex has no siblings for the purposes of a
+ * diamond merge.
+ *
+ * This is the case if it has no successors with more than one predecessor
+ * (itself), or no predecessors with more than one successor (itself).
+ */
+static
+bool hasNoDiamondSiblings(const RoseGraph &g, RoseVertex v) {
+ if (has_successor(v, g)) {
+ bool only_succ = true;
+ for (const auto &w : adjacent_vertices_range(v, g)) {
if (in_degree(w, g) > 1) {
- only_succ = false;
- break;
- }
- }
- if (only_succ) {
- return true;
- }
- }
-
- // Any candidate vertex will have a predecessor; the only vertices without
- // preds are the root vertices.
- assert(in_edges(v, g).first != in_edges(v, g).second);
-
- bool only_pred = true;
- for (const auto &u : inv_adjacent_vertices_range(v, g)) {
+ only_succ = false;
+ break;
+ }
+ }
+ if (only_succ) {
+ return true;
+ }
+ }
+
+ // Any candidate vertex will have a predecessor; the only vertices without
+ // preds are the root vertices.
+ assert(in_edges(v, g).first != in_edges(v, g).second);
+
+ bool only_pred = true;
+ for (const auto &u : inv_adjacent_vertices_range(v, g)) {
if (out_degree(u, g) > 1) {
- only_pred = false;
- break;
- }
- }
-
- return only_pred;
-}
-
-/**
- * \brief Filter out some merge candidates that are not mergeable by a diamond
- * merge.
- */
-static
-void filterDiamondCandidates(RoseGraph &g, CandidateSet &candidates) {
- DEBUG_PRINTF("%zu candidates enter\n", candidates.size());
-
- vector<RoseVertex> dead;
- for (const auto &v : candidates) {
- if (hasNoDiamondSiblings(g, v)) {
- dead.push_back(v);
- }
- }
-
- for (const auto &v : dead) {
- candidates.erase(v);
- }
-
- DEBUG_PRINTF("pruned %zu candidates, leaving %zu\n", dead.size(),
- candidates.size());
-}
-
-void aliasRoles(RoseBuildImpl &build, bool mergeRoses) {
- const CompileContext &cc = build.cc;
- RoseGraph &g = build.g;
+ only_pred = false;
+ break;
+ }
+ }
+
+ return only_pred;
+}
+
+/**
+ * \brief Filter out some merge candidates that are not mergeable by a diamond
+ * merge.
+ */
+static
+void filterDiamondCandidates(RoseGraph &g, CandidateSet &candidates) {
+ DEBUG_PRINTF("%zu candidates enter\n", candidates.size());
+
+ vector<RoseVertex> dead;
+ for (const auto &v : candidates) {
+ if (hasNoDiamondSiblings(g, v)) {
+ dead.push_back(v);
+ }
+ }
+
+ for (const auto &v : dead) {
+ candidates.erase(v);
+ }
+
+ DEBUG_PRINTF("pruned %zu candidates, leaving %zu\n", dead.size(),
+ candidates.size());
+}
+
+void aliasRoles(RoseBuildImpl &build, bool mergeRoses) {
+ const CompileContext &cc = build.cc;
+ RoseGraph &g = build.g;
assert(!hasOrphanedTops(build));
assert(canImplementGraphs(build));
-
- if (!cc.grey.roseRoleAliasing || !cc.grey.roseGraphReduction) {
- return;
- }
-
- DEBUG_PRINTF("doing role aliasing mr=%d\n", (int)mergeRoses);
-
+
+ if (!cc.grey.roseRoleAliasing || !cc.grey.roseGraphReduction) {
+ return;
+ }
+
+ DEBUG_PRINTF("doing role aliasing mr=%d\n", (int)mergeRoses);
+
RoseAliasingInfo rai(build);
- mergeRoses &= cc.grey.mergeRose & cc.grey.roseMergeRosesDuringAliasing;
-
+ mergeRoses &= cc.grey.mergeRose & cc.grey.roseMergeRosesDuringAliasing;
+
CandidateSet candidates;
- findCandidates(build, &candidates);
-
- DEBUG_PRINTF("candidates %zu\n", candidates.size());
-
- vector<RoseVertex> dead;
- size_t old_dead_size = 0;
- do {
- old_dead_size = dead.size();
+ findCandidates(build, &candidates);
+
+ DEBUG_PRINTF("candidates %zu\n", candidates.size());
+
+ vector<RoseVertex> dead;
+ size_t old_dead_size = 0;
+ do {
+ old_dead_size = dead.size();
leftMergePass(candidates, build, &dead, rai);
rightMergePass(candidates, build, &dead, mergeRoses, rai);
- } while (old_dead_size != dead.size());
-
- /* Diamond merge passes cannot create extra merges as they require the same
- * succ and preds before merging --> that if a succ/pred was ineligible due
- * to a merge to different pred/succ before a diamond merge, it will still
- * be afterwards. */
- filterDiamondCandidates(g, candidates);
+ } while (old_dead_size != dead.size());
+
+ /* Diamond merge passes cannot create extra merges as they require the same
+ * succ and preds before merging --> that if a succ/pred was ineligible due
+ * to a merge to different pred/succ before a diamond merge, it will still
+ * be afterwards. */
+ filterDiamondCandidates(g, candidates);
diamondMergePass(candidates, build, &dead, mergeRoses, rai);
-
- DEBUG_PRINTF("killed %zu vertices\n", dead.size());
- build.removeVertices(dead);
+
+ DEBUG_PRINTF("killed %zu vertices\n", dead.size());
+ build.removeVertices(dead);
assert(!hasOrphanedTops(build));
assert(canImplementGraphs(build));
-}
-
+}
+
namespace {
struct DupeLeafKey {
explicit DupeLeafKey(const RoseVertexProps &litv)
@@ -2332,4 +2332,4 @@ void uncalcLeaves(RoseBuildImpl &build) {
build.removeVertices(dead);
}
-} // namespace ue2
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.h b/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.h
index 33f0bf2ddd..4655f10d52 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_role_aliasing.h
@@ -1,48 +1,48 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
#ifndef ROSE_BUILD_ROLE_ALIASING_H
#define ROSE_BUILD_ROLE_ALIASING_H
-
+
/** \file
* \brief Rose Build: functions for reducing the size of the Rose graph
* through merging roles (RoseVertices) together.
*/
-namespace ue2 {
-
-class RoseBuildImpl;
-
-void aliasRoles(RoseBuildImpl &build, bool mergeRoses);
-
+namespace ue2 {
+
+class RoseBuildImpl;
+
+void aliasRoles(RoseBuildImpl &build, bool mergeRoses);
+
void mergeDupeLeaves(RoseBuildImpl &build);
void uncalcLeaves(RoseBuildImpl &build);
-} // namespace ue2
-
-#endif
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_scatter.cpp b/contrib/libs/hyperscan/src/rose/rose_build_scatter.cpp
index 35d66df9d2..87085ae9a8 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_scatter.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_scatter.cpp
@@ -1,131 +1,131 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_scatter.h"
-#include "ue2common.h"
-#include "util/container.h"
-#include "util/multibit_build.h"
-
-#include <cstring> // memset
-#include <set>
-
-using namespace std;
-
-namespace ue2 {
-
-template<typename T>
-static
-void rebase(vector<T> *p, u32 adj) {
- for (typename vector<T>::iterator it = p->begin(); it != p->end(); ++it) {
- DEBUG_PRINTF("=%u+%u\n", it->offset, adj);
- it->offset += adj;
- }
-}
-
-static
-void rebase(scatter_plan_raw *raw, u32 adj) {
- rebase(&raw->p_u64a, adj);
- rebase(&raw->p_u32, adj);
- rebase(&raw->p_u16, adj);
- rebase(&raw->p_u8, adj);
-}
-
-static
-void merge_in(scatter_plan_raw *out, const scatter_plan_raw &in) {
- insert(&out->p_u64a, out->p_u64a.end(), in.p_u64a);
- insert(&out->p_u32, out->p_u32.end(), in.p_u32);
- insert(&out->p_u16, out->p_u16.end(), in.p_u16);
- insert(&out->p_u8, out->p_u8.end(), in.p_u8);
-}
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_scatter.h"
+#include "ue2common.h"
+#include "util/container.h"
+#include "util/multibit_build.h"
+
+#include <cstring> // memset
+#include <set>
+
+using namespace std;
+
+namespace ue2 {
+
+template<typename T>
+static
+void rebase(vector<T> *p, u32 adj) {
+ for (typename vector<T>::iterator it = p->begin(); it != p->end(); ++it) {
+ DEBUG_PRINTF("=%u+%u\n", it->offset, adj);
+ it->offset += adj;
+ }
+}
+
+static
+void rebase(scatter_plan_raw *raw, u32 adj) {
+ rebase(&raw->p_u64a, adj);
+ rebase(&raw->p_u32, adj);
+ rebase(&raw->p_u16, adj);
+ rebase(&raw->p_u8, adj);
+}
+
+static
+void merge_in(scatter_plan_raw *out, const scatter_plan_raw &in) {
+ insert(&out->p_u64a, out->p_u64a.end(), in.p_u64a);
+ insert(&out->p_u32, out->p_u32.end(), in.p_u32);
+ insert(&out->p_u16, out->p_u16.end(), in.p_u16);
+ insert(&out->p_u8, out->p_u8.end(), in.p_u8);
+}
+
scatter_plan_raw buildStateScatterPlan(u32 role_state_offset,
u32 role_state_count, u32 left_array_count, u32 left_prefix_count,
const RoseStateOffsets &stateOffsets, bool streaming,
u32 leaf_array_count, u32 outfix_begin, u32 outfix_end) {
scatter_plan_raw out;
- /* init role array */
- scatter_plan_raw spr_role;
- mmbBuildClearPlan(role_state_count, &spr_role);
- rebase(&spr_role, role_state_offset);
+ /* init role array */
+ scatter_plan_raw spr_role;
+ mmbBuildClearPlan(role_state_count, &spr_role);
+ rebase(&spr_role, role_state_offset);
merge_in(&out, spr_role);
-
- /* init rose array: turn on prefixes */
- u32 rose_array_offset = stateOffsets.activeLeftArray;
- scatter_plan_raw spr_rose;
- mmbBuildInitRangePlan(left_array_count, 0, left_prefix_count, &spr_rose);
- rebase(&spr_rose, rose_array_offset);
+
+ /* init rose array: turn on prefixes */
+ u32 rose_array_offset = stateOffsets.activeLeftArray;
+ scatter_plan_raw spr_rose;
+ mmbBuildInitRangePlan(left_array_count, 0, left_prefix_count, &spr_rose);
+ rebase(&spr_rose, rose_array_offset);
merge_in(&out, spr_rose);
-
- /* suffix/outfix array */
- scatter_plan_raw spr_leaf;
- if (streaming) {
- mmbBuildInitRangePlan(leaf_array_count, outfix_begin, outfix_end,
- &spr_leaf);
- } else {
- mmbBuildClearPlan(leaf_array_count, &spr_leaf);
- }
- rebase(&spr_leaf, stateOffsets.activeLeafArray);
+
+ /* suffix/outfix array */
+ scatter_plan_raw spr_leaf;
+ if (streaming) {
+ mmbBuildInitRangePlan(leaf_array_count, outfix_begin, outfix_end,
+ &spr_leaf);
+ } else {
+ mmbBuildClearPlan(leaf_array_count, &spr_leaf);
+ }
+ rebase(&spr_leaf, stateOffsets.activeLeafArray);
merge_in(&out, spr_leaf);
return out;
-}
-
-u32 aux_size(const scatter_plan_raw &raw) {
- u32 rv = 0;
-
- rv += byte_length(raw.p_u64a);
- rv += byte_length(raw.p_u32);
- rv += byte_length(raw.p_u16);
- rv += byte_length(raw.p_u8);
-
- return rv;
-}
-
-void write_out(scatter_full_plan *plan_out, void *aux_out,
- const scatter_plan_raw &raw, u32 aux_base_offset) {
- memset(plan_out, 0, sizeof(*plan_out));
-
-#define DO_CASE(t) \
- if (!raw.p_##t.empty()) { \
- plan_out->s_##t##_offset = aux_base_offset; \
- plan_out->s_##t##_count = raw.p_##t.size(); \
- assert(ISALIGNED_N((char *)aux_out + aux_base_offset, \
- alignof(scatter_unit_##t))); \
- memcpy((char *)aux_out + aux_base_offset, raw.p_##t.data(), \
- byte_length(raw.p_##t)); \
- aux_base_offset += byte_length(raw.p_##t); \
- }
-
- DO_CASE(u64a);
- DO_CASE(u32);
- DO_CASE(u16);
- DO_CASE(u8);
-}
-
-} // namespace ue2
+}
+
+u32 aux_size(const scatter_plan_raw &raw) {
+ u32 rv = 0;
+
+ rv += byte_length(raw.p_u64a);
+ rv += byte_length(raw.p_u32);
+ rv += byte_length(raw.p_u16);
+ rv += byte_length(raw.p_u8);
+
+ return rv;
+}
+
+void write_out(scatter_full_plan *plan_out, void *aux_out,
+ const scatter_plan_raw &raw, u32 aux_base_offset) {
+ memset(plan_out, 0, sizeof(*plan_out));
+
+#define DO_CASE(t) \
+ if (!raw.p_##t.empty()) { \
+ plan_out->s_##t##_offset = aux_base_offset; \
+ plan_out->s_##t##_count = raw.p_##t.size(); \
+ assert(ISALIGNED_N((char *)aux_out + aux_base_offset, \
+ alignof(scatter_unit_##t))); \
+ memcpy((char *)aux_out + aux_base_offset, raw.p_##t.data(), \
+ byte_length(raw.p_##t)); \
+ aux_base_offset += byte_length(raw.p_##t); \
+ }
+
+ DO_CASE(u64a);
+ DO_CASE(u32);
+ DO_CASE(u16);
+ DO_CASE(u8);
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_scatter.h b/contrib/libs/hyperscan/src/rose/rose_build_scatter.h
index 7ce1c034ab..67a82b9937 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_scatter.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_scatter.h
@@ -1,60 +1,60 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_SCATTER_H
-#define ROSE_BUILD_SCATTER_H
-
-#include "rose_internal.h"
-#include "util/scatter.h"
-
-#include <vector>
-
-namespace ue2 {
-
-class RoseBuildImpl;
-
-struct scatter_plan_raw {
- std::vector<scatter_unit_u64a> p_u64a;
- std::vector<scatter_unit_u32> p_u32;
- std::vector<scatter_unit_u16> p_u16;
- std::vector<scatter_unit_u8> p_u8;
-};
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_SCATTER_H
+#define ROSE_BUILD_SCATTER_H
+
+#include "rose_internal.h"
+#include "util/scatter.h"
+
+#include <vector>
+
+namespace ue2 {
+
+class RoseBuildImpl;
+
+struct scatter_plan_raw {
+ std::vector<scatter_unit_u64a> p_u64a;
+ std::vector<scatter_unit_u32> p_u32;
+ std::vector<scatter_unit_u16> p_u16;
+ std::vector<scatter_unit_u8> p_u8;
+};
+
scatter_plan_raw buildStateScatterPlan(u32 role_state_offset,
u32 role_state_count, u32 left_array_count, u32 left_prefix_count,
const RoseStateOffsets &stateOffsets, bool streaming,
u32 leaf_array_count, u32 outfix_begin, u32 outfix_end);
-
-u32 aux_size(const scatter_plan_raw &raw);
-
-void write_out(scatter_full_plan *plan_out, void *aux_out,
- const scatter_plan_raw &raw, u32 aux_base_offset);
-
-} // namespace ue2
-
-#endif
+
+u32 aux_size(const scatter_plan_raw &raw);
+
+void write_out(scatter_full_plan *plan_out, void *aux_out,
+ const scatter_plan_raw &raw, u32 aux_base_offset);
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_util.h b/contrib/libs/hyperscan/src/rose/rose_build_util.h
index 2318bee856..81bb68459b 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_util.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_util.h
@@ -1,62 +1,62 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_UTIL_H
-#define ROSE_BUILD_UTIL_H
-
-#include "rose_graph.h"
-#include "util/graph.h"
-
-#include <algorithm>
-
-namespace ue2 {
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_UTIL_H
+#define ROSE_BUILD_UTIL_H
+
+#include "rose_graph.h"
+#include "util/graph.h"
+
+#include <algorithm>
+
+namespace ue2 {
+
/** Max allowed width for transient graphs in block mode */
#define ROSE_BLOCK_TRANSIENT_MAX_WIDTH 255U
-
-/**
- * \brief Add two Rose depths together, coping correctly with infinity at
- * ROSE_BOUND_INF.
- */
-static inline
-u32 add_rose_depth(u32 a, u32 b) {
- assert(a <= ROSE_BOUND_INF);
- assert(b <= ROSE_BOUND_INF);
-
- if (a == ROSE_BOUND_INF || b == ROSE_BOUND_INF) {
- return ROSE_BOUND_INF;
- }
-
- u32 rv = a + b;
- assert(rv >= a && rv >= b);
- return rv;
-}
-
-} // namespace ue2
-
-#endif // ROSE_BUILD_UTIL_H
+
+/**
+ * \brief Add two Rose depths together, coping correctly with infinity at
+ * ROSE_BOUND_INF.
+ */
+static inline
+u32 add_rose_depth(u32 a, u32 b) {
+ assert(a <= ROSE_BOUND_INF);
+ assert(b <= ROSE_BOUND_INF);
+
+ if (a == ROSE_BOUND_INF || b == ROSE_BOUND_INF) {
+ return ROSE_BOUND_INF;
+ }
+
+ u32 rv = a + b;
+ assert(rv >= a && rv >= b);
+ return rv;
+}
+
+} // namespace ue2
+
+#endif // ROSE_BUILD_UTIL_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_width.cpp b/contrib/libs/hyperscan/src/rose/rose_build_width.cpp
index 422e77d479..182b62ee6f 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_width.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_build_width.cpp
@@ -1,255 +1,255 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_build_width.h"
-
-#include "nfagraph/ng_holder.h"
-#include "nfagraph/ng_dump.h"
-#include "nfagraph/ng_width.h"
-#include "rose_build_impl.h"
-#include "ue2common.h"
-#include "util/graph.h"
-#include "util/graph_range.h"
-
-#include <algorithm>
-
-using namespace std;
-
-namespace ue2 {
-
-static
-bool is_end_anchored(const RoseGraph &g, RoseVertex v) {
- for (auto w : adjacent_vertices_range(v, g)) {
- if (g[w].eod_accept) {
- return true;
- }
- }
-
- return false;
-}
-
-u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) {
- if (table != ROSE_FLOATING && table != ROSE_ANCHORED &&
- table != ROSE_EOD_ANCHORED) {
- /* handle other tables if ever required */
- assert(0);
- return 0;
- }
-
- const RoseGraph &g = tbi.g;
-
- vector<RoseVertex> table_verts;
-
- for (auto v : vertices_range(g)) {
- if (tbi.hasLiteralInTable(v, table)) {
- table_verts.push_back(v);
- }
- }
-
- set<RoseVertex> reachable;
- find_reachable(g, table_verts, &reachable);
-
- u32 minWidth = ROSE_BOUND_INF;
- for (auto v : reachable) {
- if (g[v].eod_accept) {
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_width.h"
+
+#include "nfagraph/ng_holder.h"
+#include "nfagraph/ng_dump.h"
+#include "nfagraph/ng_width.h"
+#include "rose_build_impl.h"
+#include "ue2common.h"
+#include "util/graph.h"
+#include "util/graph_range.h"
+
+#include <algorithm>
+
+using namespace std;
+
+namespace ue2 {
+
+static
+bool is_end_anchored(const RoseGraph &g, RoseVertex v) {
+ for (auto w : adjacent_vertices_range(v, g)) {
+ if (g[w].eod_accept) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) {
+ if (table != ROSE_FLOATING && table != ROSE_ANCHORED &&
+ table != ROSE_EOD_ANCHORED) {
+ /* handle other tables if ever required */
+ assert(0);
+ return 0;
+ }
+
+ const RoseGraph &g = tbi.g;
+
+ vector<RoseVertex> table_verts;
+
+ for (auto v : vertices_range(g)) {
+ if (tbi.hasLiteralInTable(v, table)) {
+ table_verts.push_back(v);
+ }
+ }
+
+ set<RoseVertex> reachable;
+ find_reachable(g, table_verts, &reachable);
+
+ u32 minWidth = ROSE_BOUND_INF;
+ for (auto v : reachable) {
+ if (g[v].eod_accept) {
DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].index);
- continue;
- }
-
- const u32 w = g[v].min_offset;
-
- if (!g[v].reports.empty()) {
+ continue;
+ }
+
+ const u32 w = g[v].min_offset;
+
+ if (!g[v].reports.empty()) {
DEBUG_PRINTF("%zu can fire report at offset %u\n", g[v].index, w);
- minWidth = min(minWidth, w);
- }
-
- if (is_end_anchored(g, v)) {
+ minWidth = min(minWidth, w);
+ }
+
+ if (is_end_anchored(g, v)) {
DEBUG_PRINTF("%zu can fire eod report at offset %u\n", g[v].index,
w);
- minWidth = min(minWidth, w);
- }
-
- if (g[v].suffix) {
- depth suffix_width = findMinWidth(g[v].suffix, g[v].suffix.top);
- assert(suffix_width.is_reachable());
- DEBUG_PRINTF("%zu has suffix with top %u (width %s), can fire "
- "report at %u\n",
+ minWidth = min(minWidth, w);
+ }
+
+ if (g[v].suffix) {
+ depth suffix_width = findMinWidth(g[v].suffix, g[v].suffix.top);
+ assert(suffix_width.is_reachable());
+ DEBUG_PRINTF("%zu has suffix with top %u (width %s), can fire "
+ "report at %u\n",
g[v].index, g[v].suffix.top, suffix_width.str().c_str(),
- w + suffix_width);
- minWidth = min(minWidth, w + suffix_width);
- }
- }
-
- /* TODO: take into account the chain relationship between the mpv and other
- * engines */
- DEBUG_PRINTF("min width %u\n", minWidth);
- return minWidth;
-}
-
-u32 findMaxBAWidth(const RoseBuildImpl &tbi) {
- const RoseGraph &g = tbi.g;
- if (!isLeafNode(tbi.root, g)) {
- DEBUG_PRINTF("floating literal -> no max width\n");
- return ROSE_BOUND_INF;
- }
-
- u64a maxWidth = 0;
-
- for (const auto &outfix : tbi.outfixes) {
- maxWidth = max(maxWidth, (u64a)outfix.maxBAWidth);
- if (maxWidth >= ROSE_BOUND_INF) {
- DEBUG_PRINTF("outfix with no max ba width\n");
- return ROSE_BOUND_INF;
- }
- }
-
- // Everyone's anchored, so the max width can be taken from the max
- // max_offset on our vertices (so long as all accepts are EOD).
- for (auto v : vertices_range(g)) {
- if (!g[v].reports.empty() && !g[v].eod_accept) {
- DEBUG_PRINTF("accept not at eod\n");
- return ROSE_BOUND_INF;
- }
-
- if (g[v].reports.empty() && !g[v].suffix) {
- continue;
- }
-
- assert(g[v].eod_accept || g[v].suffix);
-
- u64a w = g[v].max_offset;
-
- if (g[v].suffix) {
- if (has_non_eod_accepts(g[v].suffix)) {
- return ROSE_BOUND_INF;
- }
- depth suffix_width = findMaxWidth(g[v].suffix, g[v].suffix.top);
- DEBUG_PRINTF("suffix max width for top %u is %s\n", g[v].suffix.top,
- suffix_width.str().c_str());
- assert(suffix_width.is_reachable());
- if (!suffix_width.is_finite()) {
- DEBUG_PRINTF("suffix too wide\n");
- return ROSE_BOUND_INF;
- }
-
- w += suffix_width;
- }
-
- maxWidth = max(maxWidth, w);
- if (maxWidth >= ROSE_BOUND_INF) {
- DEBUG_PRINTF("too wide\n");
- return ROSE_BOUND_INF;
- }
- }
-
- DEBUG_PRINTF("max ba width %llu\n", maxWidth);
- assert(maxWidth < ROSE_BOUND_INF);
- return maxWidth;
-}
-
-u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) {
- const RoseGraph &g = tbi.g;
- if (!isLeafNode(tbi.root, g) && table == ROSE_FLOATING) {
- DEBUG_PRINTF("floating literal -> no max width\n");
- return ROSE_BOUND_INF;
- }
-
- if (table != ROSE_FLOATING && table != ROSE_ANCHORED) {
- /* handle other tables if ever required */
- assert(0);
- return ROSE_BOUND_INF;
- }
-
- DEBUG_PRINTF("looking for a max ba width for %s\n",
- table == ROSE_FLOATING ? "floating" : "anchored");
-
- vector<RoseVertex> table_verts;
-
- for (auto v : vertices_range(g)) {
- if ((table == ROSE_FLOATING && tbi.isFloating(v))
- || (table == ROSE_ANCHORED && tbi.isAnchored(v))) {
- table_verts.push_back(v);
- }
- }
-
- set<RoseVertex> reachable;
- find_reachable(g, table_verts, &reachable);
-
- u64a maxWidth = 0;
- // Everyone's anchored, so the max width can be taken from the max
- // max_offset on our vertices (so long as all accepts are ACCEPT_EOD).
- for (auto v : reachable) {
+ w + suffix_width);
+ minWidth = min(minWidth, w + suffix_width);
+ }
+ }
+
+ /* TODO: take into account the chain relationship between the mpv and other
+ * engines */
+ DEBUG_PRINTF("min width %u\n", minWidth);
+ return minWidth;
+}
+
+u32 findMaxBAWidth(const RoseBuildImpl &tbi) {
+ const RoseGraph &g = tbi.g;
+ if (!isLeafNode(tbi.root, g)) {
+ DEBUG_PRINTF("floating literal -> no max width\n");
+ return ROSE_BOUND_INF;
+ }
+
+ u64a maxWidth = 0;
+
+ for (const auto &outfix : tbi.outfixes) {
+ maxWidth = max(maxWidth, (u64a)outfix.maxBAWidth);
+ if (maxWidth >= ROSE_BOUND_INF) {
+ DEBUG_PRINTF("outfix with no max ba width\n");
+ return ROSE_BOUND_INF;
+ }
+ }
+
+ // Everyone's anchored, so the max width can be taken from the max
+ // max_offset on our vertices (so long as all accepts are EOD).
+ for (auto v : vertices_range(g)) {
+ if (!g[v].reports.empty() && !g[v].eod_accept) {
+ DEBUG_PRINTF("accept not at eod\n");
+ return ROSE_BOUND_INF;
+ }
+
+ if (g[v].reports.empty() && !g[v].suffix) {
+ continue;
+ }
+
+ assert(g[v].eod_accept || g[v].suffix);
+
+ u64a w = g[v].max_offset;
+
+ if (g[v].suffix) {
+ if (has_non_eod_accepts(g[v].suffix)) {
+ return ROSE_BOUND_INF;
+ }
+ depth suffix_width = findMaxWidth(g[v].suffix, g[v].suffix.top);
+ DEBUG_PRINTF("suffix max width for top %u is %s\n", g[v].suffix.top,
+ suffix_width.str().c_str());
+ assert(suffix_width.is_reachable());
+ if (!suffix_width.is_finite()) {
+ DEBUG_PRINTF("suffix too wide\n");
+ return ROSE_BOUND_INF;
+ }
+
+ w += suffix_width;
+ }
+
+ maxWidth = max(maxWidth, w);
+ if (maxWidth >= ROSE_BOUND_INF) {
+ DEBUG_PRINTF("too wide\n");
+ return ROSE_BOUND_INF;
+ }
+ }
+
+ DEBUG_PRINTF("max ba width %llu\n", maxWidth);
+ assert(maxWidth < ROSE_BOUND_INF);
+ return maxWidth;
+}
+
+u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table) {
+ const RoseGraph &g = tbi.g;
+ if (!isLeafNode(tbi.root, g) && table == ROSE_FLOATING) {
+ DEBUG_PRINTF("floating literal -> no max width\n");
+ return ROSE_BOUND_INF;
+ }
+
+ if (table != ROSE_FLOATING && table != ROSE_ANCHORED) {
+ /* handle other tables if ever required */
+ assert(0);
+ return ROSE_BOUND_INF;
+ }
+
+ DEBUG_PRINTF("looking for a max ba width for %s\n",
+ table == ROSE_FLOATING ? "floating" : "anchored");
+
+ vector<RoseVertex> table_verts;
+
+ for (auto v : vertices_range(g)) {
+ if ((table == ROSE_FLOATING && tbi.isFloating(v))
+ || (table == ROSE_ANCHORED && tbi.isAnchored(v))) {
+ table_verts.push_back(v);
+ }
+ }
+
+ set<RoseVertex> reachable;
+ find_reachable(g, table_verts, &reachable);
+
+ u64a maxWidth = 0;
+ // Everyone's anchored, so the max width can be taken from the max
+ // max_offset on our vertices (so long as all accepts are ACCEPT_EOD).
+ for (auto v : reachable) {
DEBUG_PRINTF("inspecting vert %zu\n", g[v].index);
-
- if (g[v].eod_accept) {
+
+ if (g[v].eod_accept) {
DEBUG_PRINTF("skipping %zu - not a real vertex\n", g[v].index);
- continue;
- }
-
- if (!g[v].reports.empty()) {
- DEBUG_PRINTF("accept not at eod\n");
- return ROSE_BOUND_INF;
- }
-
- u64a w = g[v].max_offset;
-
- u64a follow_max = tbi.calcSuccMaxBound(v); /* may have a long bound to
- accept_eod node */
-
- if (g[v].suffix) {
- if (has_non_eod_accepts(g[v].suffix)) {
- DEBUG_PRINTF("has accept\n");
- return ROSE_BOUND_INF;
- }
- depth suffix_width = findMaxWidth(g[v].suffix);
- DEBUG_PRINTF("suffix max width %s\n", suffix_width.str().c_str());
- assert(suffix_width.is_reachable());
- if (!suffix_width.is_finite()) {
- DEBUG_PRINTF("suffix too wide\n");
- return ROSE_BOUND_INF;
- }
- follow_max = max(follow_max, (u64a)suffix_width);
- }
-
- w += follow_max;
-
- DEBUG_PRINTF("w %llu\n", w);
-
- maxWidth = max(maxWidth, w);
- if (maxWidth >= ROSE_BOUND_INF) {
- DEBUG_PRINTF("too wide\n");
- return ROSE_BOUND_INF;
- }
- }
-
- DEBUG_PRINTF("max ba width %llu\n", maxWidth);
- assert(maxWidth < ROSE_BOUND_INF);
- return maxWidth;
-}
-
-} // namespace ue2
+ continue;
+ }
+
+ if (!g[v].reports.empty()) {
+ DEBUG_PRINTF("accept not at eod\n");
+ return ROSE_BOUND_INF;
+ }
+
+ u64a w = g[v].max_offset;
+
+ u64a follow_max = tbi.calcSuccMaxBound(v); /* may have a long bound to
+ accept_eod node */
+
+ if (g[v].suffix) {
+ if (has_non_eod_accepts(g[v].suffix)) {
+ DEBUG_PRINTF("has accept\n");
+ return ROSE_BOUND_INF;
+ }
+ depth suffix_width = findMaxWidth(g[v].suffix);
+ DEBUG_PRINTF("suffix max width %s\n", suffix_width.str().c_str());
+ assert(suffix_width.is_reachable());
+ if (!suffix_width.is_finite()) {
+ DEBUG_PRINTF("suffix too wide\n");
+ return ROSE_BOUND_INF;
+ }
+ follow_max = max(follow_max, (u64a)suffix_width);
+ }
+
+ w += follow_max;
+
+ DEBUG_PRINTF("w %llu\n", w);
+
+ maxWidth = max(maxWidth, w);
+ if (maxWidth >= ROSE_BOUND_INF) {
+ DEBUG_PRINTF("too wide\n");
+ return ROSE_BOUND_INF;
+ }
+ }
+
+ DEBUG_PRINTF("max ba width %llu\n", maxWidth);
+ assert(maxWidth < ROSE_BOUND_INF);
+ return maxWidth;
+}
+
+} // namespace ue2
diff --git a/contrib/libs/hyperscan/src/rose/rose_build_width.h b/contrib/libs/hyperscan/src/rose/rose_build_width.h
index 1cbc1f1e12..a395b62daf 100644
--- a/contrib/libs/hyperscan/src/rose/rose_build_width.h
+++ b/contrib/libs/hyperscan/src/rose/rose_build_width.h
@@ -1,66 +1,66 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_BUILD_WIDTH_H
-#define ROSE_BUILD_WIDTH_H
-
-#include "rose_build_impl.h"
-#include "ue2common.h"
-
-namespace ue2 {
-
-class RoseBuildImpl;
-
-/* returns a lower bound on the minimum number of bytes required for match to be
- * raised up to the user which requires the given literal table to be used
- *
- * returns ROSE_BOUND_INF if the table can never produce matches */
-u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table);
-
-/* returns an upper bound on the maximum length of a buffer that can result in
- * matches. If there are any patterns which are not bianchored (start and end
- * anchored), then there is no such limit and ROSE_BOUND_INF is returned.
- */
-u32 findMaxBAWidth(const RoseBuildImpl &tbi);
-
-/* returns an upper bound on the maximum length of a buffer that can result in
- * matches and requires that the given table to be used. If there are any
- * patterns which are not bianchored (start and end anchored), then there is no
- * such limit and ROSE_BOUND_INF is returned.
- */
-u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table);
-
-/**
- * Note: there is no function for determining the min width of the whole rose
- * as this is more easily done by the NG layer which has access to the full
- * nfagraphs before they are chopped into little pieces.
- */
-
-} // namespace ue2
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_WIDTH_H
+#define ROSE_BUILD_WIDTH_H
+
+#include "rose_build_impl.h"
+#include "ue2common.h"
+
+namespace ue2 {
+
+class RoseBuildImpl;
+
+/* returns a lower bound on the minimum number of bytes required for match to be
+ * raised up to the user which requires the given literal table to be used
+ *
+ * returns ROSE_BOUND_INF if the table can never produce matches */
+u32 findMinWidth(const RoseBuildImpl &tbi, enum rose_literal_table table);
+
+/* returns an upper bound on the maximum length of a buffer that can result in
+ * matches. If there are any patterns which are not bianchored (start and end
+ * anchored), then there is no such limit and ROSE_BOUND_INF is returned.
+ */
+u32 findMaxBAWidth(const RoseBuildImpl &tbi);
+
+/* returns an upper bound on the maximum length of a buffer that can result in
+ * matches and requires that the given table to be used. If there are any
+ * patterns which are not bianchored (start and end anchored), then there is no
+ * such limit and ROSE_BOUND_INF is returned.
+ */
+u32 findMaxBAWidth(const RoseBuildImpl &tbi, enum rose_literal_table table);
+
+/**
+ * Note: there is no function for determining the min width of the whole rose
+ * as this is more easily done by the NG layer which has access to the full
+ * nfagraphs before they are chopped into little pieces.
+ */
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_common.h b/contrib/libs/hyperscan/src/rose/rose_common.h
index 16347b1985..34678b8fcc 100644
--- a/contrib/libs/hyperscan/src/rose/rose_common.h
+++ b/contrib/libs/hyperscan/src/rose/rose_common.h
@@ -1,46 +1,46 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_COMMON_H
-#define ROSE_COMMON_H
-
-// Common defs available to build-time clients as well as runtime.
-
-#define ROSE_BOUND_INF (~0U)
-#define MAX_MASK2_WIDTH 32
-
-// Max block width to use the combined small-block matcher on, instead of
-// running the floating and anchored tables.
-#define ROSE_SMALL_BLOCK_LEN 32
-
-/** \brief Length in bytes of a reach bitvector, used by the lookaround code. */
-#define REACH_BITVECTOR_LEN 32
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_COMMON_H
+#define ROSE_COMMON_H
+
+// Common defs available to build-time clients as well as runtime.
+
+#define ROSE_BOUND_INF (~0U)
+#define MAX_MASK2_WIDTH 32
+
+// Max block width to use the combined small-block matcher on, instead of
+// running the floating and anchored tables.
+#define ROSE_SMALL_BLOCK_LEN 32
+
+/** \brief Length in bytes of a reach bitvector, used by the lookaround code. */
+#define REACH_BITVECTOR_LEN 32
+
/** \brief Length in bytes of a reach bitvector for multi-path lookaround. */
#define MULTI_REACH_BITVECTOR_LEN 256
@@ -53,4 +53,4 @@
/** \brief Value used to represent an invalid Rose program offset. */
#define ROSE_INVALID_PROG_OFFSET 0
-#endif // ROSE_COMMON_H
+#endif // ROSE_COMMON_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_graph.h b/contrib/libs/hyperscan/src/rose/rose_graph.h
index d526463934..b5bf1985d8 100644
--- a/contrib/libs/hyperscan/src/rose/rose_graph.h
+++ b/contrib/libs/hyperscan/src/rose/rose_graph.h
@@ -1,69 +1,69 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief BGL graph structures used internally by the Rose build process.
- *
- * BGL graph structures used internally by the build-time portion of Rose. The
- * graph used for input is in rose_in_graph.h since it's part of the RoseBuild
- * external API.
- */
-
-#ifndef ROSE_GRAPH_H
-#define ROSE_GRAPH_H
-
-#include "ue2common.h"
-#include "rose_build.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief BGL graph structures used internally by the Rose build process.
+ *
+ * BGL graph structures used internally by the build-time portion of Rose. The
+ * graph used for input is in rose_in_graph.h since it's part of the RoseBuild
+ * external API.
+ */
+
+#ifndef ROSE_GRAPH_H
+#define ROSE_GRAPH_H
+
+#include "ue2common.h"
+#include "rose_build.h"
#include "rose_internal.h"
-#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
-#include "util/depth.h"
+#include "nfa/nfa_internal.h" // for MO_INVALID_IDX
+#include "util/depth.h"
#include "util/flat_containers.h"
#include "util/ue2_graph.h"
-
-#include <memory>
-#include <set>
-
-namespace ue2 {
-
-struct CastleProto;
-struct raw_dfa;
-struct raw_som_dfa;
+
+#include <memory>
+#include <set>
+
+namespace ue2 {
+
+struct CastleProto;
+struct raw_dfa;
+struct raw_som_dfa;
struct TamaProto;
-
-/** \brief Table type for a literal. */
-enum rose_literal_table {
- ROSE_ANCHORED, //!< literals anchored to start
- ROSE_FLOATING, //!< general floating literals
- ROSE_EOD_ANCHORED, //!< literals that match near EOD
- ROSE_ANCHORED_SMALL_BLOCK, //!< anchored literals for small block table
- ROSE_EVENT //!< "literal-like" events, such as EOD
-};
-
+
+/** \brief Table type for a literal. */
+enum rose_literal_table {
+ ROSE_ANCHORED, //!< literals anchored to start
+ ROSE_FLOATING, //!< general floating literals
+ ROSE_EOD_ANCHORED, //!< literals that match near EOD
+ ROSE_ANCHORED_SMALL_BLOCK, //!< anchored literals for small block table
+ ROSE_EVENT //!< "literal-like" events, such as EOD
+};
+
/** \brief Edge history types. */
enum RoseRoleHistory {
ROSE_ROLE_HISTORY_NONE, //!< no special history
@@ -72,159 +72,159 @@ enum RoseRoleHistory {
ROSE_ROLE_HISTORY_INVALID //!< history not yet assigned
};
-#include "util/order_check.h"
-
-/** \brief Provides information about the (pre|in)fix engine to the left of a
- * role. */
-struct LeftEngInfo {
- std::shared_ptr<NGHolder> graph;
- std::shared_ptr<CastleProto> castle;
- std::shared_ptr<raw_dfa> dfa;
- std::shared_ptr<raw_som_dfa> haig;
+#include "util/order_check.h"
+
+/** \brief Provides information about the (pre|in)fix engine to the left of a
+ * role. */
+struct LeftEngInfo {
+ std::shared_ptr<NGHolder> graph;
+ std::shared_ptr<CastleProto> castle;
+ std::shared_ptr<raw_dfa> dfa;
+ std::shared_ptr<raw_som_dfa> haig;
std::shared_ptr<TamaProto> tamarama;
- u32 lag = 0U;
- ReportID leftfix_report = MO_INVALID_IDX;
+ u32 lag = 0U;
+ ReportID leftfix_report = MO_INVALID_IDX;
depth dfa_min_width{0};
- depth dfa_max_width = depth::infinity();
-
- bool operator==(const LeftEngInfo &other) const {
- return other.graph == graph
- && other.castle == castle
- && other.dfa == dfa
- && other.haig == haig
+ depth dfa_max_width = depth::infinity();
+
+ bool operator==(const LeftEngInfo &other) const {
+ return other.graph == graph
+ && other.castle == castle
+ && other.dfa == dfa
+ && other.haig == haig
&& other.tamarama == tamarama
- && other.lag == lag
- && other.leftfix_report == leftfix_report;
- }
- bool operator!=(const LeftEngInfo &other) const {
- return !(*this == other);
- }
- bool operator<(const LeftEngInfo &b) const {
- const LeftEngInfo &a = *this;
- ORDER_CHECK(graph);
- ORDER_CHECK(castle);
- ORDER_CHECK(dfa);
- ORDER_CHECK(haig);
+ && other.lag == lag
+ && other.leftfix_report == leftfix_report;
+ }
+ bool operator!=(const LeftEngInfo &other) const {
+ return !(*this == other);
+ }
+ bool operator<(const LeftEngInfo &b) const {
+ const LeftEngInfo &a = *this;
+ ORDER_CHECK(graph);
+ ORDER_CHECK(castle);
+ ORDER_CHECK(dfa);
+ ORDER_CHECK(haig);
ORDER_CHECK(tamarama);
- ORDER_CHECK(lag);
- ORDER_CHECK(leftfix_report);
- return false;
- }
+ ORDER_CHECK(lag);
+ ORDER_CHECK(leftfix_report);
+ return false;
+ }
size_t hash() const;
- void reset(void);
+ void reset(void);
explicit operator bool() const;
- bool tracksSom() const { return !!haig; }
-};
-
-/** \brief Provides information about the suffix engine to the right of a
- * role. */
-struct RoseSuffixInfo {
- u32 top = 0;
- std::shared_ptr<NGHolder> graph; /* if triggers a trailing nfa */
- std::shared_ptr<CastleProto> castle;
- std::shared_ptr<raw_som_dfa> haig;
- std::shared_ptr<raw_dfa> rdfa;
+ bool tracksSom() const { return !!haig; }
+};
+
+/** \brief Provides information about the suffix engine to the right of a
+ * role. */
+struct RoseSuffixInfo {
+ u32 top = 0;
+ std::shared_ptr<NGHolder> graph; /* if triggers a trailing nfa */
+ std::shared_ptr<CastleProto> castle;
+ std::shared_ptr<raw_som_dfa> haig;
+ std::shared_ptr<raw_dfa> rdfa;
std::shared_ptr<TamaProto> tamarama;
depth dfa_min_width{0};
- depth dfa_max_width = depth::infinity();
-
- bool operator==(const RoseSuffixInfo &b) const;
- bool operator!=(const RoseSuffixInfo &b) const { return !(*this == b); }
- bool operator<(const RoseSuffixInfo &b) const;
+ depth dfa_max_width = depth::infinity();
+
+ bool operator==(const RoseSuffixInfo &b) const;
+ bool operator!=(const RoseSuffixInfo &b) const { return !(*this == b); }
+ bool operator<(const RoseSuffixInfo &b) const;
size_t hash() const;
- void reset(void);
+ void reset(void);
explicit operator bool() const { return graph || castle || haig || rdfa || tamarama; }
-};
-
-/** \brief Properties attached to each Rose graph vertex. */
-struct RoseVertexProps {
- /** \brief Unique dense vertex index. Used for BGL algorithms. */
+};
+
+/** \brief Properties attached to each Rose graph vertex. */
+struct RoseVertexProps {
+ /** \brief Unique dense vertex index. Used for BGL algorithms. */
size_t index = ~size_t{0};
-
- /** \brief IDs of literals in the Rose literal map. */
- flat_set<u32> literals;
-
- /**
- * \brief If true, this vertex is a virtual vertex for firing reports at
- * EOD. These vertices must have reports and have no associated literals.
- */
- bool eod_accept = false;
-
- /** \brief Report IDs to fire. */
- flat_set<ReportID> reports;
-
- /** \brief Bitmask of groups that this role sets. */
- rose_group groups = 0;
-
- /** \brief Minimum role (end of literal) offset depth in bytes. */
- u32 min_offset = ~u32{0};
-
- /** \brief Maximum role (end of literal) offset depth in bytes */
- u32 max_offset = 0;
-
- /** \brief SOM for the role is offset from end match offset */
- u32 som_adjust = 0;
-
- /** \brief Prefix/infix engine to the left of this role. */
- LeftEngInfo left;
-
- /**
- * \brief Suffix engine to the right of this role.
- *
- * Note: information about triggered infixes is associated with the left of
- * the destination role.
- */
- RoseSuffixInfo suffix;
-
- bool isBoring(void) const;
- bool fixedOffset(void) const;
-};
-
-/** \brief Properties attached to each Rose graph edge. */
-/* bounds are distance from end of prev to start of the next */
-struct RoseEdgeProps {
+
+ /** \brief IDs of literals in the Rose literal map. */
+ flat_set<u32> literals;
+
+ /**
+ * \brief If true, this vertex is a virtual vertex for firing reports at
+ * EOD. These vertices must have reports and have no associated literals.
+ */
+ bool eod_accept = false;
+
+ /** \brief Report IDs to fire. */
+ flat_set<ReportID> reports;
+
+ /** \brief Bitmask of groups that this role sets. */
+ rose_group groups = 0;
+
+ /** \brief Minimum role (end of literal) offset depth in bytes. */
+ u32 min_offset = ~u32{0};
+
+ /** \brief Maximum role (end of literal) offset depth in bytes */
+ u32 max_offset = 0;
+
+ /** \brief SOM for the role is offset from end match offset */
+ u32 som_adjust = 0;
+
+ /** \brief Prefix/infix engine to the left of this role. */
+ LeftEngInfo left;
+
+ /**
+ * \brief Suffix engine to the right of this role.
+ *
+ * Note: information about triggered infixes is associated with the left of
+ * the destination role.
+ */
+ RoseSuffixInfo suffix;
+
+ bool isBoring(void) const;
+ bool fixedOffset(void) const;
+};
+
+/** \brief Properties attached to each Rose graph edge. */
+/* bounds are distance from end of prev to start of the next */
+struct RoseEdgeProps {
/** \brief Unique dense vertex index. Used for BGL algorithms. */
size_t index = ~size_t{0};
- /**
- * \brief Minimum distance from the end of the source role's match to the
- * start of the target role's match.
- *
- * Not used when the target has a left engine (as the engine represents
- * bounds).
- */
- u32 minBound = 0;
-
- /**
- * \brief Maximum distance from the end of the source role's match to the
- * start of the target role's match.
- *
- * Not used when the target has a left engine (as the engine represents
- * bounds).
- */
- u32 maxBound = 0;
-
- /** \brief Which top to trigger on the target role's left engine. */
- u32 rose_top = 0;
-
- /** \brief True if the rose_top can clear all other previous tops. */
- u8 rose_cancel_prev_top = false;
-
- /** \brief History required by this edge. */
- RoseRoleHistory history = ROSE_ROLE_HISTORY_INVALID;
-};
-
-bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b);
-
-/**
- * \brief Core Rose graph structure.
- */
+ /**
+ * \brief Minimum distance from the end of the source role's match to the
+ * start of the target role's match.
+ *
+ * Not used when the target has a left engine (as the engine represents
+ * bounds).
+ */
+ u32 minBound = 0;
+
+ /**
+ * \brief Maximum distance from the end of the source role's match to the
+ * start of the target role's match.
+ *
+ * Not used when the target has a left engine (as the engine represents
+ * bounds).
+ */
+ u32 maxBound = 0;
+
+ /** \brief Which top to trigger on the target role's left engine. */
+ u32 rose_top = 0;
+
+ /** \brief True if the rose_top can clear all other previous tops. */
+ u8 rose_cancel_prev_top = false;
+
+ /** \brief History required by this edge. */
+ RoseRoleHistory history = ROSE_ROLE_HISTORY_INVALID;
+};
+
+bool operator<(const RoseEdgeProps &a, const RoseEdgeProps &b);
+
+/**
+ * \brief Core Rose graph structure.
+ */
struct RoseGraph : public ue2_graph<RoseGraph, RoseVertexProps, RoseEdgeProps> {
friend class RoseBuildImpl; /* to allow index renumbering */
};
-using RoseVertex = RoseGraph::vertex_descriptor;
-using RoseEdge = RoseGraph::edge_descriptor;
-
-} // namespace ue2
-
-#endif // ROSE_GRAPH_H
+using RoseVertex = RoseGraph::vertex_descriptor;
+using RoseEdge = RoseGraph::edge_descriptor;
+
+} // namespace ue2
+
+#endif // ROSE_GRAPH_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_in_dump.h b/contrib/libs/hyperscan/src/rose/rose_in_dump.h
index 8fe43fa1e8..22c9fefd49 100644
--- a/contrib/libs/hyperscan/src/rose/rose_in_dump.h
+++ b/contrib/libs/hyperscan/src/rose/rose_in_dump.h
@@ -1,49 +1,49 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_IN_DUMP_H
-#define ROSE_IN_DUMP_H
-
-#include "rose_in_graph.h"
-
-namespace ue2 {
-
-struct Grey;
-
-#ifdef DUMP_SUPPORT
-void dumpPreRoseGraph(const RoseInGraph &ig, const Grey &grey,
- const char *filename = nullptr);
-#else
-static UNUSED
-void dumpPreRoseGraph(const RoseInGraph &, const Grey &,
- const char * = nullptr) { }
-#endif
-
-}
-
-#endif
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_IN_DUMP_H
+#define ROSE_IN_DUMP_H
+
+#include "rose_in_graph.h"
+
+namespace ue2 {
+
+struct Grey;
+
+#ifdef DUMP_SUPPORT
+void dumpPreRoseGraph(const RoseInGraph &ig, const Grey &grey,
+ const char *filename = nullptr);
+#else
+static UNUSED
+void dumpPreRoseGraph(const RoseInGraph &, const Grey &,
+ const char * = nullptr) { }
+#endif
+
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_in_graph.h b/contrib/libs/hyperscan/src/rose/rose_in_graph.h
index f99928f147..da0ea08da1 100644
--- a/contrib/libs/hyperscan/src/rose/rose_in_graph.h
+++ b/contrib/libs/hyperscan/src/rose/rose_in_graph.h
@@ -1,197 +1,197 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
* \brief Rose Input Graph: Used for ng_violet -> rose_build_add communication.
- *
- * The input graph MUST be a DAG.
- * There MUST be exactly 1 START or ANCHORED_START vertex.
- * The edges MUST be of the form START->LITERAL, LITERAL->LITERAL,
- * LITERAL->ACCEPT or LITERAL->ACCEPT_EOD.
- * Every non START/ANCHORED_START vertex MUST have an in-edge.
- * Every non ACCEPT/ACCEPT_EOD vertex MUST have an out-edge.
- *
- * Edges are either a graph or have bounds associated with them.
- * Graphs on edges to accepts use their internal report ids.
- */
-
-#ifndef ROSE_IN_GRAPH_H
-#define ROSE_IN_GRAPH_H
-
-#include "ue2common.h"
-#include "rose/rose_common.h"
+ *
+ * The input graph MUST be a DAG.
+ * There MUST be exactly 1 START or ANCHORED_START vertex.
+ * The edges MUST be of the form START->LITERAL, LITERAL->LITERAL,
+ * LITERAL->ACCEPT or LITERAL->ACCEPT_EOD.
+ * Every non START/ANCHORED_START vertex MUST have an in-edge.
+ * Every non ACCEPT/ACCEPT_EOD vertex MUST have an out-edge.
+ *
+ * Edges are either a graph or have bounds associated with them.
+ * Graphs on edges to accepts use their internal report ids.
+ */
+
+#ifndef ROSE_IN_GRAPH_H
+#define ROSE_IN_GRAPH_H
+
+#include "ue2common.h"
+#include "rose/rose_common.h"
#include "util/flat_containers.h"
#include "util/ue2_graph.h"
-#include "util/ue2string.h"
-
-#include <memory>
-
-namespace ue2 {
-
-class NGHolder;
-struct raw_som_dfa;
+#include "util/ue2string.h"
+
+#include <memory>
+
+namespace ue2 {
+
+class NGHolder;
+struct raw_som_dfa;
struct raw_dfa;
-
-enum RoseInVertexType {
- RIV_LITERAL,
- RIV_START,
- RIV_ANCHORED_START,
- RIV_ACCEPT,
- RIV_ACCEPT_EOD
-};
-
-struct RoseInVertexProps {
- RoseInVertexProps()
- : type(RIV_LITERAL), delay(0), min_offset(0),
- max_offset(ROSE_BOUND_INF) {}
-
-private:
- template <class ReportContainer>
- RoseInVertexProps(RoseInVertexType type_in, const ue2_literal &s_in,
- const ReportContainer &reports_in, u32 min_offset_in,
- u32 max_offset_in)
- : type(type_in), s(s_in), delay(0),
- reports(begin(reports_in), end(reports_in)),
- min_offset(min_offset_in), max_offset(max_offset_in) {}
-
- // Constructor for a vertex with no reports.
- RoseInVertexProps(RoseInVertexType type_in, const ue2_literal &s_in,
- u32 min_offset_in, u32 max_offset_in)
- : type(type_in), s(s_in), delay(0), min_offset(min_offset_in),
- max_offset(max_offset_in) {}
-
-public:
- static RoseInVertexProps makeLiteral(const ue2_literal &lit) {
- DEBUG_PRINTF("making literal %s\n", dumpString(lit).c_str());
- return RoseInVertexProps(RIV_LITERAL, lit, 0, ROSE_BOUND_INF);
- }
-
- template <class ReportContainer>
- static RoseInVertexProps makeAccept(const ReportContainer &rep) {
- DEBUG_PRINTF("making accept for %zu reports\n", rep.size());
- return RoseInVertexProps(RIV_ACCEPT, ue2_literal(), rep, 0,
- ROSE_BOUND_INF);
- }
-
- template <class ReportContainer>
- static RoseInVertexProps makeAcceptEod(const ReportContainer &rep) {
- DEBUG_PRINTF("making accept-eod for %zu reports\n", rep.size());
- return RoseInVertexProps(RIV_ACCEPT_EOD, ue2_literal(), rep, 0,
- ROSE_BOUND_INF);
- }
-
+
+enum RoseInVertexType {
+ RIV_LITERAL,
+ RIV_START,
+ RIV_ANCHORED_START,
+ RIV_ACCEPT,
+ RIV_ACCEPT_EOD
+};
+
+struct RoseInVertexProps {
+ RoseInVertexProps()
+ : type(RIV_LITERAL), delay(0), min_offset(0),
+ max_offset(ROSE_BOUND_INF) {}
+
+private:
+ template <class ReportContainer>
+ RoseInVertexProps(RoseInVertexType type_in, const ue2_literal &s_in,
+ const ReportContainer &reports_in, u32 min_offset_in,
+ u32 max_offset_in)
+ : type(type_in), s(s_in), delay(0),
+ reports(begin(reports_in), end(reports_in)),
+ min_offset(min_offset_in), max_offset(max_offset_in) {}
+
+ // Constructor for a vertex with no reports.
+ RoseInVertexProps(RoseInVertexType type_in, const ue2_literal &s_in,
+ u32 min_offset_in, u32 max_offset_in)
+ : type(type_in), s(s_in), delay(0), min_offset(min_offset_in),
+ max_offset(max_offset_in) {}
+
+public:
+ static RoseInVertexProps makeLiteral(const ue2_literal &lit) {
+ DEBUG_PRINTF("making literal %s\n", dumpString(lit).c_str());
+ return RoseInVertexProps(RIV_LITERAL, lit, 0, ROSE_BOUND_INF);
+ }
+
+ template <class ReportContainer>
+ static RoseInVertexProps makeAccept(const ReportContainer &rep) {
+ DEBUG_PRINTF("making accept for %zu reports\n", rep.size());
+ return RoseInVertexProps(RIV_ACCEPT, ue2_literal(), rep, 0,
+ ROSE_BOUND_INF);
+ }
+
+ template <class ReportContainer>
+ static RoseInVertexProps makeAcceptEod(const ReportContainer &rep) {
+ DEBUG_PRINTF("making accept-eod for %zu reports\n", rep.size());
+ return RoseInVertexProps(RIV_ACCEPT_EOD, ue2_literal(), rep, 0,
+ ROSE_BOUND_INF);
+ }
+
/* for when there is a suffix graph which handles the reports */
static RoseInVertexProps makeAcceptEod() {
return RoseInVertexProps(RIV_ACCEPT_EOD, ue2_literal(), 0,
ROSE_BOUND_INF);
}
- static RoseInVertexProps makeStart(bool anchored) {
- DEBUG_PRINTF("making %s\n", anchored ? "anchored start" : "start");
- if (anchored) {
- return RoseInVertexProps(RIV_ANCHORED_START, ue2_literal(), 0, 0);
- } else {
- return RoseInVertexProps(RIV_START, ue2_literal(), 0,
- ROSE_BOUND_INF);
- }
- }
-
- RoseInVertexType type; /* polymorphic vertices are probably a bad idea */
- ue2_literal s; /**< for RIV_LITERAL */
- u32 delay; /**< for RIV_LITERAL, delay applied to literal. */
- flat_set<ReportID> reports; /**< for RIV_ACCEPT/RIV_ACCEPT_EOD */
- u32 min_offset; /**< Minimum offset at which this vertex can match. */
- u32 max_offset; /**< Maximum offset at which this vertex can match. */
+ static RoseInVertexProps makeStart(bool anchored) {
+ DEBUG_PRINTF("making %s\n", anchored ? "anchored start" : "start");
+ if (anchored) {
+ return RoseInVertexProps(RIV_ANCHORED_START, ue2_literal(), 0, 0);
+ } else {
+ return RoseInVertexProps(RIV_START, ue2_literal(), 0,
+ ROSE_BOUND_INF);
+ }
+ }
+
+ RoseInVertexType type; /* polymorphic vertices are probably a bad idea */
+ ue2_literal s; /**< for RIV_LITERAL */
+ u32 delay; /**< for RIV_LITERAL, delay applied to literal. */
+ flat_set<ReportID> reports; /**< for RIV_ACCEPT/RIV_ACCEPT_EOD */
+ u32 min_offset; /**< Minimum offset at which this vertex can match. */
+ u32 max_offset; /**< Maximum offset at which this vertex can match. */
size_t index = 0; /**< \brief Unique vertex index. */
-};
-
-struct RoseInEdgeProps {
- RoseInEdgeProps()
- : minBound(0), maxBound(0), graph(), haig(), graph_lag(0) {}
-
- RoseInEdgeProps(u32 min_in, u32 max_in)
- : minBound(min_in), maxBound(max_in), graph(), graph_lag(0) {
- assert(minBound <= maxBound);
- assert(minBound != ROSE_BOUND_INF);
- }
-
- /* haig rosefixes (prefix/infix) require their corresponding holders */
- RoseInEdgeProps(std::shared_ptr<NGHolder> g, std::shared_ptr<raw_som_dfa> h,
- u32 lag)
- : minBound(0), maxBound(ROSE_BOUND_INF), graph(g), haig(h),
- graph_lag(lag) {
- assert(graph);
- assert(haig);
- }
-
- /* haig suffixes do not require their corresponding holders */
- explicit RoseInEdgeProps(std::shared_ptr<raw_som_dfa> h)
- : minBound(0), maxBound(ROSE_BOUND_INF), haig(h), graph_lag(0) {
- assert(haig);
- }
-
- RoseInEdgeProps(std::shared_ptr<NGHolder> g, u32 lag)
- : minBound(0), maxBound(ROSE_BOUND_INF), graph(g), graph_lag(lag) {
- assert(graph);
- }
-
- /** \brief Minimum bound on 'dot' repeat between literals. ie pred end ->
- * succ begin. */
- u32 minBound;
-
- /** \brief Maximum bound on 'dot' repeat between literals. */
- u32 maxBound;
-
+};
+
+struct RoseInEdgeProps {
+ RoseInEdgeProps()
+ : minBound(0), maxBound(0), graph(), haig(), graph_lag(0) {}
+
+ RoseInEdgeProps(u32 min_in, u32 max_in)
+ : minBound(min_in), maxBound(max_in), graph(), graph_lag(0) {
+ assert(minBound <= maxBound);
+ assert(minBound != ROSE_BOUND_INF);
+ }
+
+ /* haig rosefixes (prefix/infix) require their corresponding holders */
+ RoseInEdgeProps(std::shared_ptr<NGHolder> g, std::shared_ptr<raw_som_dfa> h,
+ u32 lag)
+ : minBound(0), maxBound(ROSE_BOUND_INF), graph(g), haig(h),
+ graph_lag(lag) {
+ assert(graph);
+ assert(haig);
+ }
+
+ /* haig suffixes do not require their corresponding holders */
+ explicit RoseInEdgeProps(std::shared_ptr<raw_som_dfa> h)
+ : minBound(0), maxBound(ROSE_BOUND_INF), haig(h), graph_lag(0) {
+ assert(haig);
+ }
+
+ RoseInEdgeProps(std::shared_ptr<NGHolder> g, u32 lag)
+ : minBound(0), maxBound(ROSE_BOUND_INF), graph(g), graph_lag(lag) {
+ assert(graph);
+ }
+
+ /** \brief Minimum bound on 'dot' repeat between literals. ie pred end ->
+ * succ begin. */
+ u32 minBound;
+
+ /** \brief Maximum bound on 'dot' repeat between literals. */
+ u32 maxBound;
+
/** \brief Graph on edge. Graph is end to (end - lag). */
- std::shared_ptr<NGHolder> graph;
-
+ std::shared_ptr<NGHolder> graph;
+
/** \brief DFA version of graph, if we have already determinised. */
std::shared_ptr<raw_dfa> dfa;
- /** \brief Haig version of graph, if required. */
- std::shared_ptr<raw_som_dfa> haig;
-
+ /** \brief Haig version of graph, if required. */
+ std::shared_ptr<raw_som_dfa> haig;
+
/**
* \brief Distance behind the match offset for the literal in the target
* vertex that the leftfix needs to be checked at.
*/
- u32 graph_lag;
+ u32 graph_lag;
/** \brief Unique edge index. */
size_t index = 0;
};
-
+
struct RoseInGraph
: public ue2_graph<RoseInGraph, RoseInVertexProps, RoseInEdgeProps> {
-};
-typedef RoseInGraph::vertex_descriptor RoseInVertex;
-typedef RoseInGraph::edge_descriptor RoseInEdge;
-
-} // namespace ue2
-
-#endif
+};
+typedef RoseInGraph::vertex_descriptor RoseInVertex;
+typedef RoseInGraph::edge_descriptor RoseInEdge;
+
+} // namespace ue2
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_in_util.cpp b/contrib/libs/hyperscan/src/rose/rose_in_util.cpp
index 0de66411b2..cb531017e3 100644
--- a/contrib/libs/hyperscan/src/rose/rose_in_util.cpp
+++ b/contrib/libs/hyperscan/src/rose/rose_in_util.cpp
@@ -1,251 +1,251 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "rose_in_util.h"
-
-#include "rose_build_util.h"
-#include "nfa/goughcompile.h"
-#include "nfagraph/ng_depth.h"
-#include "nfagraph/ng_util.h"
-#include "nfagraph/ng_width.h"
-#include "util/container.h"
-#include "util/graph_range.h"
-#include "util/make_unique.h"
-
-#include <vector>
-
-#include <boost/graph/copy.hpp>
-#include <boost/graph/reverse_graph.hpp>
-#include <boost/graph/topological_sort.hpp>
-
-using namespace std;
-
-namespace ue2 {
-
-/* Returns a topological ordering of the vertices in g. That is the starts are
- * at the front and all the predecessors of a vertex occur earlier in the list
- * than the vertex. */
-vector<RoseInVertex> topo_order(const RoseInGraph &g) {
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_in_util.h"
+
+#include "rose_build_util.h"
+#include "nfa/goughcompile.h"
+#include "nfagraph/ng_depth.h"
+#include "nfagraph/ng_util.h"
+#include "nfagraph/ng_width.h"
+#include "util/container.h"
+#include "util/graph_range.h"
+#include "util/make_unique.h"
+
+#include <vector>
+
+#include <boost/graph/copy.hpp>
+#include <boost/graph/reverse_graph.hpp>
+#include <boost/graph/topological_sort.hpp>
+
+using namespace std;
+
+namespace ue2 {
+
+/* Returns a topological ordering of the vertices in g. That is the starts are
+ * at the front and all the predecessors of a vertex occur earlier in the list
+ * than the vertex. */
+vector<RoseInVertex> topo_order(const RoseInGraph &g) {
assert(hasCorrectlyNumberedVertices(g));
- vector<RoseInVertex> v_order;
+ vector<RoseInVertex> v_order;
v_order.reserve(num_vertices(g));
-
+
boost::topological_sort(g, back_inserter(v_order));
-
- reverse(v_order.begin(), v_order.end()); /* put starts at the front */
-
- return v_order;
-}
-
-namespace {
-struct RoseEdgeCopier {
- typedef unordered_map<const NGHolder *, shared_ptr<NGHolder>> GraphMap;
- typedef unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> HaigMap;
-
- RoseEdgeCopier(const RoseInGraph &g1, RoseInGraph &g2,
- const GraphMap &graph_map_in, const HaigMap &haig_map_in)
- : ig(g1), out(g2), graph_map(graph_map_in), haig_map(haig_map_in) {}
-
- void operator()(const RoseInEdge &e1, RoseInEdge &e2) {
- // Clone all properties.
- put(boost::edge_all, out, e2, get(boost::edge_all, ig, e1));
- // Substitute in cloned graphs.
- if (ig[e1].graph) {
- out[e2].graph = graph_map.at(ig[e1].graph.get());
- }
- if (ig[e1].haig) {
- out[e2].haig = haig_map.at(ig[e1].haig.get());
- }
- }
-
-private:
- const RoseInGraph &ig;
- RoseInGraph &out;
- const GraphMap &graph_map;
- const HaigMap &haig_map;
-};
-}
-
-unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig) {
+
+ reverse(v_order.begin(), v_order.end()); /* put starts at the front */
+
+ return v_order;
+}
+
+namespace {
+struct RoseEdgeCopier {
+ typedef unordered_map<const NGHolder *, shared_ptr<NGHolder>> GraphMap;
+ typedef unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> HaigMap;
+
+ RoseEdgeCopier(const RoseInGraph &g1, RoseInGraph &g2,
+ const GraphMap &graph_map_in, const HaigMap &haig_map_in)
+ : ig(g1), out(g2), graph_map(graph_map_in), haig_map(haig_map_in) {}
+
+ void operator()(const RoseInEdge &e1, RoseInEdge &e2) {
+ // Clone all properties.
+ put(boost::edge_all, out, e2, get(boost::edge_all, ig, e1));
+ // Substitute in cloned graphs.
+ if (ig[e1].graph) {
+ out[e2].graph = graph_map.at(ig[e1].graph.get());
+ }
+ if (ig[e1].haig) {
+ out[e2].haig = haig_map.at(ig[e1].haig.get());
+ }
+ }
+
+private:
+ const RoseInGraph &ig;
+ RoseInGraph &out;
+ const GraphMap &graph_map;
+ const HaigMap &haig_map;
+};
+}
+
+unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig) {
assert(hasCorrectlyNumberedVertices(ig));
unique_ptr<RoseInGraph> out = std::make_unique<RoseInGraph>();
-
- unordered_map<const NGHolder *, shared_ptr<NGHolder>> graph_map;
- unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> haig_map;
-
- for (const auto &e : edges_range(ig)) {
- const RoseInEdgeProps &ep = ig[e];
- if (ep.graph && !contains(graph_map, ep.graph.get())) {
- graph_map[ep.graph.get()] = cloneHolder(*ep.graph);
- }
- if (ep.haig && !contains(haig_map, ep.haig.get())) {
- haig_map[ep.haig.get()] = make_shared<raw_som_dfa>(*ep.haig);
- }
- }
-
- copy_graph(ig, *out,
+
+ unordered_map<const NGHolder *, shared_ptr<NGHolder>> graph_map;
+ unordered_map<const raw_som_dfa *, shared_ptr<raw_som_dfa>> haig_map;
+
+ for (const auto &e : edges_range(ig)) {
+ const RoseInEdgeProps &ep = ig[e];
+ if (ep.graph && !contains(graph_map, ep.graph.get())) {
+ graph_map[ep.graph.get()] = cloneHolder(*ep.graph);
+ }
+ if (ep.haig && !contains(haig_map, ep.haig.get())) {
+ haig_map[ep.haig.get()] = make_shared<raw_som_dfa>(*ep.haig);
+ }
+ }
+
+ copy_graph(ig, *out,
boost::edge_copy(RoseEdgeCopier(ig, *out, graph_map, haig_map)));
- return out;
-}
-
-void calcVertexOffsets(RoseInGraph &g) {
- vector<RoseInVertex> v_order = topo_order(g);
-
- for (RoseInVertex v : v_order) {
- if (g[v].type == RIV_START) {
- g[v].min_offset = 0;
- g[v].max_offset = ROSE_BOUND_INF;
- continue;
- } else if (g[v].type == RIV_ANCHORED_START) {
- g[v].min_offset = 0;
- g[v].max_offset = 0;
- continue;
- }
-
- DEBUG_PRINTF("vertex '%s'\n", dumpString(g[v].s).c_str());
-
- // Min and max predecessor depths.
- u32 min_d = ROSE_BOUND_INF;
- u32 max_d = 0;
-
- for (const auto &e : in_edges_range(v, g)) {
- RoseInVertex u = source(e, g);
- u32 e_min = g[u].min_offset;
- u32 e_max = g[u].max_offset;
-
- DEBUG_PRINTF("in-edge from u with offsets [%u,%u]\n", e_min, e_max);
-
- if (g[e].graph) {
- const NGHolder &h = *g[e].graph;
- depth g_min_width = findMinWidth(h);
- depth g_max_width =
- isAnchored(h) ? findMaxWidth(h) : depth::infinity();
- u32 graph_lag = g[e].graph_lag;
-
- DEBUG_PRINTF("edge has graph, depths [%s,%s] and lag %u\n",
- g_min_width.str().c_str(),
- g_max_width.str().c_str(), graph_lag);
- g_min_width += graph_lag;
- g_max_width += graph_lag;
- e_min = add_rose_depth(e_min, g_min_width);
- if (g_max_width.is_finite()) {
- e_max = add_rose_depth(e_max, g_max_width);
- } else {
- e_max = ROSE_BOUND_INF;
- }
- } else {
- DEBUG_PRINTF("edge has bounds [%u,%u]\n", g[e].minBound,
- g[e].maxBound);
- e_min = add_rose_depth(e_min, g[e].minBound);
- e_max = add_rose_depth(e_max, g[e].maxBound);
- if (g[v].type == RIV_LITERAL) {
- u32 len = g[v].s.length();
- DEBUG_PRINTF("lit len %u\n", len);
- e_min = add_rose_depth(e_min, len);
- e_max = add_rose_depth(e_max, len);
- }
- }
-
- min_d = min(min_d, e_min);
- max_d = max(max_d, e_max);
- }
-
- DEBUG_PRINTF("vertex depths [%u,%u]\n", min_d, max_d);
-
- assert(max_d >= min_d);
- g[v].min_offset = min_d;
- g[v].max_offset = max_d;
- }
-
- // It's possible that we may have literal delays assigned to vertices here
- // as well. If so, these need to be added to the min/max offsets.
- for (RoseInVertex v : v_order) {
- const u32 delay = g[v].delay;
- g[v].min_offset = add_rose_depth(g[v].min_offset, delay);
- g[v].max_offset = add_rose_depth(g[v].max_offset, delay);
- }
-}
-
-nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e) {
- RoseInVertex u = source(e, in);
- RoseInVertex v = target(e, in);
-
- bool start = in[u].type == RIV_START || in[u].type == RIV_ANCHORED_START;
- bool end = in[v].type == RIV_ACCEPT || in[v].type == RIV_ACCEPT_EOD;
-
- if (start && !end) {
- return NFA_PREFIX;
- } else if (!start && end) {
- return NFA_SUFFIX;
- } else if (!start && !end) {
- return NFA_INFIX;
- } else {
- assert(in[v].type == RIV_ACCEPT_EOD);
- return NFA_OUTFIX;
- }
-}
-
-void pruneUseless(RoseInGraph &g) {
- DEBUG_PRINTF("pruning useless vertices\n");
-
- set<RoseInVertex> dead;
- RoseInVertex dummy_start
- = add_vertex(RoseInVertexProps::makeStart(true), g);
- RoseInVertex dummy_end
- = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), g);
- dead.insert(dummy_start);
- dead.insert(dummy_end);
- for (auto v : vertices_range(g)) {
- if (v == dummy_start || v == dummy_end) {
- continue;
- }
- switch (g[v].type) {
- case RIV_ANCHORED_START:
- case RIV_START:
- add_edge(dummy_start, v, g);
- break;
- case RIV_ACCEPT:
- case RIV_ACCEPT_EOD:
- add_edge(v, dummy_end, g);
- break;
- default:
- break;
- }
- }
-
- find_unreachable(g, vector<RoseInVertex>(1, dummy_start), &dead);
- find_unreachable(boost::reverse_graph<RoseInGraph, RoseInGraph &>(g),
- vector<RoseInVertex>(1, dummy_end), &dead);
-
- for (auto v : dead) {
- clear_vertex(v, g);
- remove_vertex(v, g);
- }
-}
-
-}
+ return out;
+}
+
+void calcVertexOffsets(RoseInGraph &g) {
+ vector<RoseInVertex> v_order = topo_order(g);
+
+ for (RoseInVertex v : v_order) {
+ if (g[v].type == RIV_START) {
+ g[v].min_offset = 0;
+ g[v].max_offset = ROSE_BOUND_INF;
+ continue;
+ } else if (g[v].type == RIV_ANCHORED_START) {
+ g[v].min_offset = 0;
+ g[v].max_offset = 0;
+ continue;
+ }
+
+ DEBUG_PRINTF("vertex '%s'\n", dumpString(g[v].s).c_str());
+
+ // Min and max predecessor depths.
+ u32 min_d = ROSE_BOUND_INF;
+ u32 max_d = 0;
+
+ for (const auto &e : in_edges_range(v, g)) {
+ RoseInVertex u = source(e, g);
+ u32 e_min = g[u].min_offset;
+ u32 e_max = g[u].max_offset;
+
+ DEBUG_PRINTF("in-edge from u with offsets [%u,%u]\n", e_min, e_max);
+
+ if (g[e].graph) {
+ const NGHolder &h = *g[e].graph;
+ depth g_min_width = findMinWidth(h);
+ depth g_max_width =
+ isAnchored(h) ? findMaxWidth(h) : depth::infinity();
+ u32 graph_lag = g[e].graph_lag;
+
+ DEBUG_PRINTF("edge has graph, depths [%s,%s] and lag %u\n",
+ g_min_width.str().c_str(),
+ g_max_width.str().c_str(), graph_lag);
+ g_min_width += graph_lag;
+ g_max_width += graph_lag;
+ e_min = add_rose_depth(e_min, g_min_width);
+ if (g_max_width.is_finite()) {
+ e_max = add_rose_depth(e_max, g_max_width);
+ } else {
+ e_max = ROSE_BOUND_INF;
+ }
+ } else {
+ DEBUG_PRINTF("edge has bounds [%u,%u]\n", g[e].minBound,
+ g[e].maxBound);
+ e_min = add_rose_depth(e_min, g[e].minBound);
+ e_max = add_rose_depth(e_max, g[e].maxBound);
+ if (g[v].type == RIV_LITERAL) {
+ u32 len = g[v].s.length();
+ DEBUG_PRINTF("lit len %u\n", len);
+ e_min = add_rose_depth(e_min, len);
+ e_max = add_rose_depth(e_max, len);
+ }
+ }
+
+ min_d = min(min_d, e_min);
+ max_d = max(max_d, e_max);
+ }
+
+ DEBUG_PRINTF("vertex depths [%u,%u]\n", min_d, max_d);
+
+ assert(max_d >= min_d);
+ g[v].min_offset = min_d;
+ g[v].max_offset = max_d;
+ }
+
+ // It's possible that we may have literal delays assigned to vertices here
+ // as well. If so, these need to be added to the min/max offsets.
+ for (RoseInVertex v : v_order) {
+ const u32 delay = g[v].delay;
+ g[v].min_offset = add_rose_depth(g[v].min_offset, delay);
+ g[v].max_offset = add_rose_depth(g[v].max_offset, delay);
+ }
+}
+
+nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e) {
+ RoseInVertex u = source(e, in);
+ RoseInVertex v = target(e, in);
+
+ bool start = in[u].type == RIV_START || in[u].type == RIV_ANCHORED_START;
+ bool end = in[v].type == RIV_ACCEPT || in[v].type == RIV_ACCEPT_EOD;
+
+ if (start && !end) {
+ return NFA_PREFIX;
+ } else if (!start && end) {
+ return NFA_SUFFIX;
+ } else if (!start && !end) {
+ return NFA_INFIX;
+ } else {
+ assert(in[v].type == RIV_ACCEPT_EOD);
+ return NFA_OUTFIX;
+ }
+}
+
+void pruneUseless(RoseInGraph &g) {
+ DEBUG_PRINTF("pruning useless vertices\n");
+
+ set<RoseInVertex> dead;
+ RoseInVertex dummy_start
+ = add_vertex(RoseInVertexProps::makeStart(true), g);
+ RoseInVertex dummy_end
+ = add_vertex(RoseInVertexProps::makeAccept(set<ReportID>()), g);
+ dead.insert(dummy_start);
+ dead.insert(dummy_end);
+ for (auto v : vertices_range(g)) {
+ if (v == dummy_start || v == dummy_end) {
+ continue;
+ }
+ switch (g[v].type) {
+ case RIV_ANCHORED_START:
+ case RIV_START:
+ add_edge(dummy_start, v, g);
+ break;
+ case RIV_ACCEPT:
+ case RIV_ACCEPT_EOD:
+ add_edge(v, dummy_end, g);
+ break;
+ default:
+ break;
+ }
+ }
+
+ find_unreachable(g, vector<RoseInVertex>(1, dummy_start), &dead);
+ find_unreachable(boost::reverse_graph<RoseInGraph, RoseInGraph &>(g),
+ vector<RoseInVertex>(1, dummy_end), &dead);
+
+ for (auto v : dead) {
+ clear_vertex(v, g);
+ remove_vertex(v, g);
+ }
+}
+
+}
diff --git a/contrib/libs/hyperscan/src/rose/rose_in_util.h b/contrib/libs/hyperscan/src/rose/rose_in_util.h
index e6aaa042de..1f3c4ef78a 100644
--- a/contrib/libs/hyperscan/src/rose/rose_in_util.h
+++ b/contrib/libs/hyperscan/src/rose/rose_in_util.h
@@ -1,56 +1,56 @@
-/*
- * Copyright (c) 2015, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef ROSE_IN_UTIL_H
-#define ROSE_IN_UTIL_H
-
-#include "rose_in_graph.h"
-#include "nfa/nfa_kind.h"
-
-#include <vector>
-
-namespace ue2 {
-
-/* Returns a topological ordering of the vertices in g. That is the starts are
- * at the front and all the predecessors of a vertex occur earlier in the list
- * than the vertex. */
-std::vector<RoseInVertex> topo_order(const RoseInGraph &g);
-
-std::unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig);
-void calcVertexOffsets(RoseInGraph &ig);
-enum nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e);
-void pruneUseless(RoseInGraph &g);
-
+/*
+ * Copyright (c) 2015, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_IN_UTIL_H
+#define ROSE_IN_UTIL_H
+
+#include "rose_in_graph.h"
+#include "nfa/nfa_kind.h"
+
+#include <vector>
+
+namespace ue2 {
+
+/* Returns a topological ordering of the vertices in g. That is the starts are
+ * at the front and all the predecessors of a vertex occur earlier in the list
+ * than the vertex. */
+std::vector<RoseInVertex> topo_order(const RoseInGraph &g);
+
+std::unique_ptr<RoseInGraph> cloneRoseGraph(const RoseInGraph &ig);
+void calcVertexOffsets(RoseInGraph &ig);
+enum nfa_kind whatRoseIsThis(const RoseInGraph &in, const RoseInEdge &e);
+void pruneUseless(RoseInGraph &g);
+
inline
bool is_any_accept(RoseInVertex v, const RoseInGraph &g) {
return g[v].type == RIV_ACCEPT || g[v].type == RIV_ACCEPT_EOD;
-}
-
}
-#endif
+}
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/rose_internal.h b/contrib/libs/hyperscan/src/rose/rose_internal.h
index dbbb43ee92..7bd6779c3d 100644
--- a/contrib/libs/hyperscan/src/rose/rose_internal.h
+++ b/contrib/libs/hyperscan/src/rose/rose_internal.h
@@ -1,207 +1,207 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Rose data structures.
- */
-
-#ifndef ROSE_INTERNAL_H
-#define ROSE_INTERNAL_H
-
-#include "ue2common.h"
-#include "rose_common.h"
-#include "util/scatter.h"
-
-#define ROSE_OFFSET_INVALID 0xffffffff
-
-// Group constants
-typedef u64a rose_group;
-
-// Delayed literal stuff
-#define DELAY_BITS 5
-#define DELAY_SLOT_COUNT (1U << DELAY_BITS)
-#define MAX_DELAY (DELAY_SLOT_COUNT - 1)
-#define DELAY_MASK (DELAY_SLOT_COUNT - 1)
-
-/* Allocation of Rose literal ids
- *
- * The rose literal id space is segmented:
- *
- * ---- 0
- * | | 'Normal' undelayed literals in either e or f tables
- * | |
- * | |
- * | |
- * ---- anchored_base_id
- * | | literals from the a table
- * | |
- * ---- delay_base_id
- * | | Delayed version of normal literals
- * | |
- * ---- literalCount
- */
-
-/* Rose Literal Sources
- *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Rose data structures.
+ */
+
+#ifndef ROSE_INTERNAL_H
+#define ROSE_INTERNAL_H
+
+#include "ue2common.h"
+#include "rose_common.h"
+#include "util/scatter.h"
+
+#define ROSE_OFFSET_INVALID 0xffffffff
+
+// Group constants
+typedef u64a rose_group;
+
+// Delayed literal stuff
+#define DELAY_BITS 5
+#define DELAY_SLOT_COUNT (1U << DELAY_BITS)
+#define MAX_DELAY (DELAY_SLOT_COUNT - 1)
+#define DELAY_MASK (DELAY_SLOT_COUNT - 1)
+
+/* Allocation of Rose literal ids
+ *
+ * The rose literal id space is segmented:
+ *
+ * ---- 0
+ * | | 'Normal' undelayed literals in either e or f tables
+ * | |
+ * | |
+ * | |
+ * ---- anchored_base_id
+ * | | literals from the a table
+ * | |
+ * ---- delay_base_id
+ * | | Delayed version of normal literals
+ * | |
+ * ---- literalCount
+ */
+
+/* Rose Literal Sources
+ *
* Rose currently gets events (mainly roseProcessMatch calls) from a number of
* sources:
- * 1) The floating table
- * 2) The anchored table
- * 3) Delayed literals
+ * 1) The floating table
+ * 2) The anchored table
+ * 3) Delayed literals
* 4) Suffix NFAs
* 5) Literal masks
* 5) End anchored table
* 6) Prefix / Infix nfas
- *
- * Care is required to ensure that events appear to come into Rose in order
- * (or sufficiently ordered for Rose to cope). Generally the progress of the
- * floating table is considered the canonical position in the buffer.
- *
- * Anchored table:
- * The anchored table is run before the floating table as nothing in it can
- * depend on a floating literal. Order is achieved by two measures:
- * a) user matches^1 are logged and held until the floating matcher passes that
- * point;
- * b) any floating role with an anchored predecessor has a history relationship
- * to enforce the ordering.
- *
- * Delayed literals:
- * Delayed literal ordering is handled by delivering any pending delayed
- * literals before processing any floating match.
- *
- * Suffix:
- * Suffixes are always pure terminal roles. Prior to raising a match^2, pending
- * NFA queues are run to the current point (floating or delayed literal) as
- * appropriate.
- *
+ *
+ * Care is required to ensure that events appear to come into Rose in order
+ * (or sufficiently ordered for Rose to cope). Generally the progress of the
+ * floating table is considered the canonical position in the buffer.
+ *
+ * Anchored table:
+ * The anchored table is run before the floating table as nothing in it can
+ * depend on a floating literal. Order is achieved by two measures:
+ * a) user matches^1 are logged and held until the floating matcher passes that
+ * point;
+ * b) any floating role with an anchored predecessor has a history relationship
+ * to enforce the ordering.
+ *
+ * Delayed literals:
+ * Delayed literal ordering is handled by delivering any pending delayed
+ * literals before processing any floating match.
+ *
+ * Suffix:
+ * Suffixes are always pure terminal roles. Prior to raising a match^2, pending
+ * NFA queues are run to the current point (floating or delayed literal) as
+ * appropriate.
+ *
* Literal Masks:
- * These are triggered from either floating literals or delayed literals and
- * inspect the data behind them. Matches are raised at the same location as the
- * trigger literal so there are no ordering issues. Masks are always pure
- * terminal roles.
- *
- * Lookaround:
- * These are tests run on receipt of a role that "look around" the match,
- * checking characters at nearby offsets against reachability masks. Each role
- * can have a list of these lookaround offset/reach pairs, ordered in offset
- * order, and any failure will prevent the role from being switched on. Offsets
- * are relative to the byte after a literal match, and can be negative.
- *
- * Prefix / Infix:
- * TODO: remember / discuss
- *
- * End anchored table:
- * All user matches occur at the last byte. We do this last, so no problems
- * (yippee)
- *
- * ^1 User matches which occur before any possible match from the other tables
- * are not delayed.
- * ^2 Queues may also be run to the current location if a queue is full and
- * needs to be emptied.
- * ^3 There is no need to catch up at the end of a block scan as it contains no
- * terminals.
- */
-
-struct RoseCountingMiracle {
- char shufti; /** 1: count shufti class; 0: count a single character */
- u8 count; /** minimum number of occurrences for the counting
- * miracle char to kill the leftfix. */
- u8 c; /** character to look for if not shufti */
- u8 poison; /** character not in the shufti mask */
- m128 lo; /** shufti lo mask */
- m128 hi; /** shufti hi mask */
-};
-
-struct LeftNfaInfo {
- u32 maxQueueLen;
- u32 maxLag; // maximum of successor roles' lag
- u32 lagIndex; // iff lag != 0, index into leftfixLagTable
- u32 stopTable; // stop table index, or ROSE_OFFSET_INVALID
- u8 transient; /**< 0 if not transient, else max width of transient prefix */
- char infix; /* TODO: make flags */
+ * These are triggered from either floating literals or delayed literals and
+ * inspect the data behind them. Matches are raised at the same location as the
+ * trigger literal so there are no ordering issues. Masks are always pure
+ * terminal roles.
+ *
+ * Lookaround:
+ * These are tests run on receipt of a role that "look around" the match,
+ * checking characters at nearby offsets against reachability masks. Each role
+ * can have a list of these lookaround offset/reach pairs, ordered in offset
+ * order, and any failure will prevent the role from being switched on. Offsets
+ * are relative to the byte after a literal match, and can be negative.
+ *
+ * Prefix / Infix:
+ * TODO: remember / discuss
+ *
+ * End anchored table:
+ * All user matches occur at the last byte. We do this last, so no problems
+ * (yippee)
+ *
+ * ^1 User matches which occur before any possible match from the other tables
+ * are not delayed.
+ * ^2 Queues may also be run to the current location if a queue is full and
+ * needs to be emptied.
+ * ^3 There is no need to catch up at the end of a block scan as it contains no
+ * terminals.
+ */
+
+struct RoseCountingMiracle {
+ char shufti; /** 1: count shufti class; 0: count a single character */
+ u8 count; /** minimum number of occurrences for the counting
+ * miracle char to kill the leftfix. */
+ u8 c; /** character to look for if not shufti */
+ u8 poison; /** character not in the shufti mask */
+ m128 lo; /** shufti lo mask */
+ m128 hi; /** shufti hi mask */
+};
+
+struct LeftNfaInfo {
+ u32 maxQueueLen;
+ u32 maxLag; // maximum of successor roles' lag
+ u32 lagIndex; // iff lag != 0, index into leftfixLagTable
+ u32 stopTable; // stop table index, or ROSE_OFFSET_INVALID
+ u8 transient; /**< 0 if not transient, else max width of transient prefix */
+ char infix; /* TODO: make flags */
char eager; /**< nfa should be run eagerly to first match or death */
- char eod_check; /**< nfa is used by the event eod literal */
- u32 countingMiracleOffset; /** if not 0, offset to RoseCountingMiracle. */
- rose_group squash_mask; /* & mask applied when rose nfa dies */
-};
-
-struct NfaInfo {
- u32 nfaOffset;
- u32 stateOffset;
- u32 fullStateOffset; /* offset in scratch, relative to ??? */
- u32 ekeyListOffset; /* suffix, relative to base of rose, 0 if no ekeys */
- u8 no_retrigger; /* TODO */
- u8 in_sbmatcher; /**< this outfix should not be run in small-block
- * execution, as it will be handled by the sbmatcher
- * HWLM table. */
- u8 eod; /* suffix is triggered by the etable --> can only produce eod
- * matches */
-};
-
-#define MAX_STORED_LEFTFIX_LAG 127 /* max leftfix lag that we can store in one
- * whole byte (OWB) (streaming only). Other
- * values in OWB are reserved for zombie
- * status */
-#define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose
- * prefix checks */
-
+ char eod_check; /**< nfa is used by the event eod literal */
+ u32 countingMiracleOffset; /** if not 0, offset to RoseCountingMiracle. */
+ rose_group squash_mask; /* & mask applied when rose nfa dies */
+};
+
+struct NfaInfo {
+ u32 nfaOffset;
+ u32 stateOffset;
+ u32 fullStateOffset; /* offset in scratch, relative to ??? */
+ u32 ekeyListOffset; /* suffix, relative to base of rose, 0 if no ekeys */
+ u8 no_retrigger; /* TODO */
+ u8 in_sbmatcher; /**< this outfix should not be run in small-block
+ * execution, as it will be handled by the sbmatcher
+ * HWLM table. */
+ u8 eod; /* suffix is triggered by the etable --> can only produce eod
+ * matches */
+};
+
+#define MAX_STORED_LEFTFIX_LAG 127 /* max leftfix lag that we can store in one
+ * whole byte (OWB) (streaming only). Other
+ * values in OWB are reserved for zombie
+ * status */
+#define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose
+ * prefix checks */
+
/* offset of the status flags in the stream state. */
#define ROSE_STATE_OFFSET_STATUS_FLAGS 0
-
+
/* offset of role mmbit in stream state (just after the status flag byte). */
#define ROSE_STATE_OFFSET_ROLE_MMBIT sizeof(u8)
-
-/**
- * \brief Rose state offsets.
- *
- * Stores pre-calculated offsets (in bytes) to MOST of the state structures
- * used by Rose, relative to the start of stream state.
- *
- * State not covered by this structure includes:
- *
+
+/**
+ * \brief Rose state offsets.
+ *
+ * Stores pre-calculated offsets (in bytes) to MOST of the state structures
+ * used by Rose, relative to the start of stream state.
+ *
+ * State not covered by this structure includes:
+ *
* -# the first byte, containing the status bitmask
- * -# the role state multibit
- */
-struct RoseStateOffsets {
- /** History buffer.
- *
+ * -# the role state multibit
+ */
+struct RoseStateOffsets {
+ /** History buffer.
+ *
* Max size of history is RoseEngine::historyRequired. */
- u32 history;
-
+ u32 history;
+
/** Exhausted multibit.
- *
+ *
* entry per exhaustible key (used by Highlander mode). If a bit is set,
- * reports with that ekey should not be delivered to the user. */
- u32 exhausted;
-
+ * reports with that ekey should not be delivered to the user. */
+ u32 exhausted;
+
/** size in bytes of exhausted multibit */
u32 exhausted_size;
-
+
/** Logical multibit.
*
* entry per logical key(operand/operator) (used by Logical Combination). */
@@ -218,46 +218,46 @@ struct RoseStateOffsets {
/** size in bytes of combination multibit */
u32 combVec_size;
- /** Multibit for active suffix/outfix engines. */
- u32 activeLeafArray;
-
+ /** Multibit for active suffix/outfix engines. */
+ u32 activeLeafArray;
+
/** Size of multibit for active suffix/outfix engines in bytes. */
u32 activeLeafArray_size;
/** Multibit for active leftfix (prefix/infix) engines. */
- u32 activeLeftArray;
-
+ u32 activeLeftArray;
+
/** Size of multibit for active leftfix (prefix/infix) engines in bytes. */
- u32 activeLeftArray_size;
-
- /** Table of lag information (stored as one byte per engine) for active
- * Rose leftfix engines. */
- u32 leftfixLagTable;
-
- /** State for anchored matchers (McClellan DFAs). */
- u32 anchorState;
-
- /** Packed Rose groups value. */
- u32 groups;
-
- /** Size of packed Rose groups value, in bytes. */
- u32 groups_size;
-
+ u32 activeLeftArray_size;
+
+ /** Table of lag information (stored as one byte per engine) for active
+ * Rose leftfix engines. */
+ u32 leftfixLagTable;
+
+ /** State for anchored matchers (McClellan DFAs). */
+ u32 anchorState;
+
+ /** Packed Rose groups value. */
+ u32 groups;
+
+ /** Size of packed Rose groups value, in bytes. */
+ u32 groups_size;
+
/** State for long literal support. */
u32 longLitState;
-
+
/** Size of the long literal state. */
u32 longLitState_size;
- /** Packed SOM location slots. */
- u32 somLocation;
-
- /** Multibit guarding SOM location slots. */
- u32 somValid;
-
- /** Multibit guarding SOM location slots. */
- u32 somWritable;
-
+ /** Packed SOM location slots. */
+ u32 somLocation;
+
+ /** Multibit guarding SOM location slots. */
+ u32 somValid;
+
+ /** Multibit guarding SOM location slots. */
+ u32 somWritable;
+
/** Size of each of the somValid and somWritable multibits, in bytes. */
u32 somMultibit_size;
@@ -265,11 +265,11 @@ struct RoseStateOffsets {
* The NFA state region extends to end. */
u32 nfaStateBegin;
- /** Total size of Rose state, in bytes. */
- u32 end;
-};
-
-struct RoseBoundaryReports {
+ /** Total size of Rose state, in bytes. */
+ u32 end;
+};
+
+struct RoseBoundaryReports {
/** \brief 0 if no reports list, otherwise offset of program to run to
* deliver reports at EOD. */
u32 reportEodOffset;
@@ -281,35 +281,35 @@ struct RoseBoundaryReports {
/** \brief 0 if no reports list, otherwise offset of program to run to
* deliver reports if EOD is at offset 0. Superset of other programs. */
u32 reportZeroEodOffset;
-};
-
-/* NFA Queue Assignment
- *
- * --- 0
- * (|) chained mpv (if present)
- * #
- * --- outfixBeginQueue -
- * | outfixes. enabled at offset 0.
- * |
- * #
- * --- outfixEndQueue -
- * | suffixes. enabled by rose roles.
- * |
- * #
- * --- leftfixBeginQueue -
- * | prefixes
- * |
- * #
- * --- ?
- * | infixes
- * |
- * #
- */
-
-#define ROSE_RUNTIME_FULL_ROSE 0
-#define ROSE_RUNTIME_PURE_LITERAL 1
-#define ROSE_RUNTIME_SINGLE_OUTFIX 2
-
+};
+
+/* NFA Queue Assignment
+ *
+ * --- 0
+ * (|) chained mpv (if present)
+ * #
+ * --- outfixBeginQueue -
+ * | outfixes. enabled at offset 0.
+ * |
+ * #
+ * --- outfixEndQueue -
+ * | suffixes. enabled by rose roles.
+ * |
+ * #
+ * --- leftfixBeginQueue -
+ * | prefixes
+ * |
+ * #
+ * --- ?
+ * | infixes
+ * |
+ * #
+ */
+
+#define ROSE_RUNTIME_FULL_ROSE 0
+#define ROSE_RUNTIME_PURE_LITERAL 1
+#define ROSE_RUNTIME_SINGLE_OUTFIX 2
+
/**
* \brief Runtime structure header for Rose.
*
@@ -323,67 +323,67 @@ struct RoseBoundaryReports {
* -# array of NFA offsets, one per queue
* -# array of state offsets, one per queue (+)
*
- * (+) stateOffset array note: Offsets in the array are either into the stream
- * state (normal case) or into the tstate region of scratch (for transient rose
- * nfas). Rose nfa info table can distinguish the cases.
- */
-struct RoseEngine {
+ * (+) stateOffset array note: Offsets in the array are either into the stream
+ * state (normal case) or into the tstate region of scratch (for transient rose
+ * nfas). Rose nfa info table can distinguish the cases.
+ */
+struct RoseEngine {
u8 pureLiteral; /* Indicator of pure literal API */
- u8 noFloatingRoots; /* only need to run the anchored table if something
- * matched in the anchored table */
- u8 requiresEodCheck; /* stuff happens at eod time */
- u8 hasOutfixesInSmallBlock; /**< has at least one outfix that must run even
- in small block scans. */
- u8 runtimeImpl; /**< can we just run the floating table or a single outfix?
- * or do we need a full rose? */
- u8 mpvTriggeredByLeaf; /**< need to check (suf|out)fixes for mpv trigger */
- u8 canExhaust; /**< every pattern has an exhaustion key */
- u8 hasSom; /**< has at least one pattern which tracks SOM. */
- u8 somHorizon; /**< width in bytes of SOM offset storage (governed by
- SOM precision) */
- u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */
- u32 historyRequired; /**< max amount of history required for streaming */
- u32 ekeyCount; /**< number of exhaustion keys */
+ u8 noFloatingRoots; /* only need to run the anchored table if something
+ * matched in the anchored table */
+ u8 requiresEodCheck; /* stuff happens at eod time */
+ u8 hasOutfixesInSmallBlock; /**< has at least one outfix that must run even
+ in small block scans. */
+ u8 runtimeImpl; /**< can we just run the floating table or a single outfix?
+ * or do we need a full rose? */
+ u8 mpvTriggeredByLeaf; /**< need to check (suf|out)fixes for mpv trigger */
+ u8 canExhaust; /**< every pattern has an exhaustion key */
+ u8 hasSom; /**< has at least one pattern which tracks SOM. */
+ u8 somHorizon; /**< width in bytes of SOM offset storage (governed by
+ SOM precision) */
+ u32 mode; /**< scanning mode, one of HS_MODE_{BLOCK,STREAM,VECTORED} */
+ u32 historyRequired; /**< max amount of history required for streaming */
+ u32 ekeyCount; /**< number of exhaustion keys */
u32 lkeyCount; /**< number of logical keys */
u32 lopCount; /**< number of logical ops */
u32 ckeyCount; /**< number of combination keys */
u32 logicalTreeOffset; /**< offset to mapping from lkey to LogicalOp */
u32 combInfoMapOffset; /**< offset to mapping from ckey to combInfo */
- u32 dkeyCount; /**< number of dedupe keys */
+ u32 dkeyCount; /**< number of dedupe keys */
u32 dkeyLogSize; /**< size of fatbit for storing dkey log (bytes) */
- u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external
- * report ids */
- u32 somLocationCount; /**< number of som locations required */
+ u32 invDkeyOffset; /**< offset to table mapping from dkeys to the external
+ * report ids */
+ u32 somLocationCount; /**< number of som locations required */
u32 somLocationFatbitSize; /**< size of SOM location fatbit (bytes) */
- u32 rolesWithStateCount; // number of roles with entries in state bitset
- u32 stateSize; /* size of the state bitset
- * WARNING: not the size of the rose state */
- u32 anchorStateSize; /* size of the state for the anchor dfas */
- u32 tStateSize; /* total size of the state for transient rose nfas */
- u32 scratchStateSize; /**< uncompressed state req'd for NFAs in scratch;
- * used for sizing scratch only. */
- u32 smallWriteOffset; /**< offset of small-write matcher */
- u32 amatcherOffset; // offset of the anchored literal matcher (bytes)
- u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes)
- u32 fmatcherOffset; // offset of the floating literal matcher (bytes)
+ u32 rolesWithStateCount; // number of roles with entries in state bitset
+ u32 stateSize; /* size of the state bitset
+ * WARNING: not the size of the rose state */
+ u32 anchorStateSize; /* size of the state for the anchor dfas */
+ u32 tStateSize; /* total size of the state for transient rose nfas */
+ u32 scratchStateSize; /**< uncompressed state req'd for NFAs in scratch;
+ * used for sizing scratch only. */
+ u32 smallWriteOffset; /**< offset of small-write matcher */
+ u32 amatcherOffset; // offset of the anchored literal matcher (bytes)
+ u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes)
+ u32 fmatcherOffset; // offset of the floating literal matcher (bytes)
u32 drmatcherOffset; // offset of the delayed rebuild table (bytes)
- u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes)
+ u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes)
u32 longLitTableOffset; // offset of the long literal table
- u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern
- * involved with the anchored table to produce a full
- * match. */
- u32 fmatcherMinWidth; /**< minimum number of bytes required for a pattern
- * involved with the floating table to produce a full
- * match. */
- u32 eodmatcherMinWidth; /**< minimum number of bytes required for a pattern
- * involved with the eod table to produce a full
- * match. */
- u32 amatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still
- * produce a match for a pattern involved
- * with the anchored table. */
- u32 fmatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still
- * produce a match for a pattern involved
- * with the anchored table. */
+ u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern
+ * involved with the anchored table to produce a full
+ * match. */
+ u32 fmatcherMinWidth; /**< minimum number of bytes required for a pattern
+ * involved with the floating table to produce a full
+ * match. */
+ u32 eodmatcherMinWidth; /**< minimum number of bytes required for a pattern
+ * involved with the eod table to produce a full
+ * match. */
+ u32 amatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still
+ * produce a match for a pattern involved
+ * with the anchored table. */
+ u32 fmatcherMaxBiAnchoredWidth; /**< maximum number of bytes that can still
+ * produce a match for a pattern involved
+ * with the anchored table. */
/**
* \brief Offset of u32 array of program offsets for reports used by
@@ -407,11 +407,11 @@ struct RoseEngine {
*/
u32 anchoredProgramOffset;
- u32 activeArrayCount; //number of nfas tracked in the active array
- u32 activeLeftCount; //number of nfas tracked in the active rose array
- u32 queueCount; /**< number of nfa queues */
+ u32 activeArrayCount; //number of nfas tracked in the active array
+ u32 activeLeftCount; //number of nfas tracked in the active rose array
+ u32 queueCount; /**< number of nfa queues */
u32 activeQueueArraySize; //!< size of fatbit for active queues (bytes)
-
+
u32 eagerIterOffset; /**< offset to sparse iter for eager prefixes or 0 if
* none */
@@ -422,72 +422,72 @@ struct RoseEngine {
/** \brief Size of the handled keys fatbit in scratch (bytes). */
u32 handledKeyFatbitSize;
- u32 leftOffset;
- u32 roseCount;
-
+ u32 leftOffset;
+ u32 roseCount;
+
u32 eodProgramOffset; //!< EOD program, otherwise 0.
u32 flushCombProgramOffset; /**< FlushCombination program, otherwise 0 */
u32 lastFlushCombProgramOffset; /**< LastFlushCombination program,
* otherwise 0 */
-
- u32 lastByteHistoryIterOffset; // if non-zero
-
- /** \brief Minimum number of bytes required to match. */
- u32 minWidth;
-
- /** \brief Minimum number of bytes required to match, excluding boundary
- * reports. */
- u32 minWidthExcludingBoundaries;
-
- u32 maxBiAnchoredWidth; /* ROSE_BOUND_INF if any non bianchored patterns
- * present */
- u32 anchoredDistance; // region to run the anchored table over
- u32 anchoredMinDistance; /* start of region to run anchored table over */
- u32 floatingDistance; /* end of region to run the floating table over
- ROSE_BOUND_INF if not bounded */
- u32 floatingMinDistance; /* start of region to run floating table over */
- u32 smallBlockDistance; /* end of region to run the floating table over
- ROSE_BOUND_INF if not bounded */
- u32 floatingMinLiteralMatchOffset; /* the minimum offset that we can get a
- * 'valid' match from the floating
- * table */
- u32 nfaInfoOffset; /* offset to the nfa info offset array */
- rose_group initialGroups;
+
+ u32 lastByteHistoryIterOffset; // if non-zero
+
+ /** \brief Minimum number of bytes required to match. */
+ u32 minWidth;
+
+ /** \brief Minimum number of bytes required to match, excluding boundary
+ * reports. */
+ u32 minWidthExcludingBoundaries;
+
+ u32 maxBiAnchoredWidth; /* ROSE_BOUND_INF if any non bianchored patterns
+ * present */
+ u32 anchoredDistance; // region to run the anchored table over
+ u32 anchoredMinDistance; /* start of region to run anchored table over */
+ u32 floatingDistance; /* end of region to run the floating table over
+ ROSE_BOUND_INF if not bounded */
+ u32 floatingMinDistance; /* start of region to run floating table over */
+ u32 smallBlockDistance; /* end of region to run the floating table over
+ ROSE_BOUND_INF if not bounded */
+ u32 floatingMinLiteralMatchOffset; /* the minimum offset that we can get a
+ * 'valid' match from the floating
+ * table */
+ u32 nfaInfoOffset; /* offset to the nfa info offset array */
+ rose_group initialGroups;
rose_group floating_group_mask; /* groups that are used by the ftable */
- u32 size; // (bytes)
- u32 delay_count; /* number of delayed literal ids. */
+ u32 size; // (bytes)
+ u32 delay_count; /* number of delayed literal ids. */
u32 delay_fatbit_size; //!< size of each delay fatbit in scratch (bytes)
- u32 anchored_count; /* number of anchored literal ids */
+ u32 anchored_count; /* number of anchored literal ids */
u32 anchored_fatbit_size; //!< size of each anch fatbit in scratch (bytes)
- u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can
- * usefully be reported */
- u32 delayRebuildLength; /* length of the history region which needs to be
- * rescanned when we are doing a delayed literal
- * rebuild scan. */
- struct RoseStateOffsets stateOffsets;
- struct RoseBoundaryReports boundary;
- u32 totalNumLiterals; /* total number of literals including dr */
- u32 asize; /* size of the atable */
- u32 outfixBeginQueue; /* first outfix queue */
- u32 outfixEndQueue; /* one past the last outfix queue */
- u32 leftfixBeginQueue; /* first prefix/infix queue */
- u32 initMpvNfa; /* (allegedly chained) mpv to force on at init */
- u32 rosePrefixCount; /* number of rose prefixes */
- u32 activeLeftIterOffset; /* mmbit_sparse_iter over non-transient roses */
- u32 ematcherRegionSize; /* max region size to pass to ematcher */
- u32 somRevCount; /**< number of som reverse nfas */
- u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */
+ u32 maxFloatingDelayedMatch; /* max offset that a delayed literal can
+ * usefully be reported */
+ u32 delayRebuildLength; /* length of the history region which needs to be
+ * rescanned when we are doing a delayed literal
+ * rebuild scan. */
+ struct RoseStateOffsets stateOffsets;
+ struct RoseBoundaryReports boundary;
+ u32 totalNumLiterals; /* total number of literals including dr */
+ u32 asize; /* size of the atable */
+ u32 outfixBeginQueue; /* first outfix queue */
+ u32 outfixEndQueue; /* one past the last outfix queue */
+ u32 leftfixBeginQueue; /* first prefix/infix queue */
+ u32 initMpvNfa; /* (allegedly chained) mpv to force on at init */
+ u32 rosePrefixCount; /* number of rose prefixes */
+ u32 activeLeftIterOffset; /* mmbit_sparse_iter over non-transient roses */
+ u32 ematcherRegionSize; /* max region size to pass to ematcher */
+ u32 somRevCount; /**< number of som reverse nfas */
+ u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */
u32 longLitStreamState; // size in bytes
-
- struct scatter_full_plan state_init;
-};
-
-struct ALIGN_CL_DIRECTIVE anchored_matcher_info {
- u32 next_offset; /* relative to this, 0 for end */
- u32 state_offset; /* relative to anchorState */
- u32 anchoredMinDistance; /* start of region to run anchored table over */
-};
-
+
+ struct scatter_full_plan state_init;
+};
+
+struct ALIGN_CL_DIRECTIVE anchored_matcher_info {
+ u32 next_offset; /* relative to this, 0 for end */
+ u32 state_offset; /* relative to anchorState */
+ u32 anchoredMinDistance; /* start of region to run anchored table over */
+};
+
/**
* \brief Long literal subtable for a particular mode (caseful or nocase).
*/
@@ -554,106 +554,106 @@ struct RoseLongLitHashEntry {
u32 str_len;
};
-static really_inline
-const struct anchored_matcher_info *getALiteralMatcher(
- const struct RoseEngine *t) {
- if (!t->amatcherOffset) {
- return NULL;
- }
-
- const char *lt = (const char *)t + t->amatcherOffset;
- assert(ISALIGNED_CL(lt));
- return (const struct anchored_matcher_info *)lt;
-}
-
-struct HWLM;
-
-static really_inline
-const struct HWLM *getFLiteralMatcher(const struct RoseEngine *t) {
- if (!t->fmatcherOffset) {
- return NULL;
- }
-
- const char *lt = (const char *)t + t->fmatcherOffset;
- assert(ISALIGNED_CL(lt));
- return (const struct HWLM *)lt;
-}
-
-static really_inline
-const void *getSBLiteralMatcher(const struct RoseEngine *t) {
- if (!t->sbmatcherOffset) {
- return NULL;
- }
-
- const char *matcher = (const char *)t + t->sbmatcherOffset;
- assert(ISALIGNED_N(matcher, 8));
- return matcher;
-}
-
-static really_inline
-const struct LeftNfaInfo *getLeftTable(const struct RoseEngine *t) {
- const struct LeftNfaInfo *r
- = (const struct LeftNfaInfo *)((const char *)t + t->leftOffset);
- assert(ISALIGNED_N(r, 4));
- return r;
-}
-
-struct mmbit_sparse_iter; // forward decl
-
-static really_inline
-const struct mmbit_sparse_iter *getActiveLeftIter(const struct RoseEngine *t) {
- assert(t->activeLeftIterOffset);
- const struct mmbit_sparse_iter *it = (const struct mmbit_sparse_iter *)
- ((const char *)t + t->activeLeftIterOffset);
- assert(ISALIGNED_N(it, 4));
- return it;
-}
-
-static really_inline
-const struct NfaInfo *getNfaInfoByQueue(const struct RoseEngine *t, u32 qi) {
- const struct NfaInfo *infos
- = (const struct NfaInfo *)((const char *)t + t->nfaInfoOffset);
- assert(ISALIGNED_N(infos, sizeof(u32)));
-
- return &infos[qi];
-}
-
-static really_inline
-const struct NFA *getNfaByInfo(const struct RoseEngine *t,
- const struct NfaInfo *info) {
- return (const struct NFA *)((const char *)t + info->nfaOffset);
-}
-
-static really_inline
-const struct NFA *getNfaByQueue(const struct RoseEngine *t, u32 qi) {
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
- return getNfaByInfo(t, info);
-}
-
-static really_inline
-u32 queueToLeftIndex(const struct RoseEngine *t, u32 qi) {
- assert(qi >= t->leftfixBeginQueue);
- return qi - t->leftfixBeginQueue;
-}
-
-static really_inline
-const struct LeftNfaInfo *getLeftInfoByQueue(const struct RoseEngine *t,
- u32 qi) {
- const struct LeftNfaInfo *infos = getLeftTable(t);
- return &infos[queueToLeftIndex(t, qi)];
-}
-
-struct SmallWriteEngine;
-
-static really_inline
-const struct SmallWriteEngine *getSmallWrite(const struct RoseEngine *t) {
- if (!t->smallWriteOffset) {
- return NULL;
- }
-
- const struct SmallWriteEngine *smwr =
- (const struct SmallWriteEngine *)((const char *)t + t->smallWriteOffset);
- return smwr;
-}
-
-#endif // ROSE_INTERNAL_H
+static really_inline
+const struct anchored_matcher_info *getALiteralMatcher(
+ const struct RoseEngine *t) {
+ if (!t->amatcherOffset) {
+ return NULL;
+ }
+
+ const char *lt = (const char *)t + t->amatcherOffset;
+ assert(ISALIGNED_CL(lt));
+ return (const struct anchored_matcher_info *)lt;
+}
+
+struct HWLM;
+
+static really_inline
+const struct HWLM *getFLiteralMatcher(const struct RoseEngine *t) {
+ if (!t->fmatcherOffset) {
+ return NULL;
+ }
+
+ const char *lt = (const char *)t + t->fmatcherOffset;
+ assert(ISALIGNED_CL(lt));
+ return (const struct HWLM *)lt;
+}
+
+static really_inline
+const void *getSBLiteralMatcher(const struct RoseEngine *t) {
+ if (!t->sbmatcherOffset) {
+ return NULL;
+ }
+
+ const char *matcher = (const char *)t + t->sbmatcherOffset;
+ assert(ISALIGNED_N(matcher, 8));
+ return matcher;
+}
+
+static really_inline
+const struct LeftNfaInfo *getLeftTable(const struct RoseEngine *t) {
+ const struct LeftNfaInfo *r
+ = (const struct LeftNfaInfo *)((const char *)t + t->leftOffset);
+ assert(ISALIGNED_N(r, 4));
+ return r;
+}
+
+struct mmbit_sparse_iter; // forward decl
+
+static really_inline
+const struct mmbit_sparse_iter *getActiveLeftIter(const struct RoseEngine *t) {
+ assert(t->activeLeftIterOffset);
+ const struct mmbit_sparse_iter *it = (const struct mmbit_sparse_iter *)
+ ((const char *)t + t->activeLeftIterOffset);
+ assert(ISALIGNED_N(it, 4));
+ return it;
+}
+
+static really_inline
+const struct NfaInfo *getNfaInfoByQueue(const struct RoseEngine *t, u32 qi) {
+ const struct NfaInfo *infos
+ = (const struct NfaInfo *)((const char *)t + t->nfaInfoOffset);
+ assert(ISALIGNED_N(infos, sizeof(u32)));
+
+ return &infos[qi];
+}
+
+static really_inline
+const struct NFA *getNfaByInfo(const struct RoseEngine *t,
+ const struct NfaInfo *info) {
+ return (const struct NFA *)((const char *)t + info->nfaOffset);
+}
+
+static really_inline
+const struct NFA *getNfaByQueue(const struct RoseEngine *t, u32 qi) {
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+ return getNfaByInfo(t, info);
+}
+
+static really_inline
+u32 queueToLeftIndex(const struct RoseEngine *t, u32 qi) {
+ assert(qi >= t->leftfixBeginQueue);
+ return qi - t->leftfixBeginQueue;
+}
+
+static really_inline
+const struct LeftNfaInfo *getLeftInfoByQueue(const struct RoseEngine *t,
+ u32 qi) {
+ const struct LeftNfaInfo *infos = getLeftTable(t);
+ return &infos[queueToLeftIndex(t, qi)];
+}
+
+struct SmallWriteEngine;
+
+static really_inline
+const struct SmallWriteEngine *getSmallWrite(const struct RoseEngine *t) {
+ if (!t->smallWriteOffset) {
+ return NULL;
+ }
+
+ const struct SmallWriteEngine *smwr =
+ (const struct SmallWriteEngine *)((const char *)t + t->smallWriteOffset);
+ return smwr;
+}
+
+#endif // ROSE_INTERNAL_H
diff --git a/contrib/libs/hyperscan/src/rose/rose_types.h b/contrib/libs/hyperscan/src/rose/rose_types.h
index 5e22191aec..9dcef1cef0 100644
--- a/contrib/libs/hyperscan/src/rose/rose_types.h
+++ b/contrib/libs/hyperscan/src/rose/rose_types.h
@@ -1,42 +1,42 @@
-/*
+/*
* Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/** \file
* \brief Rose runtime types (callbacks, etc).
*/
-#ifndef ROSE_TYPES_H
-#define ROSE_TYPES_H
-
-#include "ue2common.h"
-
+#ifndef ROSE_TYPES_H
+#define ROSE_TYPES_H
+
+#include "ue2common.h"
+
struct hs_scratch;
-
+
/**
* \brief Continue without checking for exhaustion.
*
@@ -65,7 +65,7 @@ typedef int (*RoseCallback)(u64a offset, ReportID id,
*
* \see RoseCallback
*/
-typedef int (*RoseCallbackSom)(u64a from_offset, u64a to_offset, ReportID id,
+typedef int (*RoseCallbackSom)(u64a from_offset, u64a to_offset, ReportID id,
struct hs_scratch *scratch);
-
-#endif
+
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/runtime.h b/contrib/libs/hyperscan/src/rose/runtime.h
index 36508c0d67..5fbb2b7416 100644
--- a/contrib/libs/hyperscan/src/rose/runtime.h
+++ b/contrib/libs/hyperscan/src/rose/runtime.h
@@ -1,44 +1,44 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Runtime functions shared between various Rose runtime code.
- */
-
-#ifndef ROSE_RUNTIME_H
-#define ROSE_RUNTIME_H
-
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Runtime functions shared between various Rose runtime code.
+ */
+
+#ifndef ROSE_RUNTIME_H
+#define ROSE_RUNTIME_H
+
#include "rose_internal.h"
-#include "scratch.h"
-#include "util/partial_store.h"
-
-/*
- * ROSE STATE LAYOUT:
+#include "scratch.h"
+#include "util/partial_store.h"
+
+/*
+ * ROSE STATE LAYOUT:
*
* - runtime status byte (halt status, delay rebuild dirty, etc)
* - rose state multibit
@@ -51,83 +51,83 @@
* - exhausted bitvector
* - som slots, som multibit arrays
* - nfa stream state (for each nfa)
- */
-
-#define rose_inline really_inline
-
+ */
+
+#define rose_inline really_inline
+
/* Maximum offset that we will eagerly run prefixes to. Beyond this point, eager
* prefixes are always run in exactly the same way as normal prefixes. */
#define EAGER_STOP_OFFSET 64
-
-
-static really_inline
-const void *getByOffset(const struct RoseEngine *t, u32 offset) {
- assert(offset < t->size);
- return (const u8 *)t + offset;
-}
-
-static really_inline
+
+
+static really_inline
+const void *getByOffset(const struct RoseEngine *t, u32 offset) {
+ assert(offset < t->size);
+ return (const u8 *)t + offset;
+}
+
+static really_inline
void *getRoleState(char *state) {
return state + ROSE_STATE_OFFSET_ROLE_MMBIT;
-}
-
-/** \brief Fetch the active array for suffix nfas. */
-static really_inline
+}
+
+/** \brief Fetch the active array for suffix nfas. */
+static really_inline
u8 *getActiveLeafArray(const struct RoseEngine *t, char *state) {
return (u8 *)(state + t->stateOffsets.activeLeafArray);
-}
-
-/** \brief Fetch the active array for rose nfas. */
-static really_inline
+}
+
+/** \brief Fetch the active array for rose nfas. */
+static really_inline
u8 *getActiveLeftArray(const struct RoseEngine *t, char *state) {
return (u8 *)(state + t->stateOffsets.activeLeftArray);
-}
-
-static really_inline
+}
+
+static really_inline
rose_group loadGroups(const struct RoseEngine *t, const char *state) {
- return partial_load_u64a(state + t->stateOffsets.groups,
- t->stateOffsets.groups_size);
-
-}
-
-static really_inline
+ return partial_load_u64a(state + t->stateOffsets.groups,
+ t->stateOffsets.groups_size);
+
+}
+
+static really_inline
void storeGroups(const struct RoseEngine *t, char *state, rose_group groups) {
- partial_store_u64a(state + t->stateOffsets.groups, groups,
- t->stateOffsets.groups_size);
-}
-
-static really_inline
+ partial_store_u64a(state + t->stateOffsets.groups, groups,
+ t->stateOffsets.groups_size);
+}
+
+static really_inline
u8 *getLongLitState(const struct RoseEngine *t, char *state) {
return (u8 *)(state + t->stateOffsets.longLitState);
-}
-
-static really_inline
+}
+
+static really_inline
u8 *getLeftfixLagTable(const struct RoseEngine *t, char *state) {
return (u8 *)(state + t->stateOffsets.leftfixLagTable);
-}
-
-static really_inline
+}
+
+static really_inline
const u8 *getLeftfixLagTableConst(const struct RoseEngine *t,
const char *state) {
return (const u8 *)(state + t->stateOffsets.leftfixLagTable);
-}
-
-static really_inline
-u32 has_chained_nfas(const struct RoseEngine *t) {
- return t->outfixBeginQueue;
-}
-
-static really_inline
-void updateLastMatchOffset(struct RoseContext *tctxt, u64a offset) {
- DEBUG_PRINTF("match @%llu, last match @%llu\n", offset,
- tctxt->lastMatchOffset);
-
- assert(offset >= tctxt->minMatchOffset);
- assert(offset >= tctxt->lastMatchOffset);
- tctxt->lastMatchOffset = offset;
-}
-
-static really_inline
+}
+
+static really_inline
+u32 has_chained_nfas(const struct RoseEngine *t) {
+ return t->outfixBeginQueue;
+}
+
+static really_inline
+void updateLastMatchOffset(struct RoseContext *tctxt, u64a offset) {
+ DEBUG_PRINTF("match @%llu, last match @%llu\n", offset,
+ tctxt->lastMatchOffset);
+
+ assert(offset >= tctxt->minMatchOffset);
+ assert(offset >= tctxt->lastMatchOffset);
+ tctxt->lastMatchOffset = offset;
+}
+
+static really_inline
void updateLastCombMatchOffset(struct RoseContext *tctxt, u64a offset) {
DEBUG_PRINTF("match @%llu, last match @%llu\n", offset,
tctxt->lastCombMatchOffset);
@@ -137,24 +137,24 @@ void updateLastCombMatchOffset(struct RoseContext *tctxt, u64a offset) {
}
static really_inline
-void updateMinMatchOffset(struct RoseContext *tctxt, u64a offset) {
- DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset,
- tctxt->minMatchOffset);
-
- assert(offset >= tctxt->minMatchOffset);
- assert(offset >= tctxt->minNonMpvMatchOffset);
- tctxt->minMatchOffset = offset;
- tctxt->minNonMpvMatchOffset = offset;
-}
-
-static really_inline
-void updateMinMatchOffsetFromMpv(struct RoseContext *tctxt, u64a offset) {
- DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset,
- tctxt->minMatchOffset);
-
- assert(offset >= tctxt->minMatchOffset);
- assert(tctxt->minNonMpvMatchOffset >= tctxt->minMatchOffset);
- tctxt->minMatchOffset = offset;
- tctxt->minNonMpvMatchOffset = MAX(tctxt->minNonMpvMatchOffset, offset);
-}
-#endif
+void updateMinMatchOffset(struct RoseContext *tctxt, u64a offset) {
+ DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset,
+ tctxt->minMatchOffset);
+
+ assert(offset >= tctxt->minMatchOffset);
+ assert(offset >= tctxt->minNonMpvMatchOffset);
+ tctxt->minMatchOffset = offset;
+ tctxt->minNonMpvMatchOffset = offset;
+}
+
+static really_inline
+void updateMinMatchOffsetFromMpv(struct RoseContext *tctxt, u64a offset) {
+ DEBUG_PRINTF("min match now @%llu, was @%llu\n", offset,
+ tctxt->minMatchOffset);
+
+ assert(offset >= tctxt->minMatchOffset);
+ assert(tctxt->minNonMpvMatchOffset >= tctxt->minMatchOffset);
+ tctxt->minMatchOffset = offset;
+ tctxt->minNonMpvMatchOffset = MAX(tctxt->minNonMpvMatchOffset, offset);
+}
+#endif
diff --git a/contrib/libs/hyperscan/src/rose/stream.c b/contrib/libs/hyperscan/src/rose/stream.c
index 3fa8e2bd46..26268dd574 100644
--- a/contrib/libs/hyperscan/src/rose/stream.c
+++ b/contrib/libs/hyperscan/src/rose/stream.c
@@ -1,433 +1,433 @@
-/*
+/*
* Copyright (c) 2015-2018, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "catchup.h"
-#include "counting_miracle.h"
-#include "infix.h"
-#include "match.h"
-#include "miracle.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "catchup.h"
+#include "counting_miracle.h"
+#include "infix.h"
+#include "match.h"
+#include "miracle.h"
#include "program_runtime.h"
#include "rose.h"
#include "rose_internal.h"
#include "stream_long_lit.h"
-#include "hwlm/hwlm.h"
-#include "nfa/mcclellan.h"
-#include "nfa/nfa_api.h"
-#include "nfa/nfa_api_queue.h"
-#include "nfa/nfa_internal.h"
-#include "util/fatbit.h"
-
-static rose_inline
-void runAnchoredTableStream(const struct RoseEngine *t, const void *atable,
- size_t alen, u64a offset,
- struct hs_scratch *scratch) {
+#include "hwlm/hwlm.h"
+#include "nfa/mcclellan.h"
+#include "nfa/nfa_api.h"
+#include "nfa/nfa_api_queue.h"
+#include "nfa/nfa_internal.h"
+#include "util/fatbit.h"
+
+static rose_inline
+void runAnchoredTableStream(const struct RoseEngine *t, const void *atable,
+ size_t alen, u64a offset,
+ struct hs_scratch *scratch) {
char *state_base = scratch->core_info.state + t->stateOffsets.anchorState;
- const struct anchored_matcher_info *curr = atable;
-
- do {
- DEBUG_PRINTF("--anchored nfa (+%u) no %u so %u\n",
- curr->anchoredMinDistance, curr->next_offset,
- curr->state_offset);
- const struct NFA *nfa
- = (const struct NFA *)((const char *)curr + sizeof(*curr));
- assert(ISALIGNED_CL(nfa));
- assert(isMcClellanType(nfa->type));
-
- char *state = state_base + curr->state_offset;
-
- char start = 0;
- size_t adj = 0;
-
- if (offset <= curr->anchoredMinDistance) {
- adj = curr->anchoredMinDistance - offset;
- if (adj >= alen) {
- goto next_nfa;
- }
-
- start = 1;
- } else {
- // (No state decompress necessary.)
- if (nfa->type == MCCLELLAN_NFA_8) {
- if (!*(u8 *)state) {
- goto next_nfa;
- }
- } else {
+ const struct anchored_matcher_info *curr = atable;
+
+ do {
+ DEBUG_PRINTF("--anchored nfa (+%u) no %u so %u\n",
+ curr->anchoredMinDistance, curr->next_offset,
+ curr->state_offset);
+ const struct NFA *nfa
+ = (const struct NFA *)((const char *)curr + sizeof(*curr));
+ assert(ISALIGNED_CL(nfa));
+ assert(isMcClellanType(nfa->type));
+
+ char *state = state_base + curr->state_offset;
+
+ char start = 0;
+ size_t adj = 0;
+
+ if (offset <= curr->anchoredMinDistance) {
+ adj = curr->anchoredMinDistance - offset;
+ if (adj >= alen) {
+ goto next_nfa;
+ }
+
+ start = 1;
+ } else {
+ // (No state decompress necessary.)
+ if (nfa->type == MCCLELLAN_NFA_8) {
+ if (!*(u8 *)state) {
+ goto next_nfa;
+ }
+ } else {
if (!unaligned_load_u16(state)) {
- goto next_nfa;
- }
- }
- }
-
- if (nfa->type == MCCLELLAN_NFA_8) {
- nfaExecMcClellan8_SimpStream(nfa, state, scratch->core_info.buf,
- start, adj, alen, roseAnchoredCallback,
+ goto next_nfa;
+ }
+ }
+ }
+
+ if (nfa->type == MCCLELLAN_NFA_8) {
+ nfaExecMcClellan8_SimpStream(nfa, state, scratch->core_info.buf,
+ start, adj, alen, roseAnchoredCallback,
scratch);
- } else {
- nfaExecMcClellan16_SimpStream(nfa, state, scratch->core_info.buf,
+ } else {
+ nfaExecMcClellan16_SimpStream(nfa, state, scratch->core_info.buf,
start, adj, alen,
roseAnchoredCallback, scratch);
- }
-
- next_nfa:
- if (!curr->next_offset) {
- break;
- }
-
- curr = (const void *)((const char *)curr + curr->next_offset);
- } while (1);
-}
-
-
-static really_inline
-void saveStreamState(const struct NFA *nfa, struct mq *q, s64a loc) {
- DEBUG_PRINTF("offset=%llu, length=%zu, hlength=%zu, loc=%lld\n",
- q->offset, q->length, q->hlength, loc);
- nfaQueueCompressState(nfa, q, loc);
-}
-
-static really_inline
-u8 getByteBefore(const struct core_info *ci, s64a sp) {
- if (sp > 0) { // in main buffer
- assert(sp <= (s64a)ci->len);
- return ci->buf[sp - 1];
- }
- // in history buffer
- assert(-sp < (s64a)ci->hlen);
- return ci->hbuf[ci->hlen + sp - 1];
-}
-
-/** \brief Return value for \ref roseScanForMiracles. */
-enum MiracleAction {
- MIRACLE_DEAD, //!< kill off this engine
- MIRACLE_SAVED, //!< engine has been caught up and state saved
- MIRACLE_CONTINUE //!< continue running and catch up engine
-};
-
-static really_inline
+ }
+
+ next_nfa:
+ if (!curr->next_offset) {
+ break;
+ }
+
+ curr = (const void *)((const char *)curr + curr->next_offset);
+ } while (1);
+}
+
+
+static really_inline
+void saveStreamState(const struct NFA *nfa, struct mq *q, s64a loc) {
+ DEBUG_PRINTF("offset=%llu, length=%zu, hlength=%zu, loc=%lld\n",
+ q->offset, q->length, q->hlength, loc);
+ nfaQueueCompressState(nfa, q, loc);
+}
+
+static really_inline
+u8 getByteBefore(const struct core_info *ci, s64a sp) {
+ if (sp > 0) { // in main buffer
+ assert(sp <= (s64a)ci->len);
+ return ci->buf[sp - 1];
+ }
+ // in history buffer
+ assert(-sp < (s64a)ci->hlen);
+ return ci->hbuf[ci->hlen + sp - 1];
+}
+
+/** \brief Return value for \ref roseScanForMiracles. */
+enum MiracleAction {
+ MIRACLE_DEAD, //!< kill off this engine
+ MIRACLE_SAVED, //!< engine has been caught up and state saved
+ MIRACLE_CONTINUE //!< continue running and catch up engine
+};
+
+static really_inline
enum MiracleAction roseScanForMiracles(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch, u32 qi,
- const struct LeftNfaInfo *left,
- const struct NFA *nfa) {
- struct core_info *ci = &scratch->core_info;
- const u32 qCount = t->queueCount;
- struct mq *q = scratch->queues + qi;
-
- const char q_active = fatbit_isset(scratch->aqa, qCount, qi);
- DEBUG_PRINTF("q_active=%d\n", q_active);
-
- const s64a begin_loc = q_active ? q_cur_loc(q) : 0;
- const s64a end_loc = ci->len;
-
- s64a miracle_loc;
- if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) {
- goto found_miracle;
- }
-
- if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc,
- &miracle_loc)) {
- goto found_miracle;
- }
-
- DEBUG_PRINTF("no miracle\n");
- return MIRACLE_CONTINUE;
-
-found_miracle:
- DEBUG_PRINTF("miracle at %lld\n", miracle_loc);
-
- if (left->infix) {
- if (!q_active) {
- DEBUG_PRINTF("killing infix\n");
- return MIRACLE_DEAD;
- }
-
- DEBUG_PRINTF("skip q forward, %lld to %lld\n", begin_loc, miracle_loc);
- q_skip_forward_to(q, miracle_loc);
- if (q_last_type(q) == MQE_START) {
- DEBUG_PRINTF("miracle caused infix to die\n");
- return MIRACLE_DEAD;
- }
-
- DEBUG_PRINTF("re-init infix state\n");
- assert(q->items[q->cur].type == MQE_START);
- q->items[q->cur].location = miracle_loc;
- nfaQueueInitState(q->nfa, q);
- } else {
- if (miracle_loc > end_loc - t->historyRequired) {
+ struct hs_scratch *scratch, u32 qi,
+ const struct LeftNfaInfo *left,
+ const struct NFA *nfa) {
+ struct core_info *ci = &scratch->core_info;
+ const u32 qCount = t->queueCount;
+ struct mq *q = scratch->queues + qi;
+
+ const char q_active = fatbit_isset(scratch->aqa, qCount, qi);
+ DEBUG_PRINTF("q_active=%d\n", q_active);
+
+ const s64a begin_loc = q_active ? q_cur_loc(q) : 0;
+ const s64a end_loc = ci->len;
+
+ s64a miracle_loc;
+ if (roseMiracleOccurs(t, left, ci, begin_loc, end_loc, &miracle_loc)) {
+ goto found_miracle;
+ }
+
+ if (roseCountingMiracleOccurs(t, left, ci, begin_loc, end_loc,
+ &miracle_loc)) {
+ goto found_miracle;
+ }
+
+ DEBUG_PRINTF("no miracle\n");
+ return MIRACLE_CONTINUE;
+
+found_miracle:
+ DEBUG_PRINTF("miracle at %lld\n", miracle_loc);
+
+ if (left->infix) {
+ if (!q_active) {
+ DEBUG_PRINTF("killing infix\n");
+ return MIRACLE_DEAD;
+ }
+
+ DEBUG_PRINTF("skip q forward, %lld to %lld\n", begin_loc, miracle_loc);
+ q_skip_forward_to(q, miracle_loc);
+ if (q_last_type(q) == MQE_START) {
+ DEBUG_PRINTF("miracle caused infix to die\n");
+ return MIRACLE_DEAD;
+ }
+
+ DEBUG_PRINTF("re-init infix state\n");
+ assert(q->items[q->cur].type == MQE_START);
+ q->items[q->cur].location = miracle_loc;
+ nfaQueueInitState(q->nfa, q);
+ } else {
+ if (miracle_loc > end_loc - t->historyRequired) {
char *streamState = state + getNfaInfoByQueue(t, qi)->stateOffset;
- u64a offset = ci->buf_offset + miracle_loc;
- u8 key = offset ? getByteBefore(ci, miracle_loc) : 0;
- DEBUG_PRINTF("init state, key=0x%02x, offset=%llu\n", key, offset);
- if (!nfaInitCompressedState(nfa, offset, streamState, key)) {
- return MIRACLE_DEAD;
- }
- storeRoseDelay(t, state, left, (s64a)ci->len - miracle_loc);
- return MIRACLE_SAVED;
- }
-
- DEBUG_PRINTF("re-init prefix (skip %lld->%lld)\n", begin_loc,
- miracle_loc);
- if (!q_active) {
- fatbit_set(scratch->aqa, qCount, qi);
+ u64a offset = ci->buf_offset + miracle_loc;
+ u8 key = offset ? getByteBefore(ci, miracle_loc) : 0;
+ DEBUG_PRINTF("init state, key=0x%02x, offset=%llu\n", key, offset);
+ if (!nfaInitCompressedState(nfa, offset, streamState, key)) {
+ return MIRACLE_DEAD;
+ }
+ storeRoseDelay(t, state, left, (s64a)ci->len - miracle_loc);
+ return MIRACLE_SAVED;
+ }
+
+ DEBUG_PRINTF("re-init prefix (skip %lld->%lld)\n", begin_loc,
+ miracle_loc);
+ if (!q_active) {
+ fatbit_set(scratch->aqa, qCount, qi);
initRoseQueue(t, qi, left, scratch);
- }
- q->cur = q->end = 0;
- pushQueueAt(q, 0, MQE_START, miracle_loc);
- pushQueueAt(q, 1, MQE_TOP, miracle_loc);
- nfaQueueInitState(q->nfa, q);
- }
-
- return MIRACLE_CONTINUE;
-}
-
-
-static really_inline
+ }
+ q->cur = q->end = 0;
+ pushQueueAt(q, 0, MQE_START, miracle_loc);
+ pushQueueAt(q, 1, MQE_TOP, miracle_loc);
+ nfaQueueInitState(q->nfa, q);
+ }
+
+ return MIRACLE_CONTINUE;
+}
+
+
+static really_inline
char roseCatchUpLeftfix(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch, u32 qi,
- const struct LeftNfaInfo *left) {
- assert(!left->transient); // active roses only
-
- struct core_info *ci = &scratch->core_info;
- const u32 qCount = t->queueCount;
- struct mq *q = scratch->queues + qi;
- const struct NFA *nfa = getNfaByQueue(t, qi);
-
- if (nfaSupportsZombie(nfa)
- && ci->buf_offset /* prefix can be alive with no q */
- && !fatbit_isset(scratch->aqa, qCount, qi)
- && isZombie(t, state, left)) {
- DEBUG_PRINTF("yawn - zombie\n");
- return 1;
- }
-
- if (left->stopTable) {
- enum MiracleAction mrv =
- roseScanForMiracles(t, state, scratch, qi, left, nfa);
- switch (mrv) {
- case MIRACLE_DEAD:
- return 0;
- case MIRACLE_SAVED:
- return 1;
- default:
- assert(mrv == MIRACLE_CONTINUE);
- break;
- }
- }
-
- if (!fatbit_set(scratch->aqa, qCount, qi)) {
+ struct hs_scratch *scratch, u32 qi,
+ const struct LeftNfaInfo *left) {
+ assert(!left->transient); // active roses only
+
+ struct core_info *ci = &scratch->core_info;
+ const u32 qCount = t->queueCount;
+ struct mq *q = scratch->queues + qi;
+ const struct NFA *nfa = getNfaByQueue(t, qi);
+
+ if (nfaSupportsZombie(nfa)
+ && ci->buf_offset /* prefix can be alive with no q */
+ && !fatbit_isset(scratch->aqa, qCount, qi)
+ && isZombie(t, state, left)) {
+ DEBUG_PRINTF("yawn - zombie\n");
+ return 1;
+ }
+
+ if (left->stopTable) {
+ enum MiracleAction mrv =
+ roseScanForMiracles(t, state, scratch, qi, left, nfa);
+ switch (mrv) {
+ case MIRACLE_DEAD:
+ return 0;
+ case MIRACLE_SAVED:
+ return 1;
+ default:
+ assert(mrv == MIRACLE_CONTINUE);
+ break;
+ }
+ }
+
+ if (!fatbit_set(scratch->aqa, qCount, qi)) {
initRoseQueue(t, qi, left, scratch);
-
- s32 sp;
- if (ci->buf_offset) {
- sp = -(s32)loadRoseDelay(t, state, left);
- } else {
- sp = 0;
- }
-
- DEBUG_PRINTF("ci->len=%zu, sp=%d, historyRequired=%u\n", ci->len, sp,
- t->historyRequired);
-
- if ( ci->len - sp + 1 < t->historyRequired) {
- // we'll end up safely in the history region.
- DEBUG_PRINTF("safely in history, skipping\n");
- storeRoseDelay(t, state, left, (s64a)ci->len - sp);
- return 1;
- }
-
- pushQueueAt(q, 0, MQE_START, sp);
- if (left->infix || ci->buf_offset + sp > 0) {
- loadStreamState(nfa, q, sp);
- } else {
- pushQueueAt(q, 1, MQE_TOP, sp);
- nfaQueueInitState(nfa, q);
- }
- } else {
- DEBUG_PRINTF("queue already active\n");
- if (q->end - q->cur == 1 && q_cur_type(q) == MQE_START) {
- DEBUG_PRINTF("empty queue, start loc=%lld\n", q_cur_loc(q));
- s64a last_loc = q_cur_loc(q);
- if (ci->len - last_loc + 1 < t->historyRequired) {
- // we'll end up safely in the history region.
- DEBUG_PRINTF("safely in history, saving state and skipping\n");
- saveStreamState(nfa, q, last_loc);
- storeRoseDelay(t, state, left, (s64a)ci->len - last_loc);
- return 1;
- }
- }
- }
-
- // Determine whether the byte before last_loc will be in the history
- // buffer on the next stream write.
- s64a last_loc = q_last_loc(q);
- s64a leftovers = ci->len - last_loc;
- if (leftovers + 1 >= t->historyRequired) {
- u32 catchup_offset = left->maxLag ? left->maxLag - 1 : 0;
- last_loc = (s64a)ci->len - catchup_offset;
- }
-
- if (left->infix) {
- if (infixTooOld(q, last_loc)) {
- DEBUG_PRINTF("infix died of old age\n");
- return 0;
- }
+
+ s32 sp;
+ if (ci->buf_offset) {
+ sp = -(s32)loadRoseDelay(t, state, left);
+ } else {
+ sp = 0;
+ }
+
+ DEBUG_PRINTF("ci->len=%zu, sp=%d, historyRequired=%u\n", ci->len, sp,
+ t->historyRequired);
+
+ if ( ci->len - sp + 1 < t->historyRequired) {
+ // we'll end up safely in the history region.
+ DEBUG_PRINTF("safely in history, skipping\n");
+ storeRoseDelay(t, state, left, (s64a)ci->len - sp);
+ return 1;
+ }
+
+ pushQueueAt(q, 0, MQE_START, sp);
+ if (left->infix || ci->buf_offset + sp > 0) {
+ loadStreamState(nfa, q, sp);
+ } else {
+ pushQueueAt(q, 1, MQE_TOP, sp);
+ nfaQueueInitState(nfa, q);
+ }
+ } else {
+ DEBUG_PRINTF("queue already active\n");
+ if (q->end - q->cur == 1 && q_cur_type(q) == MQE_START) {
+ DEBUG_PRINTF("empty queue, start loc=%lld\n", q_cur_loc(q));
+ s64a last_loc = q_cur_loc(q);
+ if (ci->len - last_loc + 1 < t->historyRequired) {
+ // we'll end up safely in the history region.
+ DEBUG_PRINTF("safely in history, saving state and skipping\n");
+ saveStreamState(nfa, q, last_loc);
+ storeRoseDelay(t, state, left, (s64a)ci->len - last_loc);
+ return 1;
+ }
+ }
+ }
+
+ // Determine whether the byte before last_loc will be in the history
+ // buffer on the next stream write.
+ s64a last_loc = q_last_loc(q);
+ s64a leftovers = ci->len - last_loc;
+ if (leftovers + 1 >= t->historyRequired) {
+ u32 catchup_offset = left->maxLag ? left->maxLag - 1 : 0;
+ last_loc = (s64a)ci->len - catchup_offset;
+ }
+
+ if (left->infix) {
+ if (infixTooOld(q, last_loc)) {
+ DEBUG_PRINTF("infix died of old age\n");
+ return 0;
+ }
reduceInfixQueue(q, last_loc, left->maxQueueLen, q->nfa->maxWidth);
- }
-
- DEBUG_PRINTF("end scan at %lld\n", last_loc);
- pushQueueNoMerge(q, MQE_END, last_loc);
-
-#ifdef DEBUG
- debugQueue(q);
-#endif
-
- char rv = nfaQueueExecRose(nfa, q, MO_INVALID_IDX);
- if (!rv) { /* nfa is dead */
- DEBUG_PRINTF("died catching up to stream boundary\n");
- return 0;
- } else {
- DEBUG_PRINTF("alive, saving stream state\n");
- if (nfaSupportsZombie(nfa) &&
- nfaGetZombieStatus(nfa, q, last_loc) == NFA_ZOMBIE_ALWAYS_YES) {
- DEBUG_PRINTF("not so fast - zombie\n");
- setAsZombie(t, state, left);
- } else {
- saveStreamState(nfa, q, last_loc);
- storeRoseDelay(t, state, left, (s64a)ci->len - last_loc);
- }
- }
-
- return 1;
-}
-
-static rose_inline
+ }
+
+ DEBUG_PRINTF("end scan at %lld\n", last_loc);
+ pushQueueNoMerge(q, MQE_END, last_loc);
+
+#ifdef DEBUG
+ debugQueue(q);
+#endif
+
+ char rv = nfaQueueExecRose(nfa, q, MO_INVALID_IDX);
+ if (!rv) { /* nfa is dead */
+ DEBUG_PRINTF("died catching up to stream boundary\n");
+ return 0;
+ } else {
+ DEBUG_PRINTF("alive, saving stream state\n");
+ if (nfaSupportsZombie(nfa) &&
+ nfaGetZombieStatus(nfa, q, last_loc) == NFA_ZOMBIE_ALWAYS_YES) {
+ DEBUG_PRINTF("not so fast - zombie\n");
+ setAsZombie(t, state, left);
+ } else {
+ saveStreamState(nfa, q, last_loc);
+ storeRoseDelay(t, state, left, (s64a)ci->len - last_loc);
+ }
+ }
+
+ return 1;
+}
+
+static rose_inline
void roseCatchUpLeftfixes(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch) {
- if (!t->activeLeftIterOffset) {
- // No sparse iter, no non-transient roses.
- return;
- }
-
- // As per UE-1629, we catch up leftfix engines to:
- // * current position (last location in the queue, or last location we
- // executed to if the queue is empty) if that position (and the byte
- // before so we can decompress the stream state) will be in the history
- // buffer on the next stream write; OR
- // * (stream_boundary - max_delay) other
-
- u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into
- * left_table */
- const u32 arCount = t->activeLeftCount;
- const struct LeftNfaInfo *left_table = getLeftTable(t);
- const struct mmbit_sparse_iter *it = getActiveLeftIter(t);
-
+ struct hs_scratch *scratch) {
+ if (!t->activeLeftIterOffset) {
+ // No sparse iter, no non-transient roses.
+ return;
+ }
+
+ // As per UE-1629, we catch up leftfix engines to:
+ // * current position (last location in the queue, or last location we
+ // executed to if the queue is empty) if that position (and the byte
+ // before so we can decompress the stream state) will be in the history
+ // buffer on the next stream write; OR
+ // * (stream_boundary - max_delay) other
+
+ u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into
+ * left_table */
+ const u32 arCount = t->activeLeftCount;
+ const struct LeftNfaInfo *left_table = getLeftTable(t);
+ const struct mmbit_sparse_iter *it = getActiveLeftIter(t);
+
struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
- u32 idx = 0;
+ u32 idx = 0;
u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state);
- for (; ri != MMB_INVALID;
+ for (; ri != MMB_INVALID;
ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) {
- const struct LeftNfaInfo *left = left_table + ri;
- u32 qi = ri + t->leftfixBeginQueue;
- DEBUG_PRINTF("leftfix %u of %u, maxLag=%u, infix=%d\n", ri, arCount,
- left->maxLag, (int)left->infix);
- if (!roseCatchUpLeftfix(t, state, scratch, qi, left)) {
- DEBUG_PRINTF("removing rose %u from active list\n", ri);
- DEBUG_PRINTF("groups old=%016llx mask=%016llx\n",
- scratch->tctxt.groups, left->squash_mask);
- scratch->tctxt.groups &= left->squash_mask;
- mmbit_unset(ara, arCount, ri);
- }
- }
-}
-
-// Saves out stream state for all our active suffix NFAs.
-static rose_inline
+ const struct LeftNfaInfo *left = left_table + ri;
+ u32 qi = ri + t->leftfixBeginQueue;
+ DEBUG_PRINTF("leftfix %u of %u, maxLag=%u, infix=%d\n", ri, arCount,
+ left->maxLag, (int)left->infix);
+ if (!roseCatchUpLeftfix(t, state, scratch, qi, left)) {
+ DEBUG_PRINTF("removing rose %u from active list\n", ri);
+ DEBUG_PRINTF("groups old=%016llx mask=%016llx\n",
+ scratch->tctxt.groups, left->squash_mask);
+ scratch->tctxt.groups &= left->squash_mask;
+ mmbit_unset(ara, arCount, ri);
+ }
+ }
+}
+
+// Saves out stream state for all our active suffix NFAs.
+static rose_inline
void roseSaveNfaStreamState(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch) {
- struct mq *queues = scratch->queues;
- u8 *aa = getActiveLeafArray(t, state);
- u32 aaCount = t->activeArrayCount;
-
- if (scratch->tctxt.mpv_inactive) {
- DEBUG_PRINTF("mpv is dead as a doornail\n");
- /* mpv if it exists is queue 0 */
- mmbit_unset(aa, aaCount, 0);
- }
-
- for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
- qi = mmbit_iterate(aa, aaCount, qi)) {
- DEBUG_PRINTF("saving stream state for qi=%u\n", qi);
-
- struct mq *q = queues + qi;
-
- // If it's active, it should have an active queue (as we should have
- // done some work!)
- assert(fatbit_isset(scratch->aqa, t->queueCount, qi));
-
- const struct NFA *nfa = getNfaByQueue(t, qi);
- saveStreamState(nfa, q, q_cur_loc(q));
- }
-}
-
-static rose_inline
+ struct hs_scratch *scratch) {
+ struct mq *queues = scratch->queues;
+ u8 *aa = getActiveLeafArray(t, state);
+ u32 aaCount = t->activeArrayCount;
+
+ if (scratch->tctxt.mpv_inactive) {
+ DEBUG_PRINTF("mpv is dead as a doornail\n");
+ /* mpv if it exists is queue 0 */
+ mmbit_unset(aa, aaCount, 0);
+ }
+
+ for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
+ qi = mmbit_iterate(aa, aaCount, qi)) {
+ DEBUG_PRINTF("saving stream state for qi=%u\n", qi);
+
+ struct mq *q = queues + qi;
+
+ // If it's active, it should have an active queue (as we should have
+ // done some work!)
+ assert(fatbit_isset(scratch->aqa, t->queueCount, qi));
+
+ const struct NFA *nfa = getNfaByQueue(t, qi);
+ saveStreamState(nfa, q, q_cur_loc(q));
+ }
+}
+
+static rose_inline
void ensureStreamNeatAndTidy(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch, size_t length,
+ struct hs_scratch *scratch, size_t length,
u64a offset) {
- struct RoseContext *tctxt = &scratch->tctxt;
-
+ struct RoseContext *tctxt = &scratch->tctxt;
+
if (roseCatchUpTo(t, scratch, length + scratch->core_info.buf_offset) ==
HWLM_TERMINATE_MATCHING) {
- return; /* dead; no need to clean up state. */
- }
- roseSaveNfaStreamState(t, state, scratch);
- roseCatchUpLeftfixes(t, state, scratch);
+ return; /* dead; no need to clean up state. */
+ }
+ roseSaveNfaStreamState(t, state, scratch);
+ roseCatchUpLeftfixes(t, state, scratch);
roseFlushLastByteHistory(t, scratch, offset + length);
- tctxt->lastEndOffset = offset + length;
- storeGroups(t, state, tctxt->groups);
+ tctxt->lastEndOffset = offset + length;
+ storeGroups(t, state, tctxt->groups);
storeLongLiteralState(t, state, scratch);
-}
-
-static really_inline
+}
+
+static really_inline
void do_rebuild(const struct RoseEngine *t, struct hs_scratch *scratch) {
assert(t->drmatcherOffset);
- assert(!can_stop_matching(scratch));
+ assert(!can_stop_matching(scratch));
const struct HWLM *hwlm = getByOffset(t, t->drmatcherOffset);
- size_t len = MIN(scratch->core_info.hlen, t->delayRebuildLength);
- const u8 *buf = scratch->core_info.hbuf + scratch->core_info.hlen - len;
- DEBUG_PRINTF("BEGIN FLOATING REBUILD over %zu bytes\n", len);
-
+ size_t len = MIN(scratch->core_info.hlen, t->delayRebuildLength);
+ const u8 *buf = scratch->core_info.hbuf + scratch->core_info.hlen - len;
+ DEBUG_PRINTF("BEGIN FLOATING REBUILD over %zu bytes\n", len);
+
scratch->core_info.status &= ~STATUS_DELAY_DIRTY;
hwlmExec(hwlm, buf, len, 0, roseDelayRebuildCallback, scratch,
- scratch->tctxt.groups);
- assert(!can_stop_matching(scratch));
-}
-
+ scratch->tctxt.groups);
+ assert(!can_stop_matching(scratch));
+}
+
static rose_inline
void runEagerPrefixesStream(const struct RoseEngine *t,
struct hs_scratch *scratch) {
@@ -545,72 +545,72 @@ int can_never_match(const struct RoseEngine *t, char *state,
void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
DEBUG_PRINTF("OH HAI [%llu, %llu)\n", scratch->core_info.buf_offset,
scratch->core_info.buf_offset + (u64a)scratch->core_info.len);
- assert(t);
- assert(scratch->core_info.hbuf);
- assert(scratch->core_info.buf);
-
+ assert(t);
+ assert(scratch->core_info.hbuf);
+ assert(scratch->core_info.buf);
+
// We should not have been called if we've already been told to terminate
// matching.
assert(!told_to_stop_matching(scratch));
- assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount)
- < MAX_SPARSE_ITER_STATES);
-
- size_t length = scratch->core_info.len;
- u64a offset = scratch->core_info.buf_offset;
-
- // We may have a maximum width (for engines constructed entirely
- // of bi-anchored patterns). If this write would result in us progressing
- // beyond this point, we cannot possibly match.
- if (t->maxBiAnchoredWidth != ROSE_BOUND_INF
- && offset + length > t->maxBiAnchoredWidth) {
- DEBUG_PRINTF("bailing, write would progress beyond maxBAWidth\n");
- return;
- }
-
+ assert(mmbit_sparse_iter_state_size(t->rolesWithStateCount)
+ < MAX_SPARSE_ITER_STATES);
+
+ size_t length = scratch->core_info.len;
+ u64a offset = scratch->core_info.buf_offset;
+
+ // We may have a maximum width (for engines constructed entirely
+ // of bi-anchored patterns). If this write would result in us progressing
+ // beyond this point, we cannot possibly match.
+ if (t->maxBiAnchoredWidth != ROSE_BOUND_INF
+ && offset + length > t->maxBiAnchoredWidth) {
+ DEBUG_PRINTF("bailing, write would progress beyond maxBAWidth\n");
+ return;
+ }
+
char *state = scratch->core_info.state;
-
- struct RoseContext *tctxt = &scratch->tctxt;
- tctxt->mpv_inactive = 0;
- tctxt->groups = loadGroups(t, state);
- tctxt->lit_offset_adjust = offset + 1; // index after last byte
- tctxt->delayLastEndOffset = offset;
- tctxt->lastEndOffset = offset;
- tctxt->filledDelayedSlots = 0;
- tctxt->lastMatchOffset = 0;
+
+ struct RoseContext *tctxt = &scratch->tctxt;
+ tctxt->mpv_inactive = 0;
+ tctxt->groups = loadGroups(t, state);
+ tctxt->lit_offset_adjust = offset + 1; // index after last byte
+ tctxt->delayLastEndOffset = offset;
+ tctxt->lastEndOffset = offset;
+ tctxt->filledDelayedSlots = 0;
+ tctxt->lastMatchOffset = 0;
tctxt->lastCombMatchOffset = offset;
- tctxt->minMatchOffset = offset;
- tctxt->minNonMpvMatchOffset = offset;
- tctxt->next_mpv_offset = 0;
-
+ tctxt->minMatchOffset = offset;
+ tctxt->minNonMpvMatchOffset = offset;
+ tctxt->next_mpv_offset = 0;
+
DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu groups=%016llx\n",
scratch->core_info.hlen, scratch->core_info.len, tctxt->groups);
-
- fatbit_clear(scratch->aqa);
- scratch->al_log_sum = 0;
- scratch->catchup_pq.qm_size = 0;
-
- if (t->outfixBeginQueue != t->outfixEndQueue) {
- streamInitSufPQ(t, state, scratch);
- }
-
+
+ fatbit_clear(scratch->aqa);
+ scratch->al_log_sum = 0;
+ scratch->catchup_pq.qm_size = 0;
+
+ if (t->outfixBeginQueue != t->outfixEndQueue) {
+ streamInitSufPQ(t, state, scratch);
+ }
+
runEagerPrefixesStream(t, scratch);
-
- u32 alen = t->anchoredDistance > offset ?
- MIN(length + offset, t->anchoredDistance) - offset : 0;
-
- const struct anchored_matcher_info *atable = getALiteralMatcher(t);
- if (atable && alen) {
- DEBUG_PRINTF("BEGIN ANCHORED %zu/%u\n", scratch->core_info.hlen, alen);
- runAnchoredTableStream(t, atable, alen, offset, scratch);
-
- if (can_stop_matching(scratch)) {
- goto exit;
- }
- }
-
- const struct HWLM *ftable = getFLiteralMatcher(t);
- if (ftable) {
+
+ u32 alen = t->anchoredDistance > offset ?
+ MIN(length + offset, t->anchoredDistance) - offset : 0;
+
+ const struct anchored_matcher_info *atable = getALiteralMatcher(t);
+ if (atable && alen) {
+ DEBUG_PRINTF("BEGIN ANCHORED %zu/%u\n", scratch->core_info.hlen, alen);
+ runAnchoredTableStream(t, atable, alen, offset, scratch);
+
+ if (can_stop_matching(scratch)) {
+ goto exit;
+ }
+ }
+
+ const struct HWLM *ftable = getFLiteralMatcher(t);
+ if (ftable) {
// Load in long literal table state and set up "fake history" buffers
// (ll_buf, etc, used by the CHECK_LONG_LIT instruction). Note that this
// must be done here in order to ensure that it happens before any path
@@ -618,63 +618,63 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
loadLongLiteralState(t, state, scratch);
if (t->noFloatingRoots && !roseHasInFlightMatches(t, state, scratch)) {
- DEBUG_PRINTF("skip FLOATING: no inflight matches\n");
- goto flush_delay_and_exit;
- }
-
- size_t flen = length;
- if (t->floatingDistance != ROSE_BOUND_INF) {
- flen = t->floatingDistance > offset ?
- MIN(t->floatingDistance, length + offset) - offset : 0;
- }
-
- size_t hlength = scratch->core_info.hlen;
-
+ DEBUG_PRINTF("skip FLOATING: no inflight matches\n");
+ goto flush_delay_and_exit;
+ }
+
+ size_t flen = length;
+ if (t->floatingDistance != ROSE_BOUND_INF) {
+ flen = t->floatingDistance > offset ?
+ MIN(t->floatingDistance, length + offset) - offset : 0;
+ }
+
+ size_t hlength = scratch->core_info.hlen;
+
char rebuild = hlength &&
(scratch->core_info.status & STATUS_DELAY_DIRTY) &&
(t->maxFloatingDelayedMatch == ROSE_BOUND_INF ||
offset < t->maxFloatingDelayedMatch);
- DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n",
+ DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n",
rebuild, scratch->core_info.status,
t->maxFloatingDelayedMatch, offset);
-
+
if (rebuild) { /* rebuild floating delayed match stuff */
do_rebuild(t, scratch);
}
- if (!flen) {
- goto flush_delay_and_exit;
- }
-
- if (flen + offset <= t->floatingMinDistance) {
- DEBUG_PRINTF("skip FLOATING: before floating min\n");
- goto flush_delay_and_exit;
- }
-
- size_t start = 0;
- if (offset < t->floatingMinDistance) {
- // This scan crosses the floating min distance, so we can use that
- // to set HWLM's "start" offset.
- start = t->floatingMinDistance - offset;
- }
- DEBUG_PRINTF("start=%zu\n", start);
-
- DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length);
+ if (!flen) {
+ goto flush_delay_and_exit;
+ }
+
+ if (flen + offset <= t->floatingMinDistance) {
+ DEBUG_PRINTF("skip FLOATING: before floating min\n");
+ goto flush_delay_and_exit;
+ }
+
+ size_t start = 0;
+ if (offset < t->floatingMinDistance) {
+ // This scan crosses the floating min distance, so we can use that
+ // to set HWLM's "start" offset.
+ start = t->floatingMinDistance - offset;
+ }
+ DEBUG_PRINTF("start=%zu\n", start);
+
+ DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length);
hwlmExecStreaming(ftable, flen, start, roseFloatingCallback, scratch,
tctxt->groups & t->floating_group_mask);
- }
-
-flush_delay_and_exit:
- DEBUG_PRINTF("flushing floating\n");
+ }
+
+flush_delay_and_exit:
+ DEBUG_PRINTF("flushing floating\n");
if (cleanUpDelayed(t, scratch, length, offset) == HWLM_TERMINATE_MATCHING) {
- return;
- }
-
-exit:
- DEBUG_PRINTF("CLEAN UP TIME\n");
- if (!can_stop_matching(scratch)) {
+ return;
+ }
+
+exit:
+ DEBUG_PRINTF("CLEAN UP TIME\n");
+ if (!can_stop_matching(scratch)) {
ensureStreamNeatAndTidy(t, state, scratch, length, offset);
- }
+ }
if (!told_to_stop_matching(scratch)
&& can_never_match(t, state, scratch, length, offset)) {
@@ -685,8 +685,8 @@ exit:
DEBUG_PRINTF("DONE STREAMING SCAN, status = %u\n",
scratch->core_info.status);
- return;
-}
+ return;
+}
static rose_inline
void roseStreamInitEod(const struct RoseEngine *t, u64a offset,