aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/runtime.c
diff options
context:
space:
mode:
authorbnagaev <bnagaev@yandex-team.ru>2022-02-10 16:47:04 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:04 +0300
commitd6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (patch)
treed5dca6d44593f5e52556a1cc7b1ab0386e096ebe /contrib/libs/hyperscan/src/runtime.c
parent1861d4c1402bb2c67a3e6b43b51706081b74508a (diff)
downloadydb-d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d.tar.gz
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/runtime.c')
-rw-r--r--contrib/libs/hyperscan/src/runtime.c1730
1 files changed, 865 insertions, 865 deletions
diff --git a/contrib/libs/hyperscan/src/runtime.c b/contrib/libs/hyperscan/src/runtime.c
index a3659348c5..c5b16ab606 100644
--- a/contrib/libs/hyperscan/src/runtime.c
+++ b/contrib/libs/hyperscan/src/runtime.c
@@ -1,194 +1,194 @@
-/*
+/*
* Copyright (c) 2015-2019, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Runtime functions.
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "allocator.h"
-#include "hs_compile.h" /* for HS_MODE_* flags */
-#include "hs_runtime.h"
-#include "hs_internal.h"
-#include "hwlm/hwlm.h"
-#include "nfa/mcclellan.h"
-#include "nfa/nfa_api.h"
-#include "nfa/nfa_api_util.h"
-#include "nfa/nfa_internal.h"
-#include "nfa/nfa_rev_api.h"
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Runtime functions.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "allocator.h"
+#include "hs_compile.h" /* for HS_MODE_* flags */
+#include "hs_runtime.h"
+#include "hs_internal.h"
+#include "hwlm/hwlm.h"
+#include "nfa/mcclellan.h"
+#include "nfa/nfa_api.h"
+#include "nfa/nfa_api_util.h"
+#include "nfa/nfa_internal.h"
+#include "nfa/nfa_rev_api.h"
#include "nfa/sheng.h"
-#include "smallwrite/smallwrite_internal.h"
-#include "rose/rose.h"
-#include "rose/runtime.h"
-#include "database.h"
+#include "smallwrite/smallwrite_internal.h"
+#include "rose/rose.h"
+#include "rose/runtime.h"
+#include "database.h"
#include "report.h"
-#include "scratch.h"
-#include "som/som_runtime.h"
-#include "som/som_stream.h"
-#include "state.h"
+#include "scratch.h"
+#include "som/som_runtime.h"
+#include "som/som_stream.h"
+#include "state.h"
#include "stream_compress.h"
-#include "ue2common.h"
-#include "util/exhaust.h"
-#include "util/multibit.h"
-
-static really_inline
-void prefetch_data(const char *data, unsigned length) {
- __builtin_prefetch(data);
- __builtin_prefetch(data + length/2);
- __builtin_prefetch(data + length - 24);
-}
-
-/** dummy event handler for use when user does not provide one */
-static
+#include "ue2common.h"
+#include "util/exhaust.h"
+#include "util/multibit.h"
+
+static really_inline
+void prefetch_data(const char *data, unsigned length) {
+ __builtin_prefetch(data);
+ __builtin_prefetch(data + length/2);
+ __builtin_prefetch(data + length - 24);
+}
+
+/** dummy event handler for use when user does not provide one */
+static
int HS_CDECL null_onEvent(UNUSED unsigned id, UNUSED unsigned long long from,
UNUSED unsigned long long to, UNUSED unsigned flags,
UNUSED void *ctxt) {
- return 0;
-}
-
-static really_inline
-u32 getHistoryAmount(const struct RoseEngine *t, u64a offset) {
- return MIN(t->historyRequired, offset);
-}
-
-static really_inline
-u8 *getHistory(char *state, const struct RoseEngine *t, u64a offset) {
- return (u8 *)state + t->stateOffsets.history + t->historyRequired
- - MIN(t->historyRequired, offset);
-}
-
-/** \brief Sanity checks for scratch space.
- *
- * Although more at home in scratch.c, it is located here to be closer to its
- * callers.
- */
-static really_inline
-char validScratch(const struct RoseEngine *t, const struct hs_scratch *s) {
- if (!ISALIGNED_CL(s)) {
- DEBUG_PRINTF("bad alignment %p\n", s);
- return 0;
- }
-
- if (s->magic != SCRATCH_MAGIC) {
- DEBUG_PRINTF("bad magic 0x%x\n", s->magic);
- return 0;
- }
-
- if (t->mode == HS_MODE_BLOCK && t->stateOffsets.end > s->bStateSize) {
- DEBUG_PRINTF("bad state size\n");
- return 0;
- }
-
- if (t->queueCount > s->queueCount) {
- DEBUG_PRINTF("bad queue count\n");
- return 0;
- }
-
- /* TODO: add quick rose sanity checks */
-
- return 1;
-}
-
-static really_inline
-void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose,
- char *state, match_event_handler onEvent, void *userCtx,
- const char *data, size_t length, const u8 *history,
+ return 0;
+}
+
+static really_inline
+u32 getHistoryAmount(const struct RoseEngine *t, u64a offset) {
+ return MIN(t->historyRequired, offset);
+}
+
+static really_inline
+u8 *getHistory(char *state, const struct RoseEngine *t, u64a offset) {
+ return (u8 *)state + t->stateOffsets.history + t->historyRequired
+ - MIN(t->historyRequired, offset);
+}
+
+/** \brief Sanity checks for scratch space.
+ *
+ * Although more at home in scratch.c, it is located here to be closer to its
+ * callers.
+ */
+static really_inline
+char validScratch(const struct RoseEngine *t, const struct hs_scratch *s) {
+ if (!ISALIGNED_CL(s)) {
+ DEBUG_PRINTF("bad alignment %p\n", s);
+ return 0;
+ }
+
+ if (s->magic != SCRATCH_MAGIC) {
+ DEBUG_PRINTF("bad magic 0x%x\n", s->magic);
+ return 0;
+ }
+
+ if (t->mode == HS_MODE_BLOCK && t->stateOffsets.end > s->bStateSize) {
+ DEBUG_PRINTF("bad state size\n");
+ return 0;
+ }
+
+ if (t->queueCount > s->queueCount) {
+ DEBUG_PRINTF("bad queue count\n");
+ return 0;
+ }
+
+ /* TODO: add quick rose sanity checks */
+
+ return 1;
+}
+
+static really_inline
+void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose,
+ char *state, match_event_handler onEvent, void *userCtx,
+ const char *data, size_t length, const u8 *history,
size_t hlen, u64a offset, u8 status,
UNUSED unsigned int flags) {
- assert(rose);
- s->core_info.userContext = userCtx;
- s->core_info.userCallback = onEvent ? onEvent : null_onEvent;
- s->core_info.rose = rose;
- s->core_info.state = state; /* required for chained queues + evec */
-
- s->core_info.exhaustionVector = state + rose->stateOffsets.exhausted;
+ assert(rose);
+ s->core_info.userContext = userCtx;
+ s->core_info.userCallback = onEvent ? onEvent : null_onEvent;
+ s->core_info.rose = rose;
+ s->core_info.state = state; /* required for chained queues + evec */
+
+ s->core_info.exhaustionVector = state + rose->stateOffsets.exhausted;
s->core_info.status = status;
- s->core_info.buf = (const u8 *)data;
- s->core_info.len = length;
- s->core_info.hbuf = history;
- s->core_info.hlen = hlen;
- s->core_info.buf_offset = offset;
-
- /* and some stuff not actually in core info */
- s->som_set_now_offset = ~0ULL;
- s->deduper.current_report_offset = ~0ULL;
- s->deduper.som_log_dirty = 1; /* som logs have not been cleared */
+ s->core_info.buf = (const u8 *)data;
+ s->core_info.len = length;
+ s->core_info.hbuf = history;
+ s->core_info.hlen = hlen;
+ s->core_info.buf_offset = offset;
+
+ /* and some stuff not actually in core info */
+ s->som_set_now_offset = ~0ULL;
+ s->deduper.current_report_offset = ~0ULL;
+ s->deduper.som_log_dirty = 1; /* som logs have not been cleared */
s->fdr_conf = NULL;
-
+
// Rose program execution (used for some report paths) depends on these
// values being initialised.
s->tctxt.lastMatchOffset = 0;
s->tctxt.minMatchOffset = offset;
s->tctxt.minNonMpvMatchOffset = offset;
-}
-
+}
+
#define STATUS_VALID_BITS \
(STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_DELAY_DIRTY | STATUS_ERROR)
-
+
/** \brief Retrieve status bitmask from stream state. */
-static really_inline
+static really_inline
u8 getStreamStatus(const char *state) {
u8 status = *(const u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS);
assert((status & ~STATUS_VALID_BITS) == 0);
return status;
-}
-
+}
+
/** \brief Store status bitmask to stream state. */
-static really_inline
+static really_inline
void setStreamStatus(char *state, u8 status) {
assert((status & ~STATUS_VALID_BITS) == 0);
*(u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS) = status;
-}
-
-/** \brief Initialise SOM state. Used in both block and streaming mode. */
-static really_inline
+}
+
+/** \brief Initialise SOM state. Used in both block and streaming mode. */
+static really_inline
void initSomState(const struct RoseEngine *rose, char *state) {
- assert(rose && state);
- const u32 somCount = rose->somLocationCount;
+ assert(rose && state);
+ const u32 somCount = rose->somLocationCount;
mmbit_clear((u8 *)state + rose->stateOffsets.somValid, somCount);
mmbit_clear((u8 *)state + rose->stateOffsets.somWritable, somCount);
-}
-
-static really_inline
-void rawBlockExec(const struct RoseEngine *rose, struct hs_scratch *scratch) {
- assert(rose);
- assert(scratch);
-
+}
+
+static really_inline
+void rawBlockExec(const struct RoseEngine *rose, struct hs_scratch *scratch) {
+ assert(rose);
+ assert(scratch);
+
initSomState(rose, scratch->core_info.state);
-
- DEBUG_PRINTF("blockmode scan len=%zu\n", scratch->core_info.len);
-
+
+ DEBUG_PRINTF("blockmode scan len=%zu\n", scratch->core_info.len);
+
roseBlockExec(rose, scratch);
-}
-
-static really_inline
+}
+
+static really_inline
void pureLiteralInitScratch(struct hs_scratch *scratch, u64a offset) {
// Some init has already been done.
assert(offset == scratch->core_info.buf_offset);
@@ -201,160 +201,160 @@ void pureLiteralInitScratch(struct hs_scratch *scratch, u64a offset) {
}
static really_inline
-void pureLiteralBlockExec(const struct RoseEngine *rose,
- struct hs_scratch *scratch) {
- assert(rose);
- assert(scratch);
-
- const struct HWLM *ftable = getFLiteralMatcher(rose);
+void pureLiteralBlockExec(const struct RoseEngine *rose,
+ struct hs_scratch *scratch) {
+ assert(rose);
+ assert(scratch);
+
+ const struct HWLM *ftable = getFLiteralMatcher(rose);
initSomState(rose, scratch->core_info.state);
- const u8 *buffer = scratch->core_info.buf;
- size_t length = scratch->core_info.len;
- DEBUG_PRINTF("rose engine %d\n", rose->runtimeImpl);
-
+ const u8 *buffer = scratch->core_info.buf;
+ size_t length = scratch->core_info.len;
+ DEBUG_PRINTF("rose engine %d\n", rose->runtimeImpl);
+
pureLiteralInitScratch(scratch, 0);
scratch->tctxt.groups = rose->initialGroups;
hwlmExec(ftable, buffer, length, 0, roseCallback, scratch,
rose->initialGroups & rose->floating_group_mask);
-}
-
-static really_inline
+}
+
+static really_inline
void initOutfixQueue(struct mq *q, u32 qi, const struct RoseEngine *t,
struct hs_scratch *scratch) {
- const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
- q->nfa = getNfaByInfo(t, info);
- q->end = 0;
- q->cur = 0;
- q->state = scratch->fullState + info->fullStateOffset;
- q->streamState = (char *)scratch->core_info.state + info->stateOffset;
- q->offset = scratch->core_info.buf_offset;
- q->buffer = scratch->core_info.buf;
- q->length = scratch->core_info.len;
- q->history = scratch->core_info.hbuf;
- q->hlength = scratch->core_info.hlen;
+ const struct NfaInfo *info = getNfaInfoByQueue(t, qi);
+ q->nfa = getNfaByInfo(t, info);
+ q->end = 0;
+ q->cur = 0;
+ q->state = scratch->fullState + info->fullStateOffset;
+ q->streamState = (char *)scratch->core_info.state + info->stateOffset;
+ q->offset = scratch->core_info.buf_offset;
+ q->buffer = scratch->core_info.buf;
+ q->length = scratch->core_info.len;
+ q->history = scratch->core_info.hbuf;
+ q->hlength = scratch->core_info.hlen;
q->cb = roseReportAdaptor;
- q->context = scratch;
- q->report_current = 0;
-
- DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
- "state=%u\n", qi, q->offset, info->fullStateOffset,
- info->stateOffset, *(u32 *)q->state);
-}
-
-static never_inline
-void soleOutfixBlockExec(const struct RoseEngine *t,
- struct hs_scratch *scratch) {
- assert(t);
- assert(scratch);
-
+ q->context = scratch;
+ q->report_current = 0;
+
+ DEBUG_PRINTF("qi=%u, offset=%llu, fullState=%u, streamState=%u, "
+ "state=%u\n", qi, q->offset, info->fullStateOffset,
+ info->stateOffset, *(u32 *)q->state);
+}
+
+static never_inline
+void soleOutfixBlockExec(const struct RoseEngine *t,
+ struct hs_scratch *scratch) {
+ assert(t);
+ assert(scratch);
+
initSomState(t, scratch->core_info.state);
- assert(t->outfixEndQueue == 1);
- assert(!t->amatcherOffset);
- assert(!t->ematcherOffset);
- assert(!t->fmatcherOffset);
-
- const struct NFA *nfa = getNfaByQueue(t, 0);
-
- size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf,
- scratch->core_info.len);
- if (!len) {
- return;
- }
-
- struct mq *q = scratch->queues;
+ assert(t->outfixEndQueue == 1);
+ assert(!t->amatcherOffset);
+ assert(!t->ematcherOffset);
+ assert(!t->fmatcherOffset);
+
+ const struct NFA *nfa = getNfaByQueue(t, 0);
+
+ size_t len = nfaRevAccelCheck(nfa, scratch->core_info.buf,
+ scratch->core_info.len);
+ if (!len) {
+ return;
+ }
+
+ struct mq *q = scratch->queues;
initOutfixQueue(q, 0, t, scratch);
- q->length = len; /* adjust for rev_accel */
- nfaQueueInitState(nfa, q);
- pushQueueAt(q, 0, MQE_START, 0);
- pushQueueAt(q, 1, MQE_TOP, 0);
- pushQueueAt(q, 2, MQE_END, scratch->core_info.len);
-
- char rv = nfaQueueExec(q->nfa, q, scratch->core_info.len);
-
- if (rv && nfaAcceptsEod(nfa) && len == scratch->core_info.len) {
+ q->length = len; /* adjust for rev_accel */
+ nfaQueueInitState(nfa, q);
+ pushQueueAt(q, 0, MQE_START, 0);
+ pushQueueAt(q, 1, MQE_TOP, 0);
+ pushQueueAt(q, 2, MQE_END, scratch->core_info.len);
+
+ char rv = nfaQueueExec(q->nfa, q, scratch->core_info.len);
+
+ if (rv && nfaAcceptsEod(nfa) && len == scratch->core_info.len) {
nfaCheckFinalState(nfa, q->state, q->streamState, q->length, q->cb,
scratch);
- }
-}
-
-static rose_inline
-void runSmallWriteEngine(const struct SmallWriteEngine *smwr,
- struct hs_scratch *scratch) {
- assert(smwr);
- assert(scratch);
-
- const u8 *buffer = scratch->core_info.buf;
- size_t length = scratch->core_info.len;
-
- DEBUG_PRINTF("USING SMALL WRITE\n");
-
- if (length <= smwr->start_offset) {
- DEBUG_PRINTF("too short\n");
- return;
- }
-
- const struct NFA *nfa = getSmwrNfa(smwr);
-
- size_t local_alen = length - smwr->start_offset;
- const u8 *local_buffer = buffer + smwr->start_offset;
-
+ }
+}
+
+static rose_inline
+void runSmallWriteEngine(const struct SmallWriteEngine *smwr,
+ struct hs_scratch *scratch) {
+ assert(smwr);
+ assert(scratch);
+
+ const u8 *buffer = scratch->core_info.buf;
+ size_t length = scratch->core_info.len;
+
+ DEBUG_PRINTF("USING SMALL WRITE\n");
+
+ if (length <= smwr->start_offset) {
+ DEBUG_PRINTF("too short\n");
+ return;
+ }
+
+ const struct NFA *nfa = getSmwrNfa(smwr);
+
+ size_t local_alen = length - smwr->start_offset;
+ const u8 *local_buffer = buffer + smwr->start_offset;
+
assert(isDfaType(nfa->type));
- if (nfa->type == MCCLELLAN_NFA_8) {
- nfaExecMcClellan8_B(nfa, smwr->start_offset, local_buffer,
+ if (nfa->type == MCCLELLAN_NFA_8) {
+ nfaExecMcClellan8_B(nfa, smwr->start_offset, local_buffer,
local_alen, roseReportAdaptor, scratch);
} else if (nfa->type == MCCLELLAN_NFA_16) {
nfaExecMcClellan16_B(nfa, smwr->start_offset, local_buffer,
local_alen, roseReportAdaptor, scratch);
- } else {
+ } else {
nfaExecSheng_B(nfa, smwr->start_offset, local_buffer,
local_alen, roseReportAdaptor, scratch);
- }
-}
-
-HS_PUBLIC_API
+ }
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data,
unsigned length, unsigned flags,
hs_scratch_t *scratch, match_event_handler onEvent,
void *userCtx) {
- if (unlikely(!scratch || !data)) {
- return HS_INVALID;
- }
-
- hs_error_t err = validDatabase(db);
- if (unlikely(err != HS_SUCCESS)) {
- return err;
- }
-
- const struct RoseEngine *rose = hs_get_bytecode(db);
- if (unlikely(!ISALIGNED_16(rose))) {
- return HS_INVALID;
- }
-
- if (unlikely(rose->mode != HS_MODE_BLOCK)) {
- return HS_DB_MODE_ERROR;
- }
-
- if (unlikely(!validScratch(rose, scratch))) {
- return HS_INVALID;
- }
-
+ if (unlikely(!scratch || !data)) {
+ return HS_INVALID;
+ }
+
+ hs_error_t err = validDatabase(db);
+ if (unlikely(err != HS_SUCCESS)) {
+ return err;
+ }
+
+ const struct RoseEngine *rose = hs_get_bytecode(db);
+ if (unlikely(!ISALIGNED_16(rose))) {
+ return HS_INVALID;
+ }
+
+ if (unlikely(rose->mode != HS_MODE_BLOCK)) {
+ return HS_DB_MODE_ERROR;
+ }
+
+ if (unlikely(!validScratch(rose, scratch))) {
+ return HS_INVALID;
+ }
+
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
- if (rose->minWidth > length) {
- DEBUG_PRINTF("minwidth=%u > length=%u\n", rose->minWidth, length);
+ if (rose->minWidth > length) {
+ DEBUG_PRINTF("minwidth=%u > length=%u\n", rose->minWidth, length);
unmarkScratchInUse(scratch);
- return HS_SUCCESS;
- }
-
- prefetch_data(data, length);
-
- /* populate core info in scratch */
- populateCoreInfo(scratch, rose, scratch->bstate, onEvent, userCtx, data,
+ return HS_SUCCESS;
+ }
+
+ prefetch_data(data, length);
+
+ /* populate core info in scratch */
+ populateCoreInfo(scratch, rose, scratch->bstate, onEvent, userCtx, data,
length, NULL, 0, 0, 0, flags);
-
+
clearEvec(rose, scratch->core_info.exhaustionVector);
if (rose->ckeyCount) {
scratch->core_info.logicalVector = scratch->bstate +
@@ -365,90 +365,90 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data,
clearLvec(rose, scratch->core_info.logicalVector,
scratch->core_info.combVector);
}
-
- if (!length) {
- if (rose->boundary.reportZeroEodOffset) {
+
+ if (!length) {
+ if (rose->boundary.reportZeroEodOffset) {
roseRunBoundaryProgram(rose, rose->boundary.reportZeroEodOffset, 0,
scratch);
- }
- goto set_retval;
- }
-
- if (rose->boundary.reportZeroOffset) {
+ }
+ goto set_retval;
+ }
+
+ if (rose->boundary.reportZeroOffset) {
int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset,
0, scratch);
if (rv == MO_HALT_MATCHING) {
goto set_retval;
}
- }
-
- if (rose->minWidthExcludingBoundaries > length) {
- DEBUG_PRINTF("minWidthExcludingBoundaries=%u > length=%u\n",
- rose->minWidthExcludingBoundaries, length);
- goto done_scan;
- }
-
- // Similarly, we may have a maximum width (for engines constructed entirely
- // of bi-anchored patterns).
- if (rose->maxBiAnchoredWidth != ROSE_BOUND_INF
- && length > rose->maxBiAnchoredWidth) {
- DEBUG_PRINTF("block len=%u longer than maxBAWidth=%u\n", length,
- rose->maxBiAnchoredWidth);
- goto done_scan;
- }
-
- // Is this a small write case?
- if (rose->smallWriteOffset) {
- const struct SmallWriteEngine *smwr = getSmallWrite(rose);
- assert(smwr);
-
- // Apply the small write engine if and only if the block (buffer) is
- // small enough. Otherwise, we allow rose &co to deal with it.
- if (length < smwr->largestBuffer) {
- DEBUG_PRINTF("Attempting small write of block %u bytes long.\n",
- length);
- runSmallWriteEngine(smwr, scratch);
- goto done_scan;
- }
- }
-
- switch (rose->runtimeImpl) {
- default:
- assert(0);
- case ROSE_RUNTIME_FULL_ROSE:
- rawBlockExec(rose, scratch);
- break;
- case ROSE_RUNTIME_PURE_LITERAL:
- pureLiteralBlockExec(rose, scratch);
- break;
- case ROSE_RUNTIME_SINGLE_OUTFIX:
- soleOutfixBlockExec(rose, scratch);
- break;
- }
-
-done_scan:
+ }
+
+ if (rose->minWidthExcludingBoundaries > length) {
+ DEBUG_PRINTF("minWidthExcludingBoundaries=%u > length=%u\n",
+ rose->minWidthExcludingBoundaries, length);
+ goto done_scan;
+ }
+
+ // Similarly, we may have a maximum width (for engines constructed entirely
+ // of bi-anchored patterns).
+ if (rose->maxBiAnchoredWidth != ROSE_BOUND_INF
+ && length > rose->maxBiAnchoredWidth) {
+ DEBUG_PRINTF("block len=%u longer than maxBAWidth=%u\n", length,
+ rose->maxBiAnchoredWidth);
+ goto done_scan;
+ }
+
+ // Is this a small write case?
+ if (rose->smallWriteOffset) {
+ const struct SmallWriteEngine *smwr = getSmallWrite(rose);
+ assert(smwr);
+
+ // Apply the small write engine if and only if the block (buffer) is
+ // small enough. Otherwise, we allow rose &co to deal with it.
+ if (length < smwr->largestBuffer) {
+ DEBUG_PRINTF("Attempting small write of block %u bytes long.\n",
+ length);
+ runSmallWriteEngine(smwr, scratch);
+ goto done_scan;
+ }
+ }
+
+ switch (rose->runtimeImpl) {
+ default:
+ assert(0);
+ case ROSE_RUNTIME_FULL_ROSE:
+ rawBlockExec(rose, scratch);
+ break;
+ case ROSE_RUNTIME_PURE_LITERAL:
+ pureLiteralBlockExec(rose, scratch);
+ break;
+ case ROSE_RUNTIME_SINGLE_OUTFIX:
+ soleOutfixBlockExec(rose, scratch);
+ break;
+ }
+
+done_scan:
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch);
return HS_UNKNOWN_ERROR;
} else if (told_to_stop_matching(scratch)) {
unmarkScratchInUse(scratch);
- return HS_SCAN_TERMINATED;
- }
-
- if (rose->hasSom) {
- int halt = flushStoredSomMatches(scratch, ~0ULL);
- if (halt) {
+ return HS_SCAN_TERMINATED;
+ }
+
+ if (rose->hasSom) {
+ int halt = flushStoredSomMatches(scratch, ~0ULL);
+ if (halt) {
unmarkScratchInUse(scratch);
- return HS_SCAN_TERMINATED;
- }
- }
-
- if (rose->boundary.reportEodOffset) {
+ return HS_SCAN_TERMINATED;
+ }
+ }
+
+ if (rose->boundary.reportEodOffset) {
roseRunBoundaryProgram(rose, rose->boundary.reportEodOffset, length,
scratch);
- }
-
-set_retval:
+ }
+
+set_retval:
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch);
return HS_UNKNOWN_ERROR;
@@ -466,48 +466,48 @@ set_retval:
}
}
- DEBUG_PRINTF("done. told_to_stop_matching=%d\n",
- told_to_stop_matching(scratch));
+ DEBUG_PRINTF("done. told_to_stop_matching=%d\n",
+ told_to_stop_matching(scratch));
hs_error_t rv = told_to_stop_matching(scratch) ? HS_SCAN_TERMINATED
: HS_SUCCESS;
unmarkScratchInUse(scratch);
return rv;
-}
-
-static really_inline
-void maintainHistoryBuffer(const struct RoseEngine *rose, char *state,
- const char *buffer, size_t length) {
- if (!rose->historyRequired) {
- return;
- }
-
- // Hopefully few of our users are scanning no data.
- if (unlikely(length == 0)) {
- DEBUG_PRINTF("zero-byte scan\n");
- return;
- }
-
- char *his_state = state + rose->stateOffsets.history;
-
- if (length < rose->historyRequired) {
- size_t shortfall = rose->historyRequired - length;
- memmove(his_state, his_state + rose->historyRequired - shortfall,
- shortfall);
- }
- size_t amount = MIN(rose->historyRequired, length);
-
- memcpy(his_state + rose->historyRequired - amount, buffer + length - amount,
- amount);
-#ifdef DEBUG_HISTORY
- printf("History [%u] : ", rose->historyRequired);
- for (size_t i = 0; i < rose->historyRequired; i++) {
- printf(" %02hhx", his_state[i]);
- }
- printf("\n");
-#endif
-}
-
-static really_inline
+}
+
+static really_inline
+void maintainHistoryBuffer(const struct RoseEngine *rose, char *state,
+ const char *buffer, size_t length) {
+ if (!rose->historyRequired) {
+ return;
+ }
+
+ // Hopefully few of our users are scanning no data.
+ if (unlikely(length == 0)) {
+ DEBUG_PRINTF("zero-byte scan\n");
+ return;
+ }
+
+ char *his_state = state + rose->stateOffsets.history;
+
+ if (length < rose->historyRequired) {
+ size_t shortfall = rose->historyRequired - length;
+ memmove(his_state, his_state + rose->historyRequired - shortfall,
+ shortfall);
+ }
+ size_t amount = MIN(rose->historyRequired, length);
+
+ memcpy(his_state + rose->historyRequired - amount, buffer + length - amount,
+ amount);
+#ifdef DEBUG_HISTORY
+ printf("History [%u] : ", rose->historyRequired);
+ for (size_t i = 0; i < rose->historyRequired; i++) {
+ printf(" %02hhx", his_state[i]);
+ }
+ printf("\n");
+#endif
+}
+
+static really_inline
void init_stream(struct hs_stream *s, const struct RoseEngine *rose,
char init_history) {
char *state = getMultiState(s);
@@ -522,131 +522,131 @@ void init_stream(struct hs_stream *s, const struct RoseEngine *rose,
memset(hist_end - 16, 0x5a, 16);
}
- s->rose = rose;
- s->offset = 0;
-
+ s->rose = rose;
+ s->offset = 0;
+
setStreamStatus(state, 0);
- roseInitState(rose, state);
-
+ roseInitState(rose, state);
+
clearEvec(rose, state + rose->stateOffsets.exhausted);
if (rose->ckeyCount) {
clearLvec(rose, state + rose->stateOffsets.logicalVec,
state + rose->stateOffsets.combVec);
}
-
- // SOM state multibit structures.
- initSomState(rose, state);
-}
-
-HS_PUBLIC_API
+
+ // SOM state multibit structures.
+ initSomState(rose, state);
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_open_stream(const hs_database_t *db,
UNUSED unsigned flags,
hs_stream_t **stream) {
- if (unlikely(!stream)) {
- return HS_INVALID;
- }
-
- *stream = NULL;
-
- hs_error_t err = validDatabase(db);
- if (unlikely(err != HS_SUCCESS)) {
- return err;
- }
-
- const struct RoseEngine *rose = hs_get_bytecode(db);
- if (unlikely(!ISALIGNED_16(rose))) {
- return HS_INVALID;
- }
-
- if (unlikely(rose->mode != HS_MODE_STREAM)) {
- return HS_DB_MODE_ERROR;
- }
-
- size_t stateSize = rose->stateOffsets.end;
- struct hs_stream *s = hs_stream_alloc(sizeof(struct hs_stream) + stateSize);
- if (unlikely(!s)) {
- return HS_NOMEM;
- }
-
+ if (unlikely(!stream)) {
+ return HS_INVALID;
+ }
+
+ *stream = NULL;
+
+ hs_error_t err = validDatabase(db);
+ if (unlikely(err != HS_SUCCESS)) {
+ return err;
+ }
+
+ const struct RoseEngine *rose = hs_get_bytecode(db);
+ if (unlikely(!ISALIGNED_16(rose))) {
+ return HS_INVALID;
+ }
+
+ if (unlikely(rose->mode != HS_MODE_STREAM)) {
+ return HS_DB_MODE_ERROR;
+ }
+
+ size_t stateSize = rose->stateOffsets.end;
+ struct hs_stream *s = hs_stream_alloc(sizeof(struct hs_stream) + stateSize);
+ if (unlikely(!s)) {
+ return HS_NOMEM;
+ }
+
init_stream(s, rose, 1);
-
- *stream = s;
- return HS_SUCCESS;
-}
-
-
-static really_inline
-void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
- const struct RoseEngine *rose = id->rose;
-
+
+ *stream = s;
+ return HS_SUCCESS;
+}
+
+
+static really_inline
+void rawEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
+ const struct RoseEngine *rose = id->rose;
+
if (can_stop_matching(scratch)) {
- DEBUG_PRINTF("stream already broken\n");
- return;
- }
-
- if (isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
- DEBUG_PRINTF("stream exhausted\n");
- return;
- }
-
+ DEBUG_PRINTF("stream already broken\n");
+ return;
+ }
+
+ if (isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
+ DEBUG_PRINTF("stream exhausted\n");
+ return;
+ }
+
roseStreamEodExec(rose, id->offset, scratch);
-}
-
-static never_inline
-void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
- const struct RoseEngine *t = id->rose;
-
+}
+
+static never_inline
+void soleOutfixEodExec(hs_stream_t *id, hs_scratch_t *scratch) {
+ const struct RoseEngine *t = id->rose;
+
if (can_stop_matching(scratch)) {
- DEBUG_PRINTF("stream already broken\n");
- return;
- }
-
- if (isAllExhausted(t, scratch->core_info.exhaustionVector)) {
- DEBUG_PRINTF("stream exhausted\n");
- return;
- }
-
- assert(t->outfixEndQueue == 1);
- assert(!t->amatcherOffset);
- assert(!t->ematcherOffset);
- assert(!t->fmatcherOffset);
-
- const struct NFA *nfa = getNfaByQueue(t, 0);
-
- struct mq *q = scratch->queues;
+ DEBUG_PRINTF("stream already broken\n");
+ return;
+ }
+
+ if (isAllExhausted(t, scratch->core_info.exhaustionVector)) {
+ DEBUG_PRINTF("stream exhausted\n");
+ return;
+ }
+
+ assert(t->outfixEndQueue == 1);
+ assert(!t->amatcherOffset);
+ assert(!t->ematcherOffset);
+ assert(!t->fmatcherOffset);
+
+ const struct NFA *nfa = getNfaByQueue(t, 0);
+
+ struct mq *q = scratch->queues;
initOutfixQueue(q, 0, t, scratch);
- if (!scratch->core_info.buf_offset) {
- DEBUG_PRINTF("buf_offset is zero\n");
- return; /* no vacuous engines */
- }
-
- nfaExpandState(nfa, q->state, q->streamState, q->offset,
- queue_prev_byte(q, 0));
-
- assert(nfaAcceptsEod(nfa));
- nfaCheckFinalState(nfa, q->state, q->streamState, q->offset, q->cb,
+ if (!scratch->core_info.buf_offset) {
+ DEBUG_PRINTF("buf_offset is zero\n");
+ return; /* no vacuous engines */
+ }
+
+ nfaExpandState(nfa, q->state, q->streamState, q->offset,
+ queue_prev_byte(q, 0));
+
+ assert(nfaAcceptsEod(nfa));
+ nfaCheckFinalState(nfa, q->state, q->streamState, q->offset, q->cb,
scratch);
-}
-
-static really_inline
-void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
- match_event_handler onEvent, void *context) {
- DEBUG_PRINTF("--- report eod matches at offset %llu\n", id->offset);
- assert(onEvent);
-
- const struct RoseEngine *rose = id->rose;
- char *state = getMultiState(id);
+}
+
+static really_inline
+void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
+ match_event_handler onEvent, void *context) {
+ DEBUG_PRINTF("--- report eod matches at offset %llu\n", id->offset);
+ assert(onEvent);
+
+ const struct RoseEngine *rose = id->rose;
+ char *state = getMultiState(id);
u8 status = getStreamStatus(state);
-
+
if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_ERROR)) {
- DEBUG_PRINTF("stream is broken, just freeing storage\n");
- return;
- }
-
- populateCoreInfo(scratch, rose, state, onEvent, context, NULL, 0,
- getHistory(state, rose, id->offset),
+ DEBUG_PRINTF("stream is broken, just freeing storage\n");
+ return;
+ }
+
+ populateCoreInfo(scratch, rose, state, onEvent, context, NULL, 0,
+ getHistory(state, rose, id->offset),
getHistoryAmount(rose, id->offset), id->offset, status, 0);
-
+
if (rose->ckeyCount) {
scratch->core_info.logicalVector = state +
rose->stateOffsets.logicalVec;
@@ -656,49 +656,49 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
}
}
- if (rose->somLocationCount) {
- loadSomFromStream(scratch, id->offset);
- }
-
- if (!id->offset) {
- if (rose->boundary.reportZeroEodOffset) {
+ if (rose->somLocationCount) {
+ loadSomFromStream(scratch, id->offset);
+ }
+
+ if (!id->offset) {
+ if (rose->boundary.reportZeroEodOffset) {
int rv = roseRunBoundaryProgram(
rose, rose->boundary.reportZeroEodOffset, 0, scratch);
if (rv == MO_HALT_MATCHING) {
return;
}
- }
- } else {
- if (rose->boundary.reportEodOffset) {
+ }
+ } else {
+ if (rose->boundary.reportEodOffset) {
int rv = roseRunBoundaryProgram(
rose, rose->boundary.reportEodOffset, id->offset, scratch);
if (rv == MO_HALT_MATCHING) {
return;
}
- }
-
- if (rose->requiresEodCheck) {
- switch (rose->runtimeImpl) {
- default:
- case ROSE_RUNTIME_PURE_LITERAL:
- assert(0);
- case ROSE_RUNTIME_FULL_ROSE:
- rawEodExec(id, scratch);
- break;
- case ROSE_RUNTIME_SINGLE_OUTFIX:
- soleOutfixEodExec(id, scratch);
- break;
- }
- }
- }
-
- if (rose->hasSom && !told_to_stop_matching(scratch)) {
- int halt = flushStoredSomMatches(scratch, ~0ULL);
- if (halt) {
- DEBUG_PRINTF("told to stop matching\n");
+ }
+
+ if (rose->requiresEodCheck) {
+ switch (rose->runtimeImpl) {
+ default:
+ case ROSE_RUNTIME_PURE_LITERAL:
+ assert(0);
+ case ROSE_RUNTIME_FULL_ROSE:
+ rawEodExec(id, scratch);
+ break;
+ case ROSE_RUNTIME_SINGLE_OUTFIX:
+ soleOutfixEodExec(id, scratch);
+ break;
+ }
+ }
+ }
+
+ if (rose->hasSom && !told_to_stop_matching(scratch)) {
+ int halt = flushStoredSomMatches(scratch, ~0ULL);
+ if (halt) {
+ DEBUG_PRINTF("told to stop matching\n");
scratch->core_info.status |= STATUS_TERMINATED;
- }
- }
+ }
+ }
if (rose->lastFlushCombProgramOffset && !told_to_stop_matching(scratch)) {
if (roseRunLastFlushCombProgram(rose, scratch, id->offset)
@@ -707,203 +707,203 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch,
scratch->core_info.status |= STATUS_TERMINATED;
}
}
-}
-
-HS_PUBLIC_API
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_copy_stream(hs_stream_t **to_id,
const hs_stream_t *from_id) {
- if (!to_id) {
- return HS_INVALID;
- }
-
- *to_id = NULL;
-
- if (!from_id || !from_id->rose) {
- return HS_INVALID;
- }
-
- const struct RoseEngine *rose = from_id->rose;
- size_t stateSize = sizeof(struct hs_stream) + rose->stateOffsets.end;
-
- struct hs_stream *s = hs_stream_alloc(stateSize);
- if (!s) {
- return HS_NOMEM;
- }
-
- memcpy(s, from_id, stateSize);
-
- *to_id = s;
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ if (!to_id) {
+ return HS_INVALID;
+ }
+
+ *to_id = NULL;
+
+ if (!from_id || !from_id->rose) {
+ return HS_INVALID;
+ }
+
+ const struct RoseEngine *rose = from_id->rose;
+ size_t stateSize = sizeof(struct hs_stream) + rose->stateOffsets.end;
+
+ struct hs_stream *s = hs_stream_alloc(stateSize);
+ if (!s) {
+ return HS_NOMEM;
+ }
+
+ memcpy(s, from_id, stateSize);
+
+ *to_id = s;
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_reset_and_copy_stream(hs_stream_t *to_id,
const hs_stream_t *from_id,
hs_scratch_t *scratch,
match_event_handler onEvent,
void *context) {
- if (!from_id || !from_id->rose) {
- return HS_INVALID;
- }
-
- if (!to_id || to_id->rose != from_id->rose) {
- return HS_INVALID;
- }
-
- if (to_id == from_id) {
- return HS_INVALID;
- }
-
- if (onEvent) {
- if (!scratch || !validScratch(to_id->rose, scratch)) {
- return HS_INVALID;
- }
+ if (!from_id || !from_id->rose) {
+ return HS_INVALID;
+ }
+
+ if (!to_id || to_id->rose != from_id->rose) {
+ return HS_INVALID;
+ }
+
+ if (to_id == from_id) {
+ return HS_INVALID;
+ }
+
+ if (onEvent) {
+ if (!scratch || !validScratch(to_id->rose, scratch)) {
+ return HS_INVALID;
+ }
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
- report_eod_matches(to_id, scratch, onEvent, context);
+ report_eod_matches(to_id, scratch, onEvent, context);
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch);
return HS_UNKNOWN_ERROR;
}
unmarkScratchInUse(scratch);
- }
-
- size_t stateSize
- = sizeof(struct hs_stream) + from_id->rose->stateOffsets.end;
-
- memcpy(to_id, from_id, stateSize);
-
- return HS_SUCCESS;
-}
-
-static really_inline
-void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) {
- assert(stream_state);
- assert(scratch);
+ }
+
+ size_t stateSize
+ = sizeof(struct hs_stream) + from_id->rose->stateOffsets.end;
+
+ memcpy(to_id, from_id, stateSize);
+
+ return HS_SUCCESS;
+}
+
+static really_inline
+void rawStreamExec(struct hs_stream *stream_state, struct hs_scratch *scratch) {
+ assert(stream_state);
+ assert(scratch);
assert(!can_stop_matching(scratch));
-
- DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
- stream_state->offset, scratch->core_info.len);
-
- const struct RoseEngine *rose = stream_state->rose;
- assert(rose);
+
+ DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
+ stream_state->offset, scratch->core_info.len);
+
+ const struct RoseEngine *rose = stream_state->rose;
+ assert(rose);
roseStreamExec(rose, scratch);
-
- if (!told_to_stop_matching(scratch) &&
- isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
- DEBUG_PRINTF("stream exhausted\n");
+
+ if (!told_to_stop_matching(scratch) &&
+ isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
+ DEBUG_PRINTF("stream exhausted\n");
scratch->core_info.status |= STATUS_EXHAUSTED;
- }
-}
-
-static really_inline
-void pureLiteralStreamExec(struct hs_stream *stream_state,
- struct hs_scratch *scratch) {
- assert(stream_state);
- assert(scratch);
+ }
+}
+
+static really_inline
+void pureLiteralStreamExec(struct hs_stream *stream_state,
+ struct hs_scratch *scratch) {
+ assert(stream_state);
+ assert(scratch);
assert(!can_stop_matching(scratch));
-
- const struct RoseEngine *rose = stream_state->rose;
- const struct HWLM *ftable = getFLiteralMatcher(rose);
-
- size_t len2 = scratch->core_info.len;
-
- DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
- stream_state->offset, scratch->core_info.len);
-
+
+ const struct RoseEngine *rose = stream_state->rose;
+ const struct HWLM *ftable = getFLiteralMatcher(rose);
+
+ size_t len2 = scratch->core_info.len;
+
+ DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
+ stream_state->offset, scratch->core_info.len);
+
pureLiteralInitScratch(scratch, stream_state->offset);
scratch->tctxt.groups = loadGroups(rose, scratch->core_info.state);
- // Pure literal cases don't have floatingMinDistance set, so we always
- // start the match region at zero.
- const size_t start = 0;
-
+ // Pure literal cases don't have floatingMinDistance set, so we always
+ // start the match region at zero.
+ const size_t start = 0;
+
hwlmExecStreaming(ftable, len2, start, roseCallback, scratch,
rose->initialGroups & rose->floating_group_mask);
-
- if (!told_to_stop_matching(scratch) &&
- isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
- DEBUG_PRINTF("stream exhausted\n");
+
+ if (!told_to_stop_matching(scratch) &&
+ isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
+ DEBUG_PRINTF("stream exhausted\n");
scratch->core_info.status |= STATUS_EXHAUSTED;
- }
-}
-
-static never_inline
-void soleOutfixStreamExec(struct hs_stream *stream_state,
- struct hs_scratch *scratch) {
- assert(stream_state);
- assert(scratch);
+ }
+}
+
+static never_inline
+void soleOutfixStreamExec(struct hs_stream *stream_state,
+ struct hs_scratch *scratch) {
+ assert(stream_state);
+ assert(scratch);
assert(!can_stop_matching(scratch));
-
- const struct RoseEngine *t = stream_state->rose;
- assert(t->outfixEndQueue == 1);
- assert(!t->amatcherOffset);
- assert(!t->ematcherOffset);
- assert(!t->fmatcherOffset);
-
- const struct NFA *nfa = getNfaByQueue(t, 0);
-
- struct mq *q = scratch->queues;
+
+ const struct RoseEngine *t = stream_state->rose;
+ assert(t->outfixEndQueue == 1);
+ assert(!t->amatcherOffset);
+ assert(!t->ematcherOffset);
+ assert(!t->fmatcherOffset);
+
+ const struct NFA *nfa = getNfaByQueue(t, 0);
+
+ struct mq *q = scratch->queues;
initOutfixQueue(q, 0, t, scratch);
- if (!scratch->core_info.buf_offset) {
- nfaQueueInitState(nfa, q);
- pushQueueAt(q, 0, MQE_START, 0);
- pushQueueAt(q, 1, MQE_TOP, 0);
- pushQueueAt(q, 2, MQE_END, scratch->core_info.len);
- } else {
- nfaExpandState(nfa, q->state, q->streamState, q->offset,
- queue_prev_byte(q, 0));
- pushQueueAt(q, 0, MQE_START, 0);
- pushQueueAt(q, 1, MQE_END, scratch->core_info.len);
- }
-
- if (nfaQueueExec(q->nfa, q, scratch->core_info.len)) {
- nfaQueueCompressState(nfa, q, scratch->core_info.len);
- } else if (!told_to_stop_matching(scratch)) {
+ if (!scratch->core_info.buf_offset) {
+ nfaQueueInitState(nfa, q);
+ pushQueueAt(q, 0, MQE_START, 0);
+ pushQueueAt(q, 1, MQE_TOP, 0);
+ pushQueueAt(q, 2, MQE_END, scratch->core_info.len);
+ } else {
+ nfaExpandState(nfa, q->state, q->streamState, q->offset,
+ queue_prev_byte(q, 0));
+ pushQueueAt(q, 0, MQE_START, 0);
+ pushQueueAt(q, 1, MQE_END, scratch->core_info.len);
+ }
+
+ if (nfaQueueExec(q->nfa, q, scratch->core_info.len)) {
+ nfaQueueCompressState(nfa, q, scratch->core_info.len);
+ } else if (!told_to_stop_matching(scratch)) {
scratch->core_info.status |= STATUS_EXHAUSTED;
- }
-}
-
-static inline
-hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
- unsigned length, UNUSED unsigned flags,
- hs_scratch_t *scratch,
- match_event_handler onEvent, void *context) {
+ }
+}
+
+static inline
+hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
+ unsigned length, UNUSED unsigned flags,
+ hs_scratch_t *scratch,
+ match_event_handler onEvent, void *context) {
assert(id);
assert(scratch);
if (unlikely(!data)) {
- return HS_INVALID;
- }
-
- const struct RoseEngine *rose = id->rose;
- char *state = getMultiState(id);
-
+ return HS_INVALID;
+ }
+
+ const struct RoseEngine *rose = id->rose;
+ char *state = getMultiState(id);
+
u8 status = getStreamStatus(state);
if (status & (STATUS_TERMINATED | STATUS_EXHAUSTED | STATUS_ERROR)) {
- DEBUG_PRINTF("stream is broken, halting scan\n");
+ DEBUG_PRINTF("stream is broken, halting scan\n");
if (status & STATUS_ERROR) {
return HS_UNKNOWN_ERROR;
} else if (status & STATUS_TERMINATED) {
- return HS_SCAN_TERMINATED;
- } else {
- return HS_SUCCESS;
- }
- }
-
- // We avoid doing any work if the user has given us zero bytes of data to
- // scan. Arguably we should define some semantics for how we treat vacuous
- // cases here.
- if (unlikely(length == 0)) {
- DEBUG_PRINTF("zero length block\n");
- return HS_SUCCESS;
- }
-
- u32 historyAmount = getHistoryAmount(rose, id->offset);
- populateCoreInfo(scratch, rose, state, onEvent, context, data, length,
- getHistory(state, rose, id->offset), historyAmount,
+ return HS_SCAN_TERMINATED;
+ } else {
+ return HS_SUCCESS;
+ }
+ }
+
+ // We avoid doing any work if the user has given us zero bytes of data to
+ // scan. Arguably we should define some semantics for how we treat vacuous
+ // cases here.
+ if (unlikely(length == 0)) {
+ DEBUG_PRINTF("zero length block\n");
+ return HS_SUCCESS;
+ }
+
+ u32 historyAmount = getHistoryAmount(rose, id->offset);
+ populateCoreInfo(scratch, rose, state, onEvent, context, data, length,
+ getHistory(state, rose, id->offset), historyAmount,
id->offset, status, flags);
if (rose->ckeyCount) {
scratch->core_info.logicalVector = state +
@@ -913,17 +913,17 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
scratch->tctxt.lastCombMatchOffset = id->offset;
}
}
- assert(scratch->core_info.hlen <= id->offset
- && scratch->core_info.hlen <= rose->historyRequired);
-
- prefetch_data(data, length);
-
- if (rose->somLocationCount) {
- loadSomFromStream(scratch, id->offset);
- }
-
- if (!id->offset && rose->boundary.reportZeroOffset) {
- DEBUG_PRINTF("zero reports\n");
+ assert(scratch->core_info.hlen <= id->offset
+ && scratch->core_info.hlen <= rose->historyRequired);
+
+ prefetch_data(data, length);
+
+ if (rose->somLocationCount) {
+ loadSomFromStream(scratch, id->offset);
+ }
+
+ if (!id->offset && rose->boundary.reportZeroOffset) {
+ DEBUG_PRINTF("zero reports\n");
int rv = roseRunBoundaryProgram(rose, rose->boundary.reportZeroOffset,
0, scratch);
if (rv == MO_HALT_MATCHING) {
@@ -936,47 +936,47 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data,
return HS_SUCCESS;
}
}
- }
-
- switch (rose->runtimeImpl) {
- default:
- assert(0);
- case ROSE_RUNTIME_FULL_ROSE:
- rawStreamExec(id, scratch);
- break;
- case ROSE_RUNTIME_PURE_LITERAL:
- pureLiteralStreamExec(id, scratch);
- break;
- case ROSE_RUNTIME_SINGLE_OUTFIX:
- soleOutfixStreamExec(id, scratch);
- }
-
- if (rose->hasSom && !told_to_stop_matching(scratch)) {
- int halt = flushStoredSomMatches(scratch, ~0ULL);
- if (halt) {
+ }
+
+ switch (rose->runtimeImpl) {
+ default:
+ assert(0);
+ case ROSE_RUNTIME_FULL_ROSE:
+ rawStreamExec(id, scratch);
+ break;
+ case ROSE_RUNTIME_PURE_LITERAL:
+ pureLiteralStreamExec(id, scratch);
+ break;
+ case ROSE_RUNTIME_SINGLE_OUTFIX:
+ soleOutfixStreamExec(id, scratch);
+ }
+
+ if (rose->hasSom && !told_to_stop_matching(scratch)) {
+ int halt = flushStoredSomMatches(scratch, ~0ULL);
+ if (halt) {
scratch->core_info.status |= STATUS_TERMINATED;
- }
- }
-
+ }
+ }
+
setStreamStatus(state, scratch->core_info.status);
if (unlikely(internal_matching_error(scratch))) {
return HS_UNKNOWN_ERROR;
} else if (likely(!can_stop_matching(scratch))) {
maintainHistoryBuffer(rose, state, data, length);
- id->offset += length; /* maintain offset */
-
- if (rose->somLocationCount) {
- storeSomToStream(scratch, id->offset);
- }
- } else if (told_to_stop_matching(scratch)) {
- return HS_SCAN_TERMINATED;
- }
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ id->offset += length; /* maintain offset */
+
+ if (rose->somLocationCount) {
+ storeSomToStream(scratch, id->offset);
+ }
+ } else if (told_to_stop_matching(scratch)) {
+ return HS_SCAN_TERMINATED;
+ }
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data,
unsigned length, unsigned flags,
hs_scratch_t *scratch,
@@ -993,115 +993,115 @@ hs_error_t HS_CDECL hs_scan_stream(hs_stream_t *id, const char *data,
onEvent, context);
unmarkScratchInUse(scratch);
return rv;
-}
-
-HS_PUBLIC_API
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch,
match_event_handler onEvent,
void *context) {
- if (!id) {
- return HS_INVALID;
- }
-
- if (onEvent) {
- if (!scratch || !validScratch(id->rose, scratch)) {
- return HS_INVALID;
- }
+ if (!id) {
+ return HS_INVALID;
+ }
+
+ if (onEvent) {
+ if (!scratch || !validScratch(id->rose, scratch)) {
+ return HS_INVALID;
+ }
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
- report_eod_matches(id, scratch, onEvent, context);
+ report_eod_matches(id, scratch, onEvent, context);
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch);
return HS_UNKNOWN_ERROR;
}
unmarkScratchInUse(scratch);
- }
-
- hs_stream_free(id);
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+ }
+
+ hs_stream_free(id);
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags,
hs_scratch_t *scratch,
match_event_handler onEvent,
void *context) {
- if (!id) {
- return HS_INVALID;
- }
-
- if (onEvent) {
- if (!scratch || !validScratch(id->rose, scratch)) {
- return HS_INVALID;
- }
+ if (!id) {
+ return HS_INVALID;
+ }
+
+ if (onEvent) {
+ if (!scratch || !validScratch(id->rose, scratch)) {
+ return HS_INVALID;
+ }
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
- report_eod_matches(id, scratch, onEvent, context);
+ report_eod_matches(id, scratch, onEvent, context);
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch);
return HS_UNKNOWN_ERROR;
}
unmarkScratchInUse(scratch);
- }
-
+ }
+
// history already initialised
init_stream(id, id->rose, 0);
-
- return HS_SUCCESS;
-}
-
-HS_PUBLIC_API
+
+ return HS_SUCCESS;
+}
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_stream_size(const hs_database_t *db,
size_t *stream_size) {
- if (!stream_size) {
- return HS_INVALID;
- }
-
- hs_error_t ret = validDatabase(db);
- if (ret != HS_SUCCESS) {
- return ret;
- }
-
- const struct RoseEngine *rose = hs_get_bytecode(db);
- if (!ISALIGNED_16(rose)) {
- return HS_INVALID;
- }
-
- if (rose->mode != HS_MODE_STREAM) {
- return HS_DB_MODE_ERROR;
- }
-
- u32 base_stream_size = rose->stateOffsets.end;
-
- // stream state plus the hs_stream struct itself
- *stream_size = base_stream_size + sizeof(struct hs_stream);
-
- return HS_SUCCESS;
-}
-
-#if defined(DEBUG) || defined(DUMP_SUPPORT)
-#include "util/compare.h"
-// A debugging crutch: print a hex-escaped version of the match for our
-// perusal.
-static UNUSED
-void dumpData(const char *data, size_t len) {
- DEBUG_PRINTF("BUFFER:");
- for (size_t i = 0; i < len; i++) {
- u8 c = data[i];
- if (ourisprint(c) && c != '\'') {
- printf("%c", c);
- } else {
- printf("\\x%02x", c);
- }
- }
- printf("\n");
-}
-#endif
-
-HS_PUBLIC_API
+ if (!stream_size) {
+ return HS_INVALID;
+ }
+
+ hs_error_t ret = validDatabase(db);
+ if (ret != HS_SUCCESS) {
+ return ret;
+ }
+
+ const struct RoseEngine *rose = hs_get_bytecode(db);
+ if (!ISALIGNED_16(rose)) {
+ return HS_INVALID;
+ }
+
+ if (rose->mode != HS_MODE_STREAM) {
+ return HS_DB_MODE_ERROR;
+ }
+
+ u32 base_stream_size = rose->stateOffsets.end;
+
+ // stream state plus the hs_stream struct itself
+ *stream_size = base_stream_size + sizeof(struct hs_stream);
+
+ return HS_SUCCESS;
+}
+
+#if defined(DEBUG) || defined(DUMP_SUPPORT)
+#include "util/compare.h"
+// A debugging crutch: print a hex-escaped version of the match for our
+// perusal.
+static UNUSED
+void dumpData(const char *data, size_t len) {
+ DEBUG_PRINTF("BUFFER:");
+ for (size_t i = 0; i < len; i++) {
+ u8 c = data[i];
+ if (ourisprint(c) && c != '\'') {
+ printf("%c", c);
+ } else {
+ printf("\\x%02x", c);
+ }
+ }
+ printf("\n");
+}
+#endif
+
+HS_PUBLIC_API
hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db,
const char * const * data,
const unsigned int *length,
@@ -1109,68 +1109,68 @@ hs_error_t HS_CDECL hs_scan_vector(const hs_database_t *db,
UNUSED unsigned int flags,
hs_scratch_t *scratch,
match_event_handler onEvent, void *context) {
- if (unlikely(!scratch || !data || !length)) {
- return HS_INVALID;
- }
-
- hs_error_t err = validDatabase(db);
- if (unlikely(err != HS_SUCCESS)) {
- return err;
- }
-
- const struct RoseEngine *rose = hs_get_bytecode(db);
- if (unlikely(!ISALIGNED_16(rose))) {
- return HS_INVALID;
- }
-
- if (unlikely(rose->mode != HS_MODE_VECTORED)) {
- return HS_DB_MODE_ERROR;
- }
-
- if (unlikely(!validScratch(rose, scratch))) {
- return HS_INVALID;
- }
-
+ if (unlikely(!scratch || !data || !length)) {
+ return HS_INVALID;
+ }
+
+ hs_error_t err = validDatabase(db);
+ if (unlikely(err != HS_SUCCESS)) {
+ return err;
+ }
+
+ const struct RoseEngine *rose = hs_get_bytecode(db);
+ if (unlikely(!ISALIGNED_16(rose))) {
+ return HS_INVALID;
+ }
+
+ if (unlikely(rose->mode != HS_MODE_VECTORED)) {
+ return HS_DB_MODE_ERROR;
+ }
+
+ if (unlikely(!validScratch(rose, scratch))) {
+ return HS_INVALID;
+ }
+
if (unlikely(markScratchInUse(scratch))) {
return HS_SCRATCH_IN_USE;
}
- hs_stream_t *id = (hs_stream_t *)(scratch->bstate);
-
+ hs_stream_t *id = (hs_stream_t *)(scratch->bstate);
+
init_stream(id, rose, 1); /* open stream */
-
- for (u32 i = 0; i < count; i++) {
- DEBUG_PRINTF("block %u/%u offset=%llu len=%u\n", i, count, id->offset,
- length[i]);
-#ifdef DEBUG
- dumpData(data[i], length[i]);
-#endif
- hs_error_t ret
- = hs_scan_stream_internal(id, data[i], length[i], 0, scratch,
- onEvent, context);
- if (ret != HS_SUCCESS) {
+
+ for (u32 i = 0; i < count; i++) {
+ DEBUG_PRINTF("block %u/%u offset=%llu len=%u\n", i, count, id->offset,
+ length[i]);
+#ifdef DEBUG
+ dumpData(data[i], length[i]);
+#endif
+ hs_error_t ret
+ = hs_scan_stream_internal(id, data[i], length[i], 0, scratch,
+ onEvent, context);
+ if (ret != HS_SUCCESS) {
unmarkScratchInUse(scratch);
- return ret;
- }
- }
-
- /* close stream */
- if (onEvent) {
- report_eod_matches(id, scratch, onEvent, context);
-
+ return ret;
+ }
+ }
+
+ /* close stream */
+ if (onEvent) {
+ report_eod_matches(id, scratch, onEvent, context);
+
if (unlikely(internal_matching_error(scratch))) {
unmarkScratchInUse(scratch);
return HS_UNKNOWN_ERROR;
} else if (told_to_stop_matching(scratch)) {
unmarkScratchInUse(scratch);
- return HS_SCAN_TERMINATED;
- }
- }
-
+ return HS_SCAN_TERMINATED;
+ }
+ }
+
unmarkScratchInUse(scratch);
- return HS_SUCCESS;
-}
+ return HS_SUCCESS;
+}
HS_PUBLIC_API
hs_error_t HS_CDECL hs_compress_stream(const hs_stream_t *stream, char *buf,