aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/rose/stream_long_lit.h
diff options
context:
space:
mode:
authorIvan Blinkov <ivan@blinkov.ru>2022-02-10 16:47:11 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:11 +0300
commit5b283123c882433dafbaf6b338adeea16c1a0ea0 (patch)
tree339adc63bce23800021202ae4a8328a843dc447a /contrib/libs/hyperscan/src/rose/stream_long_lit.h
parent1aeb9a455974457866f78722ad98114bafc84e8a (diff)
downloadydb-5b283123c882433dafbaf6b338adeea16c1a0ea0.tar.gz
Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/rose/stream_long_lit.h')
-rw-r--r--contrib/libs/hyperscan/src/rose/stream_long_lit.h738
1 files changed, 369 insertions, 369 deletions
diff --git a/contrib/libs/hyperscan/src/rose/stream_long_lit.h b/contrib/libs/hyperscan/src/rose/stream_long_lit.h
index 34bc00e286..df9b57f4e2 100644
--- a/contrib/libs/hyperscan/src/rose/stream_long_lit.h
+++ b/contrib/libs/hyperscan/src/rose/stream_long_lit.h
@@ -1,372 +1,372 @@
-/*
- * Copyright (c) 2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef STREAM_LONG_LIT_H
-#define STREAM_LONG_LIT_H
-
-#include "rose.h"
-#include "rose_common.h"
-#include "rose_internal.h"
-#include "stream_long_lit_hash.h"
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef STREAM_LONG_LIT_H
+#define STREAM_LONG_LIT_H
+
+#include "rose.h"
+#include "rose_common.h"
+#include "rose_internal.h"
+#include "stream_long_lit_hash.h"
#include "util/compare.h"
-#include "util/copybytes.h"
-
-static really_inline
-const struct RoseLongLitHashEntry *
-getHashTableBase(const struct RoseLongLitTable *ll_table,
- const struct RoseLongLitSubtable *ll_sub) {
- assert(ll_sub->hashOffset);
- return (const struct RoseLongLitHashEntry *)((const char *)ll_table +
- ll_sub->hashOffset);
-}
-
-// Reads from stream state and unpacks values into stream state table.
-static really_inline
-void loadLongLitStreamState(const struct RoseLongLitTable *ll_table,
- const u8 *ll_state, u32 *state_case,
- u32 *state_nocase) {
- assert(ll_table);
- assert(ll_state);
- assert(state_case && state_nocase);
-
- u8 ss_bytes = ll_table->streamStateBytes;
- u8 ssb = ll_table->caseful.streamStateBits;
- UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits;
- assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
-
-#if defined(ARCH_32_BIT)
- // On 32-bit hosts, we may be able to avoid having to do any u64a
- // manipulation at all.
- if (ss_bytes <= 4) {
- u32 ssb_mask = (1U << ssb) - 1;
- u32 streamVal = partial_load_u32(ll_state, ss_bytes);
- *state_case = (u32)(streamVal & ssb_mask);
- *state_nocase = (u32)(streamVal >> ssb);
- return;
- }
-#endif
-
- u64a ssb_mask = (1ULL << ssb) - 1;
- u64a streamVal = partial_load_u64a(ll_state, ss_bytes);
- *state_case = (u32)(streamVal & ssb_mask);
- *state_nocase = (u32)(streamVal >> ssb);
-}
-
-static rose_inline
-void loadLongLiteralStateMode(struct hs_scratch *scratch,
- const struct RoseLongLitTable *ll_table,
- const struct RoseLongLitSubtable *ll_sub,
- const u32 state, const char nocase) {
- if (!state) {
- DEBUG_PRINTF("no state for %s\n", nocase ? "caseless" : "caseful");
- return;
- }
-
- const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub);
- const struct RoseLongLitHashEntry *ent = tab + state - 1;
-
- assert(ent->str_offset + ent->str_len <= ll_table->size);
- const u8 *found_buf = (const u8 *)ll_table + ent->str_offset;
- size_t found_sz = ent->str_len;
-
- struct RoseContext *tctxt = &scratch->tctxt;
- if (nocase) {
- tctxt->ll_buf_nocase = found_buf;
- tctxt->ll_len_nocase = found_sz;
- } else {
- tctxt->ll_buf = found_buf;
- tctxt->ll_len = found_sz;
- }
-}
-
-static rose_inline
-void loadLongLiteralState(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch) {
- if (!t->longLitTableOffset) {
- return;
- }
-
- // If we don't have any long literals in play, these values must point to
- // the real history buffer so that CHECK_LONG_LIT instructions examine the
- // history buffer.
- scratch->tctxt.ll_buf = scratch->core_info.hbuf;
- scratch->tctxt.ll_len = scratch->core_info.hlen;
- scratch->tctxt.ll_buf_nocase = scratch->core_info.hbuf;
- scratch->tctxt.ll_len_nocase = scratch->core_info.hlen;
-
- if (!scratch->core_info.hlen) {
- return;
- }
-
- const struct RoseLongLitTable *ll_table =
- getByOffset(t, t->longLitTableOffset);
- const u8 *ll_state = getLongLitState(t, state);
-
- u32 state_case;
- u32 state_nocase;
- loadLongLitStreamState(ll_table, ll_state, &state_case, &state_nocase);
-
- DEBUG_PRINTF("loaded {%u, %u}\n", state_case, state_nocase);
-
- loadLongLiteralStateMode(scratch, ll_table, &ll_table->caseful,
- state_case, 0);
- loadLongLiteralStateMode(scratch, ll_table, &ll_table->nocase,
- state_nocase, 1);
-}
-
-static rose_inline
-char confirmLongLiteral(const struct RoseLongLitTable *ll_table,
- const struct hs_scratch *scratch,
- const struct RoseLongLitHashEntry *ent,
- const char nocase) {
- assert(ent->str_offset + ent->str_len <= ll_table->size);
- const u8 *s = (const u8 *)ll_table + ent->str_offset;
- size_t len = ent->str_len;
- const u8 *buf = scratch->core_info.buf;
- const size_t buf_len = scratch->core_info.len;
-
- if (len > buf_len) {
- const struct RoseContext *tctxt = &scratch->tctxt;
- const u8 *hist = nocase ? tctxt->ll_buf_nocase : tctxt->ll_buf;
- size_t hist_len = nocase ? tctxt->ll_len_nocase : tctxt->ll_len;
-
- if (len > buf_len + hist_len) {
- return 0; // Break out - not enough total history
- }
-
- size_t overhang = len - buf_len;
- assert(overhang <= hist_len);
-
- if (cmpForward(hist + hist_len - overhang, s, overhang, nocase)) {
- return 0;
- }
- s += overhang;
- len -= overhang;
- }
-
- // if we got here, we don't need history or we compared ok out of history
- assert(len <= buf_len);
-
- if (cmpForward(buf + buf_len - len, s, len, nocase)) {
- return 0;
- }
-
- return 1;
-}
-
-static rose_inline
-const u8 *prepScanBuffer(const struct core_info *ci,
- const struct RoseLongLitTable *ll_table, u8 *tempbuf) {
- const u8 hash_len = ll_table->maxLen;
- assert(hash_len >= LONG_LIT_HASH_LEN);
-
- // Our hash function operates over LONG_LIT_HASH_LEN bytes, starting from
- // location (end of buffer - hash_len). If this block can be satisfied
- // entirely from either the current buffer or the history buffer, we pass
- // in the pointer directly; otherwise we must make a copy.
-
- const u8 *base;
-
- if (hash_len > ci->len) {
- size_t overhang = hash_len - ci->len;
- if (overhang >= LONG_LIT_HASH_LEN) {
- // Can read enough to hash from inside the history buffer.
- assert(overhang <= ci->hlen);
- base = ci->hbuf + ci->hlen - overhang;
- } else {
- // Copy: first chunk from history buffer.
- assert(overhang <= ci->hlen);
+#include "util/copybytes.h"
+
+static really_inline
+const struct RoseLongLitHashEntry *
+getHashTableBase(const struct RoseLongLitTable *ll_table,
+ const struct RoseLongLitSubtable *ll_sub) {
+ assert(ll_sub->hashOffset);
+ return (const struct RoseLongLitHashEntry *)((const char *)ll_table +
+ ll_sub->hashOffset);
+}
+
+// Reads from stream state and unpacks values into stream state table.
+static really_inline
+void loadLongLitStreamState(const struct RoseLongLitTable *ll_table,
+ const u8 *ll_state, u32 *state_case,
+ u32 *state_nocase) {
+ assert(ll_table);
+ assert(ll_state);
+ assert(state_case && state_nocase);
+
+ u8 ss_bytes = ll_table->streamStateBytes;
+ u8 ssb = ll_table->caseful.streamStateBits;
+ UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits;
+ assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
+
+#if defined(ARCH_32_BIT)
+ // On 32-bit hosts, we may be able to avoid having to do any u64a
+ // manipulation at all.
+ if (ss_bytes <= 4) {
+ u32 ssb_mask = (1U << ssb) - 1;
+ u32 streamVal = partial_load_u32(ll_state, ss_bytes);
+ *state_case = (u32)(streamVal & ssb_mask);
+ *state_nocase = (u32)(streamVal >> ssb);
+ return;
+ }
+#endif
+
+ u64a ssb_mask = (1ULL << ssb) - 1;
+ u64a streamVal = partial_load_u64a(ll_state, ss_bytes);
+ *state_case = (u32)(streamVal & ssb_mask);
+ *state_nocase = (u32)(streamVal >> ssb);
+}
+
+static rose_inline
+void loadLongLiteralStateMode(struct hs_scratch *scratch,
+ const struct RoseLongLitTable *ll_table,
+ const struct RoseLongLitSubtable *ll_sub,
+ const u32 state, const char nocase) {
+ if (!state) {
+ DEBUG_PRINTF("no state for %s\n", nocase ? "caseless" : "caseful");
+ return;
+ }
+
+ const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub);
+ const struct RoseLongLitHashEntry *ent = tab + state - 1;
+
+ assert(ent->str_offset + ent->str_len <= ll_table->size);
+ const u8 *found_buf = (const u8 *)ll_table + ent->str_offset;
+ size_t found_sz = ent->str_len;
+
+ struct RoseContext *tctxt = &scratch->tctxt;
+ if (nocase) {
+ tctxt->ll_buf_nocase = found_buf;
+ tctxt->ll_len_nocase = found_sz;
+ } else {
+ tctxt->ll_buf = found_buf;
+ tctxt->ll_len = found_sz;
+ }
+}
+
+static rose_inline
+void loadLongLiteralState(const struct RoseEngine *t, char *state,
+ struct hs_scratch *scratch) {
+ if (!t->longLitTableOffset) {
+ return;
+ }
+
+ // If we don't have any long literals in play, these values must point to
+ // the real history buffer so that CHECK_LONG_LIT instructions examine the
+ // history buffer.
+ scratch->tctxt.ll_buf = scratch->core_info.hbuf;
+ scratch->tctxt.ll_len = scratch->core_info.hlen;
+ scratch->tctxt.ll_buf_nocase = scratch->core_info.hbuf;
+ scratch->tctxt.ll_len_nocase = scratch->core_info.hlen;
+
+ if (!scratch->core_info.hlen) {
+ return;
+ }
+
+ const struct RoseLongLitTable *ll_table =
+ getByOffset(t, t->longLitTableOffset);
+ const u8 *ll_state = getLongLitState(t, state);
+
+ u32 state_case;
+ u32 state_nocase;
+ loadLongLitStreamState(ll_table, ll_state, &state_case, &state_nocase);
+
+ DEBUG_PRINTF("loaded {%u, %u}\n", state_case, state_nocase);
+
+ loadLongLiteralStateMode(scratch, ll_table, &ll_table->caseful,
+ state_case, 0);
+ loadLongLiteralStateMode(scratch, ll_table, &ll_table->nocase,
+ state_nocase, 1);
+}
+
+static rose_inline
+char confirmLongLiteral(const struct RoseLongLitTable *ll_table,
+ const struct hs_scratch *scratch,
+ const struct RoseLongLitHashEntry *ent,
+ const char nocase) {
+ assert(ent->str_offset + ent->str_len <= ll_table->size);
+ const u8 *s = (const u8 *)ll_table + ent->str_offset;
+ size_t len = ent->str_len;
+ const u8 *buf = scratch->core_info.buf;
+ const size_t buf_len = scratch->core_info.len;
+
+ if (len > buf_len) {
+ const struct RoseContext *tctxt = &scratch->tctxt;
+ const u8 *hist = nocase ? tctxt->ll_buf_nocase : tctxt->ll_buf;
+ size_t hist_len = nocase ? tctxt->ll_len_nocase : tctxt->ll_len;
+
+ if (len > buf_len + hist_len) {
+ return 0; // Break out - not enough total history
+ }
+
+ size_t overhang = len - buf_len;
+ assert(overhang <= hist_len);
+
+ if (cmpForward(hist + hist_len - overhang, s, overhang, nocase)) {
+ return 0;
+ }
+ s += overhang;
+ len -= overhang;
+ }
+
+ // if we got here, we don't need history or we compared ok out of history
+ assert(len <= buf_len);
+
+ if (cmpForward(buf + buf_len - len, s, len, nocase)) {
+ return 0;
+ }
+
+ return 1;
+}
+
+static rose_inline
+const u8 *prepScanBuffer(const struct core_info *ci,
+ const struct RoseLongLitTable *ll_table, u8 *tempbuf) {
+ const u8 hash_len = ll_table->maxLen;
+ assert(hash_len >= LONG_LIT_HASH_LEN);
+
+ // Our hash function operates over LONG_LIT_HASH_LEN bytes, starting from
+ // location (end of buffer - hash_len). If this block can be satisfied
+ // entirely from either the current buffer or the history buffer, we pass
+ // in the pointer directly; otherwise we must make a copy.
+
+ const u8 *base;
+
+ if (hash_len > ci->len) {
+ size_t overhang = hash_len - ci->len;
+ if (overhang >= LONG_LIT_HASH_LEN) {
+ // Can read enough to hash from inside the history buffer.
+ assert(overhang <= ci->hlen);
+ base = ci->hbuf + ci->hlen - overhang;
+ } else {
+ // Copy: first chunk from history buffer.
+ assert(overhang <= ci->hlen);
copy_upto_64_bytes(tempbuf, ci->hbuf + ci->hlen - overhang,
- overhang);
- // Copy: second chunk from current buffer.
- size_t copy_buf_len = LONG_LIT_HASH_LEN - overhang;
- assert(copy_buf_len <= ci->len);
+ overhang);
+ // Copy: second chunk from current buffer.
+ size_t copy_buf_len = LONG_LIT_HASH_LEN - overhang;
+ assert(copy_buf_len <= ci->len);
copy_upto_64_bytes(tempbuf + overhang, ci->buf, copy_buf_len);
- // Read from our temporary buffer for the hash.
- base = tempbuf;
- }
- } else {
- // Can read enough to hash from inside the current buffer.
- base = ci->buf + ci->len - hash_len;
- }
-
- return base;
-}
-
-#ifndef NDEBUG
-// Defensive checking (used in assert) that these table values don't overflow
-// the range available.
-static really_inline
-char streamingTableOverflow(u32 state_case, u32 state_nocase, u8 ssb,
- u8 ssb_nc) {
- u32 ssb_mask = (1ULL << (ssb)) - 1;
- if (state_case & ~ssb_mask) {
- return 1;
- }
- u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1;
- if (state_nocase & ~ssb_nc_mask) {
- return 1;
- }
- return 0;
-}
-#endif
-
-// Reads from stream state table and packs values into stream state.
-static rose_inline
-void storeLongLitStreamState(const struct RoseLongLitTable *ll_table,
- u8 *ll_state, u32 state_case, u32 state_nocase) {
- assert(ll_table);
- assert(ll_state);
-
- u8 ss_bytes = ll_table->streamStateBytes;
- u8 ssb = ll_table->caseful.streamStateBits;
- UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits;
- assert(ss_bytes == ROUNDUP_N(ssb + ssb_nc, 8) / 8);
- assert(!streamingTableOverflow(state_case, state_nocase, ssb, ssb_nc));
-
-#if defined(ARCH_32_BIT)
- // On 32-bit hosts, we may be able to avoid having to do any u64a
- // manipulation at all.
- if (ss_bytes <= 4) {
- u32 stagingStreamState = state_case;
- stagingStreamState |= (state_nocase << ssb);
- partial_store_u32(ll_state, stagingStreamState, ss_bytes);
- return;
- }
-#endif
-
- u64a stagingStreamState = (u64a)state_case;
- stagingStreamState |= (u64a)state_nocase << ssb;
- partial_store_u64a(ll_state, stagingStreamState, ss_bytes);
-}
-
-static really_inline
-char has_bit(const u8 *data, u32 bit) {
- return (data[bit / 8] >> (bit % 8)) & 1;
-}
-
-static rose_inline
-char bloomHasKey(const u8 *bloom, u32 bloom_mask, u32 hash) {
- return has_bit(bloom, hash & bloom_mask);
-}
-
-static rose_inline
-char checkBloomFilter(const struct RoseLongLitTable *ll_table,
- const struct RoseLongLitSubtable *ll_sub,
- const u8 *scan_buf, char nocase) {
- assert(ll_sub->bloomBits);
-
- const u8 *bloom = (const u8 *)ll_table + ll_sub->bloomOffset;
- const u32 bloom_mask = (1U << ll_sub->bloomBits) - 1;
-
- char v = 1;
- v &= bloomHasKey(bloom, bloom_mask, bloomHash_1(scan_buf, nocase));
- v &= bloomHasKey(bloom, bloom_mask, bloomHash_2(scan_buf, nocase));
- v &= bloomHasKey(bloom, bloom_mask, bloomHash_3(scan_buf, nocase));
- return v;
-}
-
-/**
- * \brief Look for a hit in the hash table.
- *
- * Returns zero if not found, otherwise returns (bucket + 1).
- */
-static rose_inline
-u32 checkHashTable(const struct RoseLongLitTable *ll_table,
- const struct RoseLongLitSubtable *ll_sub, const u8 *scan_buf,
- const struct hs_scratch *scratch, char nocase) {
- const u32 nbits = ll_sub->hashBits;
- assert(nbits && nbits < 32);
- const u32 num_entries = 1U << nbits;
-
- const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub);
-
- u32 hash = hashLongLiteral(scan_buf, LONG_LIT_HASH_LEN, nocase);
- u32 bucket = hash & ((1U << nbits) - 1);
-
- while (tab[bucket].str_offset != 0) {
- DEBUG_PRINTF("checking bucket %u\n", bucket);
- if (confirmLongLiteral(ll_table, scratch, &tab[bucket], nocase)) {
- DEBUG_PRINTF("found hit for bucket %u\n", bucket);
- return bucket + 1;
- }
-
- if (++bucket == num_entries) {
- bucket = 0;
- }
- }
-
- return 0;
-}
-
-static rose_inline
-void storeLongLiteralState(const struct RoseEngine *t, char *state,
- struct hs_scratch *scratch) {
- if (!t->longLitTableOffset) {
- DEBUG_PRINTF("no table\n");
- return;
- }
-
- struct core_info *ci = &scratch->core_info;
- const struct RoseLongLitTable *ll_table =
- getByOffset(t, t->longLitTableOffset);
- assert(ll_table->maxLen);
-
- DEBUG_PRINTF("maxLen=%u, len=%zu, hlen=%zu\n", ll_table->maxLen, ci->len,
- ci->hlen);
-
- u32 state_case = 0;
- u32 state_nocase = 0;
-
- // If we don't have enough history, we don't need to do anything.
- if (ll_table->maxLen <= ci->len + ci->hlen) {
- u8 tempbuf[LONG_LIT_HASH_LEN];
- const u8 *scan_buf = prepScanBuffer(ci, ll_table, tempbuf);
-
- if (ll_table->caseful.hashBits &&
- checkBloomFilter(ll_table, &ll_table->caseful, scan_buf, 0)) {
- state_case = checkHashTable(ll_table, &ll_table->caseful, scan_buf,
- scratch, 0);
- }
-
- if (ll_table->nocase.hashBits &&
- checkBloomFilter(ll_table, &ll_table->nocase, scan_buf, 1)) {
- state_nocase = checkHashTable(ll_table, &ll_table->nocase, scan_buf,
- scratch, 1);
- }
- } else {
- DEBUG_PRINTF("not enough history (%zu bytes)\n", ci->len + ci->hlen);
- }
-
- DEBUG_PRINTF("store {%u, %u}\n", state_case, state_nocase);
-
- u8 *ll_state = getLongLitState(t, state);
- storeLongLitStreamState(ll_table, ll_state, state_case, state_nocase);
-}
-
-#endif // STREAM_LONG_LIT_H
+ // Read from our temporary buffer for the hash.
+ base = tempbuf;
+ }
+ } else {
+ // Can read enough to hash from inside the current buffer.
+ base = ci->buf + ci->len - hash_len;
+ }
+
+ return base;
+}
+
+#ifndef NDEBUG
+// Defensive checking (used in assert) that these table values don't overflow
+// the range available.
+static really_inline
+char streamingTableOverflow(u32 state_case, u32 state_nocase, u8 ssb,
+ u8 ssb_nc) {
+ u32 ssb_mask = (1ULL << (ssb)) - 1;
+ if (state_case & ~ssb_mask) {
+ return 1;
+ }
+ u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1;
+ if (state_nocase & ~ssb_nc_mask) {
+ return 1;
+ }
+ return 0;
+}
+#endif
+
+// Reads from stream state table and packs values into stream state.
+static rose_inline
+void storeLongLitStreamState(const struct RoseLongLitTable *ll_table,
+ u8 *ll_state, u32 state_case, u32 state_nocase) {
+ assert(ll_table);
+ assert(ll_state);
+
+ u8 ss_bytes = ll_table->streamStateBytes;
+ u8 ssb = ll_table->caseful.streamStateBits;
+ UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits;
+ assert(ss_bytes == ROUNDUP_N(ssb + ssb_nc, 8) / 8);
+ assert(!streamingTableOverflow(state_case, state_nocase, ssb, ssb_nc));
+
+#if defined(ARCH_32_BIT)
+ // On 32-bit hosts, we may be able to avoid having to do any u64a
+ // manipulation at all.
+ if (ss_bytes <= 4) {
+ u32 stagingStreamState = state_case;
+ stagingStreamState |= (state_nocase << ssb);
+ partial_store_u32(ll_state, stagingStreamState, ss_bytes);
+ return;
+ }
+#endif
+
+ u64a stagingStreamState = (u64a)state_case;
+ stagingStreamState |= (u64a)state_nocase << ssb;
+ partial_store_u64a(ll_state, stagingStreamState, ss_bytes);
+}
+
+static really_inline
+char has_bit(const u8 *data, u32 bit) {
+ return (data[bit / 8] >> (bit % 8)) & 1;
+}
+
+static rose_inline
+char bloomHasKey(const u8 *bloom, u32 bloom_mask, u32 hash) {
+ return has_bit(bloom, hash & bloom_mask);
+}
+
+static rose_inline
+char checkBloomFilter(const struct RoseLongLitTable *ll_table,
+ const struct RoseLongLitSubtable *ll_sub,
+ const u8 *scan_buf, char nocase) {
+ assert(ll_sub->bloomBits);
+
+ const u8 *bloom = (const u8 *)ll_table + ll_sub->bloomOffset;
+ const u32 bloom_mask = (1U << ll_sub->bloomBits) - 1;
+
+ char v = 1;
+ v &= bloomHasKey(bloom, bloom_mask, bloomHash_1(scan_buf, nocase));
+ v &= bloomHasKey(bloom, bloom_mask, bloomHash_2(scan_buf, nocase));
+ v &= bloomHasKey(bloom, bloom_mask, bloomHash_3(scan_buf, nocase));
+ return v;
+}
+
+/**
+ * \brief Look for a hit in the hash table.
+ *
+ * Returns zero if not found, otherwise returns (bucket + 1).
+ */
+static rose_inline
+u32 checkHashTable(const struct RoseLongLitTable *ll_table,
+ const struct RoseLongLitSubtable *ll_sub, const u8 *scan_buf,
+ const struct hs_scratch *scratch, char nocase) {
+ const u32 nbits = ll_sub->hashBits;
+ assert(nbits && nbits < 32);
+ const u32 num_entries = 1U << nbits;
+
+ const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub);
+
+ u32 hash = hashLongLiteral(scan_buf, LONG_LIT_HASH_LEN, nocase);
+ u32 bucket = hash & ((1U << nbits) - 1);
+
+ while (tab[bucket].str_offset != 0) {
+ DEBUG_PRINTF("checking bucket %u\n", bucket);
+ if (confirmLongLiteral(ll_table, scratch, &tab[bucket], nocase)) {
+ DEBUG_PRINTF("found hit for bucket %u\n", bucket);
+ return bucket + 1;
+ }
+
+ if (++bucket == num_entries) {
+ bucket = 0;
+ }
+ }
+
+ return 0;
+}
+
+static rose_inline
+void storeLongLiteralState(const struct RoseEngine *t, char *state,
+ struct hs_scratch *scratch) {
+ if (!t->longLitTableOffset) {
+ DEBUG_PRINTF("no table\n");
+ return;
+ }
+
+ struct core_info *ci = &scratch->core_info;
+ const struct RoseLongLitTable *ll_table =
+ getByOffset(t, t->longLitTableOffset);
+ assert(ll_table->maxLen);
+
+ DEBUG_PRINTF("maxLen=%u, len=%zu, hlen=%zu\n", ll_table->maxLen, ci->len,
+ ci->hlen);
+
+ u32 state_case = 0;
+ u32 state_nocase = 0;
+
+ // If we don't have enough history, we don't need to do anything.
+ if (ll_table->maxLen <= ci->len + ci->hlen) {
+ u8 tempbuf[LONG_LIT_HASH_LEN];
+ const u8 *scan_buf = prepScanBuffer(ci, ll_table, tempbuf);
+
+ if (ll_table->caseful.hashBits &&
+ checkBloomFilter(ll_table, &ll_table->caseful, scan_buf, 0)) {
+ state_case = checkHashTable(ll_table, &ll_table->caseful, scan_buf,
+ scratch, 0);
+ }
+
+ if (ll_table->nocase.hashBits &&
+ checkBloomFilter(ll_table, &ll_table->nocase, scan_buf, 1)) {
+ state_nocase = checkHashTable(ll_table, &ll_table->nocase, scan_buf,
+ scratch, 1);
+ }
+ } else {
+ DEBUG_PRINTF("not enough history (%zu bytes)\n", ci->len + ci->hlen);
+ }
+
+ DEBUG_PRINTF("store {%u, %u}\n", state_case, state_nocase);
+
+ u8 *ll_state = getLongLitState(t, state);
+ storeLongLitStreamState(ll_table, ll_state, state_case, state_nocase);
+}
+
+#endif // STREAM_LONG_LIT_H