aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/hyperscan/src/nfa/repeat.c
diff options
context:
space:
mode:
authorbnagaev <bnagaev@yandex-team.ru>2022-02-10 16:47:04 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:04 +0300
commitc74559fb88da8adac0d9186cfa55a6b13c47695f (patch)
treeb83306b6e37edeea782e9eed673d89286c4fef35 /contrib/libs/hyperscan/src/nfa/repeat.c
parentd6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (diff)
downloadydb-c74559fb88da8adac0d9186cfa55a6b13c47695f.tar.gz
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfa/repeat.c')
-rw-r--r--contrib/libs/hyperscan/src/nfa/repeat.c3136
1 files changed, 1568 insertions, 1568 deletions
diff --git a/contrib/libs/hyperscan/src/nfa/repeat.c b/contrib/libs/hyperscan/src/nfa/repeat.c
index 5ef76ac696..5b2e4df4ed 100644
--- a/contrib/libs/hyperscan/src/nfa/repeat.c
+++ b/contrib/libs/hyperscan/src/nfa/repeat.c
@@ -1,893 +1,893 @@
-/*
+/*
* Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief API for handling bounded repeats.
- *
- * This file provides an internal API for handling bounded repeats of character
- * classes. It is used by the Large Bounded Repeat (LBR) engine and by the
- * bounded repeat handling in the LimEx NFA engine as well.
- */
-#include "repeat.h"
-#include "util/bitutils.h"
-#include "util/multibit.h"
-#include "util/pack_bits.h"
-#include "util/partial_store.h"
-#include "util/unaligned.h"
-
-#include <stdint.h>
-#include <string.h>
-
-/** \brief Returns the total capacity of the ring.
- * Note that it's currently one greater than repeatMax so that we can handle
- * cases where the tug and pos triggers overlap. */
-static
-u32 ringCapacity(const struct RepeatInfo *info) {
- return info->repeatMax + 1;
-}
-
-/** \brief Returns the number of elements currently in the ring. Note that if
- * the first and last indices are equal, the ring is full. */
-static
-u32 ringOccupancy(const struct RepeatRingControl *xs, const u32 ringSize) {
- if (xs->last > xs->first) {
- return xs->last - xs->first;
- } else { // wrapped
- return ringSize - (xs->first - xs->last);
- }
-}
-
-/** \brief Returns the offset of the _last_ top stored in the ring. */
-static
-u64a ringLastTop(const struct RepeatRingControl *xs, const u32 ringSize) {
- return xs->offset + ringOccupancy(xs, ringSize) - 1;
-}
-
-#if !defined(NDEBUG) || defined(DUMP_SUPPORT)
-/** \brief For debugging: returns the total capacity of the range list. */
-static UNUSED
-u32 rangeListCapacity(const struct RepeatInfo *info) {
- u32 d = info->repeatMax - info->repeatMin;
- assert(d > 0); // should be in a RING model!
- return 2 * ((info->repeatMax / d) + 1);
-}
-#endif
-
-#ifdef DEBUG
-static
-void dumpRing(const struct RepeatInfo *info, const struct RepeatRingControl *xs,
- const u8 *ring) {
- const u32 ringSize = ringCapacity(info);
- DEBUG_PRINTF("ring (occ %u/%u, %u->%u): ", ringOccupancy(xs, ringSize),
- ringSize, xs->first, xs->last);
-
- u16 i = xs->first, n = 0;
- do {
- if (mmbit_isset(ring, ringSize, i)) {
- u64a ringOffset = xs->offset + n;
- printf("%llu ", ringOffset);
- }
- ++i, ++n;
- if (i == ringSize) {
- i = 0;
- }
- } while (i != xs->last);
- printf("\n");
-}
-
-static
-void dumpRange(const struct RepeatInfo *info,
- const struct RepeatRangeControl *xs, const u16 *ring) {
- const u32 ringSize = rangeListCapacity(info);
- DEBUG_PRINTF("ring (occ %u/%u): ", xs->num, ringSize);
-
- if (xs->num) {
- for (u32 i = 0; i < xs->num; i++) {
- printf("%llu ", xs->offset + unaligned_load_u16(ring + i));
- }
- } else {
- printf("empty");
- }
- printf("\n");
-}
-
-static
-void dumpBitmap(const struct RepeatBitmapControl *xs) {
- DEBUG_PRINTF("bitmap (base=%llu): ", xs->offset);
- u64a bitmap = xs->bitmap;
- while (bitmap) {
- printf("%llu ", xs->offset + findAndClearLSB_64(&bitmap));
- }
- printf("\n");
-}
-
-static
-void dumpTrailer(const struct RepeatInfo *info,
- const struct RepeatTrailerControl *xs) {
- const u64a m_width = info->repeatMax - info->repeatMin;
- DEBUG_PRINTF("trailer: current extent is [%llu,%llu]", xs->offset,
- xs->offset + m_width);
- u64a bitmap = xs->bitmap;
- if (bitmap) {
- printf(", also matches at: ");
- while (bitmap) {
- u32 idx = findAndClearMSB_64(&bitmap);
- printf("%llu ", xs->offset - idx - 1);
- }
- } else {
- printf(", no earlier matches");
- }
- printf("\n");
-}
-
-#endif // DEBUG
-
-#ifndef NDEBUG
-/** \brief For debugging: returns true if the range is ordered with no dupes. */
-static UNUSED
-int rangeListIsOrdered(const struct RepeatRangeControl *xs, const u16 *ring) {
- for (u32 i = 1; i < xs->num; i++) {
- u16 a = unaligned_load_u16(ring + i - 1);
- u16 b = unaligned_load_u16(ring + i);
- if (a >= b) {
- return 0;
- }
- }
- return 1;
-}
-#endif
-
-u64a repeatLastTopRing(const struct RepeatInfo *info,
- const union RepeatControl *ctrl) {
- const u32 ringSize = ringCapacity(info);
- return ringLastTop(&ctrl->ring, ringSize);
-}
-
-u64a repeatLastTopRange(const union RepeatControl *ctrl, const void *state) {
- const u16 *ring = (const u16 *)state;
- const struct RepeatRangeControl *xs = &ctrl->range;
- assert(xs->num);
- return xs->offset + unaligned_load_u16(ring + xs->num - 1);
-}
-
-u64a repeatLastTopBitmap(const union RepeatControl *ctrl) {
- const struct RepeatBitmapControl *xs = &ctrl->bitmap;
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief API for handling bounded repeats.
+ *
+ * This file provides an internal API for handling bounded repeats of character
+ * classes. It is used by the Large Bounded Repeat (LBR) engine and by the
+ * bounded repeat handling in the LimEx NFA engine as well.
+ */
+#include "repeat.h"
+#include "util/bitutils.h"
+#include "util/multibit.h"
+#include "util/pack_bits.h"
+#include "util/partial_store.h"
+#include "util/unaligned.h"
+
+#include <stdint.h>
+#include <string.h>
+
+/** \brief Returns the total capacity of the ring.
+ * Note that it's currently one greater than repeatMax so that we can handle
+ * cases where the tug and pos triggers overlap. */
+static
+u32 ringCapacity(const struct RepeatInfo *info) {
+ return info->repeatMax + 1;
+}
+
+/** \brief Returns the number of elements currently in the ring. Note that if
+ * the first and last indices are equal, the ring is full. */
+static
+u32 ringOccupancy(const struct RepeatRingControl *xs, const u32 ringSize) {
+ if (xs->last > xs->first) {
+ return xs->last - xs->first;
+ } else { // wrapped
+ return ringSize - (xs->first - xs->last);
+ }
+}
+
+/** \brief Returns the offset of the _last_ top stored in the ring. */
+static
+u64a ringLastTop(const struct RepeatRingControl *xs, const u32 ringSize) {
+ return xs->offset + ringOccupancy(xs, ringSize) - 1;
+}
+
+#if !defined(NDEBUG) || defined(DUMP_SUPPORT)
+/** \brief For debugging: returns the total capacity of the range list. */
+static UNUSED
+u32 rangeListCapacity(const struct RepeatInfo *info) {
+ u32 d = info->repeatMax - info->repeatMin;
+ assert(d > 0); // should be in a RING model!
+ return 2 * ((info->repeatMax / d) + 1);
+}
+#endif
+
+#ifdef DEBUG
+static
+void dumpRing(const struct RepeatInfo *info, const struct RepeatRingControl *xs,
+ const u8 *ring) {
+ const u32 ringSize = ringCapacity(info);
+ DEBUG_PRINTF("ring (occ %u/%u, %u->%u): ", ringOccupancy(xs, ringSize),
+ ringSize, xs->first, xs->last);
+
+ u16 i = xs->first, n = 0;
+ do {
+ if (mmbit_isset(ring, ringSize, i)) {
+ u64a ringOffset = xs->offset + n;
+ printf("%llu ", ringOffset);
+ }
+ ++i, ++n;
+ if (i == ringSize) {
+ i = 0;
+ }
+ } while (i != xs->last);
+ printf("\n");
+}
+
+static
+void dumpRange(const struct RepeatInfo *info,
+ const struct RepeatRangeControl *xs, const u16 *ring) {
+ const u32 ringSize = rangeListCapacity(info);
+ DEBUG_PRINTF("ring (occ %u/%u): ", xs->num, ringSize);
+
+ if (xs->num) {
+ for (u32 i = 0; i < xs->num; i++) {
+ printf("%llu ", xs->offset + unaligned_load_u16(ring + i));
+ }
+ } else {
+ printf("empty");
+ }
+ printf("\n");
+}
+
+static
+void dumpBitmap(const struct RepeatBitmapControl *xs) {
+ DEBUG_PRINTF("bitmap (base=%llu): ", xs->offset);
+ u64a bitmap = xs->bitmap;
+ while (bitmap) {
+ printf("%llu ", xs->offset + findAndClearLSB_64(&bitmap));
+ }
+ printf("\n");
+}
+
+static
+void dumpTrailer(const struct RepeatInfo *info,
+ const struct RepeatTrailerControl *xs) {
+ const u64a m_width = info->repeatMax - info->repeatMin;
+ DEBUG_PRINTF("trailer: current extent is [%llu,%llu]", xs->offset,
+ xs->offset + m_width);
+ u64a bitmap = xs->bitmap;
+ if (bitmap) {
+ printf(", also matches at: ");
+ while (bitmap) {
+ u32 idx = findAndClearMSB_64(&bitmap);
+ printf("%llu ", xs->offset - idx - 1);
+ }
+ } else {
+ printf(", no earlier matches");
+ }
+ printf("\n");
+}
+
+#endif // DEBUG
+
+#ifndef NDEBUG
+/** \brief For debugging: returns true if the range is ordered with no dupes. */
+static UNUSED
+int rangeListIsOrdered(const struct RepeatRangeControl *xs, const u16 *ring) {
+ for (u32 i = 1; i < xs->num; i++) {
+ u16 a = unaligned_load_u16(ring + i - 1);
+ u16 b = unaligned_load_u16(ring + i);
+ if (a >= b) {
+ return 0;
+ }
+ }
+ return 1;
+}
+#endif
+
+u64a repeatLastTopRing(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl) {
+ const u32 ringSize = ringCapacity(info);
+ return ringLastTop(&ctrl->ring, ringSize);
+}
+
+u64a repeatLastTopRange(const union RepeatControl *ctrl, const void *state) {
+ const u16 *ring = (const u16 *)state;
+ const struct RepeatRangeControl *xs = &ctrl->range;
+ assert(xs->num);
+ return xs->offset + unaligned_load_u16(ring + xs->num - 1);
+}
+
+u64a repeatLastTopBitmap(const union RepeatControl *ctrl) {
+ const struct RepeatBitmapControl *xs = &ctrl->bitmap;
if (!xs->bitmap) {
/* last top was too long ago */
return 0;
}
- return xs->offset + 63 - clz64(xs->bitmap);
-}
-
-u64a repeatLastTopTrailer(const struct RepeatInfo *info,
- const union RepeatControl *ctrl) {
- const struct RepeatTrailerControl *xs = &ctrl->trailer;
- assert(xs->offset >= info->repeatMin);
- return xs->offset - info->repeatMin;
-}
-
-u64a repeatNextMatchRing(const struct RepeatInfo *info,
- const union RepeatControl *ctrl, const void *state,
- u64a offset) {
- const struct RepeatRingControl *xs = &ctrl->ring;
- const u8 *ring = (const u8 *)state;
- const u32 ringSize = ringCapacity(info);
-
- // We should have at least one top stored.
- assert(mmbit_any(ring, ringSize));
- assert(info->repeatMax < REPEAT_INF);
-
- // Increment offset, as we want the NEXT match.
- offset++;
-
- const u64a base_offset = xs->offset;
- DEBUG_PRINTF("offset=%llu, base_offset=%llu\n", offset, base_offset);
-
- u64a delta = offset - base_offset;
- if (offset < base_offset || delta < info->repeatMin) {
- DEBUG_PRINTF("before min repeat\n");
- return base_offset + info->repeatMin;
- }
- if (offset > ringLastTop(xs, ringSize) + info->repeatMax) {
- DEBUG_PRINTF("ring is stale\n");
- return 0; // no more matches
- }
-
- DEBUG_PRINTF("delta=%llu\n", delta);
- u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
- DEBUG_PRINTF("lower=%llu\n", lower);
-
- assert(lower < ringSize);
-
- // First scan, either to xs->last if there's no wrap-around or ringSize
- // (end of the underlying multibit) if we are wrapping.
-
- u32 begin = xs->first + lower;
- if (begin >= ringSize) {
- // This branch and sub tested a lot faster than using % (integer div).
- begin -= ringSize;
- }
- const u32 end = begin >= xs->last ? ringSize : xs->last;
- u32 i = mmbit_iterate_bounded(ring, ringSize, begin, end);
- if (i != MMB_INVALID) {
- u32 j = i - begin + lower;
- return MAX(offset, base_offset + j + info->repeatMin);
- }
-
- // A second scan is necessary if we need to cope with wrap-around in the
- // ring buffer.
-
- if (begin >= xs->last) {
- i = mmbit_iterate_bounded(ring, ringSize, 0, xs->last);
- if (i != MMB_INVALID) {
- u32 j = i + (ringSize - begin) + lower;
- return MAX(offset, base_offset + j + info->repeatMin);
- }
- }
-
- return 0;
-}
-
-u64a repeatNextMatchRange(const struct RepeatInfo *info,
- const union RepeatControl *ctrl, const void *state,
- u64a offset) {
- const struct RepeatRangeControl *xs = &ctrl->range;
- const u16 *ring = (const u16 *)state;
-
- assert(xs->num > 0);
- assert(xs->num <= rangeListCapacity(info));
- assert(rangeListIsOrdered(xs, ring));
- assert(info->repeatMax < REPEAT_INF);
-
- for (u32 i = 0; i < xs->num; i++) {
- u64a base = xs->offset + unaligned_load_u16(ring + i);
- u64a first = base + info->repeatMin;
- if (offset < first) {
- return first;
- }
- if (offset < base + info->repeatMax) {
- return offset + 1;
- }
- }
-
- return 0;
-}
-
-u64a repeatNextMatchBitmap(const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatBitmapControl *xs = &ctrl->bitmap;
- const u64a base = xs->offset;
- u64a bitmap = xs->bitmap;
-
- // FIXME: quick exit if there is no match, based on last top in bitmap?
-
- while (bitmap) {
- u64a top = base + findAndClearLSB_64(&bitmap);
- if (offset < top + info->repeatMin) {
- return top + info->repeatMin;
- }
- if (offset < top + info->repeatMax) {
- return offset + 1;
- }
- }
-
- return 0; // No more matches.
-}
-
-u64a repeatNextMatchTrailer(const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatTrailerControl *xs = &ctrl->trailer;
- const u32 m_width = info->repeatMax - info->repeatMin;
-
- DEBUG_PRINTF("offset=%llu, xs->offset=%llu\n", offset, xs->offset);
- DEBUG_PRINTF("{%u,%u} repeat, m_width=%u\n", info->repeatMin,
- info->repeatMax, m_width);
-
- assert(xs->offset >= info->repeatMin);
-
- if (offset >= xs->offset + m_width) {
- DEBUG_PRINTF("no more matches\n");
- return 0;
- }
-
- if (offset >= xs->offset) {
- DEBUG_PRINTF("inside most recent match window, next match %llu\n",
- offset + 1);
- return offset + 1;
- }
-
- // Offset is before the match window, we need to consult the bitmap of
- // earlier match offsets.
- u64a bitmap = xs->bitmap;
-
- u64a diff = xs->offset - offset;
- DEBUG_PRINTF("diff=%llu\n", diff);
- if (diff <= 64) {
- assert(diff);
- bitmap &= (1ULL << (diff - 1)) - 1;
- }
- DEBUG_PRINTF("bitmap = 0x%llx\n", bitmap);
- if (bitmap) {
- u32 idx = 63 - clz64(bitmap);
- DEBUG_PRINTF("clz=%u, idx = %u -> offset %llu\n", clz64(bitmap), idx,
- xs->offset - idx);
- DEBUG_PRINTF("next match at %llu\n", xs->offset - idx - 1);
- u64a next_match = xs->offset - idx - 1;
- assert(next_match > offset);
- return next_match;
- }
-
- DEBUG_PRINTF("next match is start of match window, %llu\n", xs->offset);
- return xs->offset;
-}
-
-/** \brief Store the first top in the ring buffer. */
-static
-void storeInitialRingTop(struct RepeatRingControl *xs, u8 *ring,
- u64a offset, const u32 ringSize) {
- DEBUG_PRINTF("ring=%p, ringSize=%u\n", ring, ringSize);
- xs->offset = offset;
- mmbit_clear(ring, ringSize);
- mmbit_set(ring, ringSize, 0);
- xs->first = 0;
- xs->last = 1;
-}
-
-static really_inline
-char ringIsStale(const struct RepeatRingControl *xs, const u32 ringSize,
- const u64a offset) {
- u64a finalMatch = ringLastTop(xs, ringSize);
- if (offset - finalMatch >= ringSize) {
- DEBUG_PRINTF("all matches in ring are stale\n");
- return 1;
- }
-
- return 0;
-}
-
-void repeatStoreRing(const struct RepeatInfo *info, union RepeatControl *ctrl,
- void *state, u64a offset, char is_alive) {
- struct RepeatRingControl *xs = &ctrl->ring;
- u8 *ring = (u8 *)state;
- const u32 ringSize = ringCapacity(info);
- assert(ringSize > 0);
-
- DEBUG_PRINTF("storing top for offset %llu in ring\n", offset);
-
- if (!is_alive || ringIsStale(xs, ringSize, offset)) {
- storeInitialRingTop(xs, ring, offset, ringSize);
- } else {
- assert(offset > ringLastTop(xs, ringSize)); // Dupe or out of order.
- u32 occ = ringOccupancy(xs, ringSize);
- u64a diff = offset - xs->offset;
- DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ);
- if (diff >= ringSize) {
- u32 push = diff - ringSize + 1;
- DEBUG_PRINTF("push ring %u\n", push);
- xs->first += push;
- if (xs->first >= ringSize) {
- xs->first -= ringSize;
- }
- xs->offset += push;
- diff -= push;
- occ -= push;
- }
-
- // There's now room in the ring for this top, so we write a run of
- // zeroes, then a one.
- DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ);
- assert(diff < ringSize);
- assert(diff >= occ);
- u32 n = diff - occ;
-
- u32 i = xs->last + n;
-
- mmbit_unset_range(ring, ringSize, xs->last, MIN(i, ringSize));
- if (i >= ringSize) {
- i -= ringSize;
- mmbit_unset_range(ring, ringSize, 0, i);
- }
-
- assert(i != xs->first);
- DEBUG_PRINTF("set bit %u\n", i);
- mmbit_set(ring, ringSize, i);
- xs->last = i + 1;
- if (xs->last == ringSize) {
- xs->last = 0;
- }
- }
-
- // Our ring indices shouldn't have spiraled off into uncharted space.
- assert(xs->first < ringSize);
- assert(xs->last < ringSize);
-
-#ifdef DEBUG
- DEBUG_PRINTF("post-store ring state\n");
- dumpRing(info, xs, ring);
-#endif
-
- // The final top stored in our ring should be the one we just wrote in.
- assert(ringLastTop(xs, ringSize) == offset);
-}
-
-static really_inline
-void storeInitialRangeTop(struct RepeatRangeControl *xs, u16 *ring,
- u64a offset) {
- xs->offset = offset;
- xs->num = 1;
- unaligned_store_u16(ring, 0);
-}
-
-void repeatStoreRange(const struct RepeatInfo *info, union RepeatControl *ctrl,
- void *state, u64a offset, char is_alive) {
- struct RepeatRangeControl *xs = &ctrl->range;
- u16 *ring = (u16 *)state;
-
- if (!is_alive) {
- DEBUG_PRINTF("storing initial top at %llu\n", offset);
- storeInitialRangeTop(xs, ring, offset);
- return;
- }
-
- DEBUG_PRINTF("storing top at %llu, list currently has %u/%u elements\n",
- offset, xs->num, rangeListCapacity(info));
-
-#ifdef DEBUG
- dumpRange(info, xs, ring);
-#endif
-
- // Walk ring from front. Identify the number of stale elements, and shift
- // the whole ring to delete them.
- u32 i = 0;
- for (; i < xs->num; i++) {
- u64a this_offset = xs->offset + unaligned_load_u16(ring + i);
- DEBUG_PRINTF("this_offset=%llu, diff=%llu\n", this_offset,
- offset - this_offset);
- if (offset - this_offset <= info->repeatMax) {
- break;
- }
- }
-
- if (i == xs->num) {
- DEBUG_PRINTF("whole ring is stale\n");
- storeInitialRangeTop(xs, ring, offset);
- return;
- } else if (i > 0) {
- DEBUG_PRINTF("expiring %u stale tops\n", i);
- u16 first_offset = unaligned_load_u16(ring + i); // first live top
- for (u32 j = 0; j < xs->num - i; j++) {
- u16 val = unaligned_load_u16(ring + i + j);
- assert(val >= first_offset);
- unaligned_store_u16(ring + j, val - first_offset);
- }
- xs->offset += first_offset;
- xs->num -= i;
- }
-
-#ifdef DEBUG
- DEBUG_PRINTF("post-expire:\n");
- dumpRange(info, xs, ring);
-#endif
-
- if (xs->num == 1) {
- goto append;
- }
-
- // Let d = repeatMax - repeatMin
- // Examine penultimate entry x[-2].
- // If (offset - x[-2] <= d), then last entry x[-1] can be replaced with
- // entry for offset.
- assert(xs->num >= 2);
- u32 d = info->repeatMax - info->repeatMin;
- u64a penultimate_offset =
- xs->offset + unaligned_load_u16(ring + xs->num - 2);
- if (offset - penultimate_offset <= d) {
- assert(offset - xs->offset <= (u16)-1);
- unaligned_store_u16(ring + xs->num - 1, offset - xs->offset);
- goto done;
- }
-
- // Otherwise, write a new entry for offset and return.
-
-append:
- assert(offset - xs->offset <= (u16)-1);
- assert(xs->num < rangeListCapacity(info));
- unaligned_store_u16(ring + xs->num, offset - xs->offset);
- xs->num++;
-
-done:
- assert(rangeListIsOrdered(xs, ring));
-}
-
-void repeatStoreBitmap(const struct RepeatInfo *info, union RepeatControl *ctrl,
- u64a offset, char is_alive) {
- DEBUG_PRINTF("{%u,%u} repeat, storing top at %llu\n", info->repeatMin,
- info->repeatMax, offset);
-
- struct RepeatBitmapControl *xs = &ctrl->bitmap;
- if (!is_alive || !xs->bitmap) {
- DEBUG_PRINTF("storing initial top at %llu\n", offset);
- xs->offset = offset;
- xs->bitmap = 1U;
- return;
- }
-
-#ifdef DEBUG
- DEBUG_PRINTF("pre-store:\n");
- dumpBitmap(xs);
-#endif
-
- assert(offset >= xs->offset);
-
- u64a last_top = xs->offset + 63 - clz64(xs->bitmap);
- if (offset > last_top + info->repeatMax) {
- DEBUG_PRINTF("bitmap stale, storing initial top\n");
- xs->offset = offset;
- xs->bitmap = 1U;
- return;
- }
-
- u64a diff = offset - xs->offset;
- if (diff >= info->repeatMax + 1) {
- DEBUG_PRINTF("need expire, diff=%llu\n", diff);
- u64a push = diff - info->repeatMax;
- xs->offset += push;
- xs->bitmap = push >= 64 ? 0 : xs->bitmap >> push;
- DEBUG_PRINTF("pushed xs->offset to %llu\n", xs->offset);
- }
-
- // Write a new entry.
- diff = offset - xs->offset;
- assert(diff < 64);
- xs->bitmap |= (1ULL << diff);
-
-#ifdef DEBUG
- DEBUG_PRINTF("post-store:\n");
- dumpBitmap(xs);
-#endif
-}
-
-/** \brief Returns 1 if the ring has a match between (logical) index \a lower
- * and \a upper, excluding \a upper. */
-static
-int ringHasMatch(const struct RepeatRingControl *xs, const u8 *ring,
- const u32 ringSize, u32 lower, u32 upper) {
- assert(lower < upper);
- assert(lower < ringSize);
- assert(upper <= ringSize);
-
- u32 i = xs->first + lower;
- if (i >= ringSize) {
- i -= ringSize;
- }
-
- // Performance tweak: if we're looking at a fixed repeat, we can just use
- // mmbit_isset.
- if (lower + 1 == upper) {
- return mmbit_isset(ring, ringSize, i);
- }
-
- u32 end = xs->first + upper;
- if (end >= ringSize) {
- end -= ringSize;
- }
-
- // First scan, either to end if there's no wrap-around or ringSize (end of
- // the underlying multibit) if we are wrapping.
-
- u32 scan_end = i < end ? end : ringSize;
- u32 m = mmbit_iterate_bounded(ring, ringSize, i, scan_end);
- if (m != MMB_INVALID) {
- return 1;
- }
-
- // A second scan is necessary if we need to cope with wrap-around in the
- // ring buffer.
-
- if (i >= end) {
- m = mmbit_iterate_bounded(ring, ringSize, 0, end);
- return m != MMB_INVALID;
- }
-
- return 0;
-}
-
-/** Return a mask of ones in bit positions [0..v]. */
-static really_inline
-u64a mask_ones_to(u32 v) {
- if (v < 63) {
- return (1ULL << (v + 1)) - 1;
- } else {
- return ~(0ULL);
- }
-}
-
-void repeatStoreTrailer(const struct RepeatInfo *info,
- union RepeatControl *ctrl, u64a offset, char is_alive) {
- DEBUG_PRINTF("{%u,%u} repeat, top at %llu\n", info->repeatMin,
- info->repeatMax, offset);
-
- struct RepeatTrailerControl *xs = &ctrl->trailer;
-
- /* The TRAILER repeat model stores the following data in its control block:
- *
- * 1. offset, which is the min extent of the most recent match window
- * (i.e. corresponding to the most recent top)
- * 2. bitmap, which is a bitmap of up to repeatMin matches before
- * the min extent offset.
- */
-
- const u64a next_extent = offset + info->repeatMin;
-
- if (!is_alive) {
- xs->offset = next_extent;
- xs->bitmap = 0;
- DEBUG_PRINTF("initial top, set extent to %llu\n", next_extent);
- return;
- }
-
-#ifdef DEBUG
- DEBUG_PRINTF("pre-store:\n");
- dumpTrailer(info, xs);
-#endif
-
- const u32 m_width = info->repeatMax - info->repeatMin;
- DEBUG_PRINTF("most recent match window is [%llu,%llu]\n", xs->offset,
- xs->offset + m_width);
-
- assert(next_extent > xs->offset);
- u64a diff = next_extent - xs->offset;
- DEBUG_PRINTF("diff=%llu, m_width=%u\n", diff, m_width);
-
- assert(diff);
- xs->bitmap = diff < 64 ? xs->bitmap << diff : 0;
-
- // Switch on bits in the bitmask corresponding to matches in the previous
- // match window.
- if (diff <= m_width) {
- u64a m = mask_ones_to(diff - 1);
- xs->bitmap |= m;
- } else {
- u64a shift = diff - m_width - 1;
- if (shift < 64) {
- u64a m = mask_ones_to(m_width);
- m <<= shift;
- xs->bitmap |= m;
- }
- }
-
- DEBUG_PRINTF("bitmap=0x%llx\n", xs->bitmap);
-
- // Update max extent.
- xs->offset = next_extent;
-
- // Trim stale history: we only need repeatMin bytes of history.
- if (info->repeatMin < 63) {
- u64a mask = (1ULL << (info->repeatMin + 1)) - 1;
- xs->bitmap &= mask;
- }
-
-#ifdef DEBUG
- DEBUG_PRINTF("post-store:\n");
- dumpTrailer(info, xs);
-#endif
-}
-
-enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset) {
- const struct RepeatRingControl *xs = &ctrl->ring;
- const u8 *ring = (const u8 *)state;
- const u32 ringSize = ringCapacity(info);
-
- assert(mmbit_any(ring, ringSize));
- assert(offset >= xs->offset);
-
- DEBUG_PRINTF("check: offset=%llu, repeat=[%u,%u]\n", offset,
- info->repeatMin, info->repeatMax);
-#ifdef DEBUG
- DEBUG_PRINTF("ring state\n");
- dumpRing(info, xs, ring);
-#endif
-
- if (offset - xs->offset < info->repeatMin) {
- DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
- return REPEAT_NOMATCH;
- }
-
- if (offset - ringLastTop(xs, ringSize) >= ringSize) {
- DEBUG_PRINTF("ring is stale\n");
- return REPEAT_STALE;
- }
-
- // If we're not stale, delta fits in the range [repeatMin, lastTop +
- // repeatMax], which fits in a u32.
- assert(offset - xs->offset < UINT32_MAX);
- u32 delta = (u32)(offset - xs->offset);
- DEBUG_PRINTF("delta=%u\n", delta);
-
- // Find the bounds on possible matches in the ring buffer.
- u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
- u32 upper = MIN(delta - info->repeatMin + 1, ringOccupancy(xs, ringSize));
-
- if (lower >= upper) {
- DEBUG_PRINTF("no matches to check\n");
- return REPEAT_NOMATCH;
- }
-
- DEBUG_PRINTF("possible match indices=[%u,%u]\n", lower, upper);
- if (ringHasMatch(xs, ring, ringSize, lower, upper)) {
- return REPEAT_MATCH;
- }
-
- return REPEAT_NOMATCH;
-}
-
-enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset) {
- const struct RepeatRangeControl *xs = &ctrl->range;
- const u16 *ring = (const u16 *)state;
-
- assert(xs->num > 0);
- assert(xs->num <= rangeListCapacity(info));
- assert(rangeListIsOrdered(xs, ring));
-
- // Walk the ring. For each entry x:
- // if (offset - x) falls inside repeat bounds, return success.
-
- // It may be worth doing tests on first and last elements first to bail
- // early if the whole ring is too young or stale.
-
- DEBUG_PRINTF("check %u (of %u) elements, offset %llu, bounds={%u,%u}\n",
- xs->num, rangeListCapacity(info), offset,
- info->repeatMin, info->repeatMax);
-#ifdef DEBUG
- dumpRange(info, xs, ring);
-#endif
-
- // Quick pre-check for minimum.
- assert(offset >= xs->offset);
- if (offset - xs->offset < info->repeatMin) {
- DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
- return REPEAT_NOMATCH;
- }
-
- // We check the most recent offset first, as we can establish staleness.
- u64a match = xs->offset + unaligned_load_u16(ring + xs->num - 1);
- assert(offset >= match);
- u64a diff = offset - match;
- if (diff > info->repeatMax) {
- DEBUG_PRINTF("range list is stale\n");
- return REPEAT_STALE;
- } else if (diff >= info->repeatMin && diff <= info->repeatMax) {
- return REPEAT_MATCH;
- }
-
- // Check the other offsets in the list.
- u32 count = xs->num - 1;
- for (u32 i = 0; i < count; i++) {
- match = xs->offset + unaligned_load_u16(ring + i);
- assert(offset >= match);
- diff = offset - match;
- if (diff >= info->repeatMin && diff <= info->repeatMax) {
- return REPEAT_MATCH;
- }
- }
-
- return REPEAT_NOMATCH;
-}
-
-enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- u64a offset) {
- const struct RepeatBitmapControl *xs = &ctrl->bitmap;
-
- DEBUG_PRINTF("checking if offset=%llu is a match\n", offset);
-
-#ifdef DEBUG
- dumpBitmap(xs);
-#endif
-
- u64a bitmap = xs->bitmap;
- if (!bitmap) {
- DEBUG_PRINTF("no tops; stale\n");
- return REPEAT_STALE;
- }
-
- // Quick pre-check for minimum.
- const u64a base = xs->offset;
- assert(offset >= base);
- if (offset - base < info->repeatMin) {
- DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
- return REPEAT_NOMATCH;
- }
-
- // We check the most recent offset first, as we can establish staleness.
- u64a match = base + findAndClearMSB_64(&bitmap);
- DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match);
- assert(offset >= match);
- u64a diff = offset - match;
- if (diff > info->repeatMax) {
- DEBUG_PRINTF("stale\n");
- return REPEAT_STALE;
- } else if (diff >= info->repeatMin && diff <= info->repeatMax) {
- return REPEAT_MATCH;
- }
-
- while (bitmap) {
- match = base + findAndClearLSB_64(&bitmap);
- DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match);
- assert(offset >= match);
- diff = offset - match;
- if (diff >= info->repeatMin && diff <= info->repeatMax) {
- return REPEAT_MATCH;
- }
- }
-
- return REPEAT_NOMATCH;
-}
-
-enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- u64a offset) {
- const struct RepeatTrailerControl *xs = &ctrl->trailer;
- const u32 m_width = info->repeatMax - info->repeatMin;
-
- DEBUG_PRINTF("offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n", offset,
- xs->offset, xs->bitmap);
-
- if (offset > xs->offset + m_width) {
- DEBUG_PRINTF("stale\n");
- return REPEAT_STALE;
- }
-
- if (offset >= xs->offset) {
- DEBUG_PRINTF("in match window\n");
- return REPEAT_MATCH;
- }
-
- if (offset >= xs->offset - info->repeatMin) {
- u32 idx = xs->offset - offset - 1;
- DEBUG_PRINTF("check bitmap idx %u\n", idx);
- assert(idx < 64);
- if (xs->bitmap & (1ULL << idx)) {
- DEBUG_PRINTF("match in bitmap\n");
- return REPEAT_MATCH;
- }
- }
-
- DEBUG_PRINTF("no match\n");
- return REPEAT_NOMATCH;
-}
-
+ return xs->offset + 63 - clz64(xs->bitmap);
+}
+
+u64a repeatLastTopTrailer(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl) {
+ const struct RepeatTrailerControl *xs = &ctrl->trailer;
+ assert(xs->offset >= info->repeatMin);
+ return xs->offset - info->repeatMin;
+}
+
+u64a repeatNextMatchRing(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, const void *state,
+ u64a offset) {
+ const struct RepeatRingControl *xs = &ctrl->ring;
+ const u8 *ring = (const u8 *)state;
+ const u32 ringSize = ringCapacity(info);
+
+ // We should have at least one top stored.
+ assert(mmbit_any(ring, ringSize));
+ assert(info->repeatMax < REPEAT_INF);
+
+ // Increment offset, as we want the NEXT match.
+ offset++;
+
+ const u64a base_offset = xs->offset;
+ DEBUG_PRINTF("offset=%llu, base_offset=%llu\n", offset, base_offset);
+
+ u64a delta = offset - base_offset;
+ if (offset < base_offset || delta < info->repeatMin) {
+ DEBUG_PRINTF("before min repeat\n");
+ return base_offset + info->repeatMin;
+ }
+ if (offset > ringLastTop(xs, ringSize) + info->repeatMax) {
+ DEBUG_PRINTF("ring is stale\n");
+ return 0; // no more matches
+ }
+
+ DEBUG_PRINTF("delta=%llu\n", delta);
+ u64a lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
+ DEBUG_PRINTF("lower=%llu\n", lower);
+
+ assert(lower < ringSize);
+
+ // First scan, either to xs->last if there's no wrap-around or ringSize
+ // (end of the underlying multibit) if we are wrapping.
+
+ u32 begin = xs->first + lower;
+ if (begin >= ringSize) {
+ // This branch and sub tested a lot faster than using % (integer div).
+ begin -= ringSize;
+ }
+ const u32 end = begin >= xs->last ? ringSize : xs->last;
+ u32 i = mmbit_iterate_bounded(ring, ringSize, begin, end);
+ if (i != MMB_INVALID) {
+ u32 j = i - begin + lower;
+ return MAX(offset, base_offset + j + info->repeatMin);
+ }
+
+ // A second scan is necessary if we need to cope with wrap-around in the
+ // ring buffer.
+
+ if (begin >= xs->last) {
+ i = mmbit_iterate_bounded(ring, ringSize, 0, xs->last);
+ if (i != MMB_INVALID) {
+ u32 j = i + (ringSize - begin) + lower;
+ return MAX(offset, base_offset + j + info->repeatMin);
+ }
+ }
+
+ return 0;
+}
+
+u64a repeatNextMatchRange(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, const void *state,
+ u64a offset) {
+ const struct RepeatRangeControl *xs = &ctrl->range;
+ const u16 *ring = (const u16 *)state;
+
+ assert(xs->num > 0);
+ assert(xs->num <= rangeListCapacity(info));
+ assert(rangeListIsOrdered(xs, ring));
+ assert(info->repeatMax < REPEAT_INF);
+
+ for (u32 i = 0; i < xs->num; i++) {
+ u64a base = xs->offset + unaligned_load_u16(ring + i);
+ u64a first = base + info->repeatMin;
+ if (offset < first) {
+ return first;
+ }
+ if (offset < base + info->repeatMax) {
+ return offset + 1;
+ }
+ }
+
+ return 0;
+}
+
+u64a repeatNextMatchBitmap(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatBitmapControl *xs = &ctrl->bitmap;
+ const u64a base = xs->offset;
+ u64a bitmap = xs->bitmap;
+
+ // FIXME: quick exit if there is no match, based on last top in bitmap?
+
+ while (bitmap) {
+ u64a top = base + findAndClearLSB_64(&bitmap);
+ if (offset < top + info->repeatMin) {
+ return top + info->repeatMin;
+ }
+ if (offset < top + info->repeatMax) {
+ return offset + 1;
+ }
+ }
+
+ return 0; // No more matches.
+}
+
+u64a repeatNextMatchTrailer(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatTrailerControl *xs = &ctrl->trailer;
+ const u32 m_width = info->repeatMax - info->repeatMin;
+
+ DEBUG_PRINTF("offset=%llu, xs->offset=%llu\n", offset, xs->offset);
+ DEBUG_PRINTF("{%u,%u} repeat, m_width=%u\n", info->repeatMin,
+ info->repeatMax, m_width);
+
+ assert(xs->offset >= info->repeatMin);
+
+ if (offset >= xs->offset + m_width) {
+ DEBUG_PRINTF("no more matches\n");
+ return 0;
+ }
+
+ if (offset >= xs->offset) {
+ DEBUG_PRINTF("inside most recent match window, next match %llu\n",
+ offset + 1);
+ return offset + 1;
+ }
+
+ // Offset is before the match window, we need to consult the bitmap of
+ // earlier match offsets.
+ u64a bitmap = xs->bitmap;
+
+ u64a diff = xs->offset - offset;
+ DEBUG_PRINTF("diff=%llu\n", diff);
+ if (diff <= 64) {
+ assert(diff);
+ bitmap &= (1ULL << (diff - 1)) - 1;
+ }
+ DEBUG_PRINTF("bitmap = 0x%llx\n", bitmap);
+ if (bitmap) {
+ u32 idx = 63 - clz64(bitmap);
+ DEBUG_PRINTF("clz=%u, idx = %u -> offset %llu\n", clz64(bitmap), idx,
+ xs->offset - idx);
+ DEBUG_PRINTF("next match at %llu\n", xs->offset - idx - 1);
+ u64a next_match = xs->offset - idx - 1;
+ assert(next_match > offset);
+ return next_match;
+ }
+
+ DEBUG_PRINTF("next match is start of match window, %llu\n", xs->offset);
+ return xs->offset;
+}
+
+/** \brief Store the first top in the ring buffer. */
+static
+void storeInitialRingTop(struct RepeatRingControl *xs, u8 *ring,
+ u64a offset, const u32 ringSize) {
+ DEBUG_PRINTF("ring=%p, ringSize=%u\n", ring, ringSize);
+ xs->offset = offset;
+ mmbit_clear(ring, ringSize);
+ mmbit_set(ring, ringSize, 0);
+ xs->first = 0;
+ xs->last = 1;
+}
+
+static really_inline
+char ringIsStale(const struct RepeatRingControl *xs, const u32 ringSize,
+ const u64a offset) {
+ u64a finalMatch = ringLastTop(xs, ringSize);
+ if (offset - finalMatch >= ringSize) {
+ DEBUG_PRINTF("all matches in ring are stale\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+void repeatStoreRing(const struct RepeatInfo *info, union RepeatControl *ctrl,
+ void *state, u64a offset, char is_alive) {
+ struct RepeatRingControl *xs = &ctrl->ring;
+ u8 *ring = (u8 *)state;
+ const u32 ringSize = ringCapacity(info);
+ assert(ringSize > 0);
+
+ DEBUG_PRINTF("storing top for offset %llu in ring\n", offset);
+
+ if (!is_alive || ringIsStale(xs, ringSize, offset)) {
+ storeInitialRingTop(xs, ring, offset, ringSize);
+ } else {
+ assert(offset > ringLastTop(xs, ringSize)); // Dupe or out of order.
+ u32 occ = ringOccupancy(xs, ringSize);
+ u64a diff = offset - xs->offset;
+ DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ);
+ if (diff >= ringSize) {
+ u32 push = diff - ringSize + 1;
+ DEBUG_PRINTF("push ring %u\n", push);
+ xs->first += push;
+ if (xs->first >= ringSize) {
+ xs->first -= ringSize;
+ }
+ xs->offset += push;
+ diff -= push;
+ occ -= push;
+ }
+
+ // There's now room in the ring for this top, so we write a run of
+ // zeroes, then a one.
+ DEBUG_PRINTF("diff=%llu, occ=%u\n", diff, occ);
+ assert(diff < ringSize);
+ assert(diff >= occ);
+ u32 n = diff - occ;
+
+ u32 i = xs->last + n;
+
+ mmbit_unset_range(ring, ringSize, xs->last, MIN(i, ringSize));
+ if (i >= ringSize) {
+ i -= ringSize;
+ mmbit_unset_range(ring, ringSize, 0, i);
+ }
+
+ assert(i != xs->first);
+ DEBUG_PRINTF("set bit %u\n", i);
+ mmbit_set(ring, ringSize, i);
+ xs->last = i + 1;
+ if (xs->last == ringSize) {
+ xs->last = 0;
+ }
+ }
+
+ // Our ring indices shouldn't have spiraled off into uncharted space.
+ assert(xs->first < ringSize);
+ assert(xs->last < ringSize);
+
+#ifdef DEBUG
+ DEBUG_PRINTF("post-store ring state\n");
+ dumpRing(info, xs, ring);
+#endif
+
+ // The final top stored in our ring should be the one we just wrote in.
+ assert(ringLastTop(xs, ringSize) == offset);
+}
+
+static really_inline
+void storeInitialRangeTop(struct RepeatRangeControl *xs, u16 *ring,
+ u64a offset) {
+ xs->offset = offset;
+ xs->num = 1;
+ unaligned_store_u16(ring, 0);
+}
+
+void repeatStoreRange(const struct RepeatInfo *info, union RepeatControl *ctrl,
+ void *state, u64a offset, char is_alive) {
+ struct RepeatRangeControl *xs = &ctrl->range;
+ u16 *ring = (u16 *)state;
+
+ if (!is_alive) {
+ DEBUG_PRINTF("storing initial top at %llu\n", offset);
+ storeInitialRangeTop(xs, ring, offset);
+ return;
+ }
+
+ DEBUG_PRINTF("storing top at %llu, list currently has %u/%u elements\n",
+ offset, xs->num, rangeListCapacity(info));
+
+#ifdef DEBUG
+ dumpRange(info, xs, ring);
+#endif
+
+ // Walk ring from front. Identify the number of stale elements, and shift
+ // the whole ring to delete them.
+ u32 i = 0;
+ for (; i < xs->num; i++) {
+ u64a this_offset = xs->offset + unaligned_load_u16(ring + i);
+ DEBUG_PRINTF("this_offset=%llu, diff=%llu\n", this_offset,
+ offset - this_offset);
+ if (offset - this_offset <= info->repeatMax) {
+ break;
+ }
+ }
+
+ if (i == xs->num) {
+ DEBUG_PRINTF("whole ring is stale\n");
+ storeInitialRangeTop(xs, ring, offset);
+ return;
+ } else if (i > 0) {
+ DEBUG_PRINTF("expiring %u stale tops\n", i);
+ u16 first_offset = unaligned_load_u16(ring + i); // first live top
+ for (u32 j = 0; j < xs->num - i; j++) {
+ u16 val = unaligned_load_u16(ring + i + j);
+ assert(val >= first_offset);
+ unaligned_store_u16(ring + j, val - first_offset);
+ }
+ xs->offset += first_offset;
+ xs->num -= i;
+ }
+
+#ifdef DEBUG
+ DEBUG_PRINTF("post-expire:\n");
+ dumpRange(info, xs, ring);
+#endif
+
+ if (xs->num == 1) {
+ goto append;
+ }
+
+ // Let d = repeatMax - repeatMin
+ // Examine penultimate entry x[-2].
+ // If (offset - x[-2] <= d), then last entry x[-1] can be replaced with
+ // entry for offset.
+ assert(xs->num >= 2);
+ u32 d = info->repeatMax - info->repeatMin;
+ u64a penultimate_offset =
+ xs->offset + unaligned_load_u16(ring + xs->num - 2);
+ if (offset - penultimate_offset <= d) {
+ assert(offset - xs->offset <= (u16)-1);
+ unaligned_store_u16(ring + xs->num - 1, offset - xs->offset);
+ goto done;
+ }
+
+ // Otherwise, write a new entry for offset and return.
+
+append:
+ assert(offset - xs->offset <= (u16)-1);
+ assert(xs->num < rangeListCapacity(info));
+ unaligned_store_u16(ring + xs->num, offset - xs->offset);
+ xs->num++;
+
+done:
+ assert(rangeListIsOrdered(xs, ring));
+}
+
+void repeatStoreBitmap(const struct RepeatInfo *info, union RepeatControl *ctrl,
+ u64a offset, char is_alive) {
+ DEBUG_PRINTF("{%u,%u} repeat, storing top at %llu\n", info->repeatMin,
+ info->repeatMax, offset);
+
+ struct RepeatBitmapControl *xs = &ctrl->bitmap;
+ if (!is_alive || !xs->bitmap) {
+ DEBUG_PRINTF("storing initial top at %llu\n", offset);
+ xs->offset = offset;
+ xs->bitmap = 1U;
+ return;
+ }
+
+#ifdef DEBUG
+ DEBUG_PRINTF("pre-store:\n");
+ dumpBitmap(xs);
+#endif
+
+ assert(offset >= xs->offset);
+
+ u64a last_top = xs->offset + 63 - clz64(xs->bitmap);
+ if (offset > last_top + info->repeatMax) {
+ DEBUG_PRINTF("bitmap stale, storing initial top\n");
+ xs->offset = offset;
+ xs->bitmap = 1U;
+ return;
+ }
+
+ u64a diff = offset - xs->offset;
+ if (diff >= info->repeatMax + 1) {
+ DEBUG_PRINTF("need expire, diff=%llu\n", diff);
+ u64a push = diff - info->repeatMax;
+ xs->offset += push;
+ xs->bitmap = push >= 64 ? 0 : xs->bitmap >> push;
+ DEBUG_PRINTF("pushed xs->offset to %llu\n", xs->offset);
+ }
+
+ // Write a new entry.
+ diff = offset - xs->offset;
+ assert(diff < 64);
+ xs->bitmap |= (1ULL << diff);
+
+#ifdef DEBUG
+ DEBUG_PRINTF("post-store:\n");
+ dumpBitmap(xs);
+#endif
+}
+
+/** \brief Returns 1 if the ring has a match between (logical) index \a lower
+ * and \a upper, excluding \a upper. */
+static
+int ringHasMatch(const struct RepeatRingControl *xs, const u8 *ring,
+ const u32 ringSize, u32 lower, u32 upper) {
+ assert(lower < upper);
+ assert(lower < ringSize);
+ assert(upper <= ringSize);
+
+ u32 i = xs->first + lower;
+ if (i >= ringSize) {
+ i -= ringSize;
+ }
+
+ // Performance tweak: if we're looking at a fixed repeat, we can just use
+ // mmbit_isset.
+ if (lower + 1 == upper) {
+ return mmbit_isset(ring, ringSize, i);
+ }
+
+ u32 end = xs->first + upper;
+ if (end >= ringSize) {
+ end -= ringSize;
+ }
+
+ // First scan, either to end if there's no wrap-around or ringSize (end of
+ // the underlying multibit) if we are wrapping.
+
+ u32 scan_end = i < end ? end : ringSize;
+ u32 m = mmbit_iterate_bounded(ring, ringSize, i, scan_end);
+ if (m != MMB_INVALID) {
+ return 1;
+ }
+
+ // A second scan is necessary if we need to cope with wrap-around in the
+ // ring buffer.
+
+ if (i >= end) {
+ m = mmbit_iterate_bounded(ring, ringSize, 0, end);
+ return m != MMB_INVALID;
+ }
+
+ return 0;
+}
+
+/** Return a mask of ones in bit positions [0..v]. */
+static really_inline
+u64a mask_ones_to(u32 v) {
+ if (v < 63) {
+ return (1ULL << (v + 1)) - 1;
+ } else {
+ return ~(0ULL);
+ }
+}
+
+void repeatStoreTrailer(const struct RepeatInfo *info,
+ union RepeatControl *ctrl, u64a offset, char is_alive) {
+ DEBUG_PRINTF("{%u,%u} repeat, top at %llu\n", info->repeatMin,
+ info->repeatMax, offset);
+
+ struct RepeatTrailerControl *xs = &ctrl->trailer;
+
+ /* The TRAILER repeat model stores the following data in its control block:
+ *
+ * 1. offset, which is the min extent of the most recent match window
+ * (i.e. corresponding to the most recent top)
+ * 2. bitmap, which is a bitmap of up to repeatMin matches before
+ * the min extent offset.
+ */
+
+ const u64a next_extent = offset + info->repeatMin;
+
+ if (!is_alive) {
+ xs->offset = next_extent;
+ xs->bitmap = 0;
+ DEBUG_PRINTF("initial top, set extent to %llu\n", next_extent);
+ return;
+ }
+
+#ifdef DEBUG
+ DEBUG_PRINTF("pre-store:\n");
+ dumpTrailer(info, xs);
+#endif
+
+ const u32 m_width = info->repeatMax - info->repeatMin;
+ DEBUG_PRINTF("most recent match window is [%llu,%llu]\n", xs->offset,
+ xs->offset + m_width);
+
+ assert(next_extent > xs->offset);
+ u64a diff = next_extent - xs->offset;
+ DEBUG_PRINTF("diff=%llu, m_width=%u\n", diff, m_width);
+
+ assert(diff);
+ xs->bitmap = diff < 64 ? xs->bitmap << diff : 0;
+
+ // Switch on bits in the bitmask corresponding to matches in the previous
+ // match window.
+ if (diff <= m_width) {
+ u64a m = mask_ones_to(diff - 1);
+ xs->bitmap |= m;
+ } else {
+ u64a shift = diff - m_width - 1;
+ if (shift < 64) {
+ u64a m = mask_ones_to(m_width);
+ m <<= shift;
+ xs->bitmap |= m;
+ }
+ }
+
+ DEBUG_PRINTF("bitmap=0x%llx\n", xs->bitmap);
+
+ // Update max extent.
+ xs->offset = next_extent;
+
+ // Trim stale history: we only need repeatMin bytes of history.
+ if (info->repeatMin < 63) {
+ u64a mask = (1ULL << (info->repeatMin + 1)) - 1;
+ xs->bitmap &= mask;
+ }
+
+#ifdef DEBUG
+ DEBUG_PRINTF("post-store:\n");
+ dumpTrailer(info, xs);
+#endif
+}
+
+enum RepeatMatch repeatHasMatchRing(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset) {
+ const struct RepeatRingControl *xs = &ctrl->ring;
+ const u8 *ring = (const u8 *)state;
+ const u32 ringSize = ringCapacity(info);
+
+ assert(mmbit_any(ring, ringSize));
+ assert(offset >= xs->offset);
+
+ DEBUG_PRINTF("check: offset=%llu, repeat=[%u,%u]\n", offset,
+ info->repeatMin, info->repeatMax);
+#ifdef DEBUG
+ DEBUG_PRINTF("ring state\n");
+ dumpRing(info, xs, ring);
+#endif
+
+ if (offset - xs->offset < info->repeatMin) {
+ DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
+ return REPEAT_NOMATCH;
+ }
+
+ if (offset - ringLastTop(xs, ringSize) >= ringSize) {
+ DEBUG_PRINTF("ring is stale\n");
+ return REPEAT_STALE;
+ }
+
+ // If we're not stale, delta fits in the range [repeatMin, lastTop +
+ // repeatMax], which fits in a u32.
+ assert(offset - xs->offset < UINT32_MAX);
+ u32 delta = (u32)(offset - xs->offset);
+ DEBUG_PRINTF("delta=%u\n", delta);
+
+ // Find the bounds on possible matches in the ring buffer.
+ u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
+ u32 upper = MIN(delta - info->repeatMin + 1, ringOccupancy(xs, ringSize));
+
+ if (lower >= upper) {
+ DEBUG_PRINTF("no matches to check\n");
+ return REPEAT_NOMATCH;
+ }
+
+ DEBUG_PRINTF("possible match indices=[%u,%u]\n", lower, upper);
+ if (ringHasMatch(xs, ring, ringSize, lower, upper)) {
+ return REPEAT_MATCH;
+ }
+
+ return REPEAT_NOMATCH;
+}
+
+enum RepeatMatch repeatHasMatchRange(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset) {
+ const struct RepeatRangeControl *xs = &ctrl->range;
+ const u16 *ring = (const u16 *)state;
+
+ assert(xs->num > 0);
+ assert(xs->num <= rangeListCapacity(info));
+ assert(rangeListIsOrdered(xs, ring));
+
+ // Walk the ring. For each entry x:
+ // if (offset - x) falls inside repeat bounds, return success.
+
+ // It may be worth doing tests on first and last elements first to bail
+ // early if the whole ring is too young or stale.
+
+ DEBUG_PRINTF("check %u (of %u) elements, offset %llu, bounds={%u,%u}\n",
+ xs->num, rangeListCapacity(info), offset,
+ info->repeatMin, info->repeatMax);
+#ifdef DEBUG
+ dumpRange(info, xs, ring);
+#endif
+
+ // Quick pre-check for minimum.
+ assert(offset >= xs->offset);
+ if (offset - xs->offset < info->repeatMin) {
+ DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
+ return REPEAT_NOMATCH;
+ }
+
+ // We check the most recent offset first, as we can establish staleness.
+ u64a match = xs->offset + unaligned_load_u16(ring + xs->num - 1);
+ assert(offset >= match);
+ u64a diff = offset - match;
+ if (diff > info->repeatMax) {
+ DEBUG_PRINTF("range list is stale\n");
+ return REPEAT_STALE;
+ } else if (diff >= info->repeatMin && diff <= info->repeatMax) {
+ return REPEAT_MATCH;
+ }
+
+ // Check the other offsets in the list.
+ u32 count = xs->num - 1;
+ for (u32 i = 0; i < count; i++) {
+ match = xs->offset + unaligned_load_u16(ring + i);
+ assert(offset >= match);
+ diff = offset - match;
+ if (diff >= info->repeatMin && diff <= info->repeatMax) {
+ return REPEAT_MATCH;
+ }
+ }
+
+ return REPEAT_NOMATCH;
+}
+
+enum RepeatMatch repeatHasMatchBitmap(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ u64a offset) {
+ const struct RepeatBitmapControl *xs = &ctrl->bitmap;
+
+ DEBUG_PRINTF("checking if offset=%llu is a match\n", offset);
+
+#ifdef DEBUG
+ dumpBitmap(xs);
+#endif
+
+ u64a bitmap = xs->bitmap;
+ if (!bitmap) {
+ DEBUG_PRINTF("no tops; stale\n");
+ return REPEAT_STALE;
+ }
+
+ // Quick pre-check for minimum.
+ const u64a base = xs->offset;
+ assert(offset >= base);
+ if (offset - base < info->repeatMin) {
+ DEBUG_PRINTF("haven't even seen repeatMin bytes yet!\n");
+ return REPEAT_NOMATCH;
+ }
+
+ // We check the most recent offset first, as we can establish staleness.
+ u64a match = base + findAndClearMSB_64(&bitmap);
+ DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match);
+ assert(offset >= match);
+ u64a diff = offset - match;
+ if (diff > info->repeatMax) {
+ DEBUG_PRINTF("stale\n");
+ return REPEAT_STALE;
+ } else if (diff >= info->repeatMin && diff <= info->repeatMax) {
+ return REPEAT_MATCH;
+ }
+
+ while (bitmap) {
+ match = base + findAndClearLSB_64(&bitmap);
+ DEBUG_PRINTF("offset=%llu, last_match %llu\n", offset, match);
+ assert(offset >= match);
+ diff = offset - match;
+ if (diff >= info->repeatMin && diff <= info->repeatMax) {
+ return REPEAT_MATCH;
+ }
+ }
+
+ return REPEAT_NOMATCH;
+}
+
+enum RepeatMatch repeatHasMatchTrailer(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ u64a offset) {
+ const struct RepeatTrailerControl *xs = &ctrl->trailer;
+ const u32 m_width = info->repeatMax - info->repeatMin;
+
+ DEBUG_PRINTF("offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n", offset,
+ xs->offset, xs->bitmap);
+
+ if (offset > xs->offset + m_width) {
+ DEBUG_PRINTF("stale\n");
+ return REPEAT_STALE;
+ }
+
+ if (offset >= xs->offset) {
+ DEBUG_PRINTF("in match window\n");
+ return REPEAT_MATCH;
+ }
+
+ if (offset >= xs->offset - info->repeatMin) {
+ u32 idx = xs->offset - offset - 1;
+ DEBUG_PRINTF("check bitmap idx %u\n", idx);
+ assert(idx < 64);
+ if (xs->bitmap & (1ULL << idx)) {
+ DEBUG_PRINTF("match in bitmap\n");
+ return REPEAT_MATCH;
+ }
+ }
+
+ DEBUG_PRINTF("no match\n");
+ return REPEAT_NOMATCH;
+}
+
/** \brief True if the given value can be packed into len bytes. */
-static really_inline
+static really_inline
int fits_in_len_bytes(u64a val, u32 len) {
if (len >= 8) {
return 1;
@@ -896,205 +896,205 @@ int fits_in_len_bytes(u64a val, u32 len) {
}
static really_inline
-void storePackedRelative(char *dest, u64a val, u64a offset, u64a max, u32 len) {
- assert(val <= offset);
+void storePackedRelative(char *dest, u64a val, u64a offset, u64a max, u32 len) {
+ assert(val <= offset);
assert(fits_in_len_bytes(max, len));
- u64a delta = offset - val;
- if (delta >= max) {
- delta = max;
- }
- DEBUG_PRINTF("delta %llu\n", delta);
+ u64a delta = offset - val;
+ if (delta >= max) {
+ delta = max;
+ }
+ DEBUG_PRINTF("delta %llu\n", delta);
assert(fits_in_len_bytes(delta, len));
- partial_store_u64a(dest, delta, len);
-}
-
-static
-void repeatPackRing(char *dest, const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatRingControl *xs = &ctrl->ring;
- const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4;
- const u32 offset_len = info->packedCtrlSize - ring_indices_len;
-
- // Write out packed relative base offset.
- assert(info->packedCtrlSize > ring_indices_len);
- storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len);
-
- // Write out ring indices.
- if (ring_indices_len == 4) {
- unaligned_store_u16(dest + offset_len, xs->first);
- unaligned_store_u16(dest + offset_len + 2, xs->last);
- } else {
- assert(xs->first < 256 && xs->last < 256);
- u8 *indices = (u8 *)dest + offset_len;
- indices[0] = xs->first;
- indices[1] = xs->last;
- }
-}
-
-static
-void repeatPackOffset(char *dest, const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatOffsetControl *xs = &ctrl->offset;
- DEBUG_PRINTF("packing offset %llu [h %u]\n", xs->offset, info->horizon);
+ partial_store_u64a(dest, delta, len);
+}
+
+static
+void repeatPackRing(char *dest, const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatRingControl *xs = &ctrl->ring;
+ const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4;
+ const u32 offset_len = info->packedCtrlSize - ring_indices_len;
+
+ // Write out packed relative base offset.
+ assert(info->packedCtrlSize > ring_indices_len);
+ storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len);
+
+ // Write out ring indices.
+ if (ring_indices_len == 4) {
+ unaligned_store_u16(dest + offset_len, xs->first);
+ unaligned_store_u16(dest + offset_len + 2, xs->last);
+ } else {
+ assert(xs->first < 256 && xs->last < 256);
+ u8 *indices = (u8 *)dest + offset_len;
+ indices[0] = xs->first;
+ indices[1] = xs->last;
+ }
+}
+
+static
+void repeatPackOffset(char *dest, const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatOffsetControl *xs = &ctrl->offset;
+ DEBUG_PRINTF("packing offset %llu [h %u]\n", xs->offset, info->horizon);
if (!info->packedCtrlSize) {
assert(info->type == REPEAT_ALWAYS);
DEBUG_PRINTF("externally guarded .*\n");
return;
}
- storePackedRelative(dest, xs->offset, offset, info->horizon,
- info->packedCtrlSize);
-}
-
-static
-void repeatPackRange(char *dest, const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatRangeControl *xs = &ctrl->range;
-
- // Write out packed relative base offset.
- assert(info->packedCtrlSize > 1);
- storePackedRelative(dest, xs->offset, offset, info->horizon,
- info->packedCtrlSize - 1);
-
- // Write out range number of elements.
- dest[info->packedCtrlSize - 1] = xs->num;
-}
-
-static
-void repeatPackBitmap(char *dest, const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatBitmapControl *xs = &ctrl->bitmap;
- const u32 bound = info->repeatMax;
-
- assert(offset >= xs->offset);
- u64a new_base = offset > bound ? offset - bound : 0;
-
- // Shift bitmap to begin at new_base rather than xs->offset.
- u64a bitmap = xs->bitmap;
- if (new_base >= xs->offset) {
- u64a shift = new_base - xs->offset;
- bitmap = shift < 64 ? bitmap >> shift : 0;
- } else {
- u64a shift = xs->offset - new_base;
- bitmap = shift < 64 ? bitmap << shift : 0;
- }
-
- DEBUG_PRINTF("packing %llu into %u bytes\n", bitmap, info->packedCtrlSize);
-
- // Write out packed bitmap.
+ storePackedRelative(dest, xs->offset, offset, info->horizon,
+ info->packedCtrlSize);
+}
+
+static
+void repeatPackRange(char *dest, const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatRangeControl *xs = &ctrl->range;
+
+ // Write out packed relative base offset.
+ assert(info->packedCtrlSize > 1);
+ storePackedRelative(dest, xs->offset, offset, info->horizon,
+ info->packedCtrlSize - 1);
+
+ // Write out range number of elements.
+ dest[info->packedCtrlSize - 1] = xs->num;
+}
+
+static
+void repeatPackBitmap(char *dest, const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatBitmapControl *xs = &ctrl->bitmap;
+ const u32 bound = info->repeatMax;
+
+ assert(offset >= xs->offset);
+ u64a new_base = offset > bound ? offset - bound : 0;
+
+ // Shift bitmap to begin at new_base rather than xs->offset.
+ u64a bitmap = xs->bitmap;
+ if (new_base >= xs->offset) {
+ u64a shift = new_base - xs->offset;
+ bitmap = shift < 64 ? bitmap >> shift : 0;
+ } else {
+ u64a shift = xs->offset - new_base;
+ bitmap = shift < 64 ? bitmap << shift : 0;
+ }
+
+ DEBUG_PRINTF("packing %llu into %u bytes\n", bitmap, info->packedCtrlSize);
+
+ // Write out packed bitmap.
assert(fits_in_len_bytes(bitmap, info->packedCtrlSize));
- partial_store_u64a(dest, bitmap, info->packedCtrlSize);
-}
-
-static
-void repeatPackSparseOptimalP(char *dest, const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatRingControl *xs = &ctrl->ring;
- // set ring index pointer according to patch count
- const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4;
- const u32 offset_len = info->packedCtrlSize - ring_indices_len;
-
- // Write out packed relative base offset.
- assert(info->packedCtrlSize > ring_indices_len);
- storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len);
-
- // Write out ring indices.
- if (ring_indices_len == 4) {
- unaligned_store_u16(dest + offset_len, xs->first);
- unaligned_store_u16(dest + offset_len + 2, xs->last);
- } else {
- assert(xs->first < 256 && xs->last < 256);
- u8 *indices = (u8 *)dest + offset_len;
- indices[0] = xs->first;
- indices[1] = xs->last;
- }
-
-}
-
-static
-void repeatPackTrailer(char *dest, const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- const struct RepeatTrailerControl *xs = &ctrl->trailer;
-
- DEBUG_PRINTF("saving: offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n",
- offset, xs->offset, xs->bitmap);
-
- // XXX: xs->offset may be zero in the NFA path (effectively uninitialized).
- u64a top;
- if (xs->offset) {
- assert(xs->offset >= info->repeatMin);
- top = xs->offset - info->repeatMin;
- } else {
- top = 0;
- }
-
- top = offset - top; // Pack top relative to offset.
-
- u64a v[2];
- v[0] = MIN(top, info->horizon);
- v[1] = xs->bitmap;
-
- pack_bits_64(dest, v, info->packedFieldSizes, 2);
-}
-
-void repeatPack(char *dest, const struct RepeatInfo *info,
- const union RepeatControl *ctrl, u64a offset) {
- assert(dest && info && ctrl);
-
- switch ((enum RepeatType)info->type) {
- case REPEAT_RING:
- repeatPackRing(dest, info, ctrl, offset);
- break;
- case REPEAT_FIRST:
- case REPEAT_LAST:
- repeatPackOffset(dest, info, ctrl, offset);
- break;
- case REPEAT_RANGE:
- repeatPackRange(dest, info, ctrl, offset);
- break;
- case REPEAT_BITMAP:
- repeatPackBitmap(dest, info, ctrl, offset);
- break;
- case REPEAT_SPARSE_OPTIMAL_P:
- repeatPackSparseOptimalP(dest, info, ctrl, offset);
- break;
- case REPEAT_TRAILER:
- repeatPackTrailer(dest, info, ctrl, offset);
- break;
+ partial_store_u64a(dest, bitmap, info->packedCtrlSize);
+}
+
+static
+void repeatPackSparseOptimalP(char *dest, const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatRingControl *xs = &ctrl->ring;
+ // set ring index pointer according to patch count
+ const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4;
+ const u32 offset_len = info->packedCtrlSize - ring_indices_len;
+
+ // Write out packed relative base offset.
+ assert(info->packedCtrlSize > ring_indices_len);
+ storePackedRelative(dest, xs->offset, offset, info->horizon, offset_len);
+
+ // Write out ring indices.
+ if (ring_indices_len == 4) {
+ unaligned_store_u16(dest + offset_len, xs->first);
+ unaligned_store_u16(dest + offset_len + 2, xs->last);
+ } else {
+ assert(xs->first < 256 && xs->last < 256);
+ u8 *indices = (u8 *)dest + offset_len;
+ indices[0] = xs->first;
+ indices[1] = xs->last;
+ }
+
+}
+
+static
+void repeatPackTrailer(char *dest, const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ const struct RepeatTrailerControl *xs = &ctrl->trailer;
+
+ DEBUG_PRINTF("saving: offset=%llu, xs->offset=%llu, xs->bitmap=0x%llx\n",
+ offset, xs->offset, xs->bitmap);
+
+ // XXX: xs->offset may be zero in the NFA path (effectively uninitialized).
+ u64a top;
+ if (xs->offset) {
+ assert(xs->offset >= info->repeatMin);
+ top = xs->offset - info->repeatMin;
+ } else {
+ top = 0;
+ }
+
+ top = offset - top; // Pack top relative to offset.
+
+ u64a v[2];
+ v[0] = MIN(top, info->horizon);
+ v[1] = xs->bitmap;
+
+ pack_bits_64(dest, v, info->packedFieldSizes, 2);
+}
+
+void repeatPack(char *dest, const struct RepeatInfo *info,
+ const union RepeatControl *ctrl, u64a offset) {
+ assert(dest && info && ctrl);
+
+ switch ((enum RepeatType)info->type) {
+ case REPEAT_RING:
+ repeatPackRing(dest, info, ctrl, offset);
+ break;
+ case REPEAT_FIRST:
+ case REPEAT_LAST:
+ repeatPackOffset(dest, info, ctrl, offset);
+ break;
+ case REPEAT_RANGE:
+ repeatPackRange(dest, info, ctrl, offset);
+ break;
+ case REPEAT_BITMAP:
+ repeatPackBitmap(dest, info, ctrl, offset);
+ break;
+ case REPEAT_SPARSE_OPTIMAL_P:
+ repeatPackSparseOptimalP(dest, info, ctrl, offset);
+ break;
+ case REPEAT_TRAILER:
+ repeatPackTrailer(dest, info, ctrl, offset);
+ break;
case REPEAT_ALWAYS:
/* nothing to do - no state */
break;
- }
-}
-
-static really_inline
-u64a loadPackedRelative(const char *src, u64a offset, u32 len) {
- u64a delta = partial_load_u64a(src, len);
- DEBUG_PRINTF("delta %llu\n", delta);
- assert(offset >= delta);
- return offset - delta;
-}
-
-static
-void repeatUnpackRing(const char *src, const struct RepeatInfo *info,
- u64a offset, union RepeatControl *ctrl) {
- struct RepeatRingControl *xs = &ctrl->ring;
- const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4;
- const u32 offset_len = info->packedCtrlSize - ring_indices_len;
- xs->offset = loadPackedRelative(src, offset, offset_len);
- if (ring_indices_len == 4) {
- xs->first = unaligned_load_u16(src + offset_len);
- xs->last = unaligned_load_u16(src + offset_len + 2);
- } else {
- const u8 *indices = (const u8 *)src + offset_len;
- xs->first = indices[0];
- xs->last = indices[1];
- }
-}
-
-static
-void repeatUnpackOffset(const char *src, const struct RepeatInfo *info,
- u64a offset, union RepeatControl *ctrl) {
- struct RepeatOffsetControl *xs = &ctrl->offset;
+ }
+}
+
+static really_inline
+u64a loadPackedRelative(const char *src, u64a offset, u32 len) {
+ u64a delta = partial_load_u64a(src, len);
+ DEBUG_PRINTF("delta %llu\n", delta);
+ assert(offset >= delta);
+ return offset - delta;
+}
+
+static
+void repeatUnpackRing(const char *src, const struct RepeatInfo *info,
+ u64a offset, union RepeatControl *ctrl) {
+ struct RepeatRingControl *xs = &ctrl->ring;
+ const u32 ring_indices_len = info->repeatMax < 254 ? 2 : 4;
+ const u32 offset_len = info->packedCtrlSize - ring_indices_len;
+ xs->offset = loadPackedRelative(src, offset, offset_len);
+ if (ring_indices_len == 4) {
+ xs->first = unaligned_load_u16(src + offset_len);
+ xs->last = unaligned_load_u16(src + offset_len + 2);
+ } else {
+ const u8 *indices = (const u8 *)src + offset_len;
+ xs->first = indices[0];
+ xs->last = indices[1];
+ }
+}
+
+static
+void repeatUnpackOffset(const char *src, const struct RepeatInfo *info,
+ u64a offset, union RepeatControl *ctrl) {
+ struct RepeatOffsetControl *xs = &ctrl->offset;
if (!info->packedCtrlSize) {
assert(info->type == REPEAT_ALWAYS);
DEBUG_PRINTF("externally guarded .*\n");
@@ -1102,503 +1102,503 @@ void repeatUnpackOffset(const char *src, const struct RepeatInfo *info,
} else {
xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize);
}
- DEBUG_PRINTF("unpacking offset %llu [h%u]\n", xs->offset,
- info->horizon);
-}
-
-static
-void repeatUnpackRange(const char *src, const struct RepeatInfo *info,
- u64a offset, union RepeatControl *ctrl) {
- struct RepeatRangeControl *xs = &ctrl->range;
- xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize - 1);
- xs->num = src[info->packedCtrlSize - 1];
-}
-
-static
-void repeatUnpackBitmap(const char *src, const struct RepeatInfo *info,
- u64a offset, union RepeatControl *ctrl) {
- struct RepeatBitmapControl *xs = &ctrl->bitmap;
- xs->offset = offset > info->repeatMax ? offset - info->repeatMax : 0;
- xs->bitmap = partial_load_u64a(src, info->packedCtrlSize);
-}
-
-static
-void repeatUnpackSparseOptimalP(const char *src, const struct RepeatInfo *info,
- u64a offset, union RepeatControl *ctrl) {
- struct RepeatRingControl *xs = &ctrl->ring;
- const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4;
- const u32 offset_len = info->packedCtrlSize - ring_indices_len;
- xs->offset = loadPackedRelative(src, offset, offset_len);
- if (ring_indices_len == 4) {
- xs->first = unaligned_load_u16(src + offset_len);
- xs->last = unaligned_load_u16(src + offset_len + 2);
- } else {
- const u8 *indices = (const u8 *)src + offset_len;
- xs->first = indices[0];
- xs->last = indices[1];
- }
-}
-
-static
-void repeatUnpackTrailer(const char *src, const struct RepeatInfo *info,
- u64a offset, union RepeatControl *ctrl) {
- struct RepeatTrailerControl *xs = &ctrl->trailer;
-
- u64a v[2];
- unpack_bits_64(v, (const u8 *)src, info->packedFieldSizes, 2);
-
- xs->offset = offset - v[0] + info->repeatMin;
- xs->bitmap = v[1];
-
- DEBUG_PRINTF("loaded: xs->offset=%llu, xs->bitmap=0x%llx\n", xs->offset,
- xs->bitmap);
-}
-
-void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset,
- union RepeatControl *ctrl) {
- assert(src && info && ctrl);
-
- switch ((enum RepeatType)info->type) {
- case REPEAT_RING:
- repeatUnpackRing(src, info, offset, ctrl);
- break;
- case REPEAT_FIRST:
- case REPEAT_LAST:
- repeatUnpackOffset(src, info, offset, ctrl);
- break;
- case REPEAT_RANGE:
- repeatUnpackRange(src, info, offset, ctrl);
- break;
- case REPEAT_BITMAP:
- repeatUnpackBitmap(src, info, offset, ctrl);
- break;
- case REPEAT_SPARSE_OPTIMAL_P:
- repeatUnpackSparseOptimalP(src, info, offset, ctrl);
- break;
- case REPEAT_TRAILER:
- repeatUnpackTrailer(src, info, offset, ctrl);
- break;
+ DEBUG_PRINTF("unpacking offset %llu [h%u]\n", xs->offset,
+ info->horizon);
+}
+
+static
+void repeatUnpackRange(const char *src, const struct RepeatInfo *info,
+ u64a offset, union RepeatControl *ctrl) {
+ struct RepeatRangeControl *xs = &ctrl->range;
+ xs->offset = loadPackedRelative(src, offset, info->packedCtrlSize - 1);
+ xs->num = src[info->packedCtrlSize - 1];
+}
+
+static
+void repeatUnpackBitmap(const char *src, const struct RepeatInfo *info,
+ u64a offset, union RepeatControl *ctrl) {
+ struct RepeatBitmapControl *xs = &ctrl->bitmap;
+ xs->offset = offset > info->repeatMax ? offset - info->repeatMax : 0;
+ xs->bitmap = partial_load_u64a(src, info->packedCtrlSize);
+}
+
+static
+void repeatUnpackSparseOptimalP(const char *src, const struct RepeatInfo *info,
+ u64a offset, union RepeatControl *ctrl) {
+ struct RepeatRingControl *xs = &ctrl->ring;
+ const u32 ring_indices_len = info->patchCount < 254 ? 2 : 4;
+ const u32 offset_len = info->packedCtrlSize - ring_indices_len;
+ xs->offset = loadPackedRelative(src, offset, offset_len);
+ if (ring_indices_len == 4) {
+ xs->first = unaligned_load_u16(src + offset_len);
+ xs->last = unaligned_load_u16(src + offset_len + 2);
+ } else {
+ const u8 *indices = (const u8 *)src + offset_len;
+ xs->first = indices[0];
+ xs->last = indices[1];
+ }
+}
+
+static
+void repeatUnpackTrailer(const char *src, const struct RepeatInfo *info,
+ u64a offset, union RepeatControl *ctrl) {
+ struct RepeatTrailerControl *xs = &ctrl->trailer;
+
+ u64a v[2];
+ unpack_bits_64(v, (const u8 *)src, info->packedFieldSizes, 2);
+
+ xs->offset = offset - v[0] + info->repeatMin;
+ xs->bitmap = v[1];
+
+ DEBUG_PRINTF("loaded: xs->offset=%llu, xs->bitmap=0x%llx\n", xs->offset,
+ xs->bitmap);
+}
+
+void repeatUnpack(const char *src, const struct RepeatInfo *info, u64a offset,
+ union RepeatControl *ctrl) {
+ assert(src && info && ctrl);
+
+ switch ((enum RepeatType)info->type) {
+ case REPEAT_RING:
+ repeatUnpackRing(src, info, offset, ctrl);
+ break;
+ case REPEAT_FIRST:
+ case REPEAT_LAST:
+ repeatUnpackOffset(src, info, offset, ctrl);
+ break;
+ case REPEAT_RANGE:
+ repeatUnpackRange(src, info, offset, ctrl);
+ break;
+ case REPEAT_BITMAP:
+ repeatUnpackBitmap(src, info, offset, ctrl);
+ break;
+ case REPEAT_SPARSE_OPTIMAL_P:
+ repeatUnpackSparseOptimalP(src, info, offset, ctrl);
+ break;
+ case REPEAT_TRAILER:
+ repeatUnpackTrailer(src, info, offset, ctrl);
+ break;
case REPEAT_ALWAYS:
/* nothing to do - no state */
break;
- }
-}
-
-static really_inline
-const u64a *getImplTable(const struct RepeatInfo *info) {
- const u64a *table = ((const u64a *)(ROUNDUP_PTR(
- ((const char *)(info) +
- sizeof(*info)),
- alignof(u64a))));
- return table;
-}
-
-static
-void storeInitialRingTopPatch(const struct RepeatInfo *info,
- struct RepeatRingControl *xs,
- u8 *state, u64a offset) {
- DEBUG_PRINTF("set the first patch, offset=%llu\n", offset);
- xs->offset = offset;
-
- u8 *active = state;
- u32 patch_count = info->patchCount;
- mmbit_clear(active, patch_count);
- mmbit_set(active, patch_count, 0);
-
- u8 *ring = active + info->patchesOffset;
- u32 encoding_size = info->encodingSize;
- partial_store_u64a(ring, 1ull, encoding_size);
- xs->first = 0;
- xs->last = 1;
-}
-
-static
-u32 getSparseOptimalTargetValue(const struct RepeatInfo *info,
- const u32 tval, u64a *val) {
- u32 patch_size = info->patchSize;
- const u64a *repeatTable = getImplTable(info);
- u32 loc = 0;
- DEBUG_PRINTF("val:%llu \n", *val);
- for (u32 i = 1; i <= patch_size - tval; i++) {
- u64a tmp = repeatTable[patch_size - i];
- if (*val >= tmp) {
- *val -= tmp;
- loc = i;
- i += (info->minPeriod - 1);
- }
- }
-
- return loc;
-}
-
-static
-u64a sparseLastTop(const struct RepeatInfo *info,
- const struct RepeatRingControl *xs, const u8 *state) {
- DEBUG_PRINTF("looking for last top\n");
- u32 patch_size = info->patchSize;
- u32 patch_count = info->patchCount;
- u32 encoding_size = info->encodingSize;
-
- u32 occ = ringOccupancy(xs, patch_count);
- u32 patch = xs->first + occ - 1;
- if (patch >= patch_count) {
- patch -= patch_count;
- }
-
- DEBUG_PRINTF("patch%u encoding_size%u occ%u\n", patch, encoding_size, occ);
- const u8 *ring = state + info->patchesOffset;
- u64a val = partial_load_u64a(ring + encoding_size * patch, encoding_size);
-
- DEBUG_PRINTF("val:%llu\n", val);
- const u64a *repeatTable = getImplTable(info);
- for (s32 i = patch_size - 1; i >= 0; i--) {
- if (val >= repeatTable[i]) {
- DEBUG_PRINTF("xs->offset%llu v%u p%llu\n",
- xs->offset, i, repeatTable[i]);
- return xs->offset + i + (occ - 1) * patch_size;
- }
- }
-
- assert(0);
- return 0;
-}
-
-u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state) {
- return sparseLastTop(info, &ctrl->ring, state);
-}
-
-u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset) {
- const struct RepeatRingControl *xs = &ctrl->ring;
-
- DEBUG_PRINTF("repeat [%u, %u] looking for match after %llu\n",
- info->repeatMin, info->repeatMax, offset);
-
- assert(offset >= xs->offset);
-
- u64a nextOffset = offset + 1;
-
- u32 patch_size = info->patchSize;
- u32 patch;
- u32 tval;
- if (nextOffset <= xs->offset + info->repeatMin) {
- patch = xs->first;
- tval = 0;
- } else if (nextOffset > sparseLastTop(info, xs, state) + info->repeatMax) {
- DEBUG_PRINTF("ring is stale\n");
- return 0;
- } else {
- assert(nextOffset - xs->offset < UINT32_MAX); // ring is not stale
- u32 delta = (u32)(nextOffset - xs->offset);
- u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
- patch = lower / patch_size;
- tval = lower - patch * patch_size;
- }
-
- DEBUG_PRINTF("patch %u\n", patch);
- u32 patch_count = info->patchCount;
- if (patch >= patch_count) {
- return 0;
- }
-
- DEBUG_PRINTF("initial test for %u\n", tval);
-
- u32 begin = xs->first + patch;
- if (begin >= patch_count) {
- begin -= patch_count;
- }
-
- const u8 *active = (const u8 *)state;
- const u8 *ring = active + info->patchesOffset;
- u32 encoding_size = info->encodingSize;
- const u32 end = begin >= xs->last ? patch_count : xs->last;
- u32 low = tval;
- u64a diff = 0, loc = 0;
- DEBUG_PRINTF("begin %u end %u\n", begin, end);
- for (u32 p = mmbit_iterate_bounded(active, patch_count, begin, end);
- p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count,
- p + 1, end)) {
- if (p != begin) {
- low = 0;
- }
-
- u64a val = partial_load_u64a(ring + encoding_size * p, encoding_size);
- u32 p1 = 0;
- if (p >= xs->first) {
- p1 = p - xs->first;
- } else {
- p1 = p + patch_count - xs->first;
- }
-
- if (val) {
- loc = getSparseOptimalTargetValue(info, low, &val);
- diff = (p1 + 1) * patch_size - loc;
- }
- if (loc) {
- u64a rv = MAX(nextOffset, xs->offset + info->repeatMin + diff);
- DEBUG_PRINTF("offset%llu next match at %llu\n", xs->offset, rv);
- return rv;
- }
- low = 0;
- }
-
- low = 0;
- if (begin >= xs->last) {
- for (u32 p = mmbit_iterate_bounded(active, patch_count, 0, xs->last);
- p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count,
- p + 1, xs->last)) {
-
- u64a val = partial_load_u64a(ring + encoding_size * p,
- encoding_size);
- if (val) {
- loc = getSparseOptimalTargetValue(info, low, &val);
- diff = (p + 1) * patch_size - loc;
- }
- if (loc) {
- u64a rv = MAX(nextOffset, xs->offset + info->repeatMin +
- diff + (end - xs->first) * patch_size);
- DEBUG_PRINTF("next match at %llu\n", rv);
- return rv;
- }
- }
- }
-
- DEBUG_PRINTF("next match\n");
- return 0;
-}
-
-void repeatStoreSparseOptimalP(const struct RepeatInfo *info,
- union RepeatControl *ctrl, void *state,
- u64a offset, char is_alive) {
- struct RepeatRingControl *xs = &ctrl->ring;
- u8 *active = (u8 *)state;
-
- DEBUG_PRINTF("offset: %llu encoding_size: %u\n", offset,
- info->encodingSize);
-
- // If (a) this is the first top, or (b) the ring is stale, initialize the
- // ring and write this offset in as the first top.
- if (!is_alive ||
- offset > sparseLastTop(info, xs, state) + info->repeatMax) {
- storeInitialRingTopPatch(info, xs, active, offset);
- return;
- }
-
- // Tops should arrive in order, with no duplicates.
- assert(offset > sparseLastTop(info, xs, state));
-
- // As the ring is not stale, our delta should fit within a u32.
- assert(offset - xs->offset <= UINT32_MAX);
- u32 delta = (u32)(offset - xs->offset);
- u32 patch_size = info->patchSize;
- u32 patch_count = info->patchCount;
- u32 encoding_size = info->encodingSize;
- u32 patch = delta / patch_size;
-
- DEBUG_PRINTF("delta=%u, patch_size=%u, patch=%u\n", delta, patch_size,
- patch);
-
- u8 *ring = active + info->patchesOffset;
- u32 occ = ringOccupancy(xs, patch_count);
- u64a val = 0;
- u32 idx;
-
- DEBUG_PRINTF("patch: %u patch_count: %u occ: %u\n",
- patch, patch_count, occ);
- if (patch >= patch_count) {
- u32 patch_shift_count = patch - patch_count + 1;
- assert(patch >= patch_shift_count);
- DEBUG_PRINTF("shifting by %u\n", patch_shift_count);
- xs->offset += patch_size * patch_shift_count;
- xs->first += patch_shift_count;
- if (xs->first >= patch_count) {
- xs->first -= patch_count;
- }
- idx = xs->last + patch - occ;
- mmbit_unset_range(active, patch_count, xs->last,
- MIN(idx, patch_count));
- if (idx >= patch_count) {
- idx -= patch_count;
- mmbit_unset_range(active, patch_count, 0, idx + 1);
- }
- xs->last = idx + 1;
- if (xs->last == patch_count) {
- xs->last = 0;
- }
- } else if (patch < occ) {
- assert(patch == occ - 1);
- idx = xs->last == 0 ? patch_count - 1 : (u32)xs->last - 1;
- val = partial_load_u64a(ring + encoding_size * idx, encoding_size);
- } else {
- idx = xs->last + patch - occ;
- mmbit_unset_range(active, patch_count, xs->last,
- MIN(idx, patch_count));
- if (idx >= patch_count) {
- idx -= patch_count;
- mmbit_unset_range(active, patch_count, 0, idx + 1);
- }
- xs->last = idx + 1;
- if (xs->last == patch_count) {
- xs->last = 0;
- }
- }
-
- assert((u64a)patch * patch_size <= delta);
- u32 diff = delta - patch * patch_size;
- const u64a *repeatTable = getImplTable(info);
- val += repeatTable[diff];
-
- DEBUG_PRINTF("patch=%u, occ=%u\n", patch, occ);
- DEBUG_PRINTF("xs->first:%u xs->last:%u patch:%u\n",
- xs->first, xs->last, patch);
- DEBUG_PRINTF("value:%llu\n", val);
+ }
+}
+
+static really_inline
+const u64a *getImplTable(const struct RepeatInfo *info) {
+ const u64a *table = ((const u64a *)(ROUNDUP_PTR(
+ ((const char *)(info) +
+ sizeof(*info)),
+ alignof(u64a))));
+ return table;
+}
+
+static
+void storeInitialRingTopPatch(const struct RepeatInfo *info,
+ struct RepeatRingControl *xs,
+ u8 *state, u64a offset) {
+ DEBUG_PRINTF("set the first patch, offset=%llu\n", offset);
+ xs->offset = offset;
+
+ u8 *active = state;
+ u32 patch_count = info->patchCount;
+ mmbit_clear(active, patch_count);
+ mmbit_set(active, patch_count, 0);
+
+ u8 *ring = active + info->patchesOffset;
+ u32 encoding_size = info->encodingSize;
+ partial_store_u64a(ring, 1ull, encoding_size);
+ xs->first = 0;
+ xs->last = 1;
+}
+
+static
+u32 getSparseOptimalTargetValue(const struct RepeatInfo *info,
+ const u32 tval, u64a *val) {
+ u32 patch_size = info->patchSize;
+ const u64a *repeatTable = getImplTable(info);
+ u32 loc = 0;
+ DEBUG_PRINTF("val:%llu \n", *val);
+ for (u32 i = 1; i <= patch_size - tval; i++) {
+ u64a tmp = repeatTable[patch_size - i];
+ if (*val >= tmp) {
+ *val -= tmp;
+ loc = i;
+ i += (info->minPeriod - 1);
+ }
+ }
+
+ return loc;
+}
+
+static
+u64a sparseLastTop(const struct RepeatInfo *info,
+ const struct RepeatRingControl *xs, const u8 *state) {
+ DEBUG_PRINTF("looking for last top\n");
+ u32 patch_size = info->patchSize;
+ u32 patch_count = info->patchCount;
+ u32 encoding_size = info->encodingSize;
+
+ u32 occ = ringOccupancy(xs, patch_count);
+ u32 patch = xs->first + occ - 1;
+ if (patch >= patch_count) {
+ patch -= patch_count;
+ }
+
+ DEBUG_PRINTF("patch%u encoding_size%u occ%u\n", patch, encoding_size, occ);
+ const u8 *ring = state + info->patchesOffset;
+ u64a val = partial_load_u64a(ring + encoding_size * patch, encoding_size);
+
+ DEBUG_PRINTF("val:%llu\n", val);
+ const u64a *repeatTable = getImplTable(info);
+ for (s32 i = patch_size - 1; i >= 0; i--) {
+ if (val >= repeatTable[i]) {
+ DEBUG_PRINTF("xs->offset%llu v%u p%llu\n",
+ xs->offset, i, repeatTable[i]);
+ return xs->offset + i + (occ - 1) * patch_size;
+ }
+ }
+
+ assert(0);
+ return 0;
+}
+
+u64a repeatLastTopSparseOptimalP(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state) {
+ return sparseLastTop(info, &ctrl->ring, state);
+}
+
+u64a repeatNextMatchSparseOptimalP(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset) {
+ const struct RepeatRingControl *xs = &ctrl->ring;
+
+ DEBUG_PRINTF("repeat [%u, %u] looking for match after %llu\n",
+ info->repeatMin, info->repeatMax, offset);
+
+ assert(offset >= xs->offset);
+
+ u64a nextOffset = offset + 1;
+
+ u32 patch_size = info->patchSize;
+ u32 patch;
+ u32 tval;
+ if (nextOffset <= xs->offset + info->repeatMin) {
+ patch = xs->first;
+ tval = 0;
+ } else if (nextOffset > sparseLastTop(info, xs, state) + info->repeatMax) {
+ DEBUG_PRINTF("ring is stale\n");
+ return 0;
+ } else {
+ assert(nextOffset - xs->offset < UINT32_MAX); // ring is not stale
+ u32 delta = (u32)(nextOffset - xs->offset);
+ u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
+ patch = lower / patch_size;
+ tval = lower - patch * patch_size;
+ }
+
+ DEBUG_PRINTF("patch %u\n", patch);
+ u32 patch_count = info->patchCount;
+ if (patch >= patch_count) {
+ return 0;
+ }
+
+ DEBUG_PRINTF("initial test for %u\n", tval);
+
+ u32 begin = xs->first + patch;
+ if (begin >= patch_count) {
+ begin -= patch_count;
+ }
+
+ const u8 *active = (const u8 *)state;
+ const u8 *ring = active + info->patchesOffset;
+ u32 encoding_size = info->encodingSize;
+ const u32 end = begin >= xs->last ? patch_count : xs->last;
+ u32 low = tval;
+ u64a diff = 0, loc = 0;
+ DEBUG_PRINTF("begin %u end %u\n", begin, end);
+ for (u32 p = mmbit_iterate_bounded(active, patch_count, begin, end);
+ p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count,
+ p + 1, end)) {
+ if (p != begin) {
+ low = 0;
+ }
+
+ u64a val = partial_load_u64a(ring + encoding_size * p, encoding_size);
+ u32 p1 = 0;
+ if (p >= xs->first) {
+ p1 = p - xs->first;
+ } else {
+ p1 = p + patch_count - xs->first;
+ }
+
+ if (val) {
+ loc = getSparseOptimalTargetValue(info, low, &val);
+ diff = (p1 + 1) * patch_size - loc;
+ }
+ if (loc) {
+ u64a rv = MAX(nextOffset, xs->offset + info->repeatMin + diff);
+ DEBUG_PRINTF("offset%llu next match at %llu\n", xs->offset, rv);
+ return rv;
+ }
+ low = 0;
+ }
+
+ low = 0;
+ if (begin >= xs->last) {
+ for (u32 p = mmbit_iterate_bounded(active, patch_count, 0, xs->last);
+ p != MMB_INVALID; p = mmbit_iterate_bounded(active, patch_count,
+ p + 1, xs->last)) {
+
+ u64a val = partial_load_u64a(ring + encoding_size * p,
+ encoding_size);
+ if (val) {
+ loc = getSparseOptimalTargetValue(info, low, &val);
+ diff = (p + 1) * patch_size - loc;
+ }
+ if (loc) {
+ u64a rv = MAX(nextOffset, xs->offset + info->repeatMin +
+ diff + (end - xs->first) * patch_size);
+ DEBUG_PRINTF("next match at %llu\n", rv);
+ return rv;
+ }
+ }
+ }
+
+ DEBUG_PRINTF("next match\n");
+ return 0;
+}
+
+void repeatStoreSparseOptimalP(const struct RepeatInfo *info,
+ union RepeatControl *ctrl, void *state,
+ u64a offset, char is_alive) {
+ struct RepeatRingControl *xs = &ctrl->ring;
+ u8 *active = (u8 *)state;
+
+ DEBUG_PRINTF("offset: %llu encoding_size: %u\n", offset,
+ info->encodingSize);
+
+ // If (a) this is the first top, or (b) the ring is stale, initialize the
+ // ring and write this offset in as the first top.
+ if (!is_alive ||
+ offset > sparseLastTop(info, xs, state) + info->repeatMax) {
+ storeInitialRingTopPatch(info, xs, active, offset);
+ return;
+ }
+
+ // Tops should arrive in order, with no duplicates.
+ assert(offset > sparseLastTop(info, xs, state));
+
+ // As the ring is not stale, our delta should fit within a u32.
+ assert(offset - xs->offset <= UINT32_MAX);
+ u32 delta = (u32)(offset - xs->offset);
+ u32 patch_size = info->patchSize;
+ u32 patch_count = info->patchCount;
+ u32 encoding_size = info->encodingSize;
+ u32 patch = delta / patch_size;
+
+ DEBUG_PRINTF("delta=%u, patch_size=%u, patch=%u\n", delta, patch_size,
+ patch);
+
+ u8 *ring = active + info->patchesOffset;
+ u32 occ = ringOccupancy(xs, patch_count);
+ u64a val = 0;
+ u32 idx;
+
+ DEBUG_PRINTF("patch: %u patch_count: %u occ: %u\n",
+ patch, patch_count, occ);
+ if (patch >= patch_count) {
+ u32 patch_shift_count = patch - patch_count + 1;
+ assert(patch >= patch_shift_count);
+ DEBUG_PRINTF("shifting by %u\n", patch_shift_count);
+ xs->offset += patch_size * patch_shift_count;
+ xs->first += patch_shift_count;
+ if (xs->first >= patch_count) {
+ xs->first -= patch_count;
+ }
+ idx = xs->last + patch - occ;
+ mmbit_unset_range(active, patch_count, xs->last,
+ MIN(idx, patch_count));
+ if (idx >= patch_count) {
+ idx -= patch_count;
+ mmbit_unset_range(active, patch_count, 0, idx + 1);
+ }
+ xs->last = idx + 1;
+ if (xs->last == patch_count) {
+ xs->last = 0;
+ }
+ } else if (patch < occ) {
+ assert(patch == occ - 1);
+ idx = xs->last == 0 ? patch_count - 1 : (u32)xs->last - 1;
+ val = partial_load_u64a(ring + encoding_size * idx, encoding_size);
+ } else {
+ idx = xs->last + patch - occ;
+ mmbit_unset_range(active, patch_count, xs->last,
+ MIN(idx, patch_count));
+ if (idx >= patch_count) {
+ idx -= patch_count;
+ mmbit_unset_range(active, patch_count, 0, idx + 1);
+ }
+ xs->last = idx + 1;
+ if (xs->last == patch_count) {
+ xs->last = 0;
+ }
+ }
+
+ assert((u64a)patch * patch_size <= delta);
+ u32 diff = delta - patch * patch_size;
+ const u64a *repeatTable = getImplTable(info);
+ val += repeatTable[diff];
+
+ DEBUG_PRINTF("patch=%u, occ=%u\n", patch, occ);
+ DEBUG_PRINTF("xs->first:%u xs->last:%u patch:%u\n",
+ xs->first, xs->last, patch);
+ DEBUG_PRINTF("value:%llu\n", val);
assert(fits_in_len_bytes(val, encoding_size));
- partial_store_u64a(ring + encoding_size * idx, val, encoding_size);
- mmbit_set(active, patch_count, idx);
-}
-
-static
-char sparseHasMatch(const struct RepeatInfo *info, const u8 *state,
- u32 lower, u32 upper) {
- u32 patch_size = info->patchSize;
- u32 patch_count = info->patchCount;
- u32 encoding_size = info->encodingSize;
- u32 patch_lower = lower / patch_size;
- u32 patch_upper = upper / patch_size;
- u32 diff = lower - patch_lower * patch_size;
-
- DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper);
- const u64a *repeatTable = getImplTable(info);
-
- const u8 *ring = state + info->patchesOffset;
- const u8 *active = state;
- u64a val;
- // test the first patch
- if (mmbit_isset(active, patch_count, patch_lower)) {
- val = partial_load_u64a(ring + encoding_size * patch_lower,
- encoding_size);
- DEBUG_PRINTF("patch_size=%u, diff=%u, table=%llu\n",
- patch_size, diff, repeatTable[diff]);
- DEBUG_PRINTF("patch_lower=%u, patch_upper=%u\n",
- patch_lower, patch_upper);
- if (patch_upper == patch_lower) {
- u32 limit = upper - patch_lower * patch_size;
- getSparseOptimalTargetValue(info, limit + 1, &val);
- }
- if (val >= repeatTable[diff]) {
- return 1;
- }
- }
-
- if (patch_lower == patch_upper) {
- return 0;
- }
-
- // test the patches between first and last
- u32 m = mmbit_iterate_bounded(active, patch_count,
- patch_lower + 1, patch_upper);
- if (m != MMB_INVALID) {
- return 1;
- }
-
- if (patch_upper == patch_count) {
- return 0;
- }
-
- // test the last patch
- if (!mmbit_isset(active, patch_count, patch_upper)) {
- return 0;
- }
- diff = (patch_upper + 1) * patch_size - upper;
- DEBUG_PRINTF("diff=%u\n", diff);
- val = partial_load_u64a(ring + encoding_size * patch_upper, encoding_size);
- getSparseOptimalTargetValue(info, patch_size - diff + 1, &val);
- if (val) {
- DEBUG_PRINTF("last patch: val=%llu\n", val);
- return 1;
- }
-
- return 0;
-}
-
-enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info,
- const union RepeatControl *ctrl,
- const void *state, u64a offset) {
- DEBUG_PRINTF("check for match at %llu corresponding to trigger "
- "at [%llu, %llu]\n", offset, offset - info->repeatMax,
- offset - info->repeatMin);
-
- const struct RepeatRingControl *xs = &ctrl->ring;
- const u8 *ring = (const u8 *)state;
-
- assert(offset >= xs->offset);
-
- if (offset < xs->offset + info->repeatMin) {
- DEBUG_PRINTF("too soon\n");
- return REPEAT_NOMATCH;
- } else if (offset > sparseLastTop(info, xs, state) + info->repeatMax) {
- DEBUG_PRINTF("stale\n");
- return REPEAT_STALE;
- }
-
- // Our delta between the base offset of the ring and the current offset
- // must fit within the range [repeatMin, lastPossibleTop + repeatMax]. This
- // range fits comfortably within a u32.
- assert(offset - xs->offset <= UINT32_MAX);
-
- u32 delta = (u32)(offset - xs->offset);
- u32 patch_size = info->patchSize;
- u32 patch_count = info->patchCount;
- u32 occ = ringOccupancy(xs, patch_count);
-
- u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
- u32 upper = MIN(delta - info->repeatMin, occ * patch_size - 1);
-
- DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper);
- u32 patch_lower = lower / patch_size;
- u32 patch_upper = upper / patch_size;
-
- if (patch_lower >= occ) {
- DEBUG_PRINTF("too late\n");
- return REPEAT_NOMATCH;
- }
-
- u32 remaining_lower = lower - patch_lower * patch_size;
- u32 remaining_upper = upper - patch_upper * patch_size;
- patch_lower += xs->first;
- patch_upper += xs->first;
- if (patch_lower >= patch_count) {
- patch_lower -= patch_count;
- patch_upper -= patch_count;
- } else if (patch_upper >= patch_count) {
- patch_upper -= patch_count;
- }
-
- DEBUG_PRINTF("xs->first:%u xs->last:%u patch_lower:%u, patch_upper:%u\n",
- xs->first, xs->last, patch_lower, patch_upper);
-
- u32 scan_end;
- const char is_not_wrapped = (patch_lower <= patch_upper);
- if (is_not_wrapped) {
- scan_end = patch_upper * patch_size + remaining_upper;
- } else {
- scan_end = patch_count * patch_size;
- }
-
- lower = patch_lower * patch_size + remaining_lower;
- if (sparseHasMatch(info, ring, lower, scan_end)) {
- return REPEAT_MATCH;
- }
-
- if (!is_not_wrapped) {
- upper -= (patch_count - xs->first) * patch_size;
- if (sparseHasMatch(info, ring, 0, upper)) {
- return REPEAT_MATCH;
- }
- }
-
- return REPEAT_NOMATCH;
-}
+ partial_store_u64a(ring + encoding_size * idx, val, encoding_size);
+ mmbit_set(active, patch_count, idx);
+}
+
+static
+char sparseHasMatch(const struct RepeatInfo *info, const u8 *state,
+ u32 lower, u32 upper) {
+ u32 patch_size = info->patchSize;
+ u32 patch_count = info->patchCount;
+ u32 encoding_size = info->encodingSize;
+ u32 patch_lower = lower / patch_size;
+ u32 patch_upper = upper / patch_size;
+ u32 diff = lower - patch_lower * patch_size;
+
+ DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper);
+ const u64a *repeatTable = getImplTable(info);
+
+ const u8 *ring = state + info->patchesOffset;
+ const u8 *active = state;
+ u64a val;
+ // test the first patch
+ if (mmbit_isset(active, patch_count, patch_lower)) {
+ val = partial_load_u64a(ring + encoding_size * patch_lower,
+ encoding_size);
+ DEBUG_PRINTF("patch_size=%u, diff=%u, table=%llu\n",
+ patch_size, diff, repeatTable[diff]);
+ DEBUG_PRINTF("patch_lower=%u, patch_upper=%u\n",
+ patch_lower, patch_upper);
+ if (patch_upper == patch_lower) {
+ u32 limit = upper - patch_lower * patch_size;
+ getSparseOptimalTargetValue(info, limit + 1, &val);
+ }
+ if (val >= repeatTable[diff]) {
+ return 1;
+ }
+ }
+
+ if (patch_lower == patch_upper) {
+ return 0;
+ }
+
+ // test the patches between first and last
+ u32 m = mmbit_iterate_bounded(active, patch_count,
+ patch_lower + 1, patch_upper);
+ if (m != MMB_INVALID) {
+ return 1;
+ }
+
+ if (patch_upper == patch_count) {
+ return 0;
+ }
+
+ // test the last patch
+ if (!mmbit_isset(active, patch_count, patch_upper)) {
+ return 0;
+ }
+ diff = (patch_upper + 1) * patch_size - upper;
+ DEBUG_PRINTF("diff=%u\n", diff);
+ val = partial_load_u64a(ring + encoding_size * patch_upper, encoding_size);
+ getSparseOptimalTargetValue(info, patch_size - diff + 1, &val);
+ if (val) {
+ DEBUG_PRINTF("last patch: val=%llu\n", val);
+ return 1;
+ }
+
+ return 0;
+}
+
+enum RepeatMatch repeatHasMatchSparseOptimalP(const struct RepeatInfo *info,
+ const union RepeatControl *ctrl,
+ const void *state, u64a offset) {
+ DEBUG_PRINTF("check for match at %llu corresponding to trigger "
+ "at [%llu, %llu]\n", offset, offset - info->repeatMax,
+ offset - info->repeatMin);
+
+ const struct RepeatRingControl *xs = &ctrl->ring;
+ const u8 *ring = (const u8 *)state;
+
+ assert(offset >= xs->offset);
+
+ if (offset < xs->offset + info->repeatMin) {
+ DEBUG_PRINTF("too soon\n");
+ return REPEAT_NOMATCH;
+ } else if (offset > sparseLastTop(info, xs, state) + info->repeatMax) {
+ DEBUG_PRINTF("stale\n");
+ return REPEAT_STALE;
+ }
+
+ // Our delta between the base offset of the ring and the current offset
+ // must fit within the range [repeatMin, lastPossibleTop + repeatMax]. This
+ // range fits comfortably within a u32.
+ assert(offset - xs->offset <= UINT32_MAX);
+
+ u32 delta = (u32)(offset - xs->offset);
+ u32 patch_size = info->patchSize;
+ u32 patch_count = info->patchCount;
+ u32 occ = ringOccupancy(xs, patch_count);
+
+ u32 lower = delta > info->repeatMax ? delta - info->repeatMax : 0;
+ u32 upper = MIN(delta - info->repeatMin, occ * patch_size - 1);
+
+ DEBUG_PRINTF("lower=%u, upper=%u\n", lower, upper);
+ u32 patch_lower = lower / patch_size;
+ u32 patch_upper = upper / patch_size;
+
+ if (patch_lower >= occ) {
+ DEBUG_PRINTF("too late\n");
+ return REPEAT_NOMATCH;
+ }
+
+ u32 remaining_lower = lower - patch_lower * patch_size;
+ u32 remaining_upper = upper - patch_upper * patch_size;
+ patch_lower += xs->first;
+ patch_upper += xs->first;
+ if (patch_lower >= patch_count) {
+ patch_lower -= patch_count;
+ patch_upper -= patch_count;
+ } else if (patch_upper >= patch_count) {
+ patch_upper -= patch_count;
+ }
+
+ DEBUG_PRINTF("xs->first:%u xs->last:%u patch_lower:%u, patch_upper:%u\n",
+ xs->first, xs->last, patch_lower, patch_upper);
+
+ u32 scan_end;
+ const char is_not_wrapped = (patch_lower <= patch_upper);
+ if (is_not_wrapped) {
+ scan_end = patch_upper * patch_size + remaining_upper;
+ } else {
+ scan_end = patch_count * patch_size;
+ }
+
+ lower = patch_lower * patch_size + remaining_lower;
+ if (sparseHasMatch(info, ring, lower, scan_end)) {
+ return REPEAT_MATCH;
+ }
+
+ if (!is_not_wrapped) {
+ upper -= (patch_count - xs->first) * patch_size;
+ if (sparseHasMatch(info, ring, 0, upper)) {
+ return REPEAT_MATCH;
+ }
+ }
+
+ return REPEAT_NOMATCH;
+}