diff options
author | bnagaev <bnagaev@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
commit | c74559fb88da8adac0d9186cfa55a6b13c47695f (patch) | |
tree | b83306b6e37edeea782e9eed673d89286c4fef35 /contrib/libs/hyperscan/src/nfa/repeatcompile.cpp | |
parent | d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (diff) | |
download | ydb-c74559fb88da8adac0d9186cfa55a6b13c47695f.tar.gz |
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/nfa/repeatcompile.cpp')
-rw-r--r-- | contrib/libs/hyperscan/src/nfa/repeatcompile.cpp | 740 |
1 files changed, 370 insertions, 370 deletions
diff --git a/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp b/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp index 48d96bfa06..934dd29e6b 100644 --- a/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp +++ b/contrib/libs/hyperscan/src/nfa/repeatcompile.cpp @@ -1,211 +1,211 @@ -/* +/* * Copyright (c) 2015-2016, Intel Corporation - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Intel Corporation nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -/** \file - * \brief Bounded repeat compile-time code. - */ -#include "repeatcompile.h" -#include "util/bitutils.h" -#include "util/charreach.h" -#include "util/depth.h" -#include "util/dump_charclass.h" + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Bounded repeat compile-time code. + */ +#include "repeatcompile.h" +#include "util/bitutils.h" +#include "util/charreach.h" +#include "util/depth.h" +#include "util/dump_charclass.h" #include "util/multibit_build.h" -#include "util/verify_types.h" - -#include <algorithm> -#include <cstring> // memset -#include <utility> - -using namespace std; - -namespace ue2 { - -/** \brief Calculate the number of slots required to store the given repeat in - * a RANGE model. */ -static -u32 numRangeSlots(u32 repeatMin, u32 repeatMax) { - assert(repeatMax > repeatMin); - - u32 d = repeatMax - repeatMin; - u32 slots = 2 * ((repeatMax / d) + 1); - return slots; -} - -static -u32 calcPackedBits(u64a val) { - assert(val); - if (val <= 1) { - return 1; - } - u32 bits = lg2_64(val - 1) + 1U; /* lg2 rounds down */ - DEBUG_PRINTF("packing %llu into %u bits\n", val, bits); - return bits; -} - -/* returns the min number of bytes required to represent val options */ -u32 calcPackedBytes(u64a val) { - u32 rv = (calcPackedBits(val) + 7U) / 8U; - DEBUG_PRINTF("packing %llu into %u bytes\n", val, rv); - return rv; -} - -static -u32 repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax, - const u32 minPeriod) { - u32 repeatTmp = info->patchCount > 2 ? 64 : (u32)repeatMax; - u32 repeat_index = repeatTmp < minPeriod ? repeatTmp : minPeriod; - for (u32 i = 0; i <= repeat_index; i++) { - info->table.push_back(i + 1); - } - for (u32 i = minPeriod + 1; i <= repeatTmp; i++) { - info->table.push_back(info->table[i - 1] + info->table[i - minPeriod]); - if (info->table[i] < info->table[i - 1]) { - return i - 1; - } - } - return 0; -} - -static -u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax, - const u32 minPeriod, u32 rv) { - u32 cnt = 0; - u32 patch_bits = 0; - u32 total_size = 0; - u32 min = ~0U; - u32 patch_len = 0; - - if (!rv) { - rv = repeatMax; - } - - for (u32 i = minPeriod; i <= rv; i++) { - cnt = ((u32)repeatMax + (i - 1)) / i + 1; - - // no bit packing version - patch_bits = calcPackedBits(info->table[i]); - total_size = (patch_bits + 7U) / 8U * cnt; - - if (total_size < min) { - patch_len = i; - min = total_size; - info->patchCount = cnt; - } - } - return patch_len; -} - -RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod) - : stateSize(0), packedCtrlSize(0), horizon(0), patchCount(0), - patchSize(0), encodingSize(0), patchesOffset(0) { - assert(repeatMin <= repeatMax); - assert(repeatMax.is_reachable()); - assert(minPeriod || type != REPEAT_SPARSE_OPTIMAL_P); - - switch (type) { - case REPEAT_FIRST: - assert(repeatMin.is_finite()); - stateSize = 0; // everything is in the control block. - horizon = repeatMin; - packedCtrlSize = calcPackedBytes(horizon + 1); - break; - case REPEAT_LAST: - assert(repeatMax.is_finite()); - stateSize = 0; // everything is in the control block. - horizon = repeatMax + 1; - packedCtrlSize = calcPackedBytes(horizon + 1); - break; - case REPEAT_RING: - assert(repeatMax.is_finite()); - stateSize = mmbit_size(repeatMax + 1); - horizon = repeatMax * 2 + 1; /* TODO: investigate tightening */ - // Packed offset member, plus two bytes for each ring index, reduced to - // one byte each if they'll fit in eight bits. - { - u32 offset_len = calcPackedBytes(horizon + 1); - u32 ring_indices_len = repeatMax < depth(254) ? 2 : 4; - packedCtrlSize = offset_len + ring_indices_len; - } - break; - case REPEAT_RANGE: - assert(repeatMax.is_finite()); - assert(repeatMin < repeatMax); - stateSize = numRangeSlots(repeatMin, repeatMax) * sizeof(u16); - horizon = repeatMax * 2 + 1; - // Packed offset member, plus one byte for the number of range - // elements. - packedCtrlSize = calcPackedBytes(horizon + 1) + 1; - break; - case REPEAT_BITMAP: - stateSize = 0; // everything is in the control block. - horizon = 0; // unused - packedCtrlSize = ROUNDUP_N(repeatMax + 1, 8) / 8; - break; - case REPEAT_SPARSE_OPTIMAL_P: - assert(minPeriod); - assert(repeatMax.is_finite()); - { - u32 rv = repeatRecurTable(this, repeatMax, minPeriod); - u32 repeatTmp = 0; - if ((u32)repeatMax < minPeriod) { - repeatTmp = repeatMax; - patchCount = 1; - } else { - // find optimal patch size - repeatTmp = - findOptimalPatchSize(this, repeatMax, minPeriod, rv); - assert(patchCount < 65536); - } - DEBUG_PRINTF("repeat[%u %u], period=%u\n", (u32)repeatMin, - (u32)repeatMax, minPeriod); - u64a maxVal = table[repeatTmp]; - encodingSize = calcPackedBytes(maxVal); - patchSize = repeatTmp; - assert(encodingSize <= 64); - - patchesOffset = mmbit_size(patchCount); - stateSize = patchesOffset + encodingSize * patchCount; - horizon = (repeatTmp * patchCount) * 2 + 1; - u32 ring_indices_len = patchCount < depth(254) ? 2 : 4; - packedCtrlSize = calcPackedBytes(horizon + 1) + ring_indices_len; - } - break; - case REPEAT_TRAILER: - assert(repeatMax.is_finite()); - assert(repeatMin <= depth(64)); - stateSize = 0; // everything is in the control block. - horizon = repeatMax + 1; - packedFieldSizes.resize(2); - packedFieldSizes[0] = calcPackedBits(horizon + 1); - packedFieldSizes[1] = repeatMin; - packedCtrlSize = (packedFieldSizes[0] + packedFieldSizes[1] + 7U) / 8U; - break; +#include "util/verify_types.h" + +#include <algorithm> +#include <cstring> // memset +#include <utility> + +using namespace std; + +namespace ue2 { + +/** \brief Calculate the number of slots required to store the given repeat in + * a RANGE model. */ +static +u32 numRangeSlots(u32 repeatMin, u32 repeatMax) { + assert(repeatMax > repeatMin); + + u32 d = repeatMax - repeatMin; + u32 slots = 2 * ((repeatMax / d) + 1); + return slots; +} + +static +u32 calcPackedBits(u64a val) { + assert(val); + if (val <= 1) { + return 1; + } + u32 bits = lg2_64(val - 1) + 1U; /* lg2 rounds down */ + DEBUG_PRINTF("packing %llu into %u bits\n", val, bits); + return bits; +} + +/* returns the min number of bytes required to represent val options */ +u32 calcPackedBytes(u64a val) { + u32 rv = (calcPackedBits(val) + 7U) / 8U; + DEBUG_PRINTF("packing %llu into %u bytes\n", val, rv); + return rv; +} + +static +u32 repeatRecurTable(struct RepeatStateInfo *info, const depth &repeatMax, + const u32 minPeriod) { + u32 repeatTmp = info->patchCount > 2 ? 64 : (u32)repeatMax; + u32 repeat_index = repeatTmp < minPeriod ? repeatTmp : minPeriod; + for (u32 i = 0; i <= repeat_index; i++) { + info->table.push_back(i + 1); + } + for (u32 i = minPeriod + 1; i <= repeatTmp; i++) { + info->table.push_back(info->table[i - 1] + info->table[i - minPeriod]); + if (info->table[i] < info->table[i - 1]) { + return i - 1; + } + } + return 0; +} + +static +u32 findOptimalPatchSize(struct RepeatStateInfo *info, const depth &repeatMax, + const u32 minPeriod, u32 rv) { + u32 cnt = 0; + u32 patch_bits = 0; + u32 total_size = 0; + u32 min = ~0U; + u32 patch_len = 0; + + if (!rv) { + rv = repeatMax; + } + + for (u32 i = minPeriod; i <= rv; i++) { + cnt = ((u32)repeatMax + (i - 1)) / i + 1; + + // no bit packing version + patch_bits = calcPackedBits(info->table[i]); + total_size = (patch_bits + 7U) / 8U * cnt; + + if (total_size < min) { + patch_len = i; + min = total_size; + info->patchCount = cnt; + } + } + return patch_len; +} + +RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod) + : stateSize(0), packedCtrlSize(0), horizon(0), patchCount(0), + patchSize(0), encodingSize(0), patchesOffset(0) { + assert(repeatMin <= repeatMax); + assert(repeatMax.is_reachable()); + assert(minPeriod || type != REPEAT_SPARSE_OPTIMAL_P); + + switch (type) { + case REPEAT_FIRST: + assert(repeatMin.is_finite()); + stateSize = 0; // everything is in the control block. + horizon = repeatMin; + packedCtrlSize = calcPackedBytes(horizon + 1); + break; + case REPEAT_LAST: + assert(repeatMax.is_finite()); + stateSize = 0; // everything is in the control block. + horizon = repeatMax + 1; + packedCtrlSize = calcPackedBytes(horizon + 1); + break; + case REPEAT_RING: + assert(repeatMax.is_finite()); + stateSize = mmbit_size(repeatMax + 1); + horizon = repeatMax * 2 + 1; /* TODO: investigate tightening */ + // Packed offset member, plus two bytes for each ring index, reduced to + // one byte each if they'll fit in eight bits. + { + u32 offset_len = calcPackedBytes(horizon + 1); + u32 ring_indices_len = repeatMax < depth(254) ? 2 : 4; + packedCtrlSize = offset_len + ring_indices_len; + } + break; + case REPEAT_RANGE: + assert(repeatMax.is_finite()); + assert(repeatMin < repeatMax); + stateSize = numRangeSlots(repeatMin, repeatMax) * sizeof(u16); + horizon = repeatMax * 2 + 1; + // Packed offset member, plus one byte for the number of range + // elements. + packedCtrlSize = calcPackedBytes(horizon + 1) + 1; + break; + case REPEAT_BITMAP: + stateSize = 0; // everything is in the control block. + horizon = 0; // unused + packedCtrlSize = ROUNDUP_N(repeatMax + 1, 8) / 8; + break; + case REPEAT_SPARSE_OPTIMAL_P: + assert(minPeriod); + assert(repeatMax.is_finite()); + { + u32 rv = repeatRecurTable(this, repeatMax, minPeriod); + u32 repeatTmp = 0; + if ((u32)repeatMax < minPeriod) { + repeatTmp = repeatMax; + patchCount = 1; + } else { + // find optimal patch size + repeatTmp = + findOptimalPatchSize(this, repeatMax, minPeriod, rv); + assert(patchCount < 65536); + } + DEBUG_PRINTF("repeat[%u %u], period=%u\n", (u32)repeatMin, + (u32)repeatMax, minPeriod); + u64a maxVal = table[repeatTmp]; + encodingSize = calcPackedBytes(maxVal); + patchSize = repeatTmp; + assert(encodingSize <= 64); + + patchesOffset = mmbit_size(patchCount); + stateSize = patchesOffset + encodingSize * patchCount; + horizon = (repeatTmp * patchCount) * 2 + 1; + u32 ring_indices_len = patchCount < depth(254) ? 2 : 4; + packedCtrlSize = calcPackedBytes(horizon + 1) + ring_indices_len; + } + break; + case REPEAT_TRAILER: + assert(repeatMax.is_finite()); + assert(repeatMin <= depth(64)); + stateSize = 0; // everything is in the control block. + horizon = repeatMax + 1; + packedFieldSizes.resize(2); + packedFieldSizes[0] = calcPackedBits(horizon + 1); + packedFieldSizes[1] = repeatMin; + packedCtrlSize = (packedFieldSizes[0] + packedFieldSizes[1] + 7U) / 8U; + break; case REPEAT_ALWAYS: assert(repeatMin == 0ULL); assert(repeatMax.is_infinite()); @@ -213,174 +213,174 @@ RepeatStateInfo::RepeatStateInfo(enum RepeatType type, const depth &repeatMin, horizon = 0; packedCtrlSize = 0; break; - } - DEBUG_PRINTF("stateSize=%u, packedCtrlSize=%u, horizon=%u\n", stateSize, - packedCtrlSize, horizon); - - assert(packedCtrlSize <= sizeof(RepeatControl)); -} - -/** \brief Returns the packed control block size in bytes for a given bounded - * repeat. */ -static -u32 packedSize(enum RepeatType type, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod) { - RepeatStateInfo rsi(type, repeatMin, repeatMax, minPeriod); - return rsi.packedCtrlSize; -} - -/** \brief Returns the stream state size in bytes for a given bounded - * repeat. */ -static -u32 streamStateSize(enum RepeatType type, const depth &repeatMin, - const depth &repeatMax, u32 minPeriod) { - RepeatStateInfo rsi(type, repeatMin, repeatMax, minPeriod); - return rsi.stateSize; -} - -enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax, + } + DEBUG_PRINTF("stateSize=%u, packedCtrlSize=%u, horizon=%u\n", stateSize, + packedCtrlSize, horizon); + + assert(packedCtrlSize <= sizeof(RepeatControl)); +} + +/** \brief Returns the packed control block size in bytes for a given bounded + * repeat. */ +static +u32 packedSize(enum RepeatType type, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod) { + RepeatStateInfo rsi(type, repeatMin, repeatMax, minPeriod); + return rsi.packedCtrlSize; +} + +/** \brief Returns the stream state size in bytes for a given bounded + * repeat. */ +static +u32 streamStateSize(enum RepeatType type, const depth &repeatMin, + const depth &repeatMax, u32 minPeriod) { + RepeatStateInfo rsi(type, repeatMin, repeatMax, minPeriod); + return rsi.stateSize; +} + +enum RepeatType chooseRepeatType(const depth &repeatMin, const depth &repeatMax, u32 minPeriod, bool is_reset, bool has_external_guard) { - if (repeatMax.is_infinite()) { + if (repeatMax.is_infinite()) { if (has_external_guard && !repeatMin) { return REPEAT_ALWAYS; } else { return REPEAT_FIRST; } - } - - if (repeatMin == depth(0) || is_reset) { - return REPEAT_LAST; - } - - // Cases with max < 64 can be handled with either bitmap or trailer. We use - // whichever has smaller packed state. - - if (repeatMax < depth(64)) { - u32 bitmap_len = - packedSize(REPEAT_BITMAP, repeatMin, repeatMax, minPeriod); - u32 trailer_len = - packedSize(REPEAT_TRAILER, repeatMin, repeatMax, minPeriod); - return bitmap_len <= trailer_len ? REPEAT_BITMAP : REPEAT_TRAILER; - } - - if (repeatMin <= depth(64)) { - return REPEAT_TRAILER; - } - - u32 range_len = ~0U; - if (repeatMax > repeatMin && - numRangeSlots(repeatMin, repeatMax) <= REPEAT_RANGE_MAX_SLOTS) { - assert(numRangeSlots(repeatMin, repeatMax) < 256); // stored in u8 - range_len = - streamStateSize(REPEAT_RANGE, repeatMin, repeatMax, minPeriod); - } - - assert(repeatMax.is_finite()); - - u32 sparse_len = ~0U; - if (minPeriod > 6) { - sparse_len = - streamStateSize(REPEAT_SPARSE_OPTIMAL_P, repeatMin, repeatMax, minPeriod); - } - - if (range_len != ~0U || sparse_len != ~0U) { - return range_len < sparse_len ? REPEAT_RANGE : REPEAT_SPARSE_OPTIMAL_P; - } - - return REPEAT_RING; -} - -bool matches(vector<CharReach>::const_iterator a_it, - vector<CharReach>::const_iterator a_ite, - vector<CharReach>::const_iterator b_it, - UNUSED vector<CharReach>::const_iterator b_ite) { - for (; a_it != a_ite; ++a_it, ++b_it) { - assert(b_it != b_ite); - if ((*a_it & *b_it).none()) { - return false; - } - } - assert(b_it == b_ite); - return true; -} - -static -u32 minDistAfterA(const vector<CharReach> &a, const vector<CharReach> &b) { - /* we do not count the case where b can end at the same position as a */ - - for (u32 i = 1; i < b.size(); i++) { - u32 overlap_len = b.size() - i; - if (overlap_len <= a.size()) { - if (matches(a.end() - overlap_len, a.end(), - b.begin(), b.end() - i)) { - return i; - } - } else { - assert(overlap_len > a.size()); - if (matches(a.begin(), a.end(), - b.end() - i - a.size(), b.end() - i)) { - return i; - } - } - } - - return b.size(); -} - -vector<size_t> minResetDistToEnd(const vector<vector<CharReach>> &triggers, - const CharReach &cr) { - /* if a trigger does not reset the repeat, it gets a distance of trigger - length */ - vector<size_t> out; - for (const auto &trig : triggers) { - size_t size = trig.size(); - size_t i = 0; - for (; i < size; i++) { - if ((trig[size - i - 1] & cr).none()) { - break; - } - } - out.push_back(i); - } - - return out; -} - -#if defined(DEBUG) || defined(DUMP_SUPPORT) - -static UNUSED -string dumpTrigger(const vector<CharReach> &trigger) { - string s; - for (const auto &cr : trigger) { - s += describeClass(cr); - } - return s; -} - -#endif - -u32 minPeriod(const vector<vector<CharReach>> &triggers, const CharReach &cr, - bool *can_reset) { - assert(!triggers.empty()); - - u32 rv = ~0U; - *can_reset = true; - vector<size_t> min_reset_dist = minResetDistToEnd(triggers, cr); - - for (const auto &trigger : triggers) { - DEBUG_PRINTF("trigger: %s\n", dumpTrigger(trigger).c_str()); - for (size_t j = 0; j < triggers.size(); j++) { - u32 min_ext = minDistAfterA(trigger, triggers[j]); - rv = min(rv, min_ext); - if (min_ext <= min_reset_dist[j]) { - *can_reset = false; - } - } - } - - DEBUG_PRINTF("min period %u\n", rv); - return rv; -} - -} // namespace ue2 + } + + if (repeatMin == depth(0) || is_reset) { + return REPEAT_LAST; + } + + // Cases with max < 64 can be handled with either bitmap or trailer. We use + // whichever has smaller packed state. + + if (repeatMax < depth(64)) { + u32 bitmap_len = + packedSize(REPEAT_BITMAP, repeatMin, repeatMax, minPeriod); + u32 trailer_len = + packedSize(REPEAT_TRAILER, repeatMin, repeatMax, minPeriod); + return bitmap_len <= trailer_len ? REPEAT_BITMAP : REPEAT_TRAILER; + } + + if (repeatMin <= depth(64)) { + return REPEAT_TRAILER; + } + + u32 range_len = ~0U; + if (repeatMax > repeatMin && + numRangeSlots(repeatMin, repeatMax) <= REPEAT_RANGE_MAX_SLOTS) { + assert(numRangeSlots(repeatMin, repeatMax) < 256); // stored in u8 + range_len = + streamStateSize(REPEAT_RANGE, repeatMin, repeatMax, minPeriod); + } + + assert(repeatMax.is_finite()); + + u32 sparse_len = ~0U; + if (minPeriod > 6) { + sparse_len = + streamStateSize(REPEAT_SPARSE_OPTIMAL_P, repeatMin, repeatMax, minPeriod); + } + + if (range_len != ~0U || sparse_len != ~0U) { + return range_len < sparse_len ? REPEAT_RANGE : REPEAT_SPARSE_OPTIMAL_P; + } + + return REPEAT_RING; +} + +bool matches(vector<CharReach>::const_iterator a_it, + vector<CharReach>::const_iterator a_ite, + vector<CharReach>::const_iterator b_it, + UNUSED vector<CharReach>::const_iterator b_ite) { + for (; a_it != a_ite; ++a_it, ++b_it) { + assert(b_it != b_ite); + if ((*a_it & *b_it).none()) { + return false; + } + } + assert(b_it == b_ite); + return true; +} + +static +u32 minDistAfterA(const vector<CharReach> &a, const vector<CharReach> &b) { + /* we do not count the case where b can end at the same position as a */ + + for (u32 i = 1; i < b.size(); i++) { + u32 overlap_len = b.size() - i; + if (overlap_len <= a.size()) { + if (matches(a.end() - overlap_len, a.end(), + b.begin(), b.end() - i)) { + return i; + } + } else { + assert(overlap_len > a.size()); + if (matches(a.begin(), a.end(), + b.end() - i - a.size(), b.end() - i)) { + return i; + } + } + } + + return b.size(); +} + +vector<size_t> minResetDistToEnd(const vector<vector<CharReach>> &triggers, + const CharReach &cr) { + /* if a trigger does not reset the repeat, it gets a distance of trigger + length */ + vector<size_t> out; + for (const auto &trig : triggers) { + size_t size = trig.size(); + size_t i = 0; + for (; i < size; i++) { + if ((trig[size - i - 1] & cr).none()) { + break; + } + } + out.push_back(i); + } + + return out; +} + +#if defined(DEBUG) || defined(DUMP_SUPPORT) + +static UNUSED +string dumpTrigger(const vector<CharReach> &trigger) { + string s; + for (const auto &cr : trigger) { + s += describeClass(cr); + } + return s; +} + +#endif + +u32 minPeriod(const vector<vector<CharReach>> &triggers, const CharReach &cr, + bool *can_reset) { + assert(!triggers.empty()); + + u32 rv = ~0U; + *can_reset = true; + vector<size_t> min_reset_dist = minResetDistToEnd(triggers, cr); + + for (const auto &trigger : triggers) { + DEBUG_PRINTF("trigger: %s\n", dumpTrigger(trigger).c_str()); + for (size_t j = 0; j < triggers.size(); j++) { + u32 min_ext = minDistAfterA(trigger, triggers[j]); + rv = min(rv, min_ext); + if (min_ext <= min_reset_dist[j]) { + *can_reset = false; + } + } + } + + DEBUG_PRINTF("min period %u\n", rv); + return rv; +} + +} // namespace ue2 |