diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/hyperscan/src/nfa/shufticompile.cpp | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/hyperscan/src/nfa/shufticompile.cpp')
-rw-r--r-- | contrib/libs/hyperscan/src/nfa/shufticompile.cpp | 225 |
1 files changed, 225 insertions, 0 deletions
diff --git a/contrib/libs/hyperscan/src/nfa/shufticompile.cpp b/contrib/libs/hyperscan/src/nfa/shufticompile.cpp new file mode 100644 index 0000000000..f712ef94a4 --- /dev/null +++ b/contrib/libs/hyperscan/src/nfa/shufticompile.cpp @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2015-2017, Intel Corporation + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Intel Corporation nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** \file + * \brief Shufti acceleration: compile code. + */ +#include "shufticompile.h" +#include "ue2common.h" +#include "util/charreach.h" +#include "util/container.h" +#include "util/flat_containers.h" + +#include <array> +#include <cassert> +#include <cstring> +#include <map> + +using namespace std; + +namespace ue2 { + +/** \brief Single-byte variant. + * + * Returns -1 if unable to construct masks, otherwise returns number of bits + * used in the mask. + * + * Note: always able to construct masks for 8 or fewer characters. + */ +int shuftiBuildMasks(const CharReach &c, u8 *lo, u8 *hi) { + /* Things could be packed much more optimally, but this should be able to + * handle any set of characters entirely in the lower half. */ + + assert(c.count() < 256); + assert(!c.none()); + + map<u8, CharReach> by_hi; /* hi nibble -> set of matching lo nibbles */ + /* group matching characters by high nibble */ + for (size_t i = c.find_first(); i != CharReach::npos; i = c.find_next(i)) { + u8 it_hi = i >> 4; + u8 it_lo = i & 0xf; + by_hi[it_hi].set(it_lo); + } + + map<CharReach, CharReach> by_lo_set; + /* group all hi nibbles with a common set of lo nibbles together */ + for (map<u8, CharReach>::const_iterator it = by_hi.begin(); + it != by_hi.end(); ++it) { + by_lo_set[it->second].set(it->first); + } + + if (by_lo_set.size() > 8) { + /* too many char classes on the dance floor */ + assert(c.size() > 8); + return -1; + } + + u8 bit_index = 0; + array<u8, 16> lo_a; lo_a.fill(0); + array<u8, 16> hi_a; hi_a.fill(0); + for (map<CharReach, CharReach>::const_iterator it = by_lo_set.begin(); + it != by_lo_set.end(); ++it) { + const CharReach &lo_nibbles = it->first; + const CharReach &hi_nibbles = it->second; + + /* set bits in low mask */ + for (size_t j = lo_nibbles.find_first(); j != CharReach::npos; + j = lo_nibbles.find_next(j)) { + lo_a[j] |= (1 << bit_index); + } + + /* set bits in high mask */ + for (size_t j = hi_nibbles.find_first(); j != CharReach::npos; + j = hi_nibbles.find_next(j)) { + hi_a[j] |= (1 << bit_index); + } + + bit_index++; + } + + memcpy(lo, lo_a.data(), sizeof(m128)); + memcpy(hi, hi_a.data(), sizeof(m128)); + + return bit_index; +} + +static +array<u16, 4> or_array(array<u16, 4> a, const array<u16, 4> &b) { + a[0] |= b[0]; + a[1] |= b[1]; + a[2] |= b[2]; + a[3] |= b[3]; + + return a; +} + + +#define MAX_BUCKETS 8 +static +void set_buckets_from_mask(u16 nibble_mask, u32 bucket, + array<u8, 16> &byte_mask) { + assert(bucket < MAX_BUCKETS); + + u32 mask = nibble_mask; + while (mask) { + u32 n = findAndClearLSB_32(&mask); + byte_mask[n] &= ~(1 << bucket); + } +} + +bool shuftiBuildDoubleMasks(const CharReach &onechar, + const flat_set<pair<u8, u8>> &twochar, + u8 *lo1, u8 *hi1, u8 *lo2, u8 *hi2) { + DEBUG_PRINTF("unibytes %zu dibytes %zu\n", onechar.size(), + twochar.size()); + array<u8, 16> lo1_a; + array<u8, 16> lo2_a; + array<u8, 16> hi1_a; + array<u8, 16> hi2_a; + + lo1_a.fill(0xff); + lo2_a.fill(0xff); + hi1_a.fill(0xff); + hi2_a.fill(0xff); + + // two-byte literals + vector<array<u16, 4>> nibble_masks; + for (const auto &p : twochar) { + DEBUG_PRINTF("%02hhx %02hhx\n", p.first, p.second); + u16 a_lo = 1U << (p.first & 0xf); + u16 a_hi = 1U << (p.first >> 4); + u16 b_lo = 1U << (p.second & 0xf); + u16 b_hi = 1U << (p.second >> 4); + nibble_masks.push_back({{a_lo, a_hi, b_lo, b_hi}}); + } + + // one-byte literals (second byte is a wildcard) + for (size_t it = onechar.find_first(); it != CharReach::npos; + it = onechar.find_next(it)) { + DEBUG_PRINTF("%02hhx\n", (u8)it); + u16 a_lo = 1U << (it & 0xf); + u16 a_hi = 1U << (it >> 4); + u16 wildcard = 0xffff; + nibble_masks.push_back({{a_lo, a_hi, wildcard, wildcard}}); + } + + // try to merge strings into shared buckets + for (u32 i = 0; i < 4; i++) { + map<array<u16, 4>, array<u16, 4>> new_masks; + for (const auto &a : nibble_masks) { + auto key = a; + key[i] = 0; + if (!contains(new_masks, key)) { + new_masks[key] = a; + } else { + new_masks[key] = or_array(new_masks[key], a); + } + } + nibble_masks.clear(); + for (const auto &e : new_masks) { + nibble_masks.push_back(e.second); + } + } + + if (nibble_masks.size() > MAX_BUCKETS) { + DEBUG_PRINTF("too many buckets needed (%zu)\n", nibble_masks.size()); + return false; + } + + u32 i = 0; + for (const auto &a : nibble_masks) { + set_buckets_from_mask(a[0], i, lo1_a); + set_buckets_from_mask(a[1], i, hi1_a); + set_buckets_from_mask(a[2], i, lo2_a); + set_buckets_from_mask(a[3], i, hi2_a); + i++; + } + + memcpy(lo1, lo1_a.data(), sizeof(m128)); + memcpy(lo2, lo2_a.data(), sizeof(m128)); + memcpy(hi1, hi1_a.data(), sizeof(m128)); + memcpy(hi2, hi2_a.data(), sizeof(m128)); + + return true; +} + +#ifdef DUMP_SUPPORT + +CharReach shufti2cr(const u8 *lo, const u8 *hi) { + CharReach cr; + for (u32 i = 0; i < 256; i++) { + if (lo[(u8)i & 0xf] & hi[(u8)i >> 4]) { + cr.set(i); + } + } + return cr; +} + +#endif // DUMP_SUPPORT + +} // namespace ue2 |