Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 2 of 2.

author: Ivan Blinkov <ivan@blinkov.ru> 2022-02-10 16:47:11 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:47:11 +0300
commit: 5b283123c882433dafbaf6b338adeea16c1a0ea0 (patch)
tree: 339adc63bce23800021202ae4a8328a843dc447a /contrib/libs/hyperscan/src/fdr/teddy.c
parent: 1aeb9a455974457866f78722ad98114bafc84e8a (diff)
download: ydb-5b283123c882433dafbaf6b338adeea16c1a0ea0.tar.gz
1 files changed, 781 insertions, 781 deletions
diff --git a/contrib/libs/hyperscan/src/fdr/teddy.c b/contrib/libs/hyperscan/src/fdr/teddy.c
index c62d9fa77e..e6f5476198 100644
--- a/contrib/libs/hyperscan/src/fdr/teddy.c
+++ b/contrib/libs/hyperscan/src/fdr/teddy.c
@@ -26,52 +26,52 @@
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
-/** \file 
- * \brief Teddy literal matcher: SSSE3 engine runtime. 
- */ 
- 
-#include "fdr_internal.h" 
-#include "flood_runtime.h" 
-#include "teddy.h" 
-#include "teddy_internal.h" 
-#include "teddy_runtime_common.h" 
+/** \file
+ * \brief Teddy literal matcher: SSSE3 engine runtime.
+ */
+
+#include "fdr_internal.h"
+#include "flood_runtime.h"
+#include "teddy.h"
+#include "teddy_internal.h"
+#include "teddy_runtime_common.h"
 #include "util/simd_utils.h"
 
-const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = { 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff}, 
-    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
-     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00} 
+const u8 ALIGN_DIRECTIVE p_mask_arr[17][32] = {
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff},
+    {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
 };
 
 #if defined(HAVE_AVX512VBMI) // VBMI strong teddy
@@ -98,31 +98,31 @@ do {                                                                        \
 
 #else
 
-#define CONF_CHUNK_64(chunk, bucket, off, reason, conf_fn)                  \ 
-do {                                                                        \ 
-    if (unlikely(chunk != ones_u64a)) {                                     \ 
-        chunk = ~chunk;                                                     \ 
-        conf_fn(&chunk, bucket, off, confBase, reason, a, ptr,              \ 
-                &control, &last_match);                                     \ 
-        CHECK_HWLM_TERMINATE_MATCHING;                                      \ 
-    }                                                                       \ 
-} while(0) 
-
-#define CONF_CHUNK_32(chunk, bucket, off, reason, conf_fn)                  \ 
-do {                                                                        \ 
-    if (unlikely(chunk != ones_u32)) {                                      \ 
-        chunk = ~chunk;                                                     \ 
-        conf_fn(&chunk, bucket, off, confBase, reason, a, ptr,              \ 
-                &control, &last_match);                                     \ 
-        CHECK_HWLM_TERMINATE_MATCHING;                                      \ 
-    }                                                                       \ 
-} while(0) 
+#define CONF_CHUNK_64(chunk, bucket, off, reason, conf_fn)                  \
+do {                                                                        \
+    if (unlikely(chunk != ones_u64a)) {                                     \
+        chunk = ~chunk;                                                     \
+        conf_fn(&chunk, bucket, off, confBase, reason, a, ptr,              \
+                &control, &last_match);                                     \
+        CHECK_HWLM_TERMINATE_MATCHING;                                      \
+    }                                                                       \
+} while(0)
+
+#define CONF_CHUNK_32(chunk, bucket, off, reason, conf_fn)                  \
+do {                                                                        \
+    if (unlikely(chunk != ones_u32)) {                                      \
+        chunk = ~chunk;                                                     \
+        conf_fn(&chunk, bucket, off, confBase, reason, a, ptr,              \
+                &control, &last_match);                                     \
+        CHECK_HWLM_TERMINATE_MATCHING;                                      \
+    }                                                                       \
+} while(0)
 
 #endif
- 
+
 #if defined(HAVE_AVX512VBMI) // VBMI strong teddy
 
-#ifdef ARCH_64_BIT 
+#ifdef ARCH_64_BIT
 #define CONFIRM_TEDDY(var, bucket, offset, reason, pt, conf_fn)             \
 do {                                                                        \
     if (unlikely(diff512(var, ones512()))) {                                \
@@ -390,725 +390,725 @@ do {                                                                          \
 #elif defined(HAVE_AVX512) // AVX512 reinforced teddy
 
 #ifdef ARCH_64_BIT
-#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn)                 \ 
-do {                                                                        \ 
-    if (unlikely(diff512(var, ones512()))) {                                \ 
-        m128 p128_0 = extract128from512(var, 0);                            \ 
-        m128 p128_1 = extract128from512(var, 1);                            \ 
-        m128 p128_2 = extract128from512(var, 2);                            \ 
-        m128 p128_3 = extract128from512(var, 3);                            \ 
-        u64a part1 = movq(p128_0);                                          \ 
-        u64a part2 = movq(rshiftbyte_m128(p128_0, 8));                      \ 
-        u64a part3 = movq(p128_1);                                          \ 
-        u64a part4 = movq(rshiftbyte_m128(p128_1, 8));                      \ 
-        u64a part5 = movq(p128_2);                                          \ 
-        u64a part6 = movq(rshiftbyte_m128(p128_2, 8));                      \ 
-        u64a part7 = movq(p128_3);                                          \ 
-        u64a part8 = movq(rshiftbyte_m128(p128_3, 8));                      \ 
-        CONF_CHUNK_64(part1, bucket, offset, reason, conf_fn);              \ 
-        CONF_CHUNK_64(part2, bucket, offset + 8, reason, conf_fn);          \ 
-        CONF_CHUNK_64(part3, bucket, offset + 16, reason, conf_fn);         \ 
-        CONF_CHUNK_64(part4, bucket, offset + 24, reason, conf_fn);         \ 
-        CONF_CHUNK_64(part5, bucket, offset + 32, reason, conf_fn);         \ 
-        CONF_CHUNK_64(part6, bucket, offset + 40, reason, conf_fn);         \ 
-        CONF_CHUNK_64(part7, bucket, offset + 48, reason, conf_fn);         \ 
-        CONF_CHUNK_64(part8, bucket, offset + 56, reason, conf_fn);         \ 
-    }                                                                       \ 
-} while(0) 
-#else 
-#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn)                 \ 
-do {                                                                        \ 
-    if (unlikely(diff512(var, ones512()))) {                                \ 
-        m128 p128_0 = extract128from512(var, 0);                            \ 
-        m128 p128_1 = extract128from512(var, 1);                            \ 
-        m128 p128_2 = extract128from512(var, 2);                            \ 
-        m128 p128_3 = extract128from512(var, 3);                            \ 
-        u32 part1 = movd(p128_0);                                           \ 
-        u32 part2 = movd(rshiftbyte_m128(p128_0, 4));                       \ 
-        u32 part3 = movd(rshiftbyte_m128(p128_0, 8));                       \ 
-        u32 part4 = movd(rshiftbyte_m128(p128_0, 12));                      \ 
-        u32 part5 = movd(p128_1);                                           \ 
-        u32 part6 = movd(rshiftbyte_m128(p128_1, 4));                       \ 
-        u32 part7 = movd(rshiftbyte_m128(p128_1, 8));                       \ 
-        u32 part8 = movd(rshiftbyte_m128(p128_1, 12));                      \ 
-        u32 part9 = movd(p128_2);                                           \ 
-        u32 part10 = movd(rshiftbyte_m128(p128_2, 4));                      \ 
-        u32 part11 = movd(rshiftbyte_m128(p128_2, 8));                      \ 
-        u32 part12 = movd(rshiftbyte_m128(p128_2, 12));                     \ 
-        u32 part13 = movd(p128_3);                                          \ 
-        u32 part14 = movd(rshiftbyte_m128(p128_3, 4));                      \ 
-        u32 part15 = movd(rshiftbyte_m128(p128_3, 8));                      \ 
-        u32 part16 = movd(rshiftbyte_m128(p128_3, 12));                     \ 
-        CONF_CHUNK_32(part1, bucket, offset, reason, conf_fn);              \ 
-        CONF_CHUNK_32(part2, bucket, offset + 4, reason, conf_fn);          \ 
-        CONF_CHUNK_32(part3, bucket, offset + 8, reason, conf_fn);          \ 
-        CONF_CHUNK_32(part4, bucket, offset + 12, reason, conf_fn);         \ 
-        CONF_CHUNK_32(part5, bucket, offset + 16, reason, conf_fn);         \ 
-        CONF_CHUNK_32(part6, bucket, offset + 20, reason, conf_fn);         \ 
-        CONF_CHUNK_32(part7, bucket, offset + 24, reason, conf_fn);         \ 
-        CONF_CHUNK_32(part8, bucket, offset + 28, reason, conf_fn);         \ 
-        CONF_CHUNK_32(part9, bucket, offset + 32, reason, conf_fn);         \ 
-        CONF_CHUNK_32(part10, bucket, offset + 36, reason, conf_fn);        \ 
-        CONF_CHUNK_32(part11, bucket, offset + 40, reason, conf_fn);        \ 
-        CONF_CHUNK_32(part12, bucket, offset + 44, reason, conf_fn);        \ 
-        CONF_CHUNK_32(part13, bucket, offset + 48, reason, conf_fn);        \ 
-        CONF_CHUNK_32(part14, bucket, offset + 52, reason, conf_fn);        \ 
-        CONF_CHUNK_32(part15, bucket, offset + 56, reason, conf_fn);        \ 
-        CONF_CHUNK_32(part16, bucket, offset + 60, reason, conf_fn);        \ 
-    }                                                                       \ 
-} while(0) 
-#endif 
- 
-#define PREP_SHUF_MASK_NO_REINFORCEMENT(val)                                \ 
-    m512 lo = and512(val, *lo_mask);                                        \ 
-    m512 hi = and512(rshift64_m512(val, 4), *lo_mask) 
- 
-#define PREP_SHUF_MASK                                                      \ 
-    PREP_SHUF_MASK_NO_REINFORCEMENT(load512(ptr));                          \ 
-    *c_16 = *(ptr + 15);                                                    \ 
-    *c_32 = *(ptr + 31);                                                    \ 
-    *c_48 = *(ptr + 47);                                                    \ 
-    m512 r_msk = set512_64(0ULL, r_msk_base[*c_48], 0ULL, r_msk_base[*c_32],\ 
-                           0ULL, r_msk_base[*c_16], 0ULL, r_msk_base[*c_0]);\ 
-    *c_0 = *(ptr + 63) 
- 
-#define SHIFT_OR_M1                                                         \ 
-    or512(pshufb_m512(dup_mask[0], lo), pshufb_m512(dup_mask[1], hi)) 
- 
-#define SHIFT_OR_M2                                                         \ 
-    or512(lshift128_m512(or512(pshufb_m512(dup_mask[2], lo),                \ 
-                               pshufb_m512(dup_mask[3], hi)),               \ 
-                         1), SHIFT_OR_M1) 
- 
-#define SHIFT_OR_M3                                                         \ 
-    or512(lshift128_m512(or512(pshufb_m512(dup_mask[4], lo),                \ 
-                               pshufb_m512(dup_mask[5], hi)),               \ 
-                         2), SHIFT_OR_M2) 
- 
-#define SHIFT_OR_M4                                                         \ 
-    or512(lshift128_m512(or512(pshufb_m512(dup_mask[6], lo),                \ 
-                               pshufb_m512(dup_mask[7], hi)),               \ 
-                         3), SHIFT_OR_M3) 
- 
-static really_inline 
-m512 prep_conf_teddy_no_reinforcement_m1(const m512 *lo_mask, 
-                                         const m512 *dup_mask, 
-                                         const m512 val) { 
-    PREP_SHUF_MASK_NO_REINFORCEMENT(val); 
-    return SHIFT_OR_M1; 
+#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn)                 \
+do {                                                                        \
+    if (unlikely(diff512(var, ones512()))) {                                \
+        m128 p128_0 = extract128from512(var, 0);                            \
+        m128 p128_1 = extract128from512(var, 1);                            \
+        m128 p128_2 = extract128from512(var, 2);                            \
+        m128 p128_3 = extract128from512(var, 3);                            \
+        u64a part1 = movq(p128_0);                                          \
+        u64a part2 = movq(rshiftbyte_m128(p128_0, 8));                      \
+        u64a part3 = movq(p128_1);                                          \
+        u64a part4 = movq(rshiftbyte_m128(p128_1, 8));                      \
+        u64a part5 = movq(p128_2);                                          \
+        u64a part6 = movq(rshiftbyte_m128(p128_2, 8));                      \
+        u64a part7 = movq(p128_3);                                          \
+        u64a part8 = movq(rshiftbyte_m128(p128_3, 8));                      \
+        CONF_CHUNK_64(part1, bucket, offset, reason, conf_fn);              \
+        CONF_CHUNK_64(part2, bucket, offset + 8, reason, conf_fn);          \
+        CONF_CHUNK_64(part3, bucket, offset + 16, reason, conf_fn);         \
+        CONF_CHUNK_64(part4, bucket, offset + 24, reason, conf_fn);         \
+        CONF_CHUNK_64(part5, bucket, offset + 32, reason, conf_fn);         \
+        CONF_CHUNK_64(part6, bucket, offset + 40, reason, conf_fn);         \
+        CONF_CHUNK_64(part7, bucket, offset + 48, reason, conf_fn);         \
+        CONF_CHUNK_64(part8, bucket, offset + 56, reason, conf_fn);         \
+    }                                                                       \
+} while(0)
+#else
+#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn)                 \
+do {                                                                        \
+    if (unlikely(diff512(var, ones512()))) {                                \
+        m128 p128_0 = extract128from512(var, 0);                            \
+        m128 p128_1 = extract128from512(var, 1);                            \
+        m128 p128_2 = extract128from512(var, 2);                            \
+        m128 p128_3 = extract128from512(var, 3);                            \
+        u32 part1 = movd(p128_0);                                           \
+        u32 part2 = movd(rshiftbyte_m128(p128_0, 4));                       \
+        u32 part3 = movd(rshiftbyte_m128(p128_0, 8));                       \
+        u32 part4 = movd(rshiftbyte_m128(p128_0, 12));                      \
+        u32 part5 = movd(p128_1);                                           \
+        u32 part6 = movd(rshiftbyte_m128(p128_1, 4));                       \
+        u32 part7 = movd(rshiftbyte_m128(p128_1, 8));                       \
+        u32 part8 = movd(rshiftbyte_m128(p128_1, 12));                      \
+        u32 part9 = movd(p128_2);                                           \
+        u32 part10 = movd(rshiftbyte_m128(p128_2, 4));                      \
+        u32 part11 = movd(rshiftbyte_m128(p128_2, 8));                      \
+        u32 part12 = movd(rshiftbyte_m128(p128_2, 12));                     \
+        u32 part13 = movd(p128_3);                                          \
+        u32 part14 = movd(rshiftbyte_m128(p128_3, 4));                      \
+        u32 part15 = movd(rshiftbyte_m128(p128_3, 8));                      \
+        u32 part16 = movd(rshiftbyte_m128(p128_3, 12));                     \
+        CONF_CHUNK_32(part1, bucket, offset, reason, conf_fn);              \
+        CONF_CHUNK_32(part2, bucket, offset + 4, reason, conf_fn);          \
+        CONF_CHUNK_32(part3, bucket, offset + 8, reason, conf_fn);          \
+        CONF_CHUNK_32(part4, bucket, offset + 12, reason, conf_fn);         \
+        CONF_CHUNK_32(part5, bucket, offset + 16, reason, conf_fn);         \
+        CONF_CHUNK_32(part6, bucket, offset + 20, reason, conf_fn);         \
+        CONF_CHUNK_32(part7, bucket, offset + 24, reason, conf_fn);         \
+        CONF_CHUNK_32(part8, bucket, offset + 28, reason, conf_fn);         \
+        CONF_CHUNK_32(part9, bucket, offset + 32, reason, conf_fn);         \
+        CONF_CHUNK_32(part10, bucket, offset + 36, reason, conf_fn);        \
+        CONF_CHUNK_32(part11, bucket, offset + 40, reason, conf_fn);        \
+        CONF_CHUNK_32(part12, bucket, offset + 44, reason, conf_fn);        \
+        CONF_CHUNK_32(part13, bucket, offset + 48, reason, conf_fn);        \
+        CONF_CHUNK_32(part14, bucket, offset + 52, reason, conf_fn);        \
+        CONF_CHUNK_32(part15, bucket, offset + 56, reason, conf_fn);        \
+        CONF_CHUNK_32(part16, bucket, offset + 60, reason, conf_fn);        \
+    }                                                                       \
+} while(0)
+#endif
+
+#define PREP_SHUF_MASK_NO_REINFORCEMENT(val)                                \
+    m512 lo = and512(val, *lo_mask);                                        \
+    m512 hi = and512(rshift64_m512(val, 4), *lo_mask)
+
+#define PREP_SHUF_MASK                                                      \
+    PREP_SHUF_MASK_NO_REINFORCEMENT(load512(ptr));                          \
+    *c_16 = *(ptr + 15);                                                    \
+    *c_32 = *(ptr + 31);                                                    \
+    *c_48 = *(ptr + 47);                                                    \
+    m512 r_msk = set512_64(0ULL, r_msk_base[*c_48], 0ULL, r_msk_base[*c_32],\
+                           0ULL, r_msk_base[*c_16], 0ULL, r_msk_base[*c_0]);\
+    *c_0 = *(ptr + 63)
+
+#define SHIFT_OR_M1                                                         \
+    or512(pshufb_m512(dup_mask[0], lo), pshufb_m512(dup_mask[1], hi))
+
+#define SHIFT_OR_M2                                                         \
+    or512(lshift128_m512(or512(pshufb_m512(dup_mask[2], lo),                \
+                               pshufb_m512(dup_mask[3], hi)),               \
+                         1), SHIFT_OR_M1)
+
+#define SHIFT_OR_M3                                                         \
+    or512(lshift128_m512(or512(pshufb_m512(dup_mask[4], lo),                \
+                               pshufb_m512(dup_mask[5], hi)),               \
+                         2), SHIFT_OR_M2)
+
+#define SHIFT_OR_M4                                                         \
+    or512(lshift128_m512(or512(pshufb_m512(dup_mask[6], lo),                \
+                               pshufb_m512(dup_mask[7], hi)),               \
+                         3), SHIFT_OR_M3)
+
+static really_inline
+m512 prep_conf_teddy_no_reinforcement_m1(const m512 *lo_mask,
+                                         const m512 *dup_mask,
+                                         const m512 val) {
+    PREP_SHUF_MASK_NO_REINFORCEMENT(val);
+    return SHIFT_OR_M1;
+}
+
+static really_inline
+m512 prep_conf_teddy_no_reinforcement_m2(const m512 *lo_mask,
+                                         const m512 *dup_mask,
+                                         const m512 val) {
+    PREP_SHUF_MASK_NO_REINFORCEMENT(val);
+    return SHIFT_OR_M2;
+}
+
+static really_inline
+m512 prep_conf_teddy_no_reinforcement_m3(const m512 *lo_mask,
+                                         const m512 *dup_mask,
+                                         const m512 val) {
+    PREP_SHUF_MASK_NO_REINFORCEMENT(val);
+    return SHIFT_OR_M3;
+}
+
+static really_inline
+m512 prep_conf_teddy_no_reinforcement_m4(const m512 *lo_mask,
+                                         const m512 *dup_mask,
+                                         const m512 val) {
+    PREP_SHUF_MASK_NO_REINFORCEMENT(val);
+    return SHIFT_OR_M4;
 }
 
-static really_inline 
-m512 prep_conf_teddy_no_reinforcement_m2(const m512 *lo_mask, 
-                                         const m512 *dup_mask, 
-                                         const m512 val) { 
-    PREP_SHUF_MASK_NO_REINFORCEMENT(val); 
-    return SHIFT_OR_M2; 
-} 
-
-static really_inline 
-m512 prep_conf_teddy_no_reinforcement_m3(const m512 *lo_mask, 
-                                         const m512 *dup_mask, 
-                                         const m512 val) { 
-    PREP_SHUF_MASK_NO_REINFORCEMENT(val); 
-    return SHIFT_OR_M3; 
-} 
-
-static really_inline 
-m512 prep_conf_teddy_no_reinforcement_m4(const m512 *lo_mask, 
-                                         const m512 *dup_mask, 
-                                         const m512 val) { 
-    PREP_SHUF_MASK_NO_REINFORCEMENT(val); 
-    return SHIFT_OR_M4; 
+static really_inline
+m512 prep_conf_teddy_m1(const m512 *lo_mask, const m512 *dup_mask,
+                        const u8 *ptr, const u64a *r_msk_base,
+                        u32 *c_0, u32 *c_16, u32 *c_32, u32 *c_48) {
+    PREP_SHUF_MASK;
+    return or512(SHIFT_OR_M1, r_msk);
 }
 
-static really_inline 
-m512 prep_conf_teddy_m1(const m512 *lo_mask, const m512 *dup_mask, 
-                        const u8 *ptr, const u64a *r_msk_base, 
-                        u32 *c_0, u32 *c_16, u32 *c_32, u32 *c_48) { 
-    PREP_SHUF_MASK; 
-    return or512(SHIFT_OR_M1, r_msk); 
-} 
-
-static really_inline 
-m512 prep_conf_teddy_m2(const m512 *lo_mask, const m512 *dup_mask, 
-                        const u8 *ptr, const u64a *r_msk_base, 
-                        u32 *c_0, u32 *c_16, u32 *c_32, u32 *c_48) { 
-    PREP_SHUF_MASK; 
-    return or512(SHIFT_OR_M2, r_msk); 
-} 
-
-static really_inline 
-m512 prep_conf_teddy_m3(const m512 *lo_mask, const m512 *dup_mask, 
-                        const u8 *ptr, const u64a *r_msk_base, 
-                        u32 *c_0, u32 *c_16, u32 *c_32, u32 *c_48) { 
-    PREP_SHUF_MASK; 
-    return or512(SHIFT_OR_M3, r_msk); 
-} 
-
-static really_inline 
-m512 prep_conf_teddy_m4(const m512 *lo_mask, const m512 *dup_mask, 
-                        const u8 *ptr, const u64a *r_msk_base, 
-                        u32 *c_0, u32 *c_16, u32 *c_32, u32 *c_48) { 
-    PREP_SHUF_MASK; 
-    return or512(SHIFT_OR_M4, r_msk); 
-} 
-
-#define PREP_CONF_FN_NO_REINFORCEMENT(val, n)                                 \ 
-    prep_conf_teddy_no_reinforcement_m##n(&lo_mask, dup_mask, val) 
- 
-#define PREP_CONF_FN(ptr, n)                                                  \ 
-    prep_conf_teddy_m##n(&lo_mask, dup_mask, ptr, r_msk_base,                 \ 
-                         &c_0, &c_16, &c_32, &c_48) 
- 
-#define PREPARE_MASKS_1                                                       \ 
-    dup_mask[0] = set4x128(maskBase[0]);                                      \ 
-    dup_mask[1] = set4x128(maskBase[1]); 
- 
-#define PREPARE_MASKS_2                                                       \ 
-    PREPARE_MASKS_1                                                           \ 
-    dup_mask[2] = set4x128(maskBase[2]);                                      \ 
-    dup_mask[3] = set4x128(maskBase[3]); 
- 
-#define PREPARE_MASKS_3                                                       \ 
-    PREPARE_MASKS_2                                                           \ 
-    dup_mask[4] = set4x128(maskBase[4]);                                      \ 
-    dup_mask[5] = set4x128(maskBase[5]); 
- 
-#define PREPARE_MASKS_4                                                       \ 
-    PREPARE_MASKS_3                                                           \ 
-    dup_mask[6] = set4x128(maskBase[6]);                                      \ 
-    dup_mask[7] = set4x128(maskBase[7]); 
- 
-#define PREPARE_MASKS(n)                                                      \ 
-    m512 lo_mask = set64x8(0xf);                                              \ 
-    m512 dup_mask[n * 2];                                                     \ 
-    PREPARE_MASKS_##n 
- 
-#define FDR_EXEC_TEDDY(fdr, a, control, n_msk, conf_fn)                       \ 
-do {                                                                          \ 
-    const u8 *buf_end = a->buf + a->len;                                      \ 
-    const u8 *ptr = a->buf + a->start_offset;                                 \ 
-    u32 floodBackoff = FLOOD_BACKOFF_START;                                   \ 
-    const u8 *tryFloodDetect = a->firstFloodDetect;                           \ 
-    u32 last_match = ones_u32;                                                \ 
-    const struct Teddy *teddy = (const struct Teddy *)fdr;                    \ 
-    const size_t iterBytes = 128;                                             \ 
-    DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",                 \ 
-                 a->buf, a->len, a->start_offset);                            \ 
-                                                                              \ 
-    const m128 *maskBase = getMaskBase(teddy);                                \ 
-    PREPARE_MASKS(n_msk);                                                     \ 
-    const u32 *confBase = getConfBase(teddy);                                 \ 
-                                                                              \ 
-    const u64a *r_msk_base = getReinforcedMaskBase(teddy, n_msk);             \ 
-    u32 c_0 = 0x100;                                                          \ 
-    u32 c_16 = 0x100;                                                         \ 
-    u32 c_32 = 0x100;                                                         \ 
-    u32 c_48 = 0x100;                                                         \ 
-    const u8 *mainStart = ROUNDUP_PTR(ptr, 64);                               \ 
-    DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);           \ 
-    if (ptr < mainStart) {                                                    \ 
-        ptr = mainStart - 64;                                                 \ 
-        m512 p_mask;                                                          \ 
-        m512 val_0 = vectoredLoad512(&p_mask, ptr, a->start_offset,           \ 
-                                     a->buf, buf_end,                         \ 
-                                     a->buf_history, a->len_history, n_msk);  \ 
-        m512 r_0 = PREP_CONF_FN_NO_REINFORCEMENT(val_0, n_msk);               \ 
-        r_0 = or512(r_0, p_mask);                                             \ 
-        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \ 
-        ptr += 64;                                                            \ 
-    }                                                                         \ 
-                                                                              \ 
-    if (ptr + 64 <= buf_end) {                                                \ 
-        m512 r_0 = PREP_CONF_FN(ptr, n_msk);                                  \ 
-        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \ 
-        ptr += 64;                                                            \ 
-    }                                                                         \ 
-                                                                              \ 
-    for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {                    \ 
-        __builtin_prefetch(ptr + (iterBytes * 4));                            \ 
-        CHECK_FLOOD;                                                          \ 
-        m512 r_0 = PREP_CONF_FN(ptr, n_msk);                                  \ 
-        CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn);                      \ 
-        m512 r_1 = PREP_CONF_FN(ptr + 64, n_msk);                             \ 
-        CONFIRM_TEDDY(r_1, 8, 64, NOT_CAUTIOUS, conf_fn);                     \ 
-    }                                                                         \ 
-                                                                              \ 
-    if (ptr + 64 <= buf_end) {                                                \ 
-        m512 r_0 = PREP_CONF_FN(ptr, n_msk);                                  \ 
-        CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn);                      \ 
-        ptr += 64;                                                            \ 
-    }                                                                         \ 
-                                                                              \ 
-    assert(ptr + 64 > buf_end);                                               \ 
-    if (ptr < buf_end) {                                                      \ 
-        m512 p_mask;                                                          \ 
-        m512 val_0 = vectoredLoad512(&p_mask, ptr, 0, ptr, buf_end,           \ 
-                                     a->buf_history, a->len_history, n_msk);  \ 
-        m512 r_0 = PREP_CONF_FN_NO_REINFORCEMENT(val_0, n_msk);               \ 
-        r_0 = or512(r_0, p_mask);                                             \ 
-        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \ 
-    }                                                                         \ 
-                                                                              \ 
-    return HWLM_SUCCESS;                                                      \ 
-} while(0) 
- 
-#elif defined(HAVE_AVX2) // not HAVE_AVX512 but HAVE_AVX2 reinforced teddy 
- 
-#ifdef ARCH_64_BIT 
-#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn)                 \ 
-do {                                                                        \ 
-    if (unlikely(diff256(var, ones256()))) {                                \ 
-        m128 lo = movdq_lo(var);                                            \ 
-        m128 hi = movdq_hi(var);                                            \ 
-        u64a part1 = movq(lo);                                              \ 
-        u64a part2 = movq(rshiftbyte_m128(lo, 8));                          \ 
-        u64a part3 = movq(hi);                                              \ 
-        u64a part4 = movq(rshiftbyte_m128(hi, 8));                          \ 
-        CONF_CHUNK_64(part1, bucket, offset, reason, conf_fn);              \ 
-        CONF_CHUNK_64(part2, bucket, offset + 8, reason, conf_fn);          \ 
-        CONF_CHUNK_64(part3, bucket, offset + 16, reason, conf_fn);         \ 
-        CONF_CHUNK_64(part4, bucket, offset + 24, reason, conf_fn);         \ 
-    }                                                                       \ 
-} while(0) 
-#else 
-#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn)                 \ 
-do {                                                                        \ 
-    if (unlikely(diff256(var, ones256()))) {                                \ 
-        m128 lo = movdq_lo(var);                                            \ 
-        m128 hi = movdq_hi(var);                                            \ 
-        u32 part1 = movd(lo);                                               \ 
-        u32 part2 = movd(rshiftbyte_m128(lo, 4));                           \ 
-        u32 part3 = movd(rshiftbyte_m128(lo, 8));                           \ 
-        u32 part4 = movd(rshiftbyte_m128(lo, 12));                          \ 
-        u32 part5 = movd(hi);                                               \ 
-        u32 part6 = movd(rshiftbyte_m128(hi, 4));                           \ 
-        u32 part7 = movd(rshiftbyte_m128(hi, 8));                           \ 
-        u32 part8 = movd(rshiftbyte_m128(hi, 12));                          \ 
-        CONF_CHUNK_32(part1, bucket, offset, reason, conf_fn);              \ 
-        CONF_CHUNK_32(part2, bucket, offset + 4, reason, conf_fn);          \ 
-        CONF_CHUNK_32(part3, bucket, offset + 8, reason, conf_fn);          \ 
-        CONF_CHUNK_32(part4, bucket, offset + 12, reason, conf_fn);         \ 
-        CONF_CHUNK_32(part5, bucket, offset + 16, reason, conf_fn);         \ 
-        CONF_CHUNK_32(part6, bucket, offset + 20, reason, conf_fn);         \ 
-        CONF_CHUNK_32(part7, bucket, offset + 24, reason, conf_fn);         \ 
-        CONF_CHUNK_32(part8, bucket, offset + 28, reason, conf_fn);         \ 
-    }                                                                       \ 
-} while(0) 
-#endif 
- 
-#define PREP_SHUF_MASK_NO_REINFORCEMENT(val)                                \ 
-    m256 lo = and256(val, *lo_mask);                                        \ 
-    m256 hi = and256(rshift64_m256(val, 4), *lo_mask) 
- 
-#define PREP_SHUF_MASK                                                      \ 
-    PREP_SHUF_MASK_NO_REINFORCEMENT(load256(ptr));                          \ 
-    *c_128 = *(ptr + 15);                                                   \ 
-    m256 r_msk = set64x4(0ULL, r_msk_base[*c_128], 0ULL, r_msk_base[*c_0]); \ 
-    *c_0 = *(ptr + 31) 
- 
-#define SHIFT_OR_M1                                                         \ 
-    or256(pshufb_m256(dup_mask[0], lo), pshufb_m256(dup_mask[1], hi)) 
- 
-#define SHIFT_OR_M2                                                         \ 
-    or256(lshift128_m256(or256(pshufb_m256(dup_mask[2], lo),                \ 
-                               pshufb_m256(dup_mask[3], hi)),               \ 
-                         1), SHIFT_OR_M1) 
- 
-#define SHIFT_OR_M3                                                         \ 
-    or256(lshift128_m256(or256(pshufb_m256(dup_mask[4], lo),                \ 
-                               pshufb_m256(dup_mask[5], hi)),               \ 
-                         2), SHIFT_OR_M2) 
- 
-#define SHIFT_OR_M4                                                         \ 
-    or256(lshift128_m256(or256(pshufb_m256(dup_mask[6], lo),                \ 
-                               pshufb_m256(dup_mask[7], hi)),               \ 
-                         3), SHIFT_OR_M3) 
- 
-static really_inline 
-m256 prep_conf_teddy_no_reinforcement_m1(const m256 *lo_mask, 
-                                         const m256 *dup_mask, 
-                                         const m256 val) { 
-    PREP_SHUF_MASK_NO_REINFORCEMENT(val); 
-    return SHIFT_OR_M1; 
+static really_inline
+m512 prep_conf_teddy_m2(const m512 *lo_mask, const m512 *dup_mask,
+                        const u8 *ptr, const u64a *r_msk_base,
+                        u32 *c_0, u32 *c_16, u32 *c_32, u32 *c_48) {
+    PREP_SHUF_MASK;
+    return or512(SHIFT_OR_M2, r_msk);
 }
 
-static really_inline 
-m256 prep_conf_teddy_no_reinforcement_m2(const m256 *lo_mask, 
-                                         const m256 *dup_mask, 
-                                         const m256 val) { 
-    PREP_SHUF_MASK_NO_REINFORCEMENT(val); 
-    return SHIFT_OR_M2; 
-} 
-
-static really_inline 
-m256 prep_conf_teddy_no_reinforcement_m3(const m256 *lo_mask, 
-                                         const m256 *dup_mask, 
-                                         const m256 val) { 
-    PREP_SHUF_MASK_NO_REINFORCEMENT(val); 
-    return SHIFT_OR_M3; 
-} 
-
-static really_inline 
-m256 prep_conf_teddy_no_reinforcement_m4(const m256 *lo_mask, 
-                                         const m256 *dup_mask, 
-                                         const m256 val) { 
-    PREP_SHUF_MASK_NO_REINFORCEMENT(val); 
-    return SHIFT_OR_M4; 
-} 
-
-static really_inline 
-m256 prep_conf_teddy_m1(const m256 *lo_mask, const m256 *dup_mask, 
-                        const u8 *ptr, const u64a *r_msk_base, 
-                        u32 *c_0, u32 *c_128) { 
-    PREP_SHUF_MASK; 
-    return or256(SHIFT_OR_M1, r_msk); 
-} 
-
-static really_inline 
-m256 prep_conf_teddy_m2(const m256 *lo_mask, const m256 *dup_mask, 
-                        const u8 *ptr, const u64a *r_msk_base, 
-                        u32 *c_0, u32 *c_128) { 
-    PREP_SHUF_MASK; 
-    return or256(SHIFT_OR_M2, r_msk); 
-} 
-
-static really_inline 
-m256 prep_conf_teddy_m3(const m256 *lo_mask, const m256 *dup_mask, 
-                        const u8 *ptr, const u64a *r_msk_base, 
-                        u32 *c_0, u32 *c_128) { 
-    PREP_SHUF_MASK; 
-    return or256(SHIFT_OR_M3, r_msk); 
-} 
-
-static really_inline 
-m256 prep_conf_teddy_m4(const m256 *lo_mask, const m256 *dup_mask, 
-                        const u8 *ptr, const u64a *r_msk_base, 
-                        u32 *c_0, u32 *c_128) { 
-    PREP_SHUF_MASK; 
-    return or256(SHIFT_OR_M4, r_msk); 
-} 
- 
-#define PREP_CONF_FN_NO_REINFORCEMENT(val, n)                                 \ 
-    prep_conf_teddy_no_reinforcement_m##n(&lo_mask, dup_mask, val) 
- 
-#define PREP_CONF_FN(ptr, n)                                                  \ 
-    prep_conf_teddy_m##n(&lo_mask, dup_mask, ptr, r_msk_base, &c_0, &c_128) 
- 
-#define PREPARE_MASKS_1                                                       \ 
-    dup_mask[0] = set2x128(maskBase[0]);                                      \ 
-    dup_mask[1] = set2x128(maskBase[1]); 
- 
-#define PREPARE_MASKS_2                                                       \ 
-    PREPARE_MASKS_1                                                           \ 
-    dup_mask[2] = set2x128(maskBase[2]);                                      \ 
-    dup_mask[3] = set2x128(maskBase[3]); 
- 
-#define PREPARE_MASKS_3                                                       \ 
-    PREPARE_MASKS_2                                                           \ 
-    dup_mask[4] = set2x128(maskBase[4]);                                      \ 
-    dup_mask[5] = set2x128(maskBase[5]); 
- 
-#define PREPARE_MASKS_4                                                       \ 
-    PREPARE_MASKS_3                                                           \ 
-    dup_mask[6] = set2x128(maskBase[6]);                                      \ 
-    dup_mask[7] = set2x128(maskBase[7]); 
- 
-#define PREPARE_MASKS(n)                                                      \ 
-    m256 lo_mask = set32x8(0xf);                                              \ 
-    m256 dup_mask[n * 2];                                                     \ 
-    PREPARE_MASKS_##n 
- 
-#define FDR_EXEC_TEDDY(fdr, a, control, n_msk, conf_fn)                       \ 
-do {                                                                          \ 
-    const u8 *buf_end = a->buf + a->len;                                      \ 
-    const u8 *ptr = a->buf + a->start_offset;                                 \ 
-    u32 floodBackoff = FLOOD_BACKOFF_START;                                   \ 
-    const u8 *tryFloodDetect = a->firstFloodDetect;                           \ 
-    u32 last_match = ones_u32;                                                \ 
-    const struct Teddy *teddy = (const struct Teddy *)fdr;                    \ 
-    const size_t iterBytes = 64;                                              \ 
-    DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",                 \ 
-                 a->buf, a->len, a->start_offset);                            \ 
-                                                                              \ 
-    const m128 *maskBase = getMaskBase(teddy);                                \ 
-    PREPARE_MASKS(n_msk);                                                     \ 
-    const u32 *confBase = getConfBase(teddy);                                 \ 
-                                                                              \ 
-    const u64a *r_msk_base = getReinforcedMaskBase(teddy, n_msk);             \ 
-    u32 c_0 = 0x100;                                                          \ 
-    u32 c_128 = 0x100;                                                        \ 
-    const u8 *mainStart = ROUNDUP_PTR(ptr, 32);                               \ 
-    DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);           \ 
-    if (ptr < mainStart) {                                                    \ 
-        ptr = mainStart - 32;                                                 \ 
-        m256 p_mask;                                                          \ 
-        m256 val_0 = vectoredLoad256(&p_mask, ptr, a->start_offset,           \ 
-                                     a->buf, buf_end,                         \ 
-                                     a->buf_history, a->len_history, n_msk);  \ 
-        m256 r_0 = PREP_CONF_FN_NO_REINFORCEMENT(val_0, n_msk);               \ 
-        r_0 = or256(r_0, p_mask);                                             \ 
-        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \ 
-        ptr += 32;                                                            \ 
-    }                                                                         \ 
-                                                                              \ 
-    if (ptr + 32 <= buf_end) {                                                \ 
-        m256 r_0 = PREP_CONF_FN(ptr, n_msk);                                  \ 
-        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \ 
-        ptr += 32;                                                            \ 
-    }                                                                         \ 
-                                                                              \ 
-    for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {                    \ 
-        __builtin_prefetch(ptr + (iterBytes * 4));                            \ 
-        CHECK_FLOOD;                                                          \ 
-        m256 r_0 = PREP_CONF_FN(ptr, n_msk);                                  \ 
-        CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn);                      \ 
-        m256 r_1 = PREP_CONF_FN(ptr + 32, n_msk);                             \ 
-        CONFIRM_TEDDY(r_1, 8, 32, NOT_CAUTIOUS, conf_fn);                     \ 
-    }                                                                         \ 
-                                                                              \ 
-    if (ptr + 32 <= buf_end) {                                                \ 
-        m256 r_0 = PREP_CONF_FN(ptr, n_msk);                                  \ 
-        CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn);                      \ 
-        ptr += 32;                                                            \ 
-    }                                                                         \ 
-                                                                              \ 
-    assert(ptr + 32 > buf_end);                                               \ 
-    if (ptr < buf_end) {                                                      \ 
-        m256 p_mask;                                                          \ 
-        m256 val_0 = vectoredLoad256(&p_mask, ptr, 0, ptr, buf_end,           \ 
-                                     a->buf_history, a->len_history, n_msk);  \ 
-        m256 r_0 = PREP_CONF_FN_NO_REINFORCEMENT(val_0, n_msk);               \ 
-        r_0 = or256(r_0, p_mask);                                             \ 
-        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \ 
-    }                                                                         \ 
-                                                                              \ 
-    return HWLM_SUCCESS;                                                      \ 
-} while(0) 
- 
-#else // not defined HAVE_AVX2 
- 
-#ifdef ARCH_64_BIT 
-#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn)                 \ 
-do {                                                                        \ 
-    if (unlikely(diff128(var, ones128()))) {                                \ 
-        u64a lo = movq(var);                                                \ 
-        u64a hi = movq(rshiftbyte_m128(var, 8));                            \ 
-        CONF_CHUNK_64(lo, bucket, offset, reason, conf_fn);                 \ 
-        CONF_CHUNK_64(hi, bucket, offset + 8, reason, conf_fn);             \ 
-    }                                                                       \ 
-} while(0) 
-#else 
-#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn)                 \ 
-do {                                                                        \ 
-    if (unlikely(diff128(var, ones128()))) {                                \ 
-        u32 part1 = movd(var);                                              \ 
-        u32 part2 = movd(rshiftbyte_m128(var, 4));                          \ 
-        u32 part3 = movd(rshiftbyte_m128(var, 8));                          \ 
-        u32 part4 = movd(rshiftbyte_m128(var, 12));                         \ 
-        CONF_CHUNK_32(part1, bucket, offset, reason, conf_fn);              \ 
-        CONF_CHUNK_32(part2, bucket, offset + 4, reason, conf_fn);          \ 
-        CONF_CHUNK_32(part3, bucket, offset + 8, reason, conf_fn);          \ 
-        CONF_CHUNK_32(part4, bucket, offset + 12, reason, conf_fn);         \ 
-    }                                                                       \ 
-} while(0) 
-#endif 
- 
-static really_inline 
-m128 prep_conf_teddy_m1(const m128 *maskBase, m128 val) { 
-    m128 mask = set16x8(0xf); 
-    m128 lo = and128(val, mask); 
-    m128 hi = and128(rshift64_m128(val, 4), mask); 
-    return or128(pshufb_m128(maskBase[0 * 2], lo), 
-                 pshufb_m128(maskBase[0 * 2 + 1], hi)); 
-} 
- 
-static really_inline 
-m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 val) { 
-    m128 mask = set16x8(0xf); 
-    m128 lo = and128(val, mask); 
-    m128 hi = and128(rshift64_m128(val, 4), mask); 
-    m128 r = prep_conf_teddy_m1(maskBase, val); 
- 
-    m128 res_1 = or128(pshufb_m128(maskBase[1 * 2], lo), 
-                       pshufb_m128(maskBase[1 * 2 + 1], hi)); 
-    m128 res_shifted_1 = palignr(res_1, *old_1, 16 - 1); 
-    *old_1 = res_1; 
-    return or128(r, res_shifted_1); 
-} 
- 
-static really_inline 
-m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2, 
-                        m128 val) { 
-    m128 mask = set16x8(0xf); 
-    m128 lo = and128(val, mask); 
-    m128 hi = and128(rshift64_m128(val, 4), mask); 
-    m128 r = prep_conf_teddy_m2(maskBase, old_1, val); 
- 
-    m128 res_2 = or128(pshufb_m128(maskBase[2 * 2], lo), 
-                       pshufb_m128(maskBase[2 * 2 + 1], hi)); 
-    m128 res_shifted_2 = palignr(res_2, *old_2, 16 - 2); 
-    *old_2 = res_2; 
-    return or128(r, res_shifted_2); 
-} 
- 
-static really_inline 
-m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2, 
-                        m128 *old_3, m128 val) { 
-    m128 mask = set16x8(0xf); 
-    m128 lo = and128(val, mask); 
-    m128 hi = and128(rshift64_m128(val, 4), mask); 
-    m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val); 
- 
-    m128 res_3 = or128(pshufb_m128(maskBase[3 * 2], lo), 
-                       pshufb_m128(maskBase[3 * 2 + 1], hi)); 
-    m128 res_shifted_3 = palignr(res_3, *old_3, 16 - 3); 
-    *old_3 = res_3; 
-    return or128(r, res_shifted_3); 
-} 
- 
-#define FDR_EXEC_TEDDY_RES_OLD_1 
- 
-#define FDR_EXEC_TEDDY_RES_OLD_2                                              \ 
-    m128 res_old_1 = zeroes128(); 
- 
-#define FDR_EXEC_TEDDY_RES_OLD_3                                              \ 
-    m128 res_old_1 = zeroes128();                                             \ 
-    m128 res_old_2 = zeroes128(); 
- 
-#define FDR_EXEC_TEDDY_RES_OLD_4                                              \ 
-    m128 res_old_1 = zeroes128();                                             \ 
-    m128 res_old_2 = zeroes128();                                             \ 
-    m128 res_old_3 = zeroes128(); 
- 
-#define FDR_EXEC_TEDDY_RES_OLD(n) FDR_EXEC_TEDDY_RES_OLD_##n 
- 
-#define PREP_CONF_FN_1(mask_base, val)                                        \ 
-    prep_conf_teddy_m1(mask_base, val) 
- 
-#define PREP_CONF_FN_2(mask_base, val)                                        \ 
-    prep_conf_teddy_m2(mask_base, &res_old_1, val) 
- 
-#define PREP_CONF_FN_3(mask_base, val)                                        \ 
-    prep_conf_teddy_m3(mask_base, &res_old_1, &res_old_2, val) 
- 
-#define PREP_CONF_FN_4(mask_base, val)                                        \ 
-    prep_conf_teddy_m4(mask_base, &res_old_1, &res_old_2, &res_old_3, val) 
- 
-#define PREP_CONF_FN(mask_base, val, n)                                       \ 
-    PREP_CONF_FN_##n(mask_base, val) 
- 
-#define FDR_EXEC_TEDDY(fdr, a, control, n_msk, conf_fn)                       \ 
-do {                                                                          \ 
-    const u8 *buf_end = a->buf + a->len;                                      \ 
-    const u8 *ptr = a->buf + a->start_offset;                                 \ 
-    u32 floodBackoff = FLOOD_BACKOFF_START;                                   \ 
-    const u8 *tryFloodDetect = a->firstFloodDetect;                           \ 
-    u32 last_match = ones_u32;                                                \ 
-    const struct Teddy *teddy = (const struct Teddy *)fdr;                    \ 
-    const size_t iterBytes = 32;                                              \ 
-    DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",                 \ 
-                 a->buf, a->len, a->start_offset);                            \ 
-                                                                              \ 
-    const m128 *maskBase = getMaskBase(teddy);                                \ 
-    const u32 *confBase = getConfBase(teddy);                                 \ 
-                                                                              \ 
-    FDR_EXEC_TEDDY_RES_OLD(n_msk);                                            \ 
-    const u8 *mainStart = ROUNDUP_PTR(ptr, 16);                               \ 
-    DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);           \ 
-    if (ptr < mainStart) {                                                    \ 
-        ptr = mainStart - 16;                                                 \ 
-        m128 p_mask;                                                          \ 
-        m128 val_0 = vectoredLoad128(&p_mask, ptr, a->start_offset,           \ 
-                                     a->buf, buf_end,                         \ 
-                                     a->buf_history, a->len_history, n_msk);  \ 
-        m128 r_0 = PREP_CONF_FN(maskBase, val_0, n_msk);                      \ 
-        r_0 = or128(r_0, p_mask);                                             \ 
-        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \ 
-        ptr += 16;                                                            \ 
-    }                                                                         \ 
-                                                                              \ 
-    if (ptr + 16 <= buf_end) {                                                \ 
-        m128 r_0 = PREP_CONF_FN(maskBase, load128(ptr), n_msk);               \ 
-        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \ 
-        ptr += 16;                                                            \ 
-    }                                                                         \ 
-                                                                              \ 
-    for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {                    \ 
-        __builtin_prefetch(ptr + (iterBytes * 4));                            \ 
-        CHECK_FLOOD;                                                          \ 
-        m128 r_0 = PREP_CONF_FN(maskBase, load128(ptr), n_msk);               \ 
-        CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn);                      \ 
-        m128 r_1 = PREP_CONF_FN(maskBase, load128(ptr + 16), n_msk);          \ 
-        CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, conf_fn);                     \ 
-    }                                                                         \ 
-                                                                              \ 
-    if (ptr + 16 <= buf_end) {                                                \ 
-        m128 r_0 = PREP_CONF_FN(maskBase, load128(ptr), n_msk);               \ 
-        CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn);                      \ 
-        ptr += 16;                                                            \ 
-    }                                                                         \ 
-                                                                              \ 
-    assert(ptr + 16 > buf_end);                                               \ 
-    if (ptr < buf_end) {                                                      \ 
-        m128 p_mask;                                                          \ 
-        m128 val_0 = vectoredLoad128(&p_mask, ptr, 0, ptr, buf_end,           \ 
-                                     a->buf_history, a->len_history, n_msk);  \ 
-        m128 r_0 = PREP_CONF_FN(maskBase, val_0, n_msk);                      \ 
-        r_0 = or128(r_0, p_mask);                                             \ 
-        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \ 
-    }                                                                         \ 
-                                                                              \ 
-    return HWLM_SUCCESS;                                                      \ 
-} while(0) 
- 
-#endif // HAVE_AVX2 HAVE_AVX512 
- 
-hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr, 
-                                  const struct FDR_Runtime_Args *a, 
-                                  hwlm_group_t control) { 
-    FDR_EXEC_TEDDY(fdr, a, control, 1, do_confWithBit_teddy); 
-} 
- 
-hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr, 
-                                      const struct FDR_Runtime_Args *a, 
-                                      hwlm_group_t control) { 
-    FDR_EXEC_TEDDY(fdr, a, control, 1, do_confWithBit_teddy); 
-} 
- 
-hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr, 
-                                  const struct FDR_Runtime_Args *a, 
-                                  hwlm_group_t control) { 
-    FDR_EXEC_TEDDY(fdr, a, control, 2, do_confWithBit_teddy); 
-} 
- 
-hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr, 
-                                      const struct FDR_Runtime_Args *a, 
-                                      hwlm_group_t control) { 
-    FDR_EXEC_TEDDY(fdr, a, control, 2, do_confWithBit_teddy); 
-} 
- 
-hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr, 
-                                  const struct FDR_Runtime_Args *a, 
-                                  hwlm_group_t control) { 
-    FDR_EXEC_TEDDY(fdr, a, control, 3, do_confWithBit_teddy); 
-} 
- 
-hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr, 
-                                      const struct FDR_Runtime_Args *a, 
-                                      hwlm_group_t control) { 
-    FDR_EXEC_TEDDY(fdr, a, control, 3, do_confWithBit_teddy); 
-} 
- 
-hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr, 
-                                  const struct FDR_Runtime_Args *a, 
-                                  hwlm_group_t control) { 
-    FDR_EXEC_TEDDY(fdr, a, control, 4, do_confWithBit_teddy); 
-} 
- 
-hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr, 
-                                      const struct FDR_Runtime_Args *a, 
-                                      hwlm_group_t control) { 
-    FDR_EXEC_TEDDY(fdr, a, control, 4, do_confWithBit_teddy); 
-} 
+static really_inline
+m512 prep_conf_teddy_m3(const m512 *lo_mask, const m512 *dup_mask,
+                        const u8 *ptr, const u64a *r_msk_base,
+                        u32 *c_0, u32 *c_16, u32 *c_32, u32 *c_48) {
+    PREP_SHUF_MASK;
+    return or512(SHIFT_OR_M3, r_msk);
+}
+
+static really_inline
+m512 prep_conf_teddy_m4(const m512 *lo_mask, const m512 *dup_mask,
+                        const u8 *ptr, const u64a *r_msk_base,
+                        u32 *c_0, u32 *c_16, u32 *c_32, u32 *c_48) {
+    PREP_SHUF_MASK;
+    return or512(SHIFT_OR_M4, r_msk);
+}
+
+#define PREP_CONF_FN_NO_REINFORCEMENT(val, n)                                 \
+    prep_conf_teddy_no_reinforcement_m##n(&lo_mask, dup_mask, val)
+
+#define PREP_CONF_FN(ptr, n)                                                  \
+    prep_conf_teddy_m##n(&lo_mask, dup_mask, ptr, r_msk_base,                 \
+                         &c_0, &c_16, &c_32, &c_48)
+
+#define PREPARE_MASKS_1                                                       \
+    dup_mask[0] = set4x128(maskBase[0]);                                      \
+    dup_mask[1] = set4x128(maskBase[1]);
+
+#define PREPARE_MASKS_2                                                       \
+    PREPARE_MASKS_1                                                           \
+    dup_mask[2] = set4x128(maskBase[2]);                                      \
+    dup_mask[3] = set4x128(maskBase[3]);
+
+#define PREPARE_MASKS_3                                                       \
+    PREPARE_MASKS_2                                                           \
+    dup_mask[4] = set4x128(maskBase[4]);                                      \
+    dup_mask[5] = set4x128(maskBase[5]);
+
+#define PREPARE_MASKS_4                                                       \
+    PREPARE_MASKS_3                                                           \
+    dup_mask[6] = set4x128(maskBase[6]);                                      \
+    dup_mask[7] = set4x128(maskBase[7]);
+
+#define PREPARE_MASKS(n)                                                      \
+    m512 lo_mask = set64x8(0xf);                                              \
+    m512 dup_mask[n * 2];                                                     \
+    PREPARE_MASKS_##n
+
+#define FDR_EXEC_TEDDY(fdr, a, control, n_msk, conf_fn)                       \
+do {                                                                          \
+    const u8 *buf_end = a->buf + a->len;                                      \
+    const u8 *ptr = a->buf + a->start_offset;                                 \
+    u32 floodBackoff = FLOOD_BACKOFF_START;                                   \
+    const u8 *tryFloodDetect = a->firstFloodDetect;                           \
+    u32 last_match = ones_u32;                                                \
+    const struct Teddy *teddy = (const struct Teddy *)fdr;                    \
+    const size_t iterBytes = 128;                                             \
+    DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",                 \
+                 a->buf, a->len, a->start_offset);                            \
+                                                                              \
+    const m128 *maskBase = getMaskBase(teddy);                                \
+    PREPARE_MASKS(n_msk);                                                     \
+    const u32 *confBase = getConfBase(teddy);                                 \
+                                                                              \
+    const u64a *r_msk_base = getReinforcedMaskBase(teddy, n_msk);             \
+    u32 c_0 = 0x100;                                                          \
+    u32 c_16 = 0x100;                                                         \
+    u32 c_32 = 0x100;                                                         \
+    u32 c_48 = 0x100;                                                         \
+    const u8 *mainStart = ROUNDUP_PTR(ptr, 64);                               \
+    DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);           \
+    if (ptr < mainStart) {                                                    \
+        ptr = mainStart - 64;                                                 \
+        m512 p_mask;                                                          \
+        m512 val_0 = vectoredLoad512(&p_mask, ptr, a->start_offset,           \
+                                     a->buf, buf_end,                         \
+                                     a->buf_history, a->len_history, n_msk);  \
+        m512 r_0 = PREP_CONF_FN_NO_REINFORCEMENT(val_0, n_msk);               \
+        r_0 = or512(r_0, p_mask);                                             \
+        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \
+        ptr += 64;                                                            \
+    }                                                                         \
+                                                                              \
+    if (ptr + 64 <= buf_end) {                                                \
+        m512 r_0 = PREP_CONF_FN(ptr, n_msk);                                  \
+        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \
+        ptr += 64;                                                            \
+    }                                                                         \
+                                                                              \
+    for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {                    \
+        __builtin_prefetch(ptr + (iterBytes * 4));                            \
+        CHECK_FLOOD;                                                          \
+        m512 r_0 = PREP_CONF_FN(ptr, n_msk);                                  \
+        CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn);                      \
+        m512 r_1 = PREP_CONF_FN(ptr + 64, n_msk);                             \
+        CONFIRM_TEDDY(r_1, 8, 64, NOT_CAUTIOUS, conf_fn);                     \
+    }                                                                         \
+                                                                              \
+    if (ptr + 64 <= buf_end) {                                                \
+        m512 r_0 = PREP_CONF_FN(ptr, n_msk);                                  \
+        CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn);                      \
+        ptr += 64;                                                            \
+    }                                                                         \
+                                                                              \
+    assert(ptr + 64 > buf_end);                                               \
+    if (ptr < buf_end) {                                                      \
+        m512 p_mask;                                                          \
+        m512 val_0 = vectoredLoad512(&p_mask, ptr, 0, ptr, buf_end,           \
+                                     a->buf_history, a->len_history, n_msk);  \
+        m512 r_0 = PREP_CONF_FN_NO_REINFORCEMENT(val_0, n_msk);               \
+        r_0 = or512(r_0, p_mask);                                             \
+        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \
+    }                                                                         \
+                                                                              \
+    return HWLM_SUCCESS;                                                      \
+} while(0)
+
+#elif defined(HAVE_AVX2) // not HAVE_AVX512 but HAVE_AVX2 reinforced teddy
+
+#ifdef ARCH_64_BIT
+#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn)                 \
+do {                                                                        \
+    if (unlikely(diff256(var, ones256()))) {                                \
+        m128 lo = movdq_lo(var);                                            \
+        m128 hi = movdq_hi(var);                                            \
+        u64a part1 = movq(lo);                                              \
+        u64a part2 = movq(rshiftbyte_m128(lo, 8));                          \
+        u64a part3 = movq(hi);                                              \
+        u64a part4 = movq(rshiftbyte_m128(hi, 8));                          \
+        CONF_CHUNK_64(part1, bucket, offset, reason, conf_fn);              \
+        CONF_CHUNK_64(part2, bucket, offset + 8, reason, conf_fn);          \
+        CONF_CHUNK_64(part3, bucket, offset + 16, reason, conf_fn);         \
+        CONF_CHUNK_64(part4, bucket, offset + 24, reason, conf_fn);         \
+    }                                                                       \
+} while(0)
+#else
+#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn)                 \
+do {                                                                        \
+    if (unlikely(diff256(var, ones256()))) {                                \
+        m128 lo = movdq_lo(var);                                            \
+        m128 hi = movdq_hi(var);                                            \
+        u32 part1 = movd(lo);                                               \
+        u32 part2 = movd(rshiftbyte_m128(lo, 4));                           \
+        u32 part3 = movd(rshiftbyte_m128(lo, 8));                           \
+        u32 part4 = movd(rshiftbyte_m128(lo, 12));                          \
+        u32 part5 = movd(hi);                                               \
+        u32 part6 = movd(rshiftbyte_m128(hi, 4));                           \
+        u32 part7 = movd(rshiftbyte_m128(hi, 8));                           \
+        u32 part8 = movd(rshiftbyte_m128(hi, 12));                          \
+        CONF_CHUNK_32(part1, bucket, offset, reason, conf_fn);              \
+        CONF_CHUNK_32(part2, bucket, offset + 4, reason, conf_fn);          \
+        CONF_CHUNK_32(part3, bucket, offset + 8, reason, conf_fn);          \
+        CONF_CHUNK_32(part4, bucket, offset + 12, reason, conf_fn);         \
+        CONF_CHUNK_32(part5, bucket, offset + 16, reason, conf_fn);         \
+        CONF_CHUNK_32(part6, bucket, offset + 20, reason, conf_fn);         \
+        CONF_CHUNK_32(part7, bucket, offset + 24, reason, conf_fn);         \
+        CONF_CHUNK_32(part8, bucket, offset + 28, reason, conf_fn);         \
+    }                                                                       \
+} while(0)
+#endif
+
+#define PREP_SHUF_MASK_NO_REINFORCEMENT(val)                                \
+    m256 lo = and256(val, *lo_mask);                                        \
+    m256 hi = and256(rshift64_m256(val, 4), *lo_mask)
+
+#define PREP_SHUF_MASK                                                      \
+    PREP_SHUF_MASK_NO_REINFORCEMENT(load256(ptr));                          \
+    *c_128 = *(ptr + 15);                                                   \
+    m256 r_msk = set64x4(0ULL, r_msk_base[*c_128], 0ULL, r_msk_base[*c_0]); \
+    *c_0 = *(ptr + 31)
+
+#define SHIFT_OR_M1                                                         \
+    or256(pshufb_m256(dup_mask[0], lo), pshufb_m256(dup_mask[1], hi))
+
+#define SHIFT_OR_M2                                                         \
+    or256(lshift128_m256(or256(pshufb_m256(dup_mask[2], lo),                \
+                               pshufb_m256(dup_mask[3], hi)),               \
+                         1), SHIFT_OR_M1)
+
+#define SHIFT_OR_M3                                                         \
+    or256(lshift128_m256(or256(pshufb_m256(dup_mask[4], lo),                \
+                               pshufb_m256(dup_mask[5], hi)),               \
+                         2), SHIFT_OR_M2)
+
+#define SHIFT_OR_M4                                                         \
+    or256(lshift128_m256(or256(pshufb_m256(dup_mask[6], lo),                \
+                               pshufb_m256(dup_mask[7], hi)),               \
+                         3), SHIFT_OR_M3)
+
+static really_inline
+m256 prep_conf_teddy_no_reinforcement_m1(const m256 *lo_mask,
+                                         const m256 *dup_mask,
+                                         const m256 val) {
+    PREP_SHUF_MASK_NO_REINFORCEMENT(val);
+    return SHIFT_OR_M1;
+}
+
+static really_inline
+m256 prep_conf_teddy_no_reinforcement_m2(const m256 *lo_mask,
+                                         const m256 *dup_mask,
+                                         const m256 val) {
+    PREP_SHUF_MASK_NO_REINFORCEMENT(val);
+    return SHIFT_OR_M2;
+}
+
+static really_inline
+m256 prep_conf_teddy_no_reinforcement_m3(const m256 *lo_mask,
+                                         const m256 *dup_mask,
+                                         const m256 val) {
+    PREP_SHUF_MASK_NO_REINFORCEMENT(val);
+    return SHIFT_OR_M3;
+}
+
+static really_inline
+m256 prep_conf_teddy_no_reinforcement_m4(const m256 *lo_mask,
+                                         const m256 *dup_mask,
+                                         const m256 val) {
+    PREP_SHUF_MASK_NO_REINFORCEMENT(val);
+    return SHIFT_OR_M4;
+}
+
+static really_inline
+m256 prep_conf_teddy_m1(const m256 *lo_mask, const m256 *dup_mask,
+                        const u8 *ptr, const u64a *r_msk_base,
+                        u32 *c_0, u32 *c_128) {
+    PREP_SHUF_MASK;
+    return or256(SHIFT_OR_M1, r_msk);
+}
+
+static really_inline
+m256 prep_conf_teddy_m2(const m256 *lo_mask, const m256 *dup_mask,
+                        const u8 *ptr, const u64a *r_msk_base,
+                        u32 *c_0, u32 *c_128) {
+    PREP_SHUF_MASK;
+    return or256(SHIFT_OR_M2, r_msk);
+}
+
+static really_inline
+m256 prep_conf_teddy_m3(const m256 *lo_mask, const m256 *dup_mask,
+                        const u8 *ptr, const u64a *r_msk_base,
+                        u32 *c_0, u32 *c_128) {
+    PREP_SHUF_MASK;
+    return or256(SHIFT_OR_M3, r_msk);
+}
+
+static really_inline
+m256 prep_conf_teddy_m4(const m256 *lo_mask, const m256 *dup_mask,
+                        const u8 *ptr, const u64a *r_msk_base,
+                        u32 *c_0, u32 *c_128) {
+    PREP_SHUF_MASK;
+    return or256(SHIFT_OR_M4, r_msk);
+}
+
+#define PREP_CONF_FN_NO_REINFORCEMENT(val, n)                                 \
+    prep_conf_teddy_no_reinforcement_m##n(&lo_mask, dup_mask, val)
+
+#define PREP_CONF_FN(ptr, n)                                                  \
+    prep_conf_teddy_m##n(&lo_mask, dup_mask, ptr, r_msk_base, &c_0, &c_128)
+
+#define PREPARE_MASKS_1                                                       \
+    dup_mask[0] = set2x128(maskBase[0]);                                      \
+    dup_mask[1] = set2x128(maskBase[1]);
+
+#define PREPARE_MASKS_2                                                       \
+    PREPARE_MASKS_1                                                           \
+    dup_mask[2] = set2x128(maskBase[2]);                                      \
+    dup_mask[3] = set2x128(maskBase[3]);
+
+#define PREPARE_MASKS_3                                                       \
+    PREPARE_MASKS_2                                                           \
+    dup_mask[4] = set2x128(maskBase[4]);                                      \
+    dup_mask[5] = set2x128(maskBase[5]);
+
+#define PREPARE_MASKS_4                                                       \
+    PREPARE_MASKS_3                                                           \
+    dup_mask[6] = set2x128(maskBase[6]);                                      \
+    dup_mask[7] = set2x128(maskBase[7]);
+
+#define PREPARE_MASKS(n)                                                      \
+    m256 lo_mask = set32x8(0xf);                                              \
+    m256 dup_mask[n * 2];                                                     \
+    PREPARE_MASKS_##n
+
+#define FDR_EXEC_TEDDY(fdr, a, control, n_msk, conf_fn)                       \
+do {                                                                          \
+    const u8 *buf_end = a->buf + a->len;                                      \
+    const u8 *ptr = a->buf + a->start_offset;                                 \
+    u32 floodBackoff = FLOOD_BACKOFF_START;                                   \
+    const u8 *tryFloodDetect = a->firstFloodDetect;                           \
+    u32 last_match = ones_u32;                                                \
+    const struct Teddy *teddy = (const struct Teddy *)fdr;                    \
+    const size_t iterBytes = 64;                                              \
+    DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",                 \
+                 a->buf, a->len, a->start_offset);                            \
+                                                                              \
+    const m128 *maskBase = getMaskBase(teddy);                                \
+    PREPARE_MASKS(n_msk);                                                     \
+    const u32 *confBase = getConfBase(teddy);                                 \
+                                                                              \
+    const u64a *r_msk_base = getReinforcedMaskBase(teddy, n_msk);             \
+    u32 c_0 = 0x100;                                                          \
+    u32 c_128 = 0x100;                                                        \
+    const u8 *mainStart = ROUNDUP_PTR(ptr, 32);                               \
+    DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);           \
+    if (ptr < mainStart) {                                                    \
+        ptr = mainStart - 32;                                                 \
+        m256 p_mask;                                                          \
+        m256 val_0 = vectoredLoad256(&p_mask, ptr, a->start_offset,           \
+                                     a->buf, buf_end,                         \
+                                     a->buf_history, a->len_history, n_msk);  \
+        m256 r_0 = PREP_CONF_FN_NO_REINFORCEMENT(val_0, n_msk);               \
+        r_0 = or256(r_0, p_mask);                                             \
+        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \
+        ptr += 32;                                                            \
+    }                                                                         \
+                                                                              \
+    if (ptr + 32 <= buf_end) {                                                \
+        m256 r_0 = PREP_CONF_FN(ptr, n_msk);                                  \
+        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \
+        ptr += 32;                                                            \
+    }                                                                         \
+                                                                              \
+    for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {                    \
+        __builtin_prefetch(ptr + (iterBytes * 4));                            \
+        CHECK_FLOOD;                                                          \
+        m256 r_0 = PREP_CONF_FN(ptr, n_msk);                                  \
+        CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn);                      \
+        m256 r_1 = PREP_CONF_FN(ptr + 32, n_msk);                             \
+        CONFIRM_TEDDY(r_1, 8, 32, NOT_CAUTIOUS, conf_fn);                     \
+    }                                                                         \
+                                                                              \
+    if (ptr + 32 <= buf_end) {                                                \
+        m256 r_0 = PREP_CONF_FN(ptr, n_msk);                                  \
+        CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn);                      \
+        ptr += 32;                                                            \
+    }                                                                         \
+                                                                              \
+    assert(ptr + 32 > buf_end);                                               \
+    if (ptr < buf_end) {                                                      \
+        m256 p_mask;                                                          \
+        m256 val_0 = vectoredLoad256(&p_mask, ptr, 0, ptr, buf_end,           \
+                                     a->buf_history, a->len_history, n_msk);  \
+        m256 r_0 = PREP_CONF_FN_NO_REINFORCEMENT(val_0, n_msk);               \
+        r_0 = or256(r_0, p_mask);                                             \
+        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \
+    }                                                                         \
+                                                                              \
+    return HWLM_SUCCESS;                                                      \
+} while(0)
+
+#else // not defined HAVE_AVX2
+
+#ifdef ARCH_64_BIT
+#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn)                 \
+do {                                                                        \
+    if (unlikely(diff128(var, ones128()))) {                                \
+        u64a lo = movq(var);                                                \
+        u64a hi = movq(rshiftbyte_m128(var, 8));                            \
+        CONF_CHUNK_64(lo, bucket, offset, reason, conf_fn);                 \
+        CONF_CHUNK_64(hi, bucket, offset + 8, reason, conf_fn);             \
+    }                                                                       \
+} while(0)
+#else
+#define CONFIRM_TEDDY(var, bucket, offset, reason, conf_fn)                 \
+do {                                                                        \
+    if (unlikely(diff128(var, ones128()))) {                                \
+        u32 part1 = movd(var);                                              \
+        u32 part2 = movd(rshiftbyte_m128(var, 4));                          \
+        u32 part3 = movd(rshiftbyte_m128(var, 8));                          \
+        u32 part4 = movd(rshiftbyte_m128(var, 12));                         \
+        CONF_CHUNK_32(part1, bucket, offset, reason, conf_fn);              \
+        CONF_CHUNK_32(part2, bucket, offset + 4, reason, conf_fn);          \
+        CONF_CHUNK_32(part3, bucket, offset + 8, reason, conf_fn);          \
+        CONF_CHUNK_32(part4, bucket, offset + 12, reason, conf_fn);         \
+    }                                                                       \
+} while(0)
+#endif
+
+static really_inline
+m128 prep_conf_teddy_m1(const m128 *maskBase, m128 val) {
+    m128 mask = set16x8(0xf);
+    m128 lo = and128(val, mask);
+    m128 hi = and128(rshift64_m128(val, 4), mask);
+    return or128(pshufb_m128(maskBase[0 * 2], lo),
+                 pshufb_m128(maskBase[0 * 2 + 1], hi));
+}
+
+static really_inline
+m128 prep_conf_teddy_m2(const m128 *maskBase, m128 *old_1, m128 val) {
+    m128 mask = set16x8(0xf);
+    m128 lo = and128(val, mask);
+    m128 hi = and128(rshift64_m128(val, 4), mask);
+    m128 r = prep_conf_teddy_m1(maskBase, val);
+
+    m128 res_1 = or128(pshufb_m128(maskBase[1 * 2], lo),
+                       pshufb_m128(maskBase[1 * 2 + 1], hi));
+    m128 res_shifted_1 = palignr(res_1, *old_1, 16 - 1);
+    *old_1 = res_1;
+    return or128(r, res_shifted_1);
+}
+
+static really_inline
+m128 prep_conf_teddy_m3(const m128 *maskBase, m128 *old_1, m128 *old_2,
+                        m128 val) {
+    m128 mask = set16x8(0xf);
+    m128 lo = and128(val, mask);
+    m128 hi = and128(rshift64_m128(val, 4), mask);
+    m128 r = prep_conf_teddy_m2(maskBase, old_1, val);
+
+    m128 res_2 = or128(pshufb_m128(maskBase[2 * 2], lo),
+                       pshufb_m128(maskBase[2 * 2 + 1], hi));
+    m128 res_shifted_2 = palignr(res_2, *old_2, 16 - 2);
+    *old_2 = res_2;
+    return or128(r, res_shifted_2);
+}
+
+static really_inline
+m128 prep_conf_teddy_m4(const m128 *maskBase, m128 *old_1, m128 *old_2,
+                        m128 *old_3, m128 val) {
+    m128 mask = set16x8(0xf);
+    m128 lo = and128(val, mask);
+    m128 hi = and128(rshift64_m128(val, 4), mask);
+    m128 r = prep_conf_teddy_m3(maskBase, old_1, old_2, val);
+
+    m128 res_3 = or128(pshufb_m128(maskBase[3 * 2], lo),
+                       pshufb_m128(maskBase[3 * 2 + 1], hi));
+    m128 res_shifted_3 = palignr(res_3, *old_3, 16 - 3);
+    *old_3 = res_3;
+    return or128(r, res_shifted_3);
+}
+
+#define FDR_EXEC_TEDDY_RES_OLD_1
+
+#define FDR_EXEC_TEDDY_RES_OLD_2                                              \
+    m128 res_old_1 = zeroes128();
+
+#define FDR_EXEC_TEDDY_RES_OLD_3                                              \
+    m128 res_old_1 = zeroes128();                                             \
+    m128 res_old_2 = zeroes128();
+
+#define FDR_EXEC_TEDDY_RES_OLD_4                                              \
+    m128 res_old_1 = zeroes128();                                             \
+    m128 res_old_2 = zeroes128();                                             \
+    m128 res_old_3 = zeroes128();
+
+#define FDR_EXEC_TEDDY_RES_OLD(n) FDR_EXEC_TEDDY_RES_OLD_##n
+
+#define PREP_CONF_FN_1(mask_base, val)                                        \
+    prep_conf_teddy_m1(mask_base, val)
+
+#define PREP_CONF_FN_2(mask_base, val)                                        \
+    prep_conf_teddy_m2(mask_base, &res_old_1, val)
+
+#define PREP_CONF_FN_3(mask_base, val)                                        \
+    prep_conf_teddy_m3(mask_base, &res_old_1, &res_old_2, val)
+
+#define PREP_CONF_FN_4(mask_base, val)                                        \
+    prep_conf_teddy_m4(mask_base, &res_old_1, &res_old_2, &res_old_3, val)
+
+#define PREP_CONF_FN(mask_base, val, n)                                       \
+    PREP_CONF_FN_##n(mask_base, val)
+
+#define FDR_EXEC_TEDDY(fdr, a, control, n_msk, conf_fn)                       \
+do {                                                                          \
+    const u8 *buf_end = a->buf + a->len;                                      \
+    const u8 *ptr = a->buf + a->start_offset;                                 \
+    u32 floodBackoff = FLOOD_BACKOFF_START;                                   \
+    const u8 *tryFloodDetect = a->firstFloodDetect;                           \
+    u32 last_match = ones_u32;                                                \
+    const struct Teddy *teddy = (const struct Teddy *)fdr;                    \
+    const size_t iterBytes = 32;                                              \
+    DEBUG_PRINTF("params: buf %p len %zu start_offset %zu\n",                 \
+                 a->buf, a->len, a->start_offset);                            \
+                                                                              \
+    const m128 *maskBase = getMaskBase(teddy);                                \
+    const u32 *confBase = getConfBase(teddy);                                 \
+                                                                              \
+    FDR_EXEC_TEDDY_RES_OLD(n_msk);                                            \
+    const u8 *mainStart = ROUNDUP_PTR(ptr, 16);                               \
+    DEBUG_PRINTF("derive: ptr: %p mainstart %p\n", ptr, mainStart);           \
+    if (ptr < mainStart) {                                                    \
+        ptr = mainStart - 16;                                                 \
+        m128 p_mask;                                                          \
+        m128 val_0 = vectoredLoad128(&p_mask, ptr, a->start_offset,           \
+                                     a->buf, buf_end,                         \
+                                     a->buf_history, a->len_history, n_msk);  \
+        m128 r_0 = PREP_CONF_FN(maskBase, val_0, n_msk);                      \
+        r_0 = or128(r_0, p_mask);                                             \
+        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \
+        ptr += 16;                                                            \
+    }                                                                         \
+                                                                              \
+    if (ptr + 16 <= buf_end) {                                                \
+        m128 r_0 = PREP_CONF_FN(maskBase, load128(ptr), n_msk);               \
+        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \
+        ptr += 16;                                                            \
+    }                                                                         \
+                                                                              \
+    for (; ptr + iterBytes <= buf_end; ptr += iterBytes) {                    \
+        __builtin_prefetch(ptr + (iterBytes * 4));                            \
+        CHECK_FLOOD;                                                          \
+        m128 r_0 = PREP_CONF_FN(maskBase, load128(ptr), n_msk);               \
+        CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn);                      \
+        m128 r_1 = PREP_CONF_FN(maskBase, load128(ptr + 16), n_msk);          \
+        CONFIRM_TEDDY(r_1, 8, 16, NOT_CAUTIOUS, conf_fn);                     \
+    }                                                                         \
+                                                                              \
+    if (ptr + 16 <= buf_end) {                                                \
+        m128 r_0 = PREP_CONF_FN(maskBase, load128(ptr), n_msk);               \
+        CONFIRM_TEDDY(r_0, 8, 0, NOT_CAUTIOUS, conf_fn);                      \
+        ptr += 16;                                                            \
+    }                                                                         \
+                                                                              \
+    assert(ptr + 16 > buf_end);                                               \
+    if (ptr < buf_end) {                                                      \
+        m128 p_mask;                                                          \
+        m128 val_0 = vectoredLoad128(&p_mask, ptr, 0, ptr, buf_end,           \
+                                     a->buf_history, a->len_history, n_msk);  \
+        m128 r_0 = PREP_CONF_FN(maskBase, val_0, n_msk);                      \
+        r_0 = or128(r_0, p_mask);                                             \
+        CONFIRM_TEDDY(r_0, 8, 0, VECTORING, conf_fn);                         \
+    }                                                                         \
+                                                                              \
+    return HWLM_SUCCESS;                                                      \
+} while(0)
+
+#endif // HAVE_AVX2 HAVE_AVX512
+
+hwlm_error_t fdr_exec_teddy_msks1(const struct FDR *fdr,
+                                  const struct FDR_Runtime_Args *a,
+                                  hwlm_group_t control) {
+    FDR_EXEC_TEDDY(fdr, a, control, 1, do_confWithBit_teddy);
+}
+
+hwlm_error_t fdr_exec_teddy_msks1_pck(const struct FDR *fdr,
+                                      const struct FDR_Runtime_Args *a,
+                                      hwlm_group_t control) {
+    FDR_EXEC_TEDDY(fdr, a, control, 1, do_confWithBit_teddy);
+}
+
+hwlm_error_t fdr_exec_teddy_msks2(const struct FDR *fdr,
+                                  const struct FDR_Runtime_Args *a,
+                                  hwlm_group_t control) {
+    FDR_EXEC_TEDDY(fdr, a, control, 2, do_confWithBit_teddy);
+}
+
+hwlm_error_t fdr_exec_teddy_msks2_pck(const struct FDR *fdr,
+                                      const struct FDR_Runtime_Args *a,
+                                      hwlm_group_t control) {
+    FDR_EXEC_TEDDY(fdr, a, control, 2, do_confWithBit_teddy);
+}
+
+hwlm_error_t fdr_exec_teddy_msks3(const struct FDR *fdr,
+                                  const struct FDR_Runtime_Args *a,
+                                  hwlm_group_t control) {
+    FDR_EXEC_TEDDY(fdr, a, control, 3, do_confWithBit_teddy);
+}
+
+hwlm_error_t fdr_exec_teddy_msks3_pck(const struct FDR *fdr,
+                                      const struct FDR_Runtime_Args *a,
+                                      hwlm_group_t control) {
+    FDR_EXEC_TEDDY(fdr, a, control, 3, do_confWithBit_teddy);
+}
+
+hwlm_error_t fdr_exec_teddy_msks4(const struct FDR *fdr,
+                                  const struct FDR_Runtime_Args *a,
+                                  hwlm_group_t control) {
+    FDR_EXEC_TEDDY(fdr, a, control, 4, do_confWithBit_teddy);
+}
+
+hwlm_error_t fdr_exec_teddy_msks4_pck(const struct FDR *fdr,
+                                      const struct FDR_Runtime_Args *a,
+                                      hwlm_group_t control) {
+    FDR_EXEC_TEDDY(fdr, a, control, 4, do_confWithBit_teddy);
+}
author	Ivan Blinkov <ivan@blinkov.ru>	2022-02-10 16:47:11 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:47:11 +0300
commit	5b283123c882433dafbaf6b338adeea16c1a0ea0 (patch)
tree	339adc63bce23800021202ae4a8328a843dc447a /contrib/libs/hyperscan/src/fdr/teddy.c
parent	1aeb9a455974457866f78722ad98114bafc84e8a (diff)
download	ydb-5b283123c882433dafbaf6b338adeea16c1a0ea0.tar.gz