Restoring authorship annotation for Ivan Blinkov <ivan@blinkov.ru>. Commit 2 of 2.

author: Ivan Blinkov <ivan@blinkov.ru> 2022-02-10 16:47:11 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:47:11 +0300
commit: 5b283123c882433dafbaf6b338adeea16c1a0ea0 (patch)
tree: 339adc63bce23800021202ae4a8328a843dc447a /contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
parent: 1aeb9a455974457866f78722ad98114bafc84e8a (diff)
download: ydb-5b283123c882433dafbaf6b338adeea16c1a0ea0.tar.gz
1 files changed, 389 insertions, 389 deletions
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
index 271e14fb9c..f1f829f2c1 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015-2017, Intel Corporation 
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -40,20 +40,20 @@
 
 #include "util/bitutils.h" // for CASE_CLEAR
 #include "util/charreach.h"
-#include "util/compile_context.h" 
+#include "util/compile_context.h"
 #include "util/container.h"
 #include "util/dump_charclass.h"
 #include "util/graph_range.h"
-#include "util/small_vector.h" 
-#include "util/target_info.h" 
+#include "util/small_vector.h"
+#include "util/target_info.h"
 
 #include <algorithm>
 #include <map>
 
-#include <boost/range/adaptor/map.hpp> 
- 
+#include <boost/range/adaptor/map.hpp>
+
 using namespace std;
-using boost::adaptors::map_keys; 
+using boost::adaptors::map_keys;
 
 namespace ue2 {
 
@@ -72,7 +72,7 @@ void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
         }
 
         const CharReach &acr = g[v].char_reach;
-        DEBUG_PRINTF("checking %zu\n", g[v].index); 
+        DEBUG_PRINTF("checking %zu\n", g[v].index);
 
         if (acr.count() < WIDE_FRIEND_MIN || !acr.isSubsetOf(cr)) {
             DEBUG_PRINTF("bad reach %zu\n", acr.count());
@@ -89,7 +89,7 @@ void findAccelFriendGeneration(const NGHolder &g, const CharReach &cr,
         next_preds->insert(v);
         insert(next_cands, adjacent_vertices(v, g));
 
-        DEBUG_PRINTF("%zu is a friend indeed\n", g[v].index); 
+        DEBUG_PRINTF("%zu is a friend indeed\n", g[v].index);
         friends->insert(v);
     next_cand:;
     }
@@ -136,321 +136,321 @@ void findAccelFriends(const NGHolder &g, NFAVertex v,
 }
 
 static
-void findPaths(const NGHolder &g, NFAVertex v, 
-               const vector<CharReach> &refined_cr, 
-               vector<vector<CharReach>> *paths, 
-               const flat_set<NFAVertex> &forbidden, u32 depth) { 
-    static const u32 MAGIC_TOO_WIDE_NUMBER = 16; 
-    if (!depth) { 
-        paths->push_back({}); 
-        return; 
-    } 
-    if (v == g.accept || v == g.acceptEod) { 
-        paths->push_back({}); 
-        if (!generates_callbacks(g) || v == g.acceptEod) { 
-            paths->back().push_back(CharReach()); /* red tape options */ 
-        }
-        return; 
-    }
-
-    /* for the escape 'literals' we want to use the minimal cr so we 
-     * can be more selective */ 
-    const CharReach &cr = refined_cr[g[v].index]; 
-
-    if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER 
-        || hasSelfLoop(v, g)) { 
-        /* give up on pushing past this point */ 
-        paths->push_back({cr}); 
-        return; 
-    }
-
-    vector<vector<CharReach>> curr; 
+void findPaths(const NGHolder &g, NFAVertex v,
+               const vector<CharReach> &refined_cr,
+               vector<vector<CharReach>> *paths,
+               const flat_set<NFAVertex> &forbidden, u32 depth) {
+    static const u32 MAGIC_TOO_WIDE_NUMBER = 16;
+    if (!depth) {
+        paths->push_back({});
+        return;
+    }
+    if (v == g.accept || v == g.acceptEod) {
+        paths->push_back({});
+        if (!generates_callbacks(g) || v == g.acceptEod) {
+            paths->back().push_back(CharReach()); /* red tape options */
+        }
+        return;
+    }
+
+    /* for the escape 'literals' we want to use the minimal cr so we
+     * can be more selective */
+    const CharReach &cr = refined_cr[g[v].index];
+
+    if (out_degree(v, g) >= MAGIC_TOO_WIDE_NUMBER
+        || hasSelfLoop(v, g)) {
+        /* give up on pushing past this point */
+        paths->push_back({cr});
+        return;
+    }
+
+    vector<vector<CharReach>> curr;
     for (auto w : adjacent_vertices_range(v, g)) {
-        if (contains(forbidden, w)) { 
-            /* path has looped back to one of the active+boring acceleration 
-             * states.  We can ignore this path if we have sufficient back- 
-             * off. */ 
+        if (contains(forbidden, w)) {
+            /* path has looped back to one of the active+boring acceleration
+             * states.  We can ignore this path if we have sufficient back-
+             * off. */
             paths->push_back({cr});
             continue;
         }
- 
-        u32 new_depth = depth - 1; 
-        do { 
-            curr.clear(); 
-            findPaths(g, w, refined_cr, &curr, forbidden, new_depth); 
-        } while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER); 
- 
-        for (auto &c : curr) { 
-            c.push_back(cr); 
-            paths->push_back(std::move(c)); 
+
+        u32 new_depth = depth - 1;
+        do {
+            curr.clear();
+            findPaths(g, w, refined_cr, &curr, forbidden, new_depth);
+        } while (new_depth-- && curr.size() >= MAGIC_TOO_WIDE_NUMBER);
+
+        for (auto &c : curr) {
+            c.push_back(cr);
+            paths->push_back(std::move(c));
         }
     }
 }
 
-namespace { 
-struct SAccelScheme { 
-    SAccelScheme(CharReach cr_in, u32 offset_in) 
-        : cr(std::move(cr_in)), offset(offset_in) { 
-        assert(offset <= MAX_ACCEL_DEPTH); 
+namespace {
+struct SAccelScheme {
+    SAccelScheme(CharReach cr_in, u32 offset_in)
+        : cr(std::move(cr_in)), offset(offset_in) {
+        assert(offset <= MAX_ACCEL_DEPTH);
     }
 
-    SAccelScheme() {} 
+    SAccelScheme() {}
+
+    bool operator<(const SAccelScheme &b) const {
+        const SAccelScheme &a = *this;
 
-    bool operator<(const SAccelScheme &b) const { 
-        const SAccelScheme &a = *this; 
- 
-        const size_t a_count = cr.count(), b_count = b.cr.count(); 
-        if (a_count != b_count) { 
-            return a_count < b_count; 
+        const size_t a_count = cr.count(), b_count = b.cr.count();
+        if (a_count != b_count) {
+            return a_count < b_count;
         }
 
-        /* TODO: give bonus if one is a 'caseless' character */ 
-        ORDER_CHECK(offset); 
-        ORDER_CHECK(cr); 
+        /* TODO: give bonus if one is a 'caseless' character */
+        ORDER_CHECK(offset);
+        ORDER_CHECK(cr);
         return false;
     }
 
-    CharReach cr = CharReach::dot(); 
-    u32 offset = MAX_ACCEL_DEPTH + 1; 
-}; 
+    CharReach cr = CharReach::dot();
+    u32 offset = MAX_ACCEL_DEPTH + 1;
+};
 }
 
-/** 
- * \brief Limit on the number of (recursive) calls to findBestInternal(). 
- */ 
-static constexpr size_t MAX_FINDBEST_CALLS = 1000000; 
- 
+/**
+ * \brief Limit on the number of (recursive) calls to findBestInternal().
+ */
+static constexpr size_t MAX_FINDBEST_CALLS = 1000000;
+
 static
-void findBestInternal(vector<vector<CharReach>>::const_iterator pb, 
-                      vector<vector<CharReach>>::const_iterator pe, 
-                      size_t *num_calls, const SAccelScheme &curr, 
-                      SAccelScheme *best) { 
-    assert(curr.offset <= MAX_ACCEL_DEPTH); 
-
-    if (++(*num_calls) > MAX_FINDBEST_CALLS) { 
-        DEBUG_PRINTF("hit num_calls limit %zu\n", *num_calls); 
-        return; 
-    }
-
-    DEBUG_PRINTF("paths left %zu\n", pe - pb); 
-    if (pb == pe) { 
-        if (curr < *best) { 
-            *best = curr; 
-            DEBUG_PRINTF("new best: count=%zu, class=%s, offset=%u\n", 
-                         best->cr.count(), describeClass(best->cr).c_str(), 
-                         best->offset); 
-        } 
-        return; 
-    }
-
-    DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin()); 
-
-    small_vector<SAccelScheme, 10> priority_path; 
-    priority_path.reserve(pb->size()); 
-    u32 i = 0; 
-    for (auto p = pb->begin(); p != pb->end(); ++p, i++) { 
-        SAccelScheme as(*p | curr.cr, max(i, curr.offset)); 
-        if (*best < as) { 
-            DEBUG_PRINTF("worse\n"); 
-            continue; 
-        } 
-        priority_path.push_back(move(as)); 
-    }
-
-    sort(priority_path.begin(), priority_path.end()); 
-    for (auto it = priority_path.begin(); it != priority_path.end(); ++it) { 
-        auto jt = next(it); 
-        for (; jt != priority_path.end(); ++jt) { 
-            if (!it->cr.isSubsetOf(jt->cr)) { 
-                break; 
-            } 
-        } 
-        priority_path.erase(next(it), jt); 
-        DEBUG_PRINTF("||%zu\n", it->cr.count()); 
-    }
-    DEBUG_PRINTF("---\n"); 
-
-    for (const SAccelScheme &in : priority_path) { 
-        DEBUG_PRINTF("in: count %zu\n", in.cr.count()); 
-        if (*best < in) { 
-            DEBUG_PRINTF("worse\n"); 
-            continue; 
-        } 
-        findBestInternal(pb + 1, pe, num_calls, in, best); 
-
-        if (curr.cr == best->cr) { 
-            return; /* could only get better by offset */ 
-        } 
+void findBestInternal(vector<vector<CharReach>>::const_iterator pb,
+                      vector<vector<CharReach>>::const_iterator pe,
+                      size_t *num_calls, const SAccelScheme &curr,
+                      SAccelScheme *best) {
+    assert(curr.offset <= MAX_ACCEL_DEPTH);
+
+    if (++(*num_calls) > MAX_FINDBEST_CALLS) {
+        DEBUG_PRINTF("hit num_calls limit %zu\n", *num_calls);
+        return;
+    }
+
+    DEBUG_PRINTF("paths left %zu\n", pe - pb);
+    if (pb == pe) {
+        if (curr < *best) {
+            *best = curr;
+            DEBUG_PRINTF("new best: count=%zu, class=%s, offset=%u\n",
+                         best->cr.count(), describeClass(best->cr).c_str(),
+                         best->offset);
+        }
+        return;
+    }
+
+    DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
+
+    small_vector<SAccelScheme, 10> priority_path;
+    priority_path.reserve(pb->size());
+    u32 i = 0;
+    for (auto p = pb->begin(); p != pb->end(); ++p, i++) {
+        SAccelScheme as(*p | curr.cr, max(i, curr.offset));
+        if (*best < as) {
+            DEBUG_PRINTF("worse\n");
+            continue;
+        }
+        priority_path.push_back(move(as));
+    }
+
+    sort(priority_path.begin(), priority_path.end());
+    for (auto it = priority_path.begin(); it != priority_path.end(); ++it) {
+        auto jt = next(it);
+        for (; jt != priority_path.end(); ++jt) {
+            if (!it->cr.isSubsetOf(jt->cr)) {
+                break;
+            }
+        }
+        priority_path.erase(next(it), jt);
+        DEBUG_PRINTF("||%zu\n", it->cr.count());
+    }
+    DEBUG_PRINTF("---\n");
+
+    for (const SAccelScheme &in : priority_path) {
+        DEBUG_PRINTF("in: count %zu\n", in.cr.count());
+        if (*best < in) {
+            DEBUG_PRINTF("worse\n");
+            continue;
+        }
+        findBestInternal(pb + 1, pe, num_calls, in, best);
+
+        if (curr.cr == best->cr) {
+            return; /* could only get better by offset */
+        }
     }
 }
 
 static
-SAccelScheme findBest(const vector<vector<CharReach>> &paths, 
-                      const CharReach &terminating) { 
-    SAccelScheme curr(terminating, 0U); 
-    SAccelScheme best; 
-    size_t num_calls = 0; 
-    findBestInternal(paths.begin(), paths.end(), &num_calls, curr, &best); 
-    DEBUG_PRINTF("findBest completed, num_calls=%zu\n", num_calls); 
-    DEBUG_PRINTF("selected scheme: count=%zu, class=%s, offset=%u\n", 
-                 best.cr.count(), describeClass(best.cr).c_str(), best.offset); 
-    return best; 
-} 
-
-namespace { 
-struct DAccelScheme { 
-    DAccelScheme(CharReach cr_in, u32 offset_in) 
-        : double_cr(std::move(cr_in)), double_offset(offset_in) { 
-        assert(double_offset <= MAX_ACCEL_DEPTH); 
-    } 
-
-    bool operator<(const DAccelScheme &b) const { 
-        const DAccelScheme &a = *this; 
-
-        size_t a_dcount = a.double_cr.count(); 
-        size_t b_dcount = b.double_cr.count(); 
-
-        assert(!a.double_byte.empty() || a_dcount || a.double_offset); 
-        assert(!b.double_byte.empty() || b_dcount || b.double_offset); 
-
-        if (a_dcount != b_dcount) { 
-            return a_dcount < b_dcount; 
-        } 
-
-        if (!a_dcount) { 
-            bool cd_a = buildDvermMask(a.double_byte); 
-            bool cd_b = buildDvermMask(b.double_byte); 
-            if (cd_a != cd_b) { 
-                return cd_a > cd_b; 
+SAccelScheme findBest(const vector<vector<CharReach>> &paths,
+                      const CharReach &terminating) {
+    SAccelScheme curr(terminating, 0U);
+    SAccelScheme best;
+    size_t num_calls = 0;
+    findBestInternal(paths.begin(), paths.end(), &num_calls, curr, &best);
+    DEBUG_PRINTF("findBest completed, num_calls=%zu\n", num_calls);
+    DEBUG_PRINTF("selected scheme: count=%zu, class=%s, offset=%u\n",
+                 best.cr.count(), describeClass(best.cr).c_str(), best.offset);
+    return best;
+}
+
+namespace {
+struct DAccelScheme {
+    DAccelScheme(CharReach cr_in, u32 offset_in)
+        : double_cr(std::move(cr_in)), double_offset(offset_in) {
+        assert(double_offset <= MAX_ACCEL_DEPTH);
+    }
+
+    bool operator<(const DAccelScheme &b) const {
+        const DAccelScheme &a = *this;
+
+        size_t a_dcount = a.double_cr.count();
+        size_t b_dcount = b.double_cr.count();
+
+        assert(!a.double_byte.empty() || a_dcount || a.double_offset);
+        assert(!b.double_byte.empty() || b_dcount || b.double_offset);
+
+        if (a_dcount != b_dcount) {
+            return a_dcount < b_dcount;
+        }
+
+        if (!a_dcount) {
+            bool cd_a = buildDvermMask(a.double_byte);
+            bool cd_b = buildDvermMask(b.double_byte);
+            if (cd_a != cd_b) {
+                return cd_a > cd_b;
             }
         }
 
-        ORDER_CHECK(double_byte.size()); 
-        ORDER_CHECK(double_offset); 
+        ORDER_CHECK(double_byte.size());
+        ORDER_CHECK(double_offset);
 
-        /* TODO: give bonus if one is a 'caseless' character */ 
-        ORDER_CHECK(double_byte); 
-        ORDER_CHECK(double_cr); 
+        /* TODO: give bonus if one is a 'caseless' character */
+        ORDER_CHECK(double_byte);
+        ORDER_CHECK(double_cr);
 
-        return false; 
+        return false;
     }
 
-    flat_set<pair<u8, u8>> double_byte; 
-    CharReach double_cr; 
-    u32 double_offset = 0; 
-}; 
+    flat_set<pair<u8, u8>> double_byte;
+    CharReach double_cr;
+    u32 double_offset = 0;
+};
 }
 
 static
-DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1, 
-                               const CharReach &cr_2_in, u32 offset_in) { 
-    cr_1 &= ~as.double_cr; 
-    CharReach cr_2 = cr_2_in & ~as.double_cr; 
-    u32 offset = offset_in; 
- 
-    if (cr_1.none()) { 
-        DEBUG_PRINTF("empty first element\n"); 
-        ENSURE_AT_LEAST(&as.double_offset, offset); 
-        return as; 
-    }
- 
-    if (cr_2_in != cr_2 || cr_2.none()) { 
-        offset = offset_in + 1; 
-    }
-
-    size_t two_count = cr_1.count() * cr_2.count(); 
-
-    DEBUG_PRINTF("will generate raw %zu pairs\n", two_count); 
- 
-    if (!two_count) { 
-        DEBUG_PRINTF("empty element\n"); 
-        ENSURE_AT_LEAST(&as.double_offset, offset); 
-        return as; 
-    }
-
-    if (two_count > DOUBLE_SHUFTI_LIMIT) { 
-        if (cr_2.count() < cr_1.count()) { 
-            as.double_cr |= cr_2; 
-            offset = offset_in + 1; 
-        } else { 
-            as.double_cr |= cr_1; 
-        }
-    } else { 
-        for (auto i = cr_1.find_first(); i != CharReach::npos; 
-             i = cr_1.find_next(i)) { 
-            for (auto j = cr_2.find_first(); j != CharReach::npos; 
-                 j = cr_2.find_next(j)) { 
-                as.double_byte.emplace(i, j); 
-            } 
-        }
-    }
-
-    ENSURE_AT_LEAST(&as.double_offset, offset); 
-    DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n", 
-                 as.double_byte.size(), as.double_cr.count(), as.double_offset); 
-    return as; 
+DAccelScheme make_double_accel(DAccelScheme as, CharReach cr_1,
+                               const CharReach &cr_2_in, u32 offset_in) {
+    cr_1 &= ~as.double_cr;
+    CharReach cr_2 = cr_2_in & ~as.double_cr;
+    u32 offset = offset_in;
+
+    if (cr_1.none()) {
+        DEBUG_PRINTF("empty first element\n");
+        ENSURE_AT_LEAST(&as.double_offset, offset);
+        return as;
+    }
+
+    if (cr_2_in != cr_2 || cr_2.none()) {
+        offset = offset_in + 1;
+    }
+
+    size_t two_count = cr_1.count() * cr_2.count();
+
+    DEBUG_PRINTF("will generate raw %zu pairs\n", two_count);
+
+    if (!two_count) {
+        DEBUG_PRINTF("empty element\n");
+        ENSURE_AT_LEAST(&as.double_offset, offset);
+        return as;
+    }
+
+    if (two_count > DOUBLE_SHUFTI_LIMIT) {
+        if (cr_2.count() < cr_1.count()) {
+            as.double_cr |= cr_2;
+            offset = offset_in + 1;
+        } else {
+            as.double_cr |= cr_1;
+        }
+    } else {
+        for (auto i = cr_1.find_first(); i != CharReach::npos;
+             i = cr_1.find_next(i)) {
+            for (auto j = cr_2.find_first(); j != CharReach::npos;
+                 j = cr_2.find_next(j)) {
+                as.double_byte.emplace(i, j);
+            }
+        }
+    }
+
+    ENSURE_AT_LEAST(&as.double_offset, offset);
+    DEBUG_PRINTF("construct da %zu pairs, %zu singles, offset %u\n",
+                 as.double_byte.size(), as.double_cr.count(), as.double_offset);
+    return as;
 }
 
 static
-void findDoubleBest(vector<vector<CharReach> >::const_iterator pb, 
+void findDoubleBest(vector<vector<CharReach> >::const_iterator pb,
               vector<vector<CharReach> >::const_iterator pe,
-              const DAccelScheme &curr, DAccelScheme *best) { 
-    assert(curr.double_offset <= MAX_ACCEL_DEPTH); 
+              const DAccelScheme &curr, DAccelScheme *best) {
+    assert(curr.double_offset <= MAX_ACCEL_DEPTH);
     DEBUG_PRINTF("paths left %zu\n", pe - pb);
-    DEBUG_PRINTF("current base: %zu pairs, %zu singles, offset %u\n", 
-                 curr.double_byte.size(), curr.double_cr.count(), 
-                 curr.double_offset); 
+    DEBUG_PRINTF("current base: %zu pairs, %zu singles, offset %u\n",
+                 curr.double_byte.size(), curr.double_cr.count(),
+                 curr.double_offset);
     if (pb == pe) {
-        if (curr < *best) { 
-            *best = curr; 
-            DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n", 
-                         best->double_byte.size(), best->double_cr.count(), 
-                         best->double_offset); 
-        } 
+        if (curr < *best) {
+            *best = curr;
+            DEBUG_PRINTF("new best: %zu pairs, %zu singles, offset %u\n",
+                         best->double_byte.size(), best->double_cr.count(),
+                         best->double_offset);
+        }
         return;
     }
 
     DEBUG_PRINTF("p len %zu\n", pb->end() - pb->begin());
 
-    small_vector<DAccelScheme, 10> priority_path; 
-    priority_path.reserve(pb->size()); 
+    small_vector<DAccelScheme, 10> priority_path;
+    priority_path.reserve(pb->size());
     u32 i = 0;
-    for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end(); 
+    for (auto p = pb->begin(); p != pb->end() && next(p) != pb->end();
          ++p, i++) {
-        DAccelScheme as = make_double_accel(curr, *p, *next(p), i); 
-        if (*best < as) { 
-            DEBUG_PRINTF("worse\n"); 
-            continue; 
-        } 
-        priority_path.push_back(move(as)); 
+        DAccelScheme as = make_double_accel(curr, *p, *next(p), i);
+        if (*best < as) {
+            DEBUG_PRINTF("worse\n");
+            continue;
+        }
+        priority_path.push_back(move(as));
     }
 
     sort(priority_path.begin(), priority_path.end());
-    DEBUG_PRINTF("%zu candidates for this path\n", priority_path.size()); 
-    DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n", 
-                 best->double_byte.size(), best->double_cr.count(), 
-                 best->double_offset); 
-
-    for (const DAccelScheme &in : priority_path) { 
-        DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n", 
-                     in.double_byte.size(), in.double_cr.count(), 
-                     in.double_offset); 
-        if (*best < in) { 
+    DEBUG_PRINTF("%zu candidates for this path\n", priority_path.size());
+    DEBUG_PRINTF("input best: %zu pairs, %zu singles, offset %u\n",
+                 best->double_byte.size(), best->double_cr.count(),
+                 best->double_offset);
+
+    for (const DAccelScheme &in : priority_path) {
+        DEBUG_PRINTF("in: %zu pairs, %zu singles, offset %u\n",
+                     in.double_byte.size(), in.double_cr.count(),
+                     in.double_offset);
+        if (*best < in) {
             DEBUG_PRINTF("worse\n");
             continue;
         }
-        findDoubleBest(pb + 1, pe, in, best); 
+        findDoubleBest(pb + 1, pe, in, best);
     }
 }
 
 #ifdef DEBUG
 static
-void dumpPaths(const vector<vector<CharReach>> &paths) { 
-    for (const auto &path : paths) { 
+void dumpPaths(const vector<vector<CharReach>> &paths) {
+    for (const auto &path : paths) {
         DEBUG_PRINTF("path: [");
-        for (const auto &cr : path) { 
+        for (const auto &cr : path) {
             printf(" [");
-            describeClass(stdout, cr, 20, CC_OUT_TEXT); 
+            describeClass(stdout, cr, 20, CC_OUT_TEXT);
             printf("]");
         }
         printf(" ]\n");
@@ -459,13 +459,13 @@ void dumpPaths(const vector<vector<CharReach>> &paths) {
 #endif
 
 static
-void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) { 
+void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) {
     /* paths segments which are a superset of an earlier segment should never be
      * picked as an acceleration segment -> to improve processing just replace
      * with dot */
-    for (auto &p : paths) { 
-        for (auto it = p.begin(); it != p.end();  ++it) { 
-            for (auto jt = next(it); jt != p.end(); ++jt) { 
+    for (auto &p : paths) {
+        for (auto it = p.begin(); it != p.end();  ++it) {
+            for (auto jt = next(it); jt != p.end(); ++jt) {
                 if (it->isSubsetOf(*jt)) {
                     *jt = CharReach::dot();
                 }
@@ -475,10 +475,10 @@ void blowoutPathsLessStrictSegment(vector<vector<CharReach> > &paths) {
 }
 
 static
-void unifyPathsLastSegment(vector<vector<CharReach> > &paths) { 
+void unifyPathsLastSegment(vector<vector<CharReach> > &paths) {
     /* try to unify paths which only differ in the last segment */
-    for (vector<vector<CharReach> >::iterator p = paths.begin(); 
-         p != paths.end() && p + 1 != paths.end();) { 
+    for (vector<vector<CharReach> >::iterator p = paths.begin();
+         p != paths.end() && p + 1 != paths.end();) {
         vector<CharReach> &a = *p;
         vector<CharReach> &b = *(p + 1);
 
@@ -496,7 +496,7 @@ void unifyPathsLastSegment(vector<vector<CharReach> > &paths) {
         if (i == a.size() - 1) {
             /* we can unify these paths */
             a[i] |= b[i];
-            paths.erase(p + 1); 
+            paths.erase(p + 1);
         } else {
             ++p;
         }
@@ -504,117 +504,117 @@ void unifyPathsLastSegment(vector<vector<CharReach> > &paths) {
 }
 
 static
-void improvePaths(vector<vector<CharReach> > &paths) { 
+void improvePaths(vector<vector<CharReach> > &paths) {
 #ifdef DEBUG
     DEBUG_PRINTF("orig paths\n");
-    dumpPaths(paths); 
+    dumpPaths(paths);
 #endif
     blowoutPathsLessStrictSegment(paths);
 
-    sort(paths.begin(), paths.end()); 
+    sort(paths.begin(), paths.end());
 
     unifyPathsLastSegment(paths);
 
 #ifdef DEBUG
     DEBUG_PRINTF("opt paths\n");
-    dumpPaths(paths); 
+    dumpPaths(paths);
 #endif
 }
 
-#define MAX_DOUBLE_ACCEL_PATHS 10 
- 
-static 
-DAccelScheme findBestDoubleAccelScheme(vector<vector<CharReach> > paths, 
-                                       const CharReach &terminating) { 
-    DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n", 
-                 terminating.count()); 
-    unifyPathsLastSegment(paths); 
- 
-#ifdef DEBUG 
-    DEBUG_PRINTF("paths:\n"); 
-    dumpPaths(paths); 
-#endif 
- 
-    /* if there are too many paths, shorten the paths to reduce the number of 
-     * distinct paths we have to consider */ 
-    while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) { 
-        for (auto &p : paths) { 
-            if (p.empty()) { 
-                return DAccelScheme(terminating, 0U); 
-            } 
-            p.pop_back(); 
-        } 
-        unifyPathsLastSegment(paths); 
-    } 
- 
-    if (paths.empty()) { 
-        return DAccelScheme(terminating, 0U); 
-    } 
- 
-    DAccelScheme curr(terminating, 0U); 
-    DAccelScheme best(CharReach::dot(), 0U); 
-    findDoubleBest(paths.begin(), paths.end(), curr, &best); 
-    DEBUG_PRINTF("da %zu pairs, %zu singles\n", best.double_byte.size(), 
-                 best.double_cr.count()); 
-    return best; 
-} 
- 
-#define MAX_EXPLORE_PATHS 40 
- 
-AccelScheme findBestAccelScheme(vector<vector<CharReach>> paths, 
-                                const CharReach &terminating, 
-                                bool look_for_double_byte) { 
-    AccelScheme rv; 
-    if (look_for_double_byte) { 
-        DAccelScheme da = findBestDoubleAccelScheme(paths, terminating); 
-        if (da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT) { 
-            rv.double_byte = std::move(da.double_byte); 
-            rv.double_cr = move(da.double_cr); 
-            rv.double_offset = da.double_offset; 
-        } 
-    } 
- 
-    improvePaths(paths); 
- 
-    DEBUG_PRINTF("we have %zu paths\n", paths.size()); 
-    if (paths.size() > MAX_EXPLORE_PATHS) { 
-        return rv; /* too many paths to explore */ 
-    } 
- 
-    /* if we were smart we would do something netflowy on the paths to find the 
-     * best cut. But we aren't, so we will just brute force it. 
-     */ 
-    SAccelScheme best = findBest(paths, terminating); 
- 
-    /* find best is a bit lazy in terms of minimising the offset, see if we can 
-     * make it better. need to find the min max offset that we need.*/ 
-    u32 offset = 0; 
-    for (const auto &path : paths) { 
-        u32 i = 0; 
-        for (const auto &cr : path) { 
-            if (cr.isSubsetOf(best.cr)) { 
-                break; 
-            } 
-            i++; 
-        } 
-        offset = MAX(offset, i); 
-    } 
-    assert(offset <= best.offset); 
-    best.offset = offset; 
- 
-    rv.offset = best.offset; 
-    rv.cr = best.cr; 
-    if (rv.cr.count() < rv.double_cr.count()) { 
-        rv.double_byte.clear(); 
-    } 
- 
-    return rv; 
-} 
- 
+#define MAX_DOUBLE_ACCEL_PATHS 10
+
+static
+DAccelScheme findBestDoubleAccelScheme(vector<vector<CharReach> > paths,
+                                       const CharReach &terminating) {
+    DEBUG_PRINTF("looking for double accel, %zu terminating symbols\n",
+                 terminating.count());
+    unifyPathsLastSegment(paths);
+
+#ifdef DEBUG
+    DEBUG_PRINTF("paths:\n");
+    dumpPaths(paths);
+#endif
+
+    /* if there are too many paths, shorten the paths to reduce the number of
+     * distinct paths we have to consider */
+    while (paths.size() > MAX_DOUBLE_ACCEL_PATHS) {
+        for (auto &p : paths) {
+            if (p.empty()) {
+                return DAccelScheme(terminating, 0U);
+            }
+            p.pop_back();
+        }
+        unifyPathsLastSegment(paths);
+    }
+
+    if (paths.empty()) {
+        return DAccelScheme(terminating, 0U);
+    }
+
+    DAccelScheme curr(terminating, 0U);
+    DAccelScheme best(CharReach::dot(), 0U);
+    findDoubleBest(paths.begin(), paths.end(), curr, &best);
+    DEBUG_PRINTF("da %zu pairs, %zu singles\n", best.double_byte.size(),
+                 best.double_cr.count());
+    return best;
+}
+
+#define MAX_EXPLORE_PATHS 40
+
+AccelScheme findBestAccelScheme(vector<vector<CharReach>> paths,
+                                const CharReach &terminating,
+                                bool look_for_double_byte) {
+    AccelScheme rv;
+    if (look_for_double_byte) {
+        DAccelScheme da = findBestDoubleAccelScheme(paths, terminating);
+        if (da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT) {
+            rv.double_byte = std::move(da.double_byte);
+            rv.double_cr = move(da.double_cr);
+            rv.double_offset = da.double_offset;
+        }
+    }
+
+    improvePaths(paths);
+
+    DEBUG_PRINTF("we have %zu paths\n", paths.size());
+    if (paths.size() > MAX_EXPLORE_PATHS) {
+        return rv; /* too many paths to explore */
+    }
+
+    /* if we were smart we would do something netflowy on the paths to find the
+     * best cut. But we aren't, so we will just brute force it.
+     */
+    SAccelScheme best = findBest(paths, terminating);
+
+    /* find best is a bit lazy in terms of minimising the offset, see if we can
+     * make it better. need to find the min max offset that we need.*/
+    u32 offset = 0;
+    for (const auto &path : paths) {
+        u32 i = 0;
+        for (const auto &cr : path) {
+            if (cr.isSubsetOf(best.cr)) {
+                break;
+            }
+            i++;
+        }
+        offset = MAX(offset, i);
+    }
+    assert(offset <= best.offset);
+    best.offset = offset;
+
+    rv.offset = best.offset;
+    rv.cr = best.cr;
+    if (rv.cr.count() < rv.double_cr.count()) {
+        rv.double_byte.clear();
+    }
+
+    return rv;
+}
+
 AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
                          const vector<CharReach> &refined_cr,
                          const map<NFAVertex, BoundedRepeatSummary> &br_cyclic,
-                         bool allow_wide, bool look_for_double_byte) { 
+                         bool allow_wide, bool look_for_double_byte) {
     CharReach terminating;
     for (auto v : verts) {
         if (!hasSelfLoop(v, g)) {
@@ -633,15 +633,15 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
         return AccelScheme(); /* invalid scheme */
     }
 
-    vector<vector<CharReach>> paths; 
+    vector<vector<CharReach>> paths;
     flat_set<NFAVertex> ignore_vert_set(verts.begin(), verts.end());
 
     /* Note: we can not in general (TODO: ignore when possible) ignore entries
      * into the bounded repeat cyclic states as that is when the magic happens
      */
-    for (auto v : br_cyclic | map_keys) { 
+    for (auto v : br_cyclic | map_keys) {
         /* TODO: can allow if repeatMin <= 1 ? */
-        ignore_vert_set.erase(v); 
+        ignore_vert_set.erase(v);
     }
 
     for (auto v : verts) {
@@ -654,12 +654,12 @@ AccelScheme nfaFindAccel(const NGHolder &g, const vector<NFAVertex> &verts,
     }
 
     /* paths built wrong: reverse them */
-    for (auto &path : paths) { 
-        reverse(path.begin(), path.end()); 
+    for (auto &path : paths) {
+        reverse(path.begin(), path.end());
     }
 
-    return findBestAccelScheme(std::move(paths), terminating, 
-                               look_for_double_byte); 
+    return findBestAccelScheme(std::move(paths), terminating,
+                               look_for_double_byte);
 }
 
 NFAVertex get_sds_or_proxy(const NGHolder &g) {
@@ -668,7 +668,7 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) {
         return g.startDs;
     }
 
-    NFAVertex v = NGHolder::null_vertex(); 
+    NFAVertex v = NGHolder::null_vertex();
     for (auto w : adjacent_vertices_range(g.start, g)) {
         if (w != g.startDs) {
             if (!v) {
@@ -685,7 +685,7 @@ NFAVertex get_sds_or_proxy(const NGHolder &g) {
 
     while (true) {
         if (hasSelfLoop(v, g)) {
-            DEBUG_PRINTF("woot %zu\n", g[v].index); 
+            DEBUG_PRINTF("woot %zu\n", g[v].index);
             return v;
         }
         if (out_degree(v, g) != 1) {
@@ -719,7 +719,7 @@ bool nfaCheckAccel(const NGHolder &g, NFAVertex v,
     CharReach terminating = g[v].char_reach;
     terminating.flip();
 
-    DEBUG_PRINTF("vertex %zu is cyclic and has %zu stop chars%s\n", 
+    DEBUG_PRINTF("vertex %zu is cyclic and has %zu stop chars%s\n",
                  g[v].index, terminating.count(),
                  allow_wide ? " (w)" : "");
 
@@ -789,9 +789,9 @@ depth_done:
     for (unsigned int i = 0; i < depth; i++) {
         if (depthReach[i].none()) {
             DEBUG_PRINTF("red tape acceleration engine depth %u\n", i);
-            *as = AccelScheme(); 
-            as->offset = i; 
-            as->cr = CharReach(); 
+            *as = AccelScheme();
+            as->offset = i;
+            as->cr = CharReach();
             return true;
         }
     }
@@ -806,8 +806,8 @@ depth_done:
                 || (cra.count() == 2 && crb.count() == 2
                     && cra.isBit5Insensitive() && crb.isBit5Insensitive())) {
                 DEBUG_PRINTF("two-byte vermicelli, depth %u\n", i);
-                *as = AccelScheme(); 
-                as->offset = i; 
+                *as = AccelScheme();
+                as->offset = i;
                 return true;
             }
         }
@@ -817,19 +817,19 @@ depth_done:
     // literals)
     if (depth > 1) {
         for (unsigned int i = 0; i < (depth - 1); i++) {
-            if (depthReach[i].count() * depthReach[i+1].count() 
-                <= DOUBLE_SHUFTI_LIMIT) { 
+            if (depthReach[i].count() * depthReach[i+1].count()
+                <= DOUBLE_SHUFTI_LIMIT) {
                 DEBUG_PRINTF("two-byte shufti, depth %u\n", i);
-                *as = AccelScheme(); 
-                as->offset = i; 
+                *as = AccelScheme();
+                as->offset = i;
                 return true;
             }
         }
     }
 
-    // Look for offset accel schemes verm/shufti; 
+    // Look for offset accel schemes verm/shufti;
     vector<NFAVertex> verts(1, v);
-    *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true); 
+    *as = nfaFindAccel(g, verts, refined_cr, br_cyclic, allow_wide, true);
     DEBUG_PRINTF("as width %zu\n", as->cr.count());
     return as->cr.count() <= ACCEL_MAX_STOP_CHAR || allow_wide;
 }
author	Ivan Blinkov <ivan@blinkov.ru>	2022-02-10 16:47:11 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:47:11 +0300
commit	5b283123c882433dafbaf6b338adeea16c1a0ea0 (patch)
tree	339adc63bce23800021202ae4a8328a843dc447a /contrib/libs/hyperscan/src/nfagraph/ng_limex_accel.cpp
parent	1aeb9a455974457866f78722ad98114bafc84e8a (diff)
download	ydb-5b283123c882433dafbaf6b338adeea16c1a0ea0.tar.gz