Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 1 of 2.

author: bnagaev <bnagaev@yandex-team.ru> 2022-02-10 16:47:04 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:47:04 +0300
commit: d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (patch)
tree: d5dca6d44593f5e52556a1cc7b1ab0386e096ebe /contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp
parent: 1861d4c1402bb2c67a3e6b43b51706081b74508a (diff)
download: ydb-d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d.tar.gz
1 files changed, 503 insertions, 503 deletions
diff --git a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp
index 8812afadb7..24d4ecace1 100644
--- a/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp
+++ b/contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp
@@ -1,558 +1,558 @@
-/*
+/* 
  * Copyright (c) 2015-2017, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *  * Redistributions of source code must retain the above copyright notice,
- *    this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *  * Neither the name of Intel Corporation nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \file
- * \brief Resolve special assert vertices.
- *
- * The assert resolution algorithm proceeds by iterating over those edges with
- * assertion flags, considering source and target vertices of each edge. If a
- * vertex has a superset of the reachability demanded by the assertion on the
- * edge, it is split into alternatives providing the word and non-word paths
- * through that vertex.
- *
- * A great deal of the complexity in the resolveAsserts pass is devoted to
- * handling these assertions when the UCP flag is specified (meaning \\w and \\W
- * are implemented with Unicode properties, rather than their ASCII
- * interpretation) and the prefiltering flag is also used. Complete,
- * non-prefiltering UCP support is not available yet.
- */
-#include "ng_asserts.h"
-
-#include "ng.h"
-#include "ng_prune.h"
-#include "ng_redundancy.h"
-#include "ng_util.h"
+ * 
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions are met: 
+ * 
+ *  * Redistributions of source code must retain the above copyright notice, 
+ *    this list of conditions and the following disclaimer. 
+ *  * Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in the 
+ *    documentation and/or other materials provided with the distribution. 
+ *  * Neither the name of Intel Corporation nor the names of its contributors 
+ *    may be used to endorse or promote products derived from this software 
+ *    without specific prior written permission. 
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE. 
+ */ 
+ 
+/** \file 
+ * \brief Resolve special assert vertices. 
+ * 
+ * The assert resolution algorithm proceeds by iterating over those edges with 
+ * assertion flags, considering source and target vertices of each edge. If a 
+ * vertex has a superset of the reachability demanded by the assertion on the 
+ * edge, it is split into alternatives providing the word and non-word paths 
+ * through that vertex. 
+ * 
+ * A great deal of the complexity in the resolveAsserts pass is devoted to 
+ * handling these assertions when the UCP flag is specified (meaning \\w and \\W 
+ * are implemented with Unicode properties, rather than their ASCII 
+ * interpretation) and the prefiltering flag is also used. Complete, 
+ * non-prefiltering UCP support is not available yet. 
+ */ 
+#include "ng_asserts.h" 
+ 
+#include "ng.h" 
+#include "ng_prune.h" 
+#include "ng_redundancy.h" 
+#include "ng_util.h" 
 #include "compiler/compiler.h"
-#include "parser/position.h" // for POS flags
-#include "util/bitutils.h" // for findAndClearLSB_32
-#include "util/boundary_reports.h"
-#include "util/container.h"
-#include "util/compile_context.h"
-#include "util/compile_error.h"
-#include "util/graph_range.h"
-#include "util/report_manager.h"
-#include "util/unicode_def.h"
-
-#include <queue>
-
-using namespace std;
-
-namespace ue2 {
-
-/** \brief Hard limit on the maximum number of vertices we'll clone before we
- * throw up our hands and report 'Pattern too large.' */
-static const size_t MAX_CLONED_VERTICES = 2048;
-
-/** \brief The definition of \\w, since we use it everywhere in here. */
-static const CharReach CHARREACH_WORD(CharReach('a', 'z') |
-        CharReach('A', 'Z') | CharReach('0', '9') | CharReach('_'));
-
-/** \brief \\W is the inverse of \\w */
-static const CharReach CHARREACH_NONWORD(~CHARREACH_WORD);
-
-/** \brief Prefiltering definition of \\w for UCP mode.
- *
- * Includes all high bytes as to capture all non-ASCII, however depending on
- * direction only continuers or starters are strictly required - as the input
- * is well-formed, this laxness will not cost us. */
-static const CharReach CHARREACH_WORD_UCP_PRE(CHARREACH_WORD
-                                              | CharReach(128, 255));
-
-/** \brief Prefiltering definition of \\W for UCP Mode.
- *
- * (non-word already includes high bytes) */
-static const CharReach CHARREACH_NONWORD_UCP_PRE(CHARREACH_NONWORD);
-
-/** \brief Find all the edges with assertion flags. */
-static
-vector<NFAEdge> getAsserts(const NGHolder &g) {
-    vector<NFAEdge> out;
-    for (const auto &e : edges_range(g)) {
-        if (g[e].assert_flags) {
-            out.push_back(e);
-        }
-    }
-    return out;
-}
-
-static
-void addToSplit(const NGHolder &g, NFAVertex v, map<u32, NFAVertex> *to_split) {
+#include "parser/position.h" // for POS flags 
+#include "util/bitutils.h" // for findAndClearLSB_32 
+#include "util/boundary_reports.h" 
+#include "util/container.h" 
+#include "util/compile_context.h" 
+#include "util/compile_error.h" 
+#include "util/graph_range.h" 
+#include "util/report_manager.h" 
+#include "util/unicode_def.h" 
+ 
+#include <queue> 
+ 
+using namespace std; 
+ 
+namespace ue2 { 
+ 
+/** \brief Hard limit on the maximum number of vertices we'll clone before we 
+ * throw up our hands and report 'Pattern too large.' */ 
+static const size_t MAX_CLONED_VERTICES = 2048; 
+ 
+/** \brief The definition of \\w, since we use it everywhere in here. */ 
+static const CharReach CHARREACH_WORD(CharReach('a', 'z') | 
+        CharReach('A', 'Z') | CharReach('0', '9') | CharReach('_')); 
+ 
+/** \brief \\W is the inverse of \\w */ 
+static const CharReach CHARREACH_NONWORD(~CHARREACH_WORD); 
+ 
+/** \brief Prefiltering definition of \\w for UCP mode. 
+ * 
+ * Includes all high bytes as to capture all non-ASCII, however depending on 
+ * direction only continuers or starters are strictly required - as the input 
+ * is well-formed, this laxness will not cost us. */ 
+static const CharReach CHARREACH_WORD_UCP_PRE(CHARREACH_WORD 
+                                              | CharReach(128, 255)); 
+ 
+/** \brief Prefiltering definition of \\W for UCP Mode. 
+ * 
+ * (non-word already includes high bytes) */ 
+static const CharReach CHARREACH_NONWORD_UCP_PRE(CHARREACH_NONWORD); 
+ 
+/** \brief Find all the edges with assertion flags. */ 
+static 
+vector<NFAEdge> getAsserts(const NGHolder &g) { 
+    vector<NFAEdge> out; 
+    for (const auto &e : edges_range(g)) { 
+        if (g[e].assert_flags) { 
+            out.push_back(e); 
+        } 
+    } 
+    return out; 
+} 
+ 
+static 
+void addToSplit(const NGHolder &g, NFAVertex v, map<u32, NFAVertex> *to_split) { 
     DEBUG_PRINTF("%zu needs splitting\n", g[v].index);
-    to_split->emplace(g[v].index, v);
-}
-
-/** \brief Find vertices that need to be split due to an assertion edge.
- *
- * A vertex needs to be split if has an edge to/from it with an assert with a
- * restriction on the relevant end. */
-static
-void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts,
-                   map<u32, NFAVertex> *to_split,
-                   map<u32, NFAVertex> *to_split_ucp) {
-    for (const auto &e : asserts) {
-        NFAVertex u = source(e, g);
-        NFAVertex v = target(e, g);
-        u32 flags = g[e].assert_flags;
-        assert(flags);
-
-        const CharReach &u_cr = g[u].char_reach;
-        const CharReach &v_cr = g[v].char_reach;
-
-        bool ucp_assert = flags & UCP_ASSERT_FLAGS;
-        bool normal_assert = flags & NON_UCP_ASSERT_FLAGS;
-        /* In reality, an expression can only be entirely ucp or not ucp */
-        assert(ucp_assert != normal_assert);
-
-        if (normal_assert) {
-            /* assume any flag results in us have to split if the vertex is not
-             * a subset of word or completely disjoint from it. We could be more
-             * nuanced if flags is a disjunction of multiple assertions. */
-            if (!u_cr.isSubsetOf(CHARREACH_WORD)
-                && !u_cr.isSubsetOf(CHARREACH_NONWORD)
-                && u != g.start) { /* start is always considered a nonword */
-                addToSplit(g, u, to_split);
-            }
-
-            if (!v_cr.isSubsetOf(CHARREACH_WORD)
-                && !v_cr.isSubsetOf(CHARREACH_NONWORD)
-                && v != g.accept /* accept require special handling, done on a
-                                  * per edge basis in resolve asserts
-                                  */
-                && v != g.acceptEod) { /* eod is always considered a nonword */
-                addToSplit(g, v, to_split);
-            }
-        }
-
-        if (ucp_assert) {
-            /* note: the ucp prefilter crs overlap - requires a bit more care */
-            if (u == g.start) { /* start never needs to be split,
-                                 * treat nonword */
-            } else if (flags & POS_FLAG_ASSERT_WORD_TO_ANY_UCP) {
-                if (!u_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE)
-                    && !u_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) {
-                    addToSplit(g, u, to_split_ucp);
-                }
-            } else {
-                assert(flags & POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP);
-                if (!u_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE)
-                    && !u_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) {
-                    addToSplit(g, u, to_split_ucp);
-                }
-            }
-
-            if (v == g.acceptEod /* eod is always considered a nonword */
-                || v == g.accept) {  /* accept require special handling, done on
-                                      * a per edge basis in resolve asserts */
-            } else if (flags & POS_FLAG_ASSERT_ANY_TO_WORD_UCP) {
-                if (!v_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE)
-                    && !v_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) {
-                    addToSplit(g, v, to_split_ucp);
-                }
-            } else {
-                assert(flags & POS_FLAG_ASSERT_ANY_TO_NONWORD_UCP);
-                if (!v_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE)
-                    && !v_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) {
-                    addToSplit(g, v, to_split_ucp);
-                }
-            }
-        }
-    }
-}
-
-static
+    to_split->emplace(g[v].index, v); 
+} 
+ 
+/** \brief Find vertices that need to be split due to an assertion edge. 
+ * 
+ * A vertex needs to be split if has an edge to/from it with an assert with a 
+ * restriction on the relevant end. */ 
+static 
+void findSplitters(const NGHolder &g, const vector<NFAEdge> &asserts, 
+                   map<u32, NFAVertex> *to_split, 
+                   map<u32, NFAVertex> *to_split_ucp) { 
+    for (const auto &e : asserts) { 
+        NFAVertex u = source(e, g); 
+        NFAVertex v = target(e, g); 
+        u32 flags = g[e].assert_flags; 
+        assert(flags); 
+ 
+        const CharReach &u_cr = g[u].char_reach; 
+        const CharReach &v_cr = g[v].char_reach; 
+ 
+        bool ucp_assert = flags & UCP_ASSERT_FLAGS; 
+        bool normal_assert = flags & NON_UCP_ASSERT_FLAGS; 
+        /* In reality, an expression can only be entirely ucp or not ucp */ 
+        assert(ucp_assert != normal_assert); 
+ 
+        if (normal_assert) { 
+            /* assume any flag results in us have to split if the vertex is not 
+             * a subset of word or completely disjoint from it. We could be more 
+             * nuanced if flags is a disjunction of multiple assertions. */ 
+            if (!u_cr.isSubsetOf(CHARREACH_WORD) 
+                && !u_cr.isSubsetOf(CHARREACH_NONWORD) 
+                && u != g.start) { /* start is always considered a nonword */ 
+                addToSplit(g, u, to_split); 
+            } 
+ 
+            if (!v_cr.isSubsetOf(CHARREACH_WORD) 
+                && !v_cr.isSubsetOf(CHARREACH_NONWORD) 
+                && v != g.accept /* accept require special handling, done on a 
+                                  * per edge basis in resolve asserts 
+                                  */ 
+                && v != g.acceptEod) { /* eod is always considered a nonword */ 
+                addToSplit(g, v, to_split); 
+            } 
+        } 
+ 
+        if (ucp_assert) { 
+            /* note: the ucp prefilter crs overlap - requires a bit more care */ 
+            if (u == g.start) { /* start never needs to be split, 
+                                 * treat nonword */ 
+            } else if (flags & POS_FLAG_ASSERT_WORD_TO_ANY_UCP) { 
+                if (!u_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE) 
+                    && !u_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) { 
+                    addToSplit(g, u, to_split_ucp); 
+                } 
+            } else { 
+                assert(flags & POS_FLAG_ASSERT_NONWORD_TO_ANY_UCP); 
+                if (!u_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE) 
+                    && !u_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) { 
+                    addToSplit(g, u, to_split_ucp); 
+                } 
+            } 
+ 
+            if (v == g.acceptEod /* eod is always considered a nonword */ 
+                || v == g.accept) {  /* accept require special handling, done on 
+                                      * a per edge basis in resolve asserts */ 
+            } else if (flags & POS_FLAG_ASSERT_ANY_TO_WORD_UCP) { 
+                if (!v_cr.isSubsetOf(CHARREACH_WORD_UCP_PRE) 
+                    && !v_cr.isSubsetOf(~CHARREACH_WORD_UCP_PRE)) { 
+                    addToSplit(g, v, to_split_ucp); 
+                } 
+            } else { 
+                assert(flags & POS_FLAG_ASSERT_ANY_TO_NONWORD_UCP); 
+                if (!v_cr.isSubsetOf(CHARREACH_NONWORD_UCP_PRE) 
+                    && !v_cr.isSubsetOf(~CHARREACH_NONWORD_UCP_PRE)) { 
+                    addToSplit(g, v, to_split_ucp); 
+                } 
+            } 
+        } 
+    } 
+} 
+ 
+static 
 void setReportId(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
                  NFAVertex v, s32 adj) {
-    // Don't try and set the report ID of a special vertex.
-    assert(!is_special(v, g));
-
-    // If there's a report set already, we're replacing it.
-    g[v].reports.clear();
-
+    // Don't try and set the report ID of a special vertex. 
+    assert(!is_special(v, g)); 
+ 
+    // If there's a report set already, we're replacing it. 
+    g[v].reports.clear(); 
+ 
     Report ir = rm.getBasicInternalReport(expr, adj);
-
-    g[v].reports.insert(rm.getInternalId(ir));
+ 
+    g[v].reports.insert(rm.getInternalId(ir)); 
     DEBUG_PRINTF("set report id for vertex %zu, adj %d\n", g[v].index, adj);
-}
-
-static
+} 
+ 
+static 
 NFAVertex makeClone(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
                     NFAVertex v, const CharReach &cr_mask) {
-    NFAVertex clone = clone_vertex(g, v);
-    g[clone].char_reach &= cr_mask;
-    clone_out_edges(g, v, clone);
-    clone_in_edges(g, v, clone);
-
-    if (v == g.startDs) {
+    NFAVertex clone = clone_vertex(g, v); 
+    g[clone].char_reach &= cr_mask; 
+    clone_out_edges(g, v, clone); 
+    clone_in_edges(g, v, clone); 
+ 
+    if (v == g.startDs) { 
         if (expr.utf8) {
-            g[clone].char_reach &= ~UTF_START_CR;
-        }
-
-        DEBUG_PRINTF("marked as virt\n");
-        g[clone].assert_flags = POS_FLAG_VIRTUAL_START;
-
+            g[clone].char_reach &= ~UTF_START_CR; 
+        } 
+ 
+        DEBUG_PRINTF("marked as virt\n"); 
+        g[clone].assert_flags = POS_FLAG_VIRTUAL_START; 
+ 
         setReportId(rm, g, expr, clone, 0);
-    }
-
-    return clone;
-}
-
-static
+    } 
+ 
+    return clone; 
+} 
+ 
+static 
 void splitVertex(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
                  NFAVertex v, bool ucp) {
-    assert(v != g.start);
-    assert(v != g.accept);
-    assert(v != g.acceptEod);
+    assert(v != g.start); 
+    assert(v != g.accept); 
+    assert(v != g.acceptEod); 
     DEBUG_PRINTF("partitioning vertex %zu ucp:%d\n", g[v].index, (int)ucp);
-
-    CharReach cr_word = ucp ? CHARREACH_WORD_UCP_PRE : CHARREACH_WORD;
-    CharReach cr_nonword = ucp ? CHARREACH_NONWORD_UCP_PRE : CHARREACH_NONWORD;
-
-    auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; };
-
-    // Split v into word/nonword vertices with only asserting out-edges.
+ 
+    CharReach cr_word = ucp ? CHARREACH_WORD_UCP_PRE : CHARREACH_WORD; 
+    CharReach cr_nonword = ucp ? CHARREACH_NONWORD_UCP_PRE : CHARREACH_NONWORD; 
+ 
+    auto has_no_assert = [&g](const NFAEdge &e) { return !g[e].assert_flags; }; 
+ 
+    // Split v into word/nonword vertices with only asserting out-edges. 
     NFAVertex w_out = makeClone(rm, g, expr, v, cr_word);
     NFAVertex nw_out = makeClone(rm, g, expr, v, cr_nonword);
-    remove_out_edge_if(w_out, has_no_assert, g);
-    remove_out_edge_if(nw_out, has_no_assert, g);
-
-    // Split v into word/nonword vertices with only asserting in-edges.
+    remove_out_edge_if(w_out, has_no_assert, g); 
+    remove_out_edge_if(nw_out, has_no_assert, g); 
+ 
+    // Split v into word/nonword vertices with only asserting in-edges. 
     NFAVertex w_in = makeClone(rm, g, expr, v, cr_word);
     NFAVertex nw_in = makeClone(rm, g, expr, v, cr_nonword);
-    remove_in_edge_if(w_in, has_no_assert, g);
-    remove_in_edge_if(nw_in, has_no_assert, g);
-
-    // Prune edges with asserts from original v.
-    auto has_assert = [&g](const NFAEdge &e) { return g[e].assert_flags; };
-    remove_in_edge_if(v, has_assert, g);
-    remove_out_edge_if(v, has_assert, g);
-}
-
-static
+    remove_in_edge_if(w_in, has_no_assert, g); 
+    remove_in_edge_if(nw_in, has_no_assert, g); 
+ 
+    // Prune edges with asserts from original v. 
+    auto has_assert = [&g](const NFAEdge &e) { return g[e].assert_flags; }; 
+    remove_in_edge_if(v, has_assert, g); 
+    remove_out_edge_if(v, has_assert, g); 
+} 
+ 
+static 
 void resolveEdges(ReportManager &rm, NGHolder &g, const ExpressionInfo &expr,
                   set<NFAEdge> *dead) {
-    for (const auto &e : edges_range(g)) {
-        u32 flags = g[e].assert_flags;
-        if (!flags) {
-            continue;
-        }
-
-        NFAVertex u = source(e, g);
-        NFAVertex v = target(e, g);
-
-        assert(u != g.startDs);
-
-        const CharReach &u_cr = g[u].char_reach;
-        const CharReach &v_cr = g[v].char_reach;
-
-        bool impassable = true;
-        bool ucp = flags & UCP_ASSERT_FLAGS;
+    for (const auto &e : edges_range(g)) { 
+        u32 flags = g[e].assert_flags; 
+        if (!flags) { 
+            continue; 
+        } 
+ 
+        NFAVertex u = source(e, g); 
+        NFAVertex v = target(e, g); 
+ 
+        assert(u != g.startDs); 
+ 
+        const CharReach &u_cr = g[u].char_reach; 
+        const CharReach &v_cr = g[v].char_reach; 
+ 
+        bool impassable = true; 
+        bool ucp = flags & UCP_ASSERT_FLAGS; 
         DEBUG_PRINTF("resolving edge %zu->%zu (flags=0x%x, ucp=%d)\n",
                      g[u].index, g[v].index, flags, (int)ucp);
-        while (flags && impassable) {
-            u32 flag = 1U << findAndClearLSB_32(&flags);
-            switch (flag) {
-            case POS_FLAG_ASSERT_NONWORD_TO_NONWORD:
-            case POS_FLAG_ASSERT_NONWORD_TO_WORD:
-                if ((u_cr & CHARREACH_NONWORD).none() && u != g.start) {
-                    continue;
-                }
-                break;
-            case POS_FLAG_ASSERT_WORD_TO_NONWORD:
-            case POS_FLAG_ASSERT_WORD_TO_WORD:
-                if ((u_cr & CHARREACH_WORD).none() || u == g.start) {
-                    continue;
-                }
-                break;
-            case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP:
-            case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP:
-                if ((u_cr & ~CHARREACH_NONWORD_UCP_PRE).any() && u != g.start) {
-                    continue;
-                }
-                break;
-            case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP:
-            case POS_FLAG_ASSERT_WORD_TO_WORD_UCP:
-                if ((u_cr & ~CHARREACH_WORD_UCP_PRE).any() || u == g.start) {
-                    continue;
-                }
-                break;
-            default:
-                assert(0);
-            }
-
-            if (v == g.accept) {
-                /* accept special will need to be treated specially later */
-                impassable = false;
-                continue;
-            }
-
-            switch (flag) {
-            case POS_FLAG_ASSERT_NONWORD_TO_NONWORD:
-            case POS_FLAG_ASSERT_WORD_TO_NONWORD:
-                if ((v_cr & CHARREACH_NONWORD).none() && v != g.acceptEod) {
-                    continue;
-                }
-                break;
-            case POS_FLAG_ASSERT_WORD_TO_WORD:
-            case POS_FLAG_ASSERT_NONWORD_TO_WORD:
-                if ((v_cr & CHARREACH_WORD).none() || v == g.acceptEod) {
-                    continue;
-                }
-                break;
-            case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP:
-            case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP:
-                if ((v_cr & ~CHARREACH_NONWORD_UCP_PRE).any()
-                    && v != g.acceptEod) {
-                    continue;
-                }
-                break;
-            case POS_FLAG_ASSERT_WORD_TO_WORD_UCP:
-            case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP:
-                if ((v_cr & ~CHARREACH_WORD_UCP_PRE).any()
-                    || v == g.acceptEod) {
-                    continue;
-                }
-                break;
-            default:
-                assert(0);
-            }
-            impassable = false;
-        }
-
-        if (impassable) {
-            dead->insert(e);
-        } else if (v == g.accept && !ucp) {
-            bool u_w = (u_cr & CHARREACH_NONWORD).none() && u != g.start;
-            UNUSED bool u_nw = (u_cr & CHARREACH_WORD).none() || u == g.start;
-            assert(u_w != u_nw);
-            bool v_w = false;
-            bool v_nw = false;
-
-            flags = g[e].assert_flags;
-            if (u_w) {
-                v_w = flags & POS_FLAG_ASSERT_WORD_TO_WORD;
-                v_nw = flags & POS_FLAG_ASSERT_WORD_TO_NONWORD;
-            } else {
-                v_w = flags & POS_FLAG_ASSERT_NONWORD_TO_WORD;
-                v_nw = flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD;
-            }
-            assert(v_w || v_nw);
-            if (v_w && v_nw) {
-                /* edge is effectively unconditional */
-                g[e].assert_flags = 0;
-            } else if (v_w) {
-                /* need to add a word byte */
-                NFAVertex vv = add_vertex(g);
+        while (flags && impassable) { 
+            u32 flag = 1U << findAndClearLSB_32(&flags); 
+            switch (flag) { 
+            case POS_FLAG_ASSERT_NONWORD_TO_NONWORD: 
+            case POS_FLAG_ASSERT_NONWORD_TO_WORD: 
+                if ((u_cr & CHARREACH_NONWORD).none() && u != g.start) { 
+                    continue; 
+                } 
+                break; 
+            case POS_FLAG_ASSERT_WORD_TO_NONWORD: 
+            case POS_FLAG_ASSERT_WORD_TO_WORD: 
+                if ((u_cr & CHARREACH_WORD).none() || u == g.start) { 
+                    continue; 
+                } 
+                break; 
+            case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP: 
+            case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP: 
+                if ((u_cr & ~CHARREACH_NONWORD_UCP_PRE).any() && u != g.start) { 
+                    continue; 
+                } 
+                break; 
+            case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP: 
+            case POS_FLAG_ASSERT_WORD_TO_WORD_UCP: 
+                if ((u_cr & ~CHARREACH_WORD_UCP_PRE).any() || u == g.start) { 
+                    continue; 
+                } 
+                break; 
+            default: 
+                assert(0); 
+            } 
+ 
+            if (v == g.accept) { 
+                /* accept special will need to be treated specially later */ 
+                impassable = false; 
+                continue; 
+            } 
+ 
+            switch (flag) { 
+            case POS_FLAG_ASSERT_NONWORD_TO_NONWORD: 
+            case POS_FLAG_ASSERT_WORD_TO_NONWORD: 
+                if ((v_cr & CHARREACH_NONWORD).none() && v != g.acceptEod) { 
+                    continue; 
+                } 
+                break; 
+            case POS_FLAG_ASSERT_WORD_TO_WORD: 
+            case POS_FLAG_ASSERT_NONWORD_TO_WORD: 
+                if ((v_cr & CHARREACH_WORD).none() || v == g.acceptEod) { 
+                    continue; 
+                } 
+                break; 
+            case POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP: 
+            case POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP: 
+                if ((v_cr & ~CHARREACH_NONWORD_UCP_PRE).any() 
+                    && v != g.acceptEod) { 
+                    continue; 
+                } 
+                break; 
+            case POS_FLAG_ASSERT_WORD_TO_WORD_UCP: 
+            case POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP: 
+                if ((v_cr & ~CHARREACH_WORD_UCP_PRE).any() 
+                    || v == g.acceptEod) { 
+                    continue; 
+                } 
+                break; 
+            default: 
+                assert(0); 
+            } 
+            impassable = false; 
+        } 
+ 
+        if (impassable) { 
+            dead->insert(e); 
+        } else if (v == g.accept && !ucp) { 
+            bool u_w = (u_cr & CHARREACH_NONWORD).none() && u != g.start; 
+            UNUSED bool u_nw = (u_cr & CHARREACH_WORD).none() || u == g.start; 
+            assert(u_w != u_nw); 
+            bool v_w = false; 
+            bool v_nw = false; 
+ 
+            flags = g[e].assert_flags; 
+            if (u_w) { 
+                v_w = flags & POS_FLAG_ASSERT_WORD_TO_WORD; 
+                v_nw = flags & POS_FLAG_ASSERT_WORD_TO_NONWORD; 
+            } else { 
+                v_w = flags & POS_FLAG_ASSERT_NONWORD_TO_WORD; 
+                v_nw = flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD; 
+            } 
+            assert(v_w || v_nw); 
+            if (v_w && v_nw) { 
+                /* edge is effectively unconditional */ 
+                g[e].assert_flags = 0; 
+            } else if (v_w) { 
+                /* need to add a word byte */ 
+                NFAVertex vv = add_vertex(g); 
                 setReportId(rm, g, expr, vv, -1);
-                g[vv].char_reach = CHARREACH_WORD;
-                add_edge(vv, g.accept, g);
-                g[e].assert_flags = 0;
-                add_edge(u, vv, g[e], g);
-                dead->insert(e);
-            } else {
-                /* need to add a non word byte or see eod */
-                NFAVertex vv = add_vertex(g);
+                g[vv].char_reach = CHARREACH_WORD; 
+                add_edge(vv, g.accept, g); 
+                g[e].assert_flags = 0; 
+                add_edge(u, vv, g[e], g); 
+                dead->insert(e); 
+            } else { 
+                /* need to add a non word byte or see eod */ 
+                NFAVertex vv = add_vertex(g); 
                 setReportId(rm, g, expr, vv, -1);
-                g[vv].char_reach = CHARREACH_NONWORD;
-                add_edge(vv, g.accept, g);
-                g[e].assert_flags = 0;
-                add_edge(u, vv, g[e], g);
+                g[vv].char_reach = CHARREACH_NONWORD; 
+                add_edge(vv, g.accept, g); 
+                g[e].assert_flags = 0; 
+                add_edge(u, vv, g[e], g); 
                 /* there may already be a different edge from start to eod if so
                  * we need to make it unconditional and alive
                  */
                 if (NFAEdge start_eod = edge(u, g.acceptEod, g)) {
-                    g[start_eod].assert_flags = 0;
-                    dead->erase(start_eod);
+                    g[start_eod].assert_flags = 0; 
+                    dead->erase(start_eod); 
                 } else {
                     add_edge(u, g.acceptEod, g[e], g);
-                }
-                dead->insert(e);
-            }
-        } else if (v == g.accept && ucp) {
-            DEBUG_PRINTF("resolving ucp assert to accept\n");
-            assert(u_cr.any());
-            bool u_w = (u_cr & CHARREACH_WORD_UCP_PRE).any()
-                       && u != g.start;
-            bool u_nw = (u_cr & CHARREACH_NONWORD_UCP_PRE).any()
-                       || u == g.start;
-            assert(u_w || u_nw);
-
-            bool v_w = false;
-            bool v_nw = false;
-
-            flags = g[e].assert_flags;
-            if (u_w) {
-                v_w |= flags & POS_FLAG_ASSERT_WORD_TO_WORD_UCP;
-                v_nw |= flags & POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP;
-            }
-            if (u_nw) {
-                v_w |= flags & POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP;
-                v_nw |= flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP;
-            }
-            assert(v_w || v_nw);
-            if (v_w && v_nw) {
-                /* edge is effectively unconditional */
-                g[e].assert_flags = 0;
-            } else if (v_w) {
-                /* need to add a word byte */
-                NFAVertex vv = add_vertex(g);
+                } 
+                dead->insert(e); 
+            } 
+        } else if (v == g.accept && ucp) { 
+            DEBUG_PRINTF("resolving ucp assert to accept\n"); 
+            assert(u_cr.any()); 
+            bool u_w = (u_cr & CHARREACH_WORD_UCP_PRE).any() 
+                       && u != g.start; 
+            bool u_nw = (u_cr & CHARREACH_NONWORD_UCP_PRE).any() 
+                       || u == g.start; 
+            assert(u_w || u_nw); 
+ 
+            bool v_w = false; 
+            bool v_nw = false; 
+ 
+            flags = g[e].assert_flags; 
+            if (u_w) { 
+                v_w |= flags & POS_FLAG_ASSERT_WORD_TO_WORD_UCP; 
+                v_nw |= flags & POS_FLAG_ASSERT_WORD_TO_NONWORD_UCP; 
+            } 
+            if (u_nw) { 
+                v_w |= flags & POS_FLAG_ASSERT_NONWORD_TO_WORD_UCP; 
+                v_nw |= flags & POS_FLAG_ASSERT_NONWORD_TO_NONWORD_UCP; 
+            } 
+            assert(v_w || v_nw); 
+            if (v_w && v_nw) { 
+                /* edge is effectively unconditional */ 
+                g[e].assert_flags = 0; 
+            } else if (v_w) { 
+                /* need to add a word byte */ 
+                NFAVertex vv = add_vertex(g); 
                 setReportId(rm, g, expr, vv, -1);
-                g[vv].char_reach = CHARREACH_WORD_UCP_PRE;
-                add_edge(vv, g.accept, g);
-                g[e].assert_flags = 0;
-                add_edge(u, vv, g[e], g);
-                dead->insert(e);
-            } else {
-                /* need to add a non word byte or see eod */
-                NFAVertex vv = add_vertex(g);
+                g[vv].char_reach = CHARREACH_WORD_UCP_PRE; 
+                add_edge(vv, g.accept, g); 
+                g[e].assert_flags = 0; 
+                add_edge(u, vv, g[e], g); 
+                dead->insert(e); 
+            } else { 
+                /* need to add a non word byte or see eod */ 
+                NFAVertex vv = add_vertex(g); 
                 setReportId(rm, g, expr, vv, -1);
-                g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE;
-                add_edge(vv, g.accept, g);
-                g[e].assert_flags = 0;
-                add_edge(u, vv, g[e], g);
+                g[vv].char_reach = CHARREACH_NONWORD_UCP_PRE; 
+                add_edge(vv, g.accept, g); 
+                g[e].assert_flags = 0; 
+                add_edge(u, vv, g[e], g); 
                 /* there may already be a different edge from start to eod if so
                  * we need to make it unconditional and alive
                  */
                 if (NFAEdge start_eod = edge(u, g.acceptEod, g)) {
-                    g[start_eod].assert_flags = 0;
-                    dead->erase(start_eod);
+                    g[start_eod].assert_flags = 0; 
+                    dead->erase(start_eod); 
                 } else {
                     add_edge(u, g.acceptEod, g[e], g);
-                }
-                dead->insert(e);
-            }
-        } else {
-            /* we can remove the asserts as we have partitioned the vertices
-             * into w/nw around the assert edges
-             */
-            g[e].assert_flags = 0;
-        }
-    }
-}
-
+                } 
+                dead->insert(e); 
+            } 
+        } else { 
+            /* we can remove the asserts as we have partitioned the vertices 
+             * into w/nw around the assert edges 
+             */ 
+            g[e].assert_flags = 0; 
+        } 
+    } 
+} 
+ 
 void resolveAsserts(ReportManager &rm, NGHolder &g,
                     const ExpressionInfo &expr) {
-    vector<NFAEdge> asserts = getAsserts(g);
-    if (asserts.empty()) {
-        return;
-    }
-
-    map<u32, NFAVertex> to_split; /* by index, for determinism */
-    map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */
-    findSplitters(g, asserts, &to_split, &to_split_ucp);
-    if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) {
+    vector<NFAEdge> asserts = getAsserts(g); 
+    if (asserts.empty()) { 
+        return; 
+    } 
+ 
+    map<u32, NFAVertex> to_split; /* by index, for determinism */ 
+    map<u32, NFAVertex> to_split_ucp; /* by index, for determinism */ 
+    findSplitters(g, asserts, &to_split, &to_split_ucp); 
+    if (to_split.size() + to_split_ucp.size() > MAX_CLONED_VERTICES) { 
         throw CompileError(expr.index, "Pattern is too large.");
-    }
-
-    for (const auto &m : to_split) {
-        assert(!contains(to_split_ucp, m.first));
+    } 
+ 
+    for (const auto &m : to_split) { 
+        assert(!contains(to_split_ucp, m.first)); 
         splitVertex(rm, g, expr, m.second, false);
-    }
-
-    for (const auto &m : to_split_ucp) {
+    } 
+ 
+    for (const auto &m : to_split_ucp) { 
         splitVertex(rm, g, expr, m.second, true);
-    }
-
-    set<NFAEdge> dead;
+    } 
+ 
+    set<NFAEdge> dead; 
     resolveEdges(rm, g, expr, &dead);
-
-    remove_edges(dead, g);
+ 
+    remove_edges(dead, g); 
     renumber_vertices(g);
-    pruneUseless(g);
-    pruneEmptyVertices(g);
-
+    pruneUseless(g); 
+    pruneEmptyVertices(g); 
+ 
     renumber_vertices(g);
     renumber_edges(g);
-    clearReports(g);
-}
-
+    clearReports(g); 
+} 
+ 
 void ensureCodePointStart(ReportManager &rm, NGHolder &g,
                           const ExpressionInfo &expr) {
-    /* In utf8 mode there is an implicit assertion that we start at codepoint
-     * boundaries. Assert resolution handles the badness coming from asserts.
-     * The only other source of trouble is startDs->accept connections.
-     */
+    /* In utf8 mode there is an implicit assertion that we start at codepoint 
+     * boundaries. Assert resolution handles the badness coming from asserts. 
+     * The only other source of trouble is startDs->accept connections. 
+     */ 
     NFAEdge orig = edge(g.startDs, g.accept, g);
     if (expr.utf8 && orig) {
         DEBUG_PRINTF("rectifying %u\n", expr.report);
         Report ir = rm.getBasicInternalReport(expr);
-        ReportID rep = rm.getInternalId(ir);
-
-        NFAVertex v_a = add_vertex(g);
-        g[v_a].assert_flags = POS_FLAG_VIRTUAL_START;
-        g[v_a].char_reach = UTF_ASCII_CR;
-        add_edge(v_a, g.accept, g[orig], g);
-
-        NFAVertex v_2 = add_vertex(g);
-        g[v_2].assert_flags = POS_FLAG_VIRTUAL_START;
-        g[v_2].char_reach = CharReach(UTF_TWO_BYTE_MIN, UTF_TWO_BYTE_MAX);
-
-        NFAVertex v_3 = add_vertex(g);
-        g[v_3].assert_flags = POS_FLAG_VIRTUAL_START;
-        g[v_3].char_reach = CharReach(UTF_THREE_BYTE_MIN, UTF_THREE_BYTE_MAX);
-
-        NFAVertex v_4 = add_vertex(g);
-        g[v_4].assert_flags = POS_FLAG_VIRTUAL_START;
-        g[v_4].char_reach = CharReach(UTF_FOUR_BYTE_MIN, UTF_FOUR_BYTE_MAX);
-
-        NFAVertex v_c = add_vertex(g);
-        g[v_c].assert_flags = POS_FLAG_VIRTUAL_START;
-        g[v_c].char_reach = UTF_CONT_CR;
-        add_edge(v_c, g.accept, g[orig], g);
-
-        add_edge(v_2, v_c, g);
-
-        NFAVertex v_3c = add_vertex(g);
-        g[v_3c].assert_flags = POS_FLAG_VIRTUAL_START;
-        g[v_3c].char_reach = UTF_CONT_CR;
-        add_edge(v_3c, v_c, g);
-        add_edge(v_3, v_3c, g);
-
-        NFAVertex v_4c = add_vertex(g);
-        g[v_4c].assert_flags = POS_FLAG_VIRTUAL_START;
-        g[v_4c].char_reach = UTF_CONT_CR;
-        add_edge(v_4c, v_3c, g);
-        add_edge(v_4, v_4c, g);
-
-        g[v_a].reports.insert(rep);
-        g[v_c].reports.insert(rep);
-
-        add_edge(g.start, v_a, g);
-        add_edge(g.startDs, v_a, g);
-        add_edge(g.start, v_2, g);
-        add_edge(g.startDs, v_2, g);
-        add_edge(g.start, v_3, g);
-        add_edge(g.startDs, v_3, g);
-        add_edge(g.start, v_4, g);
-        add_edge(g.startDs, v_4, g);
-        remove_edge(orig, g);
+        ReportID rep = rm.getInternalId(ir); 
+ 
+        NFAVertex v_a = add_vertex(g); 
+        g[v_a].assert_flags = POS_FLAG_VIRTUAL_START; 
+        g[v_a].char_reach = UTF_ASCII_CR; 
+        add_edge(v_a, g.accept, g[orig], g); 
+ 
+        NFAVertex v_2 = add_vertex(g); 
+        g[v_2].assert_flags = POS_FLAG_VIRTUAL_START; 
+        g[v_2].char_reach = CharReach(UTF_TWO_BYTE_MIN, UTF_TWO_BYTE_MAX); 
+ 
+        NFAVertex v_3 = add_vertex(g); 
+        g[v_3].assert_flags = POS_FLAG_VIRTUAL_START; 
+        g[v_3].char_reach = CharReach(UTF_THREE_BYTE_MIN, UTF_THREE_BYTE_MAX); 
+ 
+        NFAVertex v_4 = add_vertex(g); 
+        g[v_4].assert_flags = POS_FLAG_VIRTUAL_START; 
+        g[v_4].char_reach = CharReach(UTF_FOUR_BYTE_MIN, UTF_FOUR_BYTE_MAX); 
+ 
+        NFAVertex v_c = add_vertex(g); 
+        g[v_c].assert_flags = POS_FLAG_VIRTUAL_START; 
+        g[v_c].char_reach = UTF_CONT_CR; 
+        add_edge(v_c, g.accept, g[orig], g); 
+ 
+        add_edge(v_2, v_c, g); 
+ 
+        NFAVertex v_3c = add_vertex(g); 
+        g[v_3c].assert_flags = POS_FLAG_VIRTUAL_START; 
+        g[v_3c].char_reach = UTF_CONT_CR; 
+        add_edge(v_3c, v_c, g); 
+        add_edge(v_3, v_3c, g); 
+ 
+        NFAVertex v_4c = add_vertex(g); 
+        g[v_4c].assert_flags = POS_FLAG_VIRTUAL_START; 
+        g[v_4c].char_reach = UTF_CONT_CR; 
+        add_edge(v_4c, v_3c, g); 
+        add_edge(v_4, v_4c, g); 
+ 
+        g[v_a].reports.insert(rep); 
+        g[v_c].reports.insert(rep); 
+ 
+        add_edge(g.start, v_a, g); 
+        add_edge(g.startDs, v_a, g); 
+        add_edge(g.start, v_2, g); 
+        add_edge(g.startDs, v_2, g); 
+        add_edge(g.start, v_3, g); 
+        add_edge(g.startDs, v_3, g); 
+        add_edge(g.start, v_4, g); 
+        add_edge(g.startDs, v_4, g); 
+        remove_edge(orig, g); 
         renumber_edges(g);
         clearReports(g);
-    }
-}
-
-} // namespace ue2
+    } 
+} 
+ 
+} // namespace ue2
author	bnagaev <bnagaev@yandex-team.ru>	2022-02-10 16:47:04 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:47:04 +0300
commit	d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (patch)
tree	d5dca6d44593f5e52556a1cc7b1ab0386e096ebe /contrib/libs/hyperscan/src/nfagraph/ng_asserts.cpp
parent	1861d4c1402bb2c67a3e6b43b51706081b74508a (diff)
download	ydb-d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d.tar.gz