Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 2 of 2.

author: orivej <orivej@yandex-team.ru> 2022-02-10 16:45:01 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:45:01 +0300
commit: 2d37894b1b037cf24231090eda8589bbb44fb6fc (patch)
tree: be835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/tools/python3/src/Objects/stringlib/fastsearch.h
parent: 718c552901d703c502ccbefdfc3c9028d608b947 (diff)
download: ydb-2d37894b1b037cf24231090eda8589bbb44fb6fc.tar.gz
1 files changed, 281 insertions, 281 deletions
diff --git a/contrib/tools/python3/src/Objects/stringlib/fastsearch.h b/contrib/tools/python3/src/Objects/stringlib/fastsearch.h
index 5ed40b3469..56a4467d35 100644
--- a/contrib/tools/python3/src/Objects/stringlib/fastsearch.h
+++ b/contrib/tools/python3/src/Objects/stringlib/fastsearch.h
@@ -1,283 +1,283 @@
-/* stringlib: fastsearch implementation */ 
- 
-#define STRINGLIB_FASTSEARCH_H 
- 
-/* fast search/count implementation, based on a mix between boyer- 
-   moore and horspool, with a few more bells and whistles on the top. 
-   for some more background, see: http://effbot.org/zone/stringlib.htm */ 
- 
-/* note: fastsearch may access s[n], which isn't a problem when using 
-   Python's ordinary string types, but may cause problems if you're 
-   using this code in other contexts.  also, the count mode returns -1 
-   if there cannot possible be a match in the target string, and 0 if 
-   it has actually checked for matches, but didn't find any.  callers 
-   beware! */ 
- 
-#define FAST_COUNT 0 
-#define FAST_SEARCH 1 
-#define FAST_RSEARCH 2 
- 
-#if LONG_BIT >= 128 
-#define STRINGLIB_BLOOM_WIDTH 128 
-#elif LONG_BIT >= 64 
-#define STRINGLIB_BLOOM_WIDTH 64 
-#elif LONG_BIT >= 32 
-#define STRINGLIB_BLOOM_WIDTH 32 
-#else 
-#error "LONG_BIT is smaller than 32" 
-#endif 
- 
-#define STRINGLIB_BLOOM_ADD(mask, ch) \ 
-    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) 
-#define STRINGLIB_BLOOM(mask, ch)     \ 
-    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1))))) 
- 
-#if STRINGLIB_SIZEOF_CHAR == 1 
-#  define MEMCHR_CUT_OFF 15 
-#else 
-#  define MEMCHR_CUT_OFF 40 
-#endif 
- 
-Py_LOCAL_INLINE(Py_ssize_t) 
-STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) 
-{ 
-    const STRINGLIB_CHAR *p, *e; 
- 
-    p = s; 
-    e = s + n; 
-    if (n > MEMCHR_CUT_OFF) { 
-#if STRINGLIB_SIZEOF_CHAR == 1 
-        p = memchr(s, ch, n); 
-        if (p != NULL) 
-            return (p - s); 
-        return -1; 
-#else 
+/* stringlib: fastsearch implementation */
+
+#define STRINGLIB_FASTSEARCH_H
+
+/* fast search/count implementation, based on a mix between boyer-
+   moore and horspool, with a few more bells and whistles on the top.
+   for some more background, see: http://effbot.org/zone/stringlib.htm */
+
+/* note: fastsearch may access s[n], which isn't a problem when using
+   Python's ordinary string types, but may cause problems if you're
+   using this code in other contexts.  also, the count mode returns -1
+   if there cannot possible be a match in the target string, and 0 if
+   it has actually checked for matches, but didn't find any.  callers
+   beware! */
+
+#define FAST_COUNT 0
+#define FAST_SEARCH 1
+#define FAST_RSEARCH 2
+
+#if LONG_BIT >= 128
+#define STRINGLIB_BLOOM_WIDTH 128
+#elif LONG_BIT >= 64
+#define STRINGLIB_BLOOM_WIDTH 64
+#elif LONG_BIT >= 32
+#define STRINGLIB_BLOOM_WIDTH 32
+#else
+#error "LONG_BIT is smaller than 32"
+#endif
+
+#define STRINGLIB_BLOOM_ADD(mask, ch) \
+    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
+#define STRINGLIB_BLOOM(mask, ch)     \
+    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
+
+#if STRINGLIB_SIZEOF_CHAR == 1
+#  define MEMCHR_CUT_OFF 15
+#else
+#  define MEMCHR_CUT_OFF 40
+#endif
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
+{
+    const STRINGLIB_CHAR *p, *e;
+
+    p = s;
+    e = s + n;
+    if (n > MEMCHR_CUT_OFF) {
+#if STRINGLIB_SIZEOF_CHAR == 1
+        p = memchr(s, ch, n);
+        if (p != NULL)
+            return (p - s);
+        return -1;
+#else
         /* use memchr if we can choose a needle without too many likely
-           false positives */ 
-        const STRINGLIB_CHAR *s1, *e1; 
-        unsigned char needle = ch & 0xff; 
-        /* If looking for a multiple of 256, we'd have too 
-           many false positives looking for the '\0' byte in UCS2 
-           and UCS4 representations. */ 
-        if (needle != 0) { 
-            do { 
-                void *candidate = memchr(p, needle, 
-                                         (e - p) * sizeof(STRINGLIB_CHAR)); 
-                if (candidate == NULL) 
-                    return -1; 
-                s1 = p; 
-                p = (const STRINGLIB_CHAR *) 
-                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); 
-                if (*p == ch) 
-                    return (p - s); 
-                /* False positive */ 
-                p++; 
-                if (p - s1 > MEMCHR_CUT_OFF) 
-                    continue; 
-                if (e - p <= MEMCHR_CUT_OFF) 
-                    break; 
-                e1 = p + MEMCHR_CUT_OFF; 
-                while (p != e1) { 
-                    if (*p == ch) 
-                        return (p - s); 
-                    p++; 
-                } 
-            } 
-            while (e - p > MEMCHR_CUT_OFF); 
-        } 
-#endif 
-    } 
-    while (p < e) { 
-        if (*p == ch) 
-            return (p - s); 
-        p++; 
-    } 
-    return -1; 
-} 
- 
-Py_LOCAL_INLINE(Py_ssize_t) 
-STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch) 
-{ 
-    const STRINGLIB_CHAR *p; 
-#ifdef HAVE_MEMRCHR 
-    /* memrchr() is a GNU extension, available since glibc 2.1.91. 
-       it doesn't seem as optimized as memchr(), but is still quite 
-       faster than our hand-written loop below */ 
- 
-    if (n > MEMCHR_CUT_OFF) { 
-#if STRINGLIB_SIZEOF_CHAR == 1 
-        p = memrchr(s, ch, n); 
-        if (p != NULL) 
-            return (p - s); 
-        return -1; 
-#else 
+           false positives */
+        const STRINGLIB_CHAR *s1, *e1;
+        unsigned char needle = ch & 0xff;
+        /* If looking for a multiple of 256, we'd have too
+           many false positives looking for the '\0' byte in UCS2
+           and UCS4 representations. */
+        if (needle != 0) {
+            do {
+                void *candidate = memchr(p, needle,
+                                         (e - p) * sizeof(STRINGLIB_CHAR));
+                if (candidate == NULL)
+                    return -1;
+                s1 = p;
+                p = (const STRINGLIB_CHAR *)
+                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
+                if (*p == ch)
+                    return (p - s);
+                /* False positive */
+                p++;
+                if (p - s1 > MEMCHR_CUT_OFF)
+                    continue;
+                if (e - p <= MEMCHR_CUT_OFF)
+                    break;
+                e1 = p + MEMCHR_CUT_OFF;
+                while (p != e1) {
+                    if (*p == ch)
+                        return (p - s);
+                    p++;
+                }
+            }
+            while (e - p > MEMCHR_CUT_OFF);
+        }
+#endif
+    }
+    while (p < e) {
+        if (*p == ch)
+            return (p - s);
+        p++;
+    }
+    return -1;
+}
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
+{
+    const STRINGLIB_CHAR *p;
+#ifdef HAVE_MEMRCHR
+    /* memrchr() is a GNU extension, available since glibc 2.1.91.
+       it doesn't seem as optimized as memchr(), but is still quite
+       faster than our hand-written loop below */
+
+    if (n > MEMCHR_CUT_OFF) {
+#if STRINGLIB_SIZEOF_CHAR == 1
+        p = memrchr(s, ch, n);
+        if (p != NULL)
+            return (p - s);
+        return -1;
+#else
         /* use memrchr if we can choose a needle without too many likely
-           false positives */ 
-        const STRINGLIB_CHAR *s1; 
-        Py_ssize_t n1; 
-        unsigned char needle = ch & 0xff; 
-        /* If looking for a multiple of 256, we'd have too 
-           many false positives looking for the '\0' byte in UCS2 
-           and UCS4 representations. */ 
-        if (needle != 0) { 
-            do { 
-                void *candidate = memrchr(s, needle, 
-                                          n * sizeof(STRINGLIB_CHAR)); 
-                if (candidate == NULL) 
-                    return -1; 
-                n1 = n; 
-                p = (const STRINGLIB_CHAR *) 
-                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR)); 
-                n = p - s; 
-                if (*p == ch) 
-                    return n; 
-                /* False positive */ 
-                if (n1 - n > MEMCHR_CUT_OFF) 
-                    continue; 
-                if (n <= MEMCHR_CUT_OFF) 
-                    break; 
-                s1 = p - MEMCHR_CUT_OFF; 
-                while (p > s1) { 
-                    p--; 
-                    if (*p == ch) 
-                        return (p - s); 
-                } 
-                n = p - s; 
-            } 
-            while (n > MEMCHR_CUT_OFF); 
-        } 
-#endif 
-    } 
-#endif  /* HAVE_MEMRCHR */ 
-    p = s + n; 
-    while (p > s) { 
-        p--; 
-        if (*p == ch) 
-            return (p - s); 
-    } 
-    return -1; 
-} 
- 
-#undef MEMCHR_CUT_OFF 
- 
-Py_LOCAL_INLINE(Py_ssize_t) 
-FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n, 
-           const STRINGLIB_CHAR* p, Py_ssize_t m, 
-           Py_ssize_t maxcount, int mode) 
-{ 
-    unsigned long mask; 
-    Py_ssize_t skip, count = 0; 
-    Py_ssize_t i, j, mlast, w; 
- 
-    w = n - m; 
- 
-    if (w < 0 || (mode == FAST_COUNT && maxcount == 0)) 
-        return -1; 
- 
-    /* look for special cases */ 
-    if (m <= 1) { 
-        if (m <= 0) 
-            return -1; 
-        /* use special case for 1-character strings */ 
-        if (mode == FAST_SEARCH) 
-            return STRINGLIB(find_char)(s, n, p[0]); 
-        else if (mode == FAST_RSEARCH) 
-            return STRINGLIB(rfind_char)(s, n, p[0]); 
-        else {  /* FAST_COUNT */ 
-            for (i = 0; i < n; i++) 
-                if (s[i] == p[0]) { 
-                    count++; 
-                    if (count == maxcount) 
-                        return maxcount; 
-                } 
-            return count; 
-        } 
-    } 
- 
-    mlast = m - 1; 
-    skip = mlast - 1; 
-    mask = 0; 
- 
-    if (mode != FAST_RSEARCH) { 
-        const STRINGLIB_CHAR *ss = s + m - 1; 
-        const STRINGLIB_CHAR *pp = p + m - 1; 
- 
-        /* create compressed boyer-moore delta 1 table */ 
- 
-        /* process pattern[:-1] */ 
-        for (i = 0; i < mlast; i++) { 
-            STRINGLIB_BLOOM_ADD(mask, p[i]); 
-            if (p[i] == p[mlast]) 
-                skip = mlast - i - 1; 
-        } 
-        /* process pattern[-1] outside the loop */ 
-        STRINGLIB_BLOOM_ADD(mask, p[mlast]); 
- 
-        for (i = 0; i <= w; i++) { 
-            /* note: using mlast in the skip path slows things down on x86 */ 
-            if (ss[i] == pp[0]) { 
-                /* candidate match */ 
-                for (j = 0; j < mlast; j++) 
-                    if (s[i+j] != p[j]) 
-                        break; 
-                if (j == mlast) { 
-                    /* got a match! */ 
-                    if (mode != FAST_COUNT) 
-                        return i; 
-                    count++; 
-                    if (count == maxcount) 
-                        return maxcount; 
-                    i = i + mlast; 
-                    continue; 
-                } 
-                /* miss: check if next character is part of pattern */ 
-                if (!STRINGLIB_BLOOM(mask, ss[i+1])) 
-                    i = i + m; 
-                else 
-                    i = i + skip; 
-            } else { 
-                /* skip: check if next character is part of pattern */ 
-                if (!STRINGLIB_BLOOM(mask, ss[i+1])) 
-                    i = i + m; 
-            } 
-        } 
-    } else {    /* FAST_RSEARCH */ 
- 
-        /* create compressed boyer-moore delta 1 table */ 
- 
-        /* process pattern[0] outside the loop */ 
-        STRINGLIB_BLOOM_ADD(mask, p[0]); 
-        /* process pattern[:0:-1] */ 
-        for (i = mlast; i > 0; i--) { 
-            STRINGLIB_BLOOM_ADD(mask, p[i]); 
-            if (p[i] == p[0]) 
-                skip = i - 1; 
-        } 
- 
-        for (i = w; i >= 0; i--) { 
-            if (s[i] == p[0]) { 
-                /* candidate match */ 
-                for (j = mlast; j > 0; j--) 
-                    if (s[i+j] != p[j]) 
-                        break; 
-                if (j == 0) 
-                    /* got a match! */ 
-                    return i; 
-                /* miss: check if previous character is part of pattern */ 
-                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) 
-                    i = i - m; 
-                else 
-                    i = i - skip; 
-            } else { 
-                /* skip: check if previous character is part of pattern */ 
-                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) 
-                    i = i - m; 
-            } 
-        } 
-    } 
- 
-    if (mode != FAST_COUNT) 
-        return -1; 
-    return count; 
-} 
- 
+           false positives */
+        const STRINGLIB_CHAR *s1;
+        Py_ssize_t n1;
+        unsigned char needle = ch & 0xff;
+        /* If looking for a multiple of 256, we'd have too
+           many false positives looking for the '\0' byte in UCS2
+           and UCS4 representations. */
+        if (needle != 0) {
+            do {
+                void *candidate = memrchr(s, needle,
+                                          n * sizeof(STRINGLIB_CHAR));
+                if (candidate == NULL)
+                    return -1;
+                n1 = n;
+                p = (const STRINGLIB_CHAR *)
+                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
+                n = p - s;
+                if (*p == ch)
+                    return n;
+                /* False positive */
+                if (n1 - n > MEMCHR_CUT_OFF)
+                    continue;
+                if (n <= MEMCHR_CUT_OFF)
+                    break;
+                s1 = p - MEMCHR_CUT_OFF;
+                while (p > s1) {
+                    p--;
+                    if (*p == ch)
+                        return (p - s);
+                }
+                n = p - s;
+            }
+            while (n > MEMCHR_CUT_OFF);
+        }
+#endif
+    }
+#endif  /* HAVE_MEMRCHR */
+    p = s + n;
+    while (p > s) {
+        p--;
+        if (*p == ch)
+            return (p - s);
+    }
+    return -1;
+}
+
+#undef MEMCHR_CUT_OFF
+
+Py_LOCAL_INLINE(Py_ssize_t)
+FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
+           const STRINGLIB_CHAR* p, Py_ssize_t m,
+           Py_ssize_t maxcount, int mode)
+{
+    unsigned long mask;
+    Py_ssize_t skip, count = 0;
+    Py_ssize_t i, j, mlast, w;
+
+    w = n - m;
+
+    if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
+        return -1;
+
+    /* look for special cases */
+    if (m <= 1) {
+        if (m <= 0)
+            return -1;
+        /* use special case for 1-character strings */
+        if (mode == FAST_SEARCH)
+            return STRINGLIB(find_char)(s, n, p[0]);
+        else if (mode == FAST_RSEARCH)
+            return STRINGLIB(rfind_char)(s, n, p[0]);
+        else {  /* FAST_COUNT */
+            for (i = 0; i < n; i++)
+                if (s[i] == p[0]) {
+                    count++;
+                    if (count == maxcount)
+                        return maxcount;
+                }
+            return count;
+        }
+    }
+
+    mlast = m - 1;
+    skip = mlast - 1;
+    mask = 0;
+
+    if (mode != FAST_RSEARCH) {
+        const STRINGLIB_CHAR *ss = s + m - 1;
+        const STRINGLIB_CHAR *pp = p + m - 1;
+
+        /* create compressed boyer-moore delta 1 table */
+
+        /* process pattern[:-1] */
+        for (i = 0; i < mlast; i++) {
+            STRINGLIB_BLOOM_ADD(mask, p[i]);
+            if (p[i] == p[mlast])
+                skip = mlast - i - 1;
+        }
+        /* process pattern[-1] outside the loop */
+        STRINGLIB_BLOOM_ADD(mask, p[mlast]);
+
+        for (i = 0; i <= w; i++) {
+            /* note: using mlast in the skip path slows things down on x86 */
+            if (ss[i] == pp[0]) {
+                /* candidate match */
+                for (j = 0; j < mlast; j++)
+                    if (s[i+j] != p[j])
+                        break;
+                if (j == mlast) {
+                    /* got a match! */
+                    if (mode != FAST_COUNT)
+                        return i;
+                    count++;
+                    if (count == maxcount)
+                        return maxcount;
+                    i = i + mlast;
+                    continue;
+                }
+                /* miss: check if next character is part of pattern */
+                if (!STRINGLIB_BLOOM(mask, ss[i+1]))
+                    i = i + m;
+                else
+                    i = i + skip;
+            } else {
+                /* skip: check if next character is part of pattern */
+                if (!STRINGLIB_BLOOM(mask, ss[i+1]))
+                    i = i + m;
+            }
+        }
+    } else {    /* FAST_RSEARCH */
+
+        /* create compressed boyer-moore delta 1 table */
+
+        /* process pattern[0] outside the loop */
+        STRINGLIB_BLOOM_ADD(mask, p[0]);
+        /* process pattern[:0:-1] */
+        for (i = mlast; i > 0; i--) {
+            STRINGLIB_BLOOM_ADD(mask, p[i]);
+            if (p[i] == p[0])
+                skip = i - 1;
+        }
+
+        for (i = w; i >= 0; i--) {
+            if (s[i] == p[0]) {
+                /* candidate match */
+                for (j = mlast; j > 0; j--)
+                    if (s[i+j] != p[j])
+                        break;
+                if (j == 0)
+                    /* got a match! */
+                    return i;
+                /* miss: check if previous character is part of pattern */
+                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
+                    i = i - m;
+                else
+                    i = i - skip;
+            } else {
+                /* skip: check if previous character is part of pattern */
+                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
+                    i = i - m;
+            }
+        }
+    }
+
+    if (mode != FAST_COUNT)
+        return -1;
+    return count;
+}
+
author	orivej <orivej@yandex-team.ru>	2022-02-10 16:45:01 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:45:01 +0300
commit	2d37894b1b037cf24231090eda8589bbb44fb6fc (patch)
tree	be835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/tools/python3/src/Objects/stringlib/fastsearch.h
parent	718c552901d703c502ccbefdfc3c9028d608b947 (diff)
download	ydb-2d37894b1b037cf24231090eda8589bbb44fb6fc.tar.gz