aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/src/Objects/stringlib/fastsearch.h
diff options
context:
space:
mode:
authororivej <orivej@yandex-team.ru>2022-02-10 16:44:49 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:49 +0300
commit718c552901d703c502ccbefdfc3c9028d608b947 (patch)
tree46534a98bbefcd7b1f3faa5b52c138ab27db75b7 /contrib/tools/python3/src/Objects/stringlib/fastsearch.h
parente9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (diff)
downloadydb-718c552901d703c502ccbefdfc3c9028d608b947.tar.gz
Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/tools/python3/src/Objects/stringlib/fastsearch.h')
-rw-r--r--contrib/tools/python3/src/Objects/stringlib/fastsearch.h562
1 files changed, 281 insertions, 281 deletions
diff --git a/contrib/tools/python3/src/Objects/stringlib/fastsearch.h b/contrib/tools/python3/src/Objects/stringlib/fastsearch.h
index 56a4467d35..5ed40b3469 100644
--- a/contrib/tools/python3/src/Objects/stringlib/fastsearch.h
+++ b/contrib/tools/python3/src/Objects/stringlib/fastsearch.h
@@ -1,283 +1,283 @@
-/* stringlib: fastsearch implementation */
-
-#define STRINGLIB_FASTSEARCH_H
-
-/* fast search/count implementation, based on a mix between boyer-
- moore and horspool, with a few more bells and whistles on the top.
- for some more background, see: http://effbot.org/zone/stringlib.htm */
-
-/* note: fastsearch may access s[n], which isn't a problem when using
- Python's ordinary string types, but may cause problems if you're
- using this code in other contexts. also, the count mode returns -1
- if there cannot possible be a match in the target string, and 0 if
- it has actually checked for matches, but didn't find any. callers
- beware! */
-
-#define FAST_COUNT 0
-#define FAST_SEARCH 1
-#define FAST_RSEARCH 2
-
-#if LONG_BIT >= 128
-#define STRINGLIB_BLOOM_WIDTH 128
-#elif LONG_BIT >= 64
-#define STRINGLIB_BLOOM_WIDTH 64
-#elif LONG_BIT >= 32
-#define STRINGLIB_BLOOM_WIDTH 32
-#else
-#error "LONG_BIT is smaller than 32"
-#endif
-
-#define STRINGLIB_BLOOM_ADD(mask, ch) \
- ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
-#define STRINGLIB_BLOOM(mask, ch) \
- ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
-
-#if STRINGLIB_SIZEOF_CHAR == 1
-# define MEMCHR_CUT_OFF 15
-#else
-# define MEMCHR_CUT_OFF 40
-#endif
-
-Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
-{
- const STRINGLIB_CHAR *p, *e;
-
- p = s;
- e = s + n;
- if (n > MEMCHR_CUT_OFF) {
-#if STRINGLIB_SIZEOF_CHAR == 1
- p = memchr(s, ch, n);
- if (p != NULL)
- return (p - s);
- return -1;
-#else
+/* stringlib: fastsearch implementation */
+
+#define STRINGLIB_FASTSEARCH_H
+
+/* fast search/count implementation, based on a mix between boyer-
+ moore and horspool, with a few more bells and whistles on the top.
+ for some more background, see: http://effbot.org/zone/stringlib.htm */
+
+/* note: fastsearch may access s[n], which isn't a problem when using
+ Python's ordinary string types, but may cause problems if you're
+ using this code in other contexts. also, the count mode returns -1
+ if there cannot possible be a match in the target string, and 0 if
+ it has actually checked for matches, but didn't find any. callers
+ beware! */
+
+#define FAST_COUNT 0
+#define FAST_SEARCH 1
+#define FAST_RSEARCH 2
+
+#if LONG_BIT >= 128
+#define STRINGLIB_BLOOM_WIDTH 128
+#elif LONG_BIT >= 64
+#define STRINGLIB_BLOOM_WIDTH 64
+#elif LONG_BIT >= 32
+#define STRINGLIB_BLOOM_WIDTH 32
+#else
+#error "LONG_BIT is smaller than 32"
+#endif
+
+#define STRINGLIB_BLOOM_ADD(mask, ch) \
+ ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
+#define STRINGLIB_BLOOM(mask, ch) \
+ ((mask & (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
+
+#if STRINGLIB_SIZEOF_CHAR == 1
+# define MEMCHR_CUT_OFF 15
+#else
+# define MEMCHR_CUT_OFF 40
+#endif
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
+{
+ const STRINGLIB_CHAR *p, *e;
+
+ p = s;
+ e = s + n;
+ if (n > MEMCHR_CUT_OFF) {
+#if STRINGLIB_SIZEOF_CHAR == 1
+ p = memchr(s, ch, n);
+ if (p != NULL)
+ return (p - s);
+ return -1;
+#else
/* use memchr if we can choose a needle without too many likely
- false positives */
- const STRINGLIB_CHAR *s1, *e1;
- unsigned char needle = ch & 0xff;
- /* If looking for a multiple of 256, we'd have too
- many false positives looking for the '\0' byte in UCS2
- and UCS4 representations. */
- if (needle != 0) {
- do {
- void *candidate = memchr(p, needle,
- (e - p) * sizeof(STRINGLIB_CHAR));
- if (candidate == NULL)
- return -1;
- s1 = p;
- p = (const STRINGLIB_CHAR *)
- _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
- if (*p == ch)
- return (p - s);
- /* False positive */
- p++;
- if (p - s1 > MEMCHR_CUT_OFF)
- continue;
- if (e - p <= MEMCHR_CUT_OFF)
- break;
- e1 = p + MEMCHR_CUT_OFF;
- while (p != e1) {
- if (*p == ch)
- return (p - s);
- p++;
- }
- }
- while (e - p > MEMCHR_CUT_OFF);
- }
-#endif
- }
- while (p < e) {
- if (*p == ch)
- return (p - s);
- p++;
- }
- return -1;
-}
-
-Py_LOCAL_INLINE(Py_ssize_t)
-STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
-{
- const STRINGLIB_CHAR *p;
-#ifdef HAVE_MEMRCHR
- /* memrchr() is a GNU extension, available since glibc 2.1.91.
- it doesn't seem as optimized as memchr(), but is still quite
- faster than our hand-written loop below */
-
- if (n > MEMCHR_CUT_OFF) {
-#if STRINGLIB_SIZEOF_CHAR == 1
- p = memrchr(s, ch, n);
- if (p != NULL)
- return (p - s);
- return -1;
-#else
+ false positives */
+ const STRINGLIB_CHAR *s1, *e1;
+ unsigned char needle = ch & 0xff;
+ /* If looking for a multiple of 256, we'd have too
+ many false positives looking for the '\0' byte in UCS2
+ and UCS4 representations. */
+ if (needle != 0) {
+ do {
+ void *candidate = memchr(p, needle,
+ (e - p) * sizeof(STRINGLIB_CHAR));
+ if (candidate == NULL)
+ return -1;
+ s1 = p;
+ p = (const STRINGLIB_CHAR *)
+ _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
+ if (*p == ch)
+ return (p - s);
+ /* False positive */
+ p++;
+ if (p - s1 > MEMCHR_CUT_OFF)
+ continue;
+ if (e - p <= MEMCHR_CUT_OFF)
+ break;
+ e1 = p + MEMCHR_CUT_OFF;
+ while (p != e1) {
+ if (*p == ch)
+ return (p - s);
+ p++;
+ }
+ }
+ while (e - p > MEMCHR_CUT_OFF);
+ }
+#endif
+ }
+ while (p < e) {
+ if (*p == ch)
+ return (p - s);
+ p++;
+ }
+ return -1;
+}
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
+{
+ const STRINGLIB_CHAR *p;
+#ifdef HAVE_MEMRCHR
+ /* memrchr() is a GNU extension, available since glibc 2.1.91.
+ it doesn't seem as optimized as memchr(), but is still quite
+ faster than our hand-written loop below */
+
+ if (n > MEMCHR_CUT_OFF) {
+#if STRINGLIB_SIZEOF_CHAR == 1
+ p = memrchr(s, ch, n);
+ if (p != NULL)
+ return (p - s);
+ return -1;
+#else
/* use memrchr if we can choose a needle without too many likely
- false positives */
- const STRINGLIB_CHAR *s1;
- Py_ssize_t n1;
- unsigned char needle = ch & 0xff;
- /* If looking for a multiple of 256, we'd have too
- many false positives looking for the '\0' byte in UCS2
- and UCS4 representations. */
- if (needle != 0) {
- do {
- void *candidate = memrchr(s, needle,
- n * sizeof(STRINGLIB_CHAR));
- if (candidate == NULL)
- return -1;
- n1 = n;
- p = (const STRINGLIB_CHAR *)
- _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
- n = p - s;
- if (*p == ch)
- return n;
- /* False positive */
- if (n1 - n > MEMCHR_CUT_OFF)
- continue;
- if (n <= MEMCHR_CUT_OFF)
- break;
- s1 = p - MEMCHR_CUT_OFF;
- while (p > s1) {
- p--;
- if (*p == ch)
- return (p - s);
- }
- n = p - s;
- }
- while (n > MEMCHR_CUT_OFF);
- }
-#endif
- }
-#endif /* HAVE_MEMRCHR */
- p = s + n;
- while (p > s) {
- p--;
- if (*p == ch)
- return (p - s);
- }
- return -1;
-}
-
-#undef MEMCHR_CUT_OFF
-
-Py_LOCAL_INLINE(Py_ssize_t)
-FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
- const STRINGLIB_CHAR* p, Py_ssize_t m,
- Py_ssize_t maxcount, int mode)
-{
- unsigned long mask;
- Py_ssize_t skip, count = 0;
- Py_ssize_t i, j, mlast, w;
-
- w = n - m;
-
- if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
- return -1;
-
- /* look for special cases */
- if (m <= 1) {
- if (m <= 0)
- return -1;
- /* use special case for 1-character strings */
- if (mode == FAST_SEARCH)
- return STRINGLIB(find_char)(s, n, p[0]);
- else if (mode == FAST_RSEARCH)
- return STRINGLIB(rfind_char)(s, n, p[0]);
- else { /* FAST_COUNT */
- for (i = 0; i < n; i++)
- if (s[i] == p[0]) {
- count++;
- if (count == maxcount)
- return maxcount;
- }
- return count;
- }
- }
-
- mlast = m - 1;
- skip = mlast - 1;
- mask = 0;
-
- if (mode != FAST_RSEARCH) {
- const STRINGLIB_CHAR *ss = s + m - 1;
- const STRINGLIB_CHAR *pp = p + m - 1;
-
- /* create compressed boyer-moore delta 1 table */
-
- /* process pattern[:-1] */
- for (i = 0; i < mlast; i++) {
- STRINGLIB_BLOOM_ADD(mask, p[i]);
- if (p[i] == p[mlast])
- skip = mlast - i - 1;
- }
- /* process pattern[-1] outside the loop */
- STRINGLIB_BLOOM_ADD(mask, p[mlast]);
-
- for (i = 0; i <= w; i++) {
- /* note: using mlast in the skip path slows things down on x86 */
- if (ss[i] == pp[0]) {
- /* candidate match */
- for (j = 0; j < mlast; j++)
- if (s[i+j] != p[j])
- break;
- if (j == mlast) {
- /* got a match! */
- if (mode != FAST_COUNT)
- return i;
- count++;
- if (count == maxcount)
- return maxcount;
- i = i + mlast;
- continue;
- }
- /* miss: check if next character is part of pattern */
- if (!STRINGLIB_BLOOM(mask, ss[i+1]))
- i = i + m;
- else
- i = i + skip;
- } else {
- /* skip: check if next character is part of pattern */
- if (!STRINGLIB_BLOOM(mask, ss[i+1]))
- i = i + m;
- }
- }
- } else { /* FAST_RSEARCH */
-
- /* create compressed boyer-moore delta 1 table */
-
- /* process pattern[0] outside the loop */
- STRINGLIB_BLOOM_ADD(mask, p[0]);
- /* process pattern[:0:-1] */
- for (i = mlast; i > 0; i--) {
- STRINGLIB_BLOOM_ADD(mask, p[i]);
- if (p[i] == p[0])
- skip = i - 1;
- }
-
- for (i = w; i >= 0; i--) {
- if (s[i] == p[0]) {
- /* candidate match */
- for (j = mlast; j > 0; j--)
- if (s[i+j] != p[j])
- break;
- if (j == 0)
- /* got a match! */
- return i;
- /* miss: check if previous character is part of pattern */
- if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
- i = i - m;
- else
- i = i - skip;
- } else {
- /* skip: check if previous character is part of pattern */
- if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
- i = i - m;
- }
- }
- }
-
- if (mode != FAST_COUNT)
- return -1;
- return count;
-}
-
+ false positives */
+ const STRINGLIB_CHAR *s1;
+ Py_ssize_t n1;
+ unsigned char needle = ch & 0xff;
+ /* If looking for a multiple of 256, we'd have too
+ many false positives looking for the '\0' byte in UCS2
+ and UCS4 representations. */
+ if (needle != 0) {
+ do {
+ void *candidate = memrchr(s, needle,
+ n * sizeof(STRINGLIB_CHAR));
+ if (candidate == NULL)
+ return -1;
+ n1 = n;
+ p = (const STRINGLIB_CHAR *)
+ _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
+ n = p - s;
+ if (*p == ch)
+ return n;
+ /* False positive */
+ if (n1 - n > MEMCHR_CUT_OFF)
+ continue;
+ if (n <= MEMCHR_CUT_OFF)
+ break;
+ s1 = p - MEMCHR_CUT_OFF;
+ while (p > s1) {
+ p--;
+ if (*p == ch)
+ return (p - s);
+ }
+ n = p - s;
+ }
+ while (n > MEMCHR_CUT_OFF);
+ }
+#endif
+ }
+#endif /* HAVE_MEMRCHR */
+ p = s + n;
+ while (p > s) {
+ p--;
+ if (*p == ch)
+ return (p - s);
+ }
+ return -1;
+}
+
+#undef MEMCHR_CUT_OFF
+
+Py_LOCAL_INLINE(Py_ssize_t)
+FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
+ const STRINGLIB_CHAR* p, Py_ssize_t m,
+ Py_ssize_t maxcount, int mode)
+{
+ unsigned long mask;
+ Py_ssize_t skip, count = 0;
+ Py_ssize_t i, j, mlast, w;
+
+ w = n - m;
+
+ if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
+ return -1;
+
+ /* look for special cases */
+ if (m <= 1) {
+ if (m <= 0)
+ return -1;
+ /* use special case for 1-character strings */
+ if (mode == FAST_SEARCH)
+ return STRINGLIB(find_char)(s, n, p[0]);
+ else if (mode == FAST_RSEARCH)
+ return STRINGLIB(rfind_char)(s, n, p[0]);
+ else { /* FAST_COUNT */
+ for (i = 0; i < n; i++)
+ if (s[i] == p[0]) {
+ count++;
+ if (count == maxcount)
+ return maxcount;
+ }
+ return count;
+ }
+ }
+
+ mlast = m - 1;
+ skip = mlast - 1;
+ mask = 0;
+
+ if (mode != FAST_RSEARCH) {
+ const STRINGLIB_CHAR *ss = s + m - 1;
+ const STRINGLIB_CHAR *pp = p + m - 1;
+
+ /* create compressed boyer-moore delta 1 table */
+
+ /* process pattern[:-1] */
+ for (i = 0; i < mlast; i++) {
+ STRINGLIB_BLOOM_ADD(mask, p[i]);
+ if (p[i] == p[mlast])
+ skip = mlast - i - 1;
+ }
+ /* process pattern[-1] outside the loop */
+ STRINGLIB_BLOOM_ADD(mask, p[mlast]);
+
+ for (i = 0; i <= w; i++) {
+ /* note: using mlast in the skip path slows things down on x86 */
+ if (ss[i] == pp[0]) {
+ /* candidate match */
+ for (j = 0; j < mlast; j++)
+ if (s[i+j] != p[j])
+ break;
+ if (j == mlast) {
+ /* got a match! */
+ if (mode != FAST_COUNT)
+ return i;
+ count++;
+ if (count == maxcount)
+ return maxcount;
+ i = i + mlast;
+ continue;
+ }
+ /* miss: check if next character is part of pattern */
+ if (!STRINGLIB_BLOOM(mask, ss[i+1]))
+ i = i + m;
+ else
+ i = i + skip;
+ } else {
+ /* skip: check if next character is part of pattern */
+ if (!STRINGLIB_BLOOM(mask, ss[i+1]))
+ i = i + m;
+ }
+ }
+ } else { /* FAST_RSEARCH */
+
+ /* create compressed boyer-moore delta 1 table */
+
+ /* process pattern[0] outside the loop */
+ STRINGLIB_BLOOM_ADD(mask, p[0]);
+ /* process pattern[:0:-1] */
+ for (i = mlast; i > 0; i--) {
+ STRINGLIB_BLOOM_ADD(mask, p[i]);
+ if (p[i] == p[0])
+ skip = i - 1;
+ }
+
+ for (i = w; i >= 0; i--) {
+ if (s[i] == p[0]) {
+ /* candidate match */
+ for (j = mlast; j > 0; j--)
+ if (s[i+j] != p[j])
+ break;
+ if (j == 0)
+ /* got a match! */
+ return i;
+ /* miss: check if previous character is part of pattern */
+ if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
+ i = i - m;
+ else
+ i = i - skip;
+ } else {
+ /* skip: check if previous character is part of pattern */
+ if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
+ i = i - m;
+ }
+ }
+ }
+
+ if (mode != FAST_COUNT)
+ return -1;
+ return count;
+}
+