Intermediate changes

author: robot-piglet <robot-piglet@yandex-team.com> 2023-12-02 01:45:21 +0300
committer: robot-piglet <robot-piglet@yandex-team.com> 2023-12-02 02:42:50 +0300
commit: 9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c (patch)
tree: 9f88a486917d371d099cd712efd91b4c122d209d /contrib/python/pyre2/py3/src
parent: 32fb6dda1feb24f9ab69ece5df0cb9ec238ca5e6 (diff)
download: ydb-9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c.tar.gz
6 files changed, 1744 insertions, 0 deletions
diff --git a/contrib/python/pyre2/py3/src/_re2macros.h b/contrib/python/pyre2/py3/src/_re2macros.h
new file mode 100644
index 0000000000..b9ac82af6b
--- /dev/null
+++ b/contrib/python/pyre2/py3/src/_re2macros.h
@@ -0,0 +1,13 @@
+#ifndef __RE2MACROS_H
+#define __RE2MACROS_H
+
+#include <stdio.h>
+#include "re2/stringpiece.h"
+
+static inline re2::StringPiece * new_StringPiece_array(int n)
+{
+    re2::StringPiece * sp = new re2::StringPiece[n];
+    return sp;
+}
+
+#endif
diff --git a/contrib/python/pyre2/py3/src/compile.pxi b/contrib/python/pyre2/py3/src/compile.pxi
new file mode 100644
index 0000000000..887a2778cd
--- /dev/null
+++ b/contrib/python/pyre2/py3/src/compile.pxi
@@ -0,0 +1,234 @@
+
+def compile(pattern, int flags=0, int max_mem=8388608):
+    cachekey = (type(pattern), pattern, flags)
+    if cachekey in _cache:
+        return _cache[cachekey]
+    p = _compile(pattern, flags, max_mem)
+
+    if len(_cache) >= _MAXCACHE:
+        _cache.popitem()
+    _cache[cachekey] = p
+    return p
+
+
+def _compile(object pattern, int flags=0, int max_mem=8388608):
+    """Compile a regular expression pattern, returning a pattern object."""
+    def fallback(pattern, flags, error_msg):
+        """Raise error, warn, or simply return fallback from re module."""
+        if current_notification == FALLBACK_EXCEPTION:
+            raise RegexError(error_msg)
+        elif current_notification == FALLBACK_WARNING:
+            warnings.warn("WARNING: Using re module. Reason: %s" % error_msg)
+        try:
+            result = PythonRePattern(pattern, flags)
+        except re.error as err:
+            raise RegexError(*err.args)
+        return result
+
+    cdef StringPiece * s
+    cdef Options opts
+    cdef int error_code
+    cdef int encoded = 0
+    cdef object original_pattern
+
+    if isinstance(pattern, (Pattern, SREPattern)):
+        if flags:
+            raise ValueError(
+                    'Cannot process flags argument with a compiled pattern')
+        return pattern
+
+    original_pattern = pattern
+    if flags & _L:
+        return fallback(original_pattern, flags, "re.LOCALE not supported")
+    pattern = unicode_to_bytes(pattern, &encoded, -1)
+    newflags = flags
+    if not PY2:
+        if not encoded and flags & _U:  # re.UNICODE
+            pass  # can use UNICODE with bytes pattern, but assumes valid UTF-8
+            # raise ValueError("can't use UNICODE flag with a bytes pattern")
+        elif encoded and not (flags & ASCII):  # re.ASCII (not in Python 2)
+            newflags = flags | _U  # re.UNICODE
+        elif encoded and flags & ASCII:
+            newflags = flags & ~_U  # re.UNICODE
+    try:
+        pattern = _prepare_pattern(pattern, newflags)
+    except BackreferencesException:
+        return fallback(original_pattern, flags, "Backreferences not supported")
+    except CharClassProblemException:
+        return fallback(original_pattern, flags,
+                "\W and \S not supported inside character classes")
+
+    # Set the options given the flags above.
+    if flags & _I:
+        opts.set_case_sensitive(0);
+
+    opts.set_max_mem(max_mem)
+    opts.set_log_errors(0)
+    if flags & _U or encoded:
+        opts.set_encoding(EncodingUTF8)
+    else:  # re.UNICODE flag not passed, and pattern is bytes,
+        # so allow matching of arbitrary byte sequences.
+        opts.set_encoding(EncodingLatin1)
+
+    s = new StringPiece(<char *><bytes>pattern, len(pattern))
+
+    cdef RE2 *re_pattern
+    with nogil:
+         re_pattern = new RE2(s[0], opts)
+
+    if not re_pattern.ok():
+        # Something went wrong with the compilation.
+        del s
+        error_msg = cpp_to_unicode(re_pattern.error())
+        error_code = re_pattern.error_code()
+        del re_pattern
+        if current_notification == FALLBACK_EXCEPTION:
+            # Raise an exception regardless of the type of error.
+            raise RegexError(error_msg)
+        elif error_code not in (ErrorBadPerlOp, ErrorRepeatSize,
+                # ErrorBadEscape,
+                ErrorPatternTooLarge):
+            # Raise an error because these will not be fixed by using the
+            # ``re`` module.
+            raise RegexError(error_msg)
+        elif current_notification == FALLBACK_WARNING:
+            warnings.warn("WARNING: Using re module. Reason: %s" % error_msg)
+        return PythonRePattern(original_pattern, flags)
+
+    cdef Pattern pypattern = Pattern()
+    cdef map[cpp_string, int] named_groups = re_pattern.NamedCapturingGroups()
+    pypattern.pattern = original_pattern
+    pypattern.re_pattern = re_pattern
+    pypattern.groups = re_pattern.NumberOfCapturingGroups()
+    pypattern.encoded = encoded
+    pypattern.flags = flags
+    pypattern.groupindex = {}
+    for it in named_groups:
+        pypattern.groupindex[cpp_to_unicode(it.first)] = it.second
+
+    if flags & DEBUG:
+        print(repr(pypattern._dump_pattern()))
+    del s
+    return pypattern
+
+
+def _prepare_pattern(bytes pattern, int flags):
+    """Translate pattern to RE2 syntax."""
+    cdef bytearray result = bytearray()
+    cdef unsigned char * cstring = pattern
+    cdef unsigned char this, that
+    cdef int size = len(pattern)
+    cdef int n = 0
+
+    if flags & (_S | _M):
+        result.extend(b'(?')
+        if flags & _S:
+            result.extend(b's')
+        if flags & _M:
+            result.extend(b'm')
+        result.extend(b')')
+    while n < size:
+        this = cstring[n]
+        if flags & _X:
+            if this in b' \t\n\r\f\v':
+                n += 1
+                continue
+            elif this == b'#':
+                while True:
+                    n += 1
+                    if n >= size:
+                        break
+                    this = cstring[n]
+                    if this == b'\n':
+                        break
+                n += 1
+                continue
+
+        if this != b'[' and this != b'\\':
+            result.append(this)
+            n += 1
+            continue
+        elif this == b'[':
+            result.append(this)
+            while True:
+                n += 1
+                if n >= size:
+                    raise RegexError("unexpected end of regular expression")
+                this = cstring[n]
+                if this == b']':
+                    result.append(this)
+                    break
+                elif this == b'\\':
+                    n += 1
+                    that = cstring[n]
+                    if that == b'b':
+                        result.extend(br'\010')
+                    elif flags & _U:
+                        if that == b'd':
+                            result.extend(br'\p{Nd}')
+                        elif that == b'w':
+                            result.extend(br'_\p{L}\p{Nd}')
+                        elif that == b's':
+                            result.extend(br'\s\p{Z}')
+                        elif that == b'D':
+                            result.extend(br'\P{Nd}')
+                        elif that == b'W':
+                            # Since \w and \s are made out of several character
+                            # groups, I don't see a way to convert their
+                            # complements into a group without rewriting the
+                            # whole expression, which seems too complicated.
+                            raise CharClassProblemException()
+                        elif that == b'S':
+                            raise CharClassProblemException()
+                        else:
+                            result.append(this)
+                            result.append(that)
+                    else:
+                        result.append(this)
+                        result.append(that)
+                else:
+                    result.append(this)
+        elif this == b'\\':
+            n += 1
+            that = cstring[n]
+            if b'8' <= that <= b'9':
+                raise BackreferencesException()
+            elif isoct(that):
+                if (n + 2 < size and isoct(cstring[n + 1])
+                        and isoct(cstring[n + 2])):
+                    # all clear, this is an octal escape
+                    result.extend(cstring[n - 1:n + 3])
+                    n += 2
+                else:
+                    raise BackreferencesException()
+            elif that == b'x':
+                if (n + 2 < size and ishex(cstring[n + 1])
+                        and ishex(cstring[n + 2])):
+                    # hex escape
+                    result.extend(cstring[n - 1:n + 3])
+                    n += 2
+                else:
+                    raise BackreferencesException()
+            elif that == b'Z':
+                result.extend(b'\\z')
+            elif flags & _U:
+                if that == b'd':
+                    result.extend(br'\p{Nd}')
+                elif that == b'w':
+                    result.extend(br'[_\p{L}\p{Nd}]')
+                elif that == b's':
+                    result.extend(br'[\s\p{Z}]')
+                elif that == b'D':
+                    result.extend(br'[^\p{Nd}]')
+                elif that == b'W':
+                    result.extend(br'[^_\p{L}\p{Nd}]')
+                elif that == b'S':
+                    result.extend(br'[^\s\p{Z}]')
+                else:
+                    result.append(this)
+                    result.append(that)
+            else:
+                result.append(this)
+                result.append(that)
+        n += 1
+    return bytes(result)
diff --git a/contrib/python/pyre2/py3/src/includes.pxi b/contrib/python/pyre2/py3/src/includes.pxi
new file mode 100644
index 0000000000..8c35b6d4b2
--- /dev/null
+++ b/contrib/python/pyre2/py3/src/includes.pxi
@@ -0,0 +1,109 @@
+cimport cpython.unicode
+from libcpp.map cimport map
+from libcpp.string cimport string as cpp_string
+from cython.operator cimport postincrement, dereference
+from cpython.buffer cimport Py_buffer, PyBUF_SIMPLE, PyObject_CheckBuffer, \
+        PyObject_GetBuffer, PyBuffer_Release
+from cpython.version cimport PY_MAJOR_VERSION
+
+
+cdef extern from *:
+    cdef void emit_if_narrow_unicode "#if !defined(Py_UNICODE_WIDE) && PY_VERSION_HEX < 0x03030000 //" ()
+    cdef void emit_endif "#endif //" ()
+
+
+cdef extern from "Python.h":
+    int PyObject_CheckReadBuffer(object)
+    int PyObject_AsReadBuffer(object, const void **, Py_ssize_t *)
+
+
+cdef extern from "re2/stringpiece.h" namespace "re2":
+    cdef cppclass StringPiece:
+        StringPiece()
+        StringPiece(const char *)
+        StringPiece(const char *, int)
+        const char * data()
+        int copy(char * buf, size_t n, size_t pos)
+        int length()
+
+
+cdef extern from "re2/re2.h" namespace "re2":
+    cdef enum Anchor:
+        UNANCHORED "RE2::UNANCHORED"
+        ANCHOR_START "RE2::ANCHOR_START"
+        ANCHOR_BOTH "RE2::ANCHOR_BOTH"
+
+    ctypedef Anchor re2_Anchor "RE2::Anchor"
+
+    cdef enum ErrorCode:
+        NoError "RE2::NoError"
+        ErrorInternal "RE2::ErrorInternal"
+        # Parse errors
+        ErrorBadEscape "RE2::ErrorBadEscape"          # bad escape sequence
+        ErrorBadCharClass "RE2::ErrorBadCharClass"       # bad character class
+        ErrorBadCharRange "RE2::ErrorBadCharRange"       # bad character class range
+        ErrorMissingBracket "RE2::ErrorMissingBracket"     # missing closing ]
+        ErrorMissingParen   "RE2::ErrorMissingParen"       # missing closing )
+        ErrorTrailingBackslash "RE2::ErrorTrailingBackslash"  # trailing \ at end of regexp
+        ErrorRepeatArgument "RE2::ErrorRepeatArgument"     # repeat argument missing, e.g. "*"
+        ErrorRepeatSize "RE2::ErrorRepeatSize"         # bad repetition argument
+        ErrorRepeatOp "RE2::ErrorRepeatOp"           # bad repetition operator
+        ErrorBadPerlOp "RE2::ErrorBadPerlOp"          # bad perl operator
+        ErrorBadUTF8 "RE2::ErrorBadUTF8"            # invalid UTF-8 in regexp
+        ErrorBadNamedCapture "RE2::ErrorBadNamedCapture"    # bad named capture group
+        ErrorPatternTooLarge "RE2::ErrorPatternTooLarge"    # pattern too large (compile failed)
+
+    cdef enum Encoding:
+        EncodingUTF8 "RE2::Options::EncodingUTF8"
+        EncodingLatin1 "RE2::Options::EncodingLatin1"
+
+    ctypedef Encoding re2_Encoding "RE2::Options::Encoding"
+
+    cdef cppclass Options "RE2::Options":
+        Options()
+        void set_posix_syntax(int b)
+        void set_longest_match(int b)
+        void set_log_errors(int b)
+        void set_max_mem(int m)
+        void set_literal(int b)
+        void set_never_nl(int b)
+        void set_case_sensitive(int b)
+        void set_perl_classes(int b)
+        void set_word_boundary(int b)
+        void set_one_line(int b)
+        int case_sensitive()
+        void set_encoding(re2_Encoding encoding)
+
+    cdef cppclass RE2:
+        RE2(const StringPiece pattern, Options option) nogil
+        RE2(const StringPiece pattern) nogil
+        int Match(const StringPiece text, int startpos, int endpos,
+                Anchor anchor, StringPiece * match, int nmatch) nogil
+        int Replace(cpp_string *str, const RE2 pattern,
+                const StringPiece rewrite) nogil
+        int GlobalReplace(cpp_string *str, const RE2 pattern,
+                const StringPiece rewrite) nogil
+        int NumberOfCapturingGroups()
+        int ok()
+        const cpp_string pattern()
+        cpp_string error()
+        ErrorCode error_code()
+        const map[cpp_string, int]& NamedCapturingGroups()
+
+    # hack for static methods
+    cdef int Replace "RE2::Replace"(
+            cpp_string *str, const RE2 pattern,
+            const StringPiece rewrite) nogil
+    cdef int GlobalReplace "RE2::GlobalReplace"(
+            cpp_string *str,
+            const RE2 pattern,
+            const StringPiece rewrite) nogil
+
+
+cdef extern from "_re2macros.h":
+    StringPiece * new_StringPiece_array(int) nogil
+
+
+cdef extern from *:
+    # StringPiece * new_StringPiece_array "new re2::StringPiece[n]" (int) nogil
+    void delete_StringPiece_array "delete[]" (StringPiece *) nogil
diff --git a/contrib/python/pyre2/py3/src/match.pxi b/contrib/python/pyre2/py3/src/match.pxi
new file mode 100644
index 0000000000..3eaae74b47
--- /dev/null
+++ b/contrib/python/pyre2/py3/src/match.pxi
@@ -0,0 +1,280 @@
+cdef class Match:
+    cdef readonly Pattern re
+    cdef readonly object string
+    cdef readonly int pos
+    cdef readonly int endpos
+    cdef readonly tuple regs
+
+    cdef StringPiece * matches
+    cdef int encoded
+    cdef int nmatches
+    cdef int _lastindex
+    cdef tuple _groups
+    cdef dict _named_groups
+
+    property lastindex:
+        def __get__(self):
+            return None if self._lastindex < 1 else self._lastindex
+
+    property lastgroup:
+        def __get__(self):
+            if self._lastindex < 1:
+                return None
+            for name, n in self.re.groupindex.items():
+                if n == self._lastindex:
+                    return name
+            return None
+
+    def __init__(self, Pattern pattern_object, int num_groups):
+        self._lastindex = -1
+        self._groups = None
+        self.pos = 0
+        self.endpos = -1
+        self.matches = new_StringPiece_array(num_groups + 1)
+        self.nmatches = num_groups
+        self.re = pattern_object
+
+    cdef _init_groups(self):
+        cdef list groups = []
+        cdef int i
+        cdef const char * last_end = NULL
+        cdef const char * cur_end = NULL
+
+        for i in range(self.nmatches):
+            if self.matches[i].data() == NULL:
+                groups.append(None)
+            else:
+                if i > 0:
+                    cur_end = self.matches[i].data() + self.matches[i].length()
+
+                    if last_end == NULL:
+                        last_end = cur_end
+                        self._lastindex = i
+                    else:
+                        # The rules for last group are a bit complicated:
+                        # if two groups end at the same point, the earlier one
+                        # is considered last, so we don't switch our selection
+                        # unless the end point has moved.
+                        if cur_end > last_end:
+                            last_end = cur_end
+                            self._lastindex = i
+                groups.append(
+                        self.matches[i].data()[:self.matches[i].length()])
+        self._groups = tuple(groups)
+
+    cdef bytes _group(self, object groupnum):
+        cdef int idx
+        if isinstance(groupnum, int):
+            idx = groupnum
+            if idx > self.nmatches - 1:
+                raise IndexError("no such group %d; available groups: %r"
+                        % (idx, list(range(self.nmatches))))
+            return self._groups[idx]
+        groupdict = self._groupdict()
+        if groupnum not in groupdict:
+            raise IndexError("no such group %r; available groups: %r"
+                    % (groupnum, list(groupdict)))
+        return groupdict[groupnum]
+
+    cdef dict _groupdict(self):
+        if self._named_groups is None:
+            self._named_groups = {name: self._groups[n]
+                    for name, n in self.re.groupindex.items()}
+        return self._named_groups
+
+    def groups(self, default=None):
+        if self.encoded:
+            return tuple([default if g is None else g.decode('utf8')
+                    for g in self._groups[1:]])
+        return tuple([default if g is None else g
+                for g in self._groups[1:]])
+
+    def group(self, *args):
+        if len(args) == 0:
+            groupnum = 0
+        elif len(args) == 1:
+            groupnum = args[0]
+        else:  # len(args) > 1:
+            return tuple([self.group(i) for i in args])
+        if self.encoded:
+            result = self._group(groupnum)
+            return None if result is None else result.decode('utf8')
+        return self._group(groupnum)
+
+    def groupdict(self):
+        result = self._groupdict()
+        if self.encoded:
+            return {a: None if b is None else b.decode('utf8')
+                    for a, b in result.items()}
+        return result
+
+    def expand(self, object template):
+        """Expand a template with groups."""
+        cdef bytearray result = bytearray()
+        if isinstance(template, unicode):
+            if not PY2 and not self.encoded:
+                raise ValueError(
+                        'cannot expand unicode template on bytes pattern')
+            templ = template.encode('utf8')
+        else:
+            if not PY2 and self.encoded:
+                raise ValueError(
+                        'cannot expand bytes template on unicode pattern')
+            templ = bytes(template)
+        self._expand(templ, result)
+        return result.decode('utf8') if self.encoded else bytes(result)
+
+    cdef _expand(self, bytes templ, bytearray result):
+        """Expand template by appending to an existing bytearray.
+        Everything remains UTF-8 encoded."""
+        cdef char * cstring
+        cdef int n = 0, prev = 0, size
+
+        # NB: cstring is used to get single characters, to avoid difference in
+        # Python 2/3 behavior of bytes objects.
+        cstring = templ
+        size = len(templ)
+        while True:
+            prev = n
+            n = templ.find(b'\\', prev)
+            if n == -1:
+                result.extend(templ[prev:])
+                break
+            result.extend(templ[prev:n])
+            n += 1
+            if (n + 2 < size and cstring[n] == b'x'
+                    and ishex(cstring[n + 1]) and ishex(cstring[n + 2])):
+                # hex char reference \x1f
+                result.append(int(templ[n + 1:n + 3], base=16) & 255)
+                n += 3
+            elif (n + 2 < size and isoct(cstring[n]) and isoct(cstring[n + 1])
+                    and isoct(cstring[n + 2])):
+                # octal char reference \123
+                result.append(int(templ[n:n + 3], base=8) & 255)
+                n += 3
+            elif cstring[n] == b'0':
+                if n + 1 < size and isoct(cstring[n + 1]):
+                    # 2 character octal: \01
+                    result.append(int(templ[n:n + 2], base=8))
+                    n += 2
+                else:  # nul-terminator literal \0
+                    result.append(b'\0')
+                    n += 1
+            elif b'0' <= cstring[n] <= b'9':  # numeric group reference
+                if n + 1 < size and isdigit(cstring[n + 1]):
+                    # 2 digit group ref \12
+                    groupno = int(templ[n:n + 2])
+                    n += 2
+                else:
+                    # 1 digit group ref \1
+                    groupno = int(templ[n:n + 1])
+                    n += 1
+                if groupno <= self.re.groups:
+                    groupval = self._group(groupno)
+                    if groupval is not None:
+                        result.extend(groupval)
+                else:
+                    raise RegexError('invalid group reference.')
+            elif cstring[n] == b'g':  # named group reference
+                n += 1
+                if n >= size or cstring[n] != b'<':
+                    raise RegexError('missing group name')
+                n += 1
+                start = n
+                while cstring[n] != b'>':
+                    if not isident(cstring[n]):
+                        raise RegexError('bad character in group name')
+                    n += 1
+                    if n >= size:
+                        raise RegexError('unterminated group name')
+                if templ[start:n].isdigit():
+                    name = int(templ[start:n])
+                elif isdigit(cstring[start]):
+                    raise RegexError('bad character in group name')
+                else:
+                    name = templ[start:n]
+                    if self.encoded:
+                        name = name.decode('utf8')
+                groupval = self._group(name)
+                if groupval is not None:
+                    result.extend(groupval)
+                n += 1
+            else:
+                if cstring[n] == b'n':
+                    result.append(b'\n')
+                elif cstring[n] == b'r':
+                    result.append(b'\r')
+                elif cstring[n] == b't':
+                    result.append(b'\t')
+                elif cstring[n] == b'v':
+                    result.append(b'\v')
+                elif cstring[n] == b'f':
+                    result.append(b'\f')
+                elif cstring[n] == b'a':
+                    result.append(b'\a')
+                elif cstring[n] == b'b':
+                    result.append(b'\b')
+                elif cstring[n] == b'\\':
+                    result.append(b'\\')
+                else:  # copy verbatim
+                    result.append(b'\\')
+                    result.append(cstring[n])
+                n += 1
+        return bytes(result)
+
+    def start(self, group=0):
+        return self.span(group)[0]
+
+    def end(self, group=0):
+        return self.span(group)[1]
+
+    def span(self, group=0):
+        if isinstance(group, int):
+            if group > len(self.regs):
+                raise IndexError("no such group %d; available groups: %r"
+                        % (group, list(range(len(self.regs)))))
+            return self.regs[group]
+        else:
+            self._groupdict()
+            if group not in self.re.groupindex:
+                raise IndexError("no such group %r; available groups: %r"
+                        % (group, list(self.re.groupindex)))
+            return self.regs[self.re.groupindex[group]]
+
+    cdef _make_spans(self, char * cstring, int size, int * cpos, int * upos):
+        cdef int start, end
+        cdef StringPiece * piece
+
+        spans = []
+        for i in range(self.nmatches):
+            if self.matches[i].data() == NULL:
+                spans.append((-1, -1))
+            else:
+                piece = &self.matches[i]
+                if piece.data() == NULL:
+                    return (-1, -1)
+                start = piece.data() - cstring
+                end = start + piece.length()
+                spans.append((start, end))
+
+        if self.encoded == 2:
+            spans = self._convert_spans(spans, cstring, size, cpos, upos)
+
+        self.regs = tuple(spans)
+
+    cdef list _convert_spans(self, spans,
+            char * cstring, int size, int * cpos, int * upos):
+        cdef map[int, int] positions
+        cdef int x, y
+        for x, y in spans:
+            positions[x] = x
+            positions[y] = y
+        unicodeindices(positions, cstring, size, cpos, upos)
+        return [(positions[x], positions[y]) for x, y in spans]
+
+    def __dealloc__(self):
+        delete_StringPiece_array(self.matches)
+
+    def __repr__(self):
+        return '<re2.Match object; span=%r, match=%r>' % (
+                self.span(), self.group())
diff --git a/contrib/python/pyre2/py3/src/pattern.pxi b/contrib/python/pyre2/py3/src/pattern.pxi
new file mode 100644
index 0000000000..b8439d2007
--- /dev/null
+++ b/contrib/python/pyre2/py3/src/pattern.pxi
@@ -0,0 +1,650 @@
+cdef class Pattern:
+    cdef readonly object pattern  # original pattern in Python format
+    cdef readonly int flags
+    cdef readonly int groups  # number of groups
+    cdef readonly dict groupindex  # name => group number
+    cdef object __weakref__
+
+    cdef bint encoded  # True if this was originally a Unicode pattern
+    cdef RE2 * re_pattern
+
+    def search(self, object string, int pos=0, int endpos=-1):
+        """Scan through string looking for a match, and return a corresponding
+        Match instance. Return None if no position in the string matches."""
+        return self._search(string, pos, endpos, UNANCHORED)
+
+    def match(self, object string, int pos=0, int endpos=-1):
+        """Matches zero or more characters at the beginning of the string."""
+        return self._search(string, pos, endpos, ANCHOR_START)
+
+    def fullmatch(self, object string, int pos=0, int endpos=-1):
+        """"fullmatch(string[, pos[, endpos]]) --> Match object or None."
+
+        Matches the entire string."""
+        return self._search(string, pos, endpos, ANCHOR_BOTH)
+
+    cdef _search(self, object string, int pos, int endpos,
+            re2_Anchor anchoring):
+        """Scan through string looking for a match, and return a corresponding
+        Match instance. Return None if no position in the string matches."""
+        cdef char * cstring
+        cdef Py_ssize_t size
+        cdef Py_buffer buf
+        cdef int retval
+        cdef int encoded = 0
+        cdef StringPiece * sp
+        cdef Match m = Match(self, self.groups + 1)
+        cdef int cpos = 0, upos = pos
+
+        if 0 <= endpos <= pos:
+            return None
+
+        bytestr = unicode_to_bytes(string, &encoded, self.encoded)
+        if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1:
+            raise TypeError('expected string or buffer')
+        try:
+            if encoded == 2 and (pos or endpos != -1):
+                utf8indices(cstring, size, &pos, &endpos)
+                cpos = pos
+            if pos > size:
+                return None
+            if 0 <= endpos < size:
+                size = endpos
+
+            sp = new StringPiece(cstring, size)
+            with nogil:
+                retval = self.re_pattern.Match(
+                        sp[0],
+                        pos,
+                        size,
+                        anchoring,
+                        m.matches,
+                        self.groups + 1)
+            del sp
+            if retval == 0:
+                return None
+
+            m.encoded = encoded
+            m.nmatches = self.groups + 1
+            m.string = string
+            m.pos = pos
+            if endpos == -1:
+                m.endpos = size
+            else:
+                m.endpos = endpos
+            m._make_spans(cstring, size, &cpos, &upos)
+            m._init_groups()
+        finally:
+            release_cstring(&buf)
+        return m
+
+    def contains(self, object string, int pos=0, int endpos=-1):
+        """"contains(string[, pos[, endpos]]) --> bool."
+
+        Scan through string looking for a match, and return True or False."""
+        cdef char * cstring
+        cdef Py_ssize_t size
+        cdef Py_buffer buf
+        cdef int retval
+        cdef int encoded = 0
+        cdef StringPiece * sp
+
+        if 0 <= endpos <= pos:
+            return False
+
+        bytestr = unicode_to_bytes(string, &encoded, self.encoded)
+        if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1:
+            raise TypeError('expected string or buffer')
+        try:
+            if encoded == 2 and (pos or endpos != -1):
+                utf8indices(cstring, size, &pos, &endpos)
+            if pos > size:
+                return False
+            if 0 <= endpos < size:
+                size = endpos
+
+            sp = new StringPiece(cstring, size)
+            with nogil:
+                retval = self.re_pattern.Match(
+                        sp[0],
+                        pos,
+                        size,
+                        UNANCHORED,
+                        NULL,
+                        0)
+            del sp
+        finally:
+            release_cstring(&buf)
+        return retval != 0
+
+    def count(self, object string, int pos=0, int endpos=-1):
+        """Return number of non-overlapping matches of pattern in string."""
+        cdef char * cstring
+        cdef Py_ssize_t size
+        cdef Py_buffer buf
+        cdef int retval
+        cdef int encoded = 0
+        cdef int result = 0
+        cdef StringPiece * sp = NULL
+        cdef StringPiece * matches = NULL
+
+        bytestr = unicode_to_bytes(string, &encoded, self.encoded)
+        if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1:
+            raise TypeError('expected string or buffer')
+        try:
+            if encoded == 2 and (pos or endpos != -1):
+                utf8indices(cstring, size, &pos, &endpos)
+            if pos > size:
+                return 0
+            if 0 <= endpos < size:
+                size = endpos
+
+            sp = new StringPiece(cstring, size)
+            matches = new_StringPiece_array(1)
+            try:
+                while True:
+                    with nogil:
+                        retval = self.re_pattern.Match(
+                                sp[0],
+                                pos,
+                                size,
+                                UNANCHORED,
+                                matches,
+                                1)
+                    if retval == 0:
+                        break
+                    result += 1
+                    if pos == size:
+                        break
+                    # offset the pos to move to the next point
+                    pos = matches[0].data() - cstring + (
+                            matches[0].length() or 1)
+            finally:
+                del sp
+                delete_StringPiece_array(matches)
+        finally:
+            release_cstring(&buf)
+        return result
+
+    def findall(self, object string, int pos=0, int endpos=-1):
+        """Return all non-overlapping matches of pattern in string as a list
+        of strings."""
+        cdef char * cstring
+        cdef Py_ssize_t size
+        cdef Py_buffer buf
+        cdef int encoded = 0
+        cdef int retval
+        cdef list resultlist = []
+        cdef StringPiece * sp = NULL
+        cdef StringPiece * matches = NULL
+
+        bytestr = unicode_to_bytes(string, &encoded, self.encoded)
+        if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1:
+            raise TypeError('expected string or buffer')
+        try:
+            if encoded == 2 and (pos or endpos != -1):
+                utf8indices(cstring, size, &pos, &endpos)
+            if pos > size:
+                return []
+            if 0 <= endpos < size:
+                size = endpos
+
+            sp = new StringPiece(cstring, size)
+            matches = new_StringPiece_array(self.groups + 1)
+
+            while True:
+                with nogil:
+                    retval = self.re_pattern.Match(
+                            sp[0],
+                            pos,
+                            size,
+                            UNANCHORED,
+                            matches,
+                            self.groups + 1)
+                if retval == 0:
+                    break
+                if self.groups > 1:
+                    if encoded:
+                        resultlist.append(tuple([
+                            '' if matches[i].data() is NULL else
+                            matches[i].data()[:matches[i].length()
+                                ].decode('utf8')
+                            for i in range(1, self.groups + 1)]))
+                    else:
+                        resultlist.append(tuple([
+                            b'' if matches[i].data() is NULL
+                            else matches[i].data()[:matches[i].length()]
+                            for i in range(1, self.groups + 1)]))
+                else:  # 0 or 1 group; return list of strings
+                    if encoded:
+                        resultlist.append(matches[self.groups].data()[
+                            :matches[self.groups].length()].decode('utf8'))
+                    else:
+                        resultlist.append(matches[self.groups].data()[
+                            :matches[self.groups].length()])
+                if pos == size:
+                    break
+                # offset the pos to move to the next point
+                pos = matches[0].data() - cstring + (matches[0].length() or 1)
+        finally:
+            del sp
+            delete_StringPiece_array(matches)
+            release_cstring(&buf)
+        return resultlist
+
+    def finditer(self, object string, int pos=0, int endpos=-1):
+        """Yield all non-overlapping matches of pattern in string as Match
+        objects."""
+        result = iter(self._finditer(string, pos, endpos))
+        next(result)  # dummy value to raise error before start of generator
+        return result
+
+    def _finditer(self, object string, int pos=0, int endpos=-1):
+        cdef char * cstring
+        cdef Py_ssize_t size
+        cdef Py_buffer buf
+        cdef int retval
+        cdef StringPiece * sp = NULL
+        cdef Match m
+        cdef int encoded = 0
+        cdef int cpos = 0, upos = pos
+
+        bytestr = unicode_to_bytes(string, &encoded, self.encoded)
+        if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1:
+            raise TypeError('expected string or buffer')
+        try:
+            if encoded == 2 and (pos or endpos != -1):
+                utf8indices(cstring, size, &pos, &endpos)
+                cpos = pos
+            if pos > size:
+                return
+            if 0 <= endpos < size:
+                size = endpos
+
+            sp = new StringPiece(cstring, size)
+
+            yield
+            while True:
+                m = Match(self, self.groups + 1)
+                m.string = string
+                with nogil:
+                    retval = self.re_pattern.Match(
+                            sp[0],
+                            pos,
+                            size,
+                            UNANCHORED,
+                            m.matches,
+                            self.groups + 1)
+                if retval == 0:
+                    break
+                m.encoded = encoded
+                m.nmatches = self.groups + 1
+                m.pos = pos
+                if endpos == -1:
+                    m.endpos = size
+                else:
+                    m.endpos = endpos
+                m._make_spans(cstring, size, &cpos, &upos)
+                m._init_groups()
+                yield m
+                if pos == size:
+                    break
+                # offset the pos to move to the next point
+                pos = m.matches[0].data() - cstring + (
+                        m.matches[0].length() or 1)
+        finally:
+            del sp
+            release_cstring(&buf)
+
+    def split(self, string, int maxsplit=0):
+        """split(string[, maxsplit = 0]) --> list
+
+        Split a string by the occurrences of the pattern."""
+        cdef char * cstring
+        cdef Py_ssize_t size
+        cdef int retval
+        cdef int pos = 0
+        cdef int lookahead = 0
+        cdef int num_split = 0
+        cdef StringPiece * sp
+        cdef StringPiece * matches
+        cdef list resultlist = []
+        cdef int encoded = 0
+        cdef Py_buffer buf
+
+        if maxsplit < 0:
+            maxsplit = 0
+
+        bytestr = unicode_to_bytes(string, &encoded, self.encoded)
+        if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1:
+            raise TypeError('expected string or buffer')
+        matches = new_StringPiece_array(self.groups + 1)
+        sp = new StringPiece(cstring, size)
+        try:
+
+            while True:
+                with nogil:
+                    retval = self.re_pattern.Match(
+                            sp[0],
+                            pos + lookahead,
+                            size,
+                            UNANCHORED,
+                            matches,
+                            self.groups + 1)
+                if retval == 0:
+                    break
+
+                match_start = matches[0].data() - cstring
+                match_end = match_start + matches[0].length()
+
+                # If an empty match, just look ahead until you find something
+                if match_start == match_end:
+                    if pos + lookahead == size:
+                        break
+                    lookahead += 1
+                    continue
+
+                if encoded:
+                    resultlist.append(
+                            char_to_unicode(&sp.data()[pos], match_start - pos))
+                else:
+                    resultlist.append(sp.data()[pos:match_start])
+                if self.groups > 0:
+                    for group in range(self.groups):
+                        if matches[group + 1].data() == NULL:
+                            resultlist.append(None)
+                        else:
+                            if encoded:
+                                resultlist.append(char_to_unicode(
+                                        matches[group + 1].data(),
+                                        matches[group + 1].length()))
+                            else:
+                                resultlist.append(matches[group + 1].data()[:
+                                        matches[group + 1].length()])
+
+                # offset the pos to move to the next point
+                pos = match_end
+                lookahead = 0
+
+                num_split += 1
+                if maxsplit and num_split >= maxsplit:
+                    break
+
+            if encoded:
+                resultlist.append(
+                        char_to_unicode(&sp.data()[pos], sp.length() - pos))
+            else:
+                resultlist.append(sp.data()[pos:])
+        finally:
+            del sp
+            delete_StringPiece_array(matches)
+            release_cstring(&buf)
+        return resultlist
+
+    def sub(self, repl, string, int count=0):
+        """sub(repl, string[, count = 0]) --> newstring
+
+        Return the string obtained by replacing the leftmost non-overlapping
+        occurrences of pattern in string by the replacement repl."""
+        cdef int num_repl = 0
+        return self._subn(repl, string, count, &num_repl)
+
+    def subn(self, repl, string, int count=0):
+        """subn(repl, string[, count = 0]) --> (newstring, number of subs)
+
+        Return the tuple (new_string, number_of_subs_made) found by replacing
+        the leftmost non-overlapping occurrences of pattern with the
+        replacement repl."""
+        cdef int num_repl = 0
+        result = self._subn(repl, string, count, &num_repl)
+        return result, num_repl
+
+    cdef _subn(self, repl, string, int count, int *num_repl):
+        cdef bytes repl_b
+        cdef char * cstring
+        cdef object result
+        cdef Py_ssize_t size
+        cdef StringPiece * sp = NULL
+        cdef cpp_string * input_str = NULL
+        cdef int string_encoded = 0
+        cdef int repl_encoded = 0
+
+        if callable(repl):
+            # This is a callback, so use the custom function
+            return self._subn_callback(repl, string, count, num_repl)
+
+        repl_b = unicode_to_bytes(repl, &repl_encoded, self.encoded)
+        if not repl_encoded and not isinstance(repl, bytes):
+            repl_b = bytes(repl)  # coerce buffer to bytes object
+
+        if count > 1 or (b'\\' if PY2 else <char>b'\\') in repl_b:
+            # Limit on number of substitutions or replacement string contains
+            # escape sequences; handle with Match.expand() implementation.
+            # RE2 does support simple numeric group references \1, \2,
+            # but the number of differences with Python behavior is
+            # non-trivial.
+            return self._subn_expand(repl_b, string, count, num_repl)
+        try:
+            cstring = repl_b
+            size = len(repl_b)
+            sp = new StringPiece(cstring, size)
+
+            bytestr = unicode_to_bytes(string, &string_encoded, self.encoded)
+            if not string_encoded and not isinstance(bytestr, bytes):
+                bytestr = bytes(bytestr)  # coerce buffer to bytes object
+            input_str = new cpp_string(<char *>bytestr, len(bytestr))
+            # NB: RE2 treats unmatched groups in repl as empty string;
+            # Python raises an error.
+            with nogil:
+                if count == 0:
+                    num_repl[0] = GlobalReplace(
+                            input_str, self.re_pattern[0], sp[0])
+                elif count == 1:
+                    num_repl[0] = Replace(
+                            input_str, self.re_pattern[0], sp[0])
+
+            if string_encoded or (repl_encoded and num_repl[0] > 0):
+                result = cpp_to_unicode(input_str[0])
+            else:
+                result = cpp_to_bytes(input_str[0])
+        finally:
+            del input_str, sp
+        return result
+
+    cdef _subn_callback(self, callback, string, int count, int * num_repl):
+        # This function is probably the hardest to implement correctly.
+        # This is my first attempt, but if anybody has a better solution,
+        # please help out.
+        cdef char * cstring
+        cdef Py_ssize_t size
+        cdef Py_buffer buf
+        cdef int retval
+        cdef int prevendpos = -1
+        cdef int endpos = 0
+        cdef int pos = 0
+        cdef int encoded = 0
+        cdef StringPiece * sp
+        cdef Match m
+        cdef bytearray result = bytearray()
+        cdef int cpos = 0, upos = 0
+
+        if count < 0:
+            count = 0
+
+        bytestr = unicode_to_bytes(string, &encoded, self.encoded)
+        if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1:
+            raise TypeError('expected string or buffer')
+        sp = new StringPiece(cstring, size)
+        try:
+            while True:
+                m = Match(self, self.groups + 1)
+                m.string = string
+                with nogil:
+                    retval = self.re_pattern.Match(
+                            sp[0],
+                            pos,
+                            size,
+                            UNANCHORED,
+                            m.matches,
+                            self.groups + 1)
+                if retval == 0:
+                    break
+
+                endpos = m.matches[0].data() - cstring
+                if endpos == prevendpos:
+                    endpos += 1
+                    if endpos > size:
+                        break
+                prevendpos = endpos
+                result.extend(sp.data()[pos:endpos])
+                pos = endpos + m.matches[0].length()
+
+                m.encoded = encoded
+                m.nmatches = self.groups + 1
+                m._make_spans(cstring, size, &cpos, &upos)
+                m._init_groups()
+                tmp = callback(m)
+                if tmp:
+                    result.extend(tmp.encode('utf8') if encoded else tmp)
+                else:
+                    result.extend(b'')
+
+                num_repl[0] += 1
+                if count and num_repl[0] >= count:
+                    break
+            result.extend(sp.data()[pos:])
+        finally:
+            del sp
+            release_cstring(&buf)
+        return result.decode('utf8') if encoded else bytes(result)
+
+    cdef _subn_expand(self, bytes repl, string, int count, int * num_repl):
+        """Perform ``count`` substitutions with replacement string and
+        Match.expand."""
+        cdef char * cstring
+        cdef Py_ssize_t size
+        cdef Py_buffer buf
+        cdef int retval
+        cdef int prevendpos = -1
+        cdef int endpos = 0
+        cdef int pos = 0
+        cdef int encoded = 0
+        cdef StringPiece * sp
+        cdef Match m
+        cdef bytearray result = bytearray()
+
+        if count < 0:
+            count = 0
+
+        bytestr = unicode_to_bytes(string, &encoded, self.encoded)
+        if pystring_to_cstring(bytestr, &cstring, &size, &buf) == -1:
+            raise TypeError('expected string or buffer')
+        sp = new StringPiece(cstring, size)
+        try:
+            while True:
+                m = Match(self, self.groups + 1)
+                m.string = string
+                with nogil:
+                    retval = self.re_pattern.Match(
+                            sp[0],
+                            pos,
+                            size,
+                            UNANCHORED,
+                            m.matches,
+                            self.groups + 1)
+                if retval == 0:
+                    break
+
+                endpos = m.matches[0].data() - cstring
+                if endpos == prevendpos:
+                    endpos += 1
+                    if endpos > size:
+                        break
+                prevendpos = endpos
+                result.extend(sp.data()[pos:endpos])
+                pos = endpos + m.matches[0].length()
+
+                m.encoded = encoded
+                m.nmatches = self.groups + 1
+                m._init_groups()
+                m._expand(repl, result)
+
+                num_repl[0] += 1
+                if count and num_repl[0] >= count:
+                    break
+            result.extend(sp.data()[pos:])
+        finally:
+            del sp
+            release_cstring(&buf)
+        return result.decode('utf8') if encoded else bytes(result)
+
+    def scanner(self, arg):
+        return re.compile(self.pattern).scanner(arg)
+        # raise NotImplementedError
+
+    def _dump_pattern(self):
+        cdef cpp_string s = self.re_pattern.pattern()
+        if self.encoded:
+            return cpp_to_bytes(s).decode('utf8')
+        return cpp_to_bytes(s)
+
+    def __repr__(self):
+        if self.flags == 0:
+            return 're2.compile(%r)' % self.pattern
+        return 're2.compile(%r, %r)' % (self.pattern, self.flags)
+
+    def __reduce__(self):
+        return (compile, (self.pattern, self.flags))
+
+    def __dealloc__(self):
+        del self.re_pattern
+
+
+class PythonRePattern:
+    """A wrapper for re.Pattern to support the extra methods defined by re2
+    (contains, count)."""
+    def __init__(self, pattern, flags=None):
+        self._pattern = re.compile(pattern, flags)
+        self.pattern = pattern
+        self.flags = flags
+        self.groupindex = self._pattern.groupindex
+        self.groups = self._pattern.groups
+
+    def contains(self, string):
+        return bool(self._pattern.search(string))
+
+    def count(self, string, pos=0, endpos=9223372036854775807):
+        return len(self._pattern.findall(string, pos, endpos))
+
+    def findall(self, string, pos=0, endpos=9223372036854775807):
+        return self._pattern.findall(string, pos, endpos)
+
+    def finditer(self, string, pos=0, endpos=9223372036854775807):
+        return self._pattern.finditer(string, pos, endpos)
+
+    def fullmatch(self, string, pos=0, endpos=9223372036854775807):
+        return self._pattern.fullmatch(string, pos, endpos)
+
+    def match(self, string, pos=0, endpos=9223372036854775807):
+        return self._pattern.match(string, pos, endpos)
+
+    def scanner(self, string, pos=0, endpos=9223372036854775807):
+        return self._pattern.scanner(string, pos, endpos)
+
+    def search(self, string, pos=0, endpos=9223372036854775807):
+        return self._pattern.search(string, pos, endpos)
+
+    def split(self, string, maxsplit=0):
+        return self._pattern.split(string, maxsplit)
+
+    def sub(self, repl, string, count=0):
+        return self._pattern.sub(repl, string, count)
+
+    def subn(self, repl, string, count=0):
+        return self._pattern.subn(repl, string, count)
+
+    def __repr__(self):
+        return repr(self._pattern)
+
+    def __reduce__(self):
+        return (self, (self.pattern, self.flags))
diff --git a/contrib/python/pyre2/py3/src/re2.pyx b/contrib/python/pyre2/py3/src/re2.pyx
new file mode 100644
index 0000000000..c48101426f
--- /dev/null
+++ b/contrib/python/pyre2/py3/src/re2.pyx
@@ -0,0 +1,458 @@
+# cython: infer_types(False)
+r"""Regular expressions using Google's RE2 engine.
+
+Compared to Python's ``re``, the RE2 engine compiles regular expressions to
+deterministic finite automata, which guarantees linear-time behavior.
+
+Intended as a drop-in replacement for ``re``. Unicode is supported by encoding
+to UTF-8, and bytes strings are treated as UTF-8 when the UNICODE flag is given.
+For best performance, work with UTF-8 encoded bytes strings.
+
+Regular expressions that are not compatible with RE2 are processed with
+fallback to ``re``. Examples of features not supported by RE2:
+
+    - lookahead assertions ``(?!...)``
+    - backreferences (``\\n`` in search pattern)
+    - \W and \S not supported inside character classes
+
+On the other hand, unicode character classes are supported (e.g., ``\p{Greek}``).
+Syntax reference: https://github.com/google/re2/wiki/Syntax
+
+What follows is a reference for the regular expression syntax supported by this
+module (i.e., without requiring fallback to `re`).
+
+Regular expressions can contain both special and ordinary characters.
+Most ordinary characters, like "A", "a", or "0", are the simplest
+regular expressions; they simply match themselves.
+
+The special characters are::
+
+    "."      Matches any character except a newline.
+    "^"      Matches the start of the string.
+    "$"      Matches the end of the string or just before the newline at
+             the end of the string.
+    "*"      Matches 0 or more (greedy) repetitions of the preceding RE.
+             Greedy means that it will match as many repetitions as possible.
+    "+"      Matches 1 or more (greedy) repetitions of the preceding RE.
+    "?"      Matches 0 or 1 (greedy) of the preceding RE.
+    *?,+?,?? Non-greedy versions of the previous three special characters.
+    {m,n}    Matches from m to n repetitions of the preceding RE.
+    {m,n}?   Non-greedy version of the above.
+    "\\"     Either escapes special characters or signals a special sequence.
+    []       Indicates a set of characters.
+             A "^" as the first character indicates a complementing set.
+    "|"      A|B, creates an RE that will match either A or B.
+    (...)    Matches the RE inside the parentheses.
+             The contents can be retrieved or matched later in the string.
+    (?:...)  Non-grouping version of regular parentheses.
+    (?imsux) Set the I, M, S, U, or X flag for the RE (see below).
+
+The special sequences consist of "\\" and a character from the list
+below.  If the ordinary character is not on the list, then the
+resulting RE will match the second character::
+
+    \A         Matches only at the start of the string.
+    \Z         Matches only at the end of the string.
+    \b         Matches the empty string, but only at the start or end of a word.
+    \B         Matches the empty string, but not at the start or end of a word.
+    \d         Matches any decimal digit.
+    \D         Matches any non-digit character.
+    \s         Matches any whitespace character.
+    \S         Matches any non-whitespace character.
+    \w         Matches any alphanumeric character.
+    \W         Matches the complement of \w.
+    \\         Matches a literal backslash.
+    \pN        Unicode character class (one-letter name)
+    \p{Greek}  Unicode character class
+    \PN        negated Unicode character class (one-letter name)
+    \P{Greek}  negated Unicode character class
+
+This module exports the following functions::
+
+    count     Count all occurrences of a pattern in a string.
+    match     Match a regular expression pattern to the beginning of a string.
+    fullmatch Match a regular expression pattern to all of a string.
+    search    Search a string for a pattern and return Match object.
+    contains  Same as search, but only return bool.
+    sub       Substitute occurrences of a pattern found in a string.
+    subn      Same as sub, but also return the number of substitutions made.
+    split     Split a string by the occurrences of a pattern.
+    findall   Find all occurrences of a pattern in a string.
+    finditer  Return an iterator yielding a match object for each match.
+    compile   Compile a pattern into a RegexObject.
+    purge     Clear the regular expression cache.
+    escape    Backslash all non-alphanumerics in a string.
+
+Some of the functions in this module takes flags as optional parameters::
+
+    A  ASCII       Make \w, \W, \b, \B, \d, \D match the corresponding ASCII
+                   character categories (rather than the whole Unicode
+                   categories, which is the default).
+    I  IGNORECASE  Perform case-insensitive matching.
+    M  MULTILINE   "^" matches the beginning of lines (after a newline)
+                   as well as the string.
+                   "$" matches the end of lines (before a newline) as well
+                   as the end of the string.
+    S  DOTALL      "." matches any character at all, including the newline.
+    X  VERBOSE     Ignore whitespace and comments for nicer looking RE's.
+    U  UNICODE     Enable Unicode character classes and make \w, \W, \b, \B,
+                   Unicode-aware (default for unicode patterns).
+
+This module also defines an exception 'RegexError' (also available under the
+alias 'error').
+
+"""
+
+include "includes.pxi"
+
+import re
+import sys
+import warnings
+from re import error as RegexError
+
+error = re.error
+
+# Import re flags to be compatible.
+I, M, S, U, X, L = re.I, re.M, re.S, re.U, re.X, re.L
+IGNORECASE = re.IGNORECASE
+MULTILINE = re.MULTILINE
+DOTALL = re.DOTALL
+UNICODE = re.UNICODE
+VERBOSE = re.VERBOSE
+LOCALE = re.LOCALE
+DEBUG = re.DEBUG
+ASCII = 256  # Python 3
+
+FALLBACK_QUIETLY = 0
+FALLBACK_WARNING = 1
+FALLBACK_EXCEPTION = 2
+
+VERSION = (0, 2, 23)
+VERSION_HEX = 0x000217
+
+cdef int _I = I, _M = M, _S = S, _U = U, _X = X, _L = L
+cdef int current_notification = FALLBACK_QUIETLY
+cdef bint PY2 = PY_MAJOR_VERSION == 2
+
+# Type of compiled re object from Python stdlib
+SREPattern = type(re.compile(''))
+
+_cache = {}
+_cache_repl = {}
+
+_MAXCACHE = 100
+
+
+include "compile.pxi"
+include "pattern.pxi"
+include "match.pxi"
+
+
+def purge():
+    """Clear the regular expression caches."""
+    _cache.clear()
+    _cache_repl.clear()
+
+
+def search(pattern, string, int flags=0):
+    """Scan through string looking for a match to the pattern, returning
+    a ``Match`` object or none if no match was found."""
+    return compile(pattern, flags).search(string)
+
+
+def match(pattern, string, int flags=0):
+    """Try to apply the pattern at the start of the string, returning
+    a ``Match`` object, or ``None`` if no match was found."""
+    return compile(pattern, flags).match(string)
+
+
+def fullmatch(pattern, string, int flags=0):
+    """Try to apply the pattern to the entire string, returning
+    a ``Match`` object, or ``None`` if no match was found."""
+    return compile(pattern, flags).fullmatch(string)
+
+
+def contains(pattern, string, int flags=0):
+    """Scan through string looking for a match to the pattern, returning
+    True or False."""
+    return compile(pattern, flags).contains(string)
+
+
+def finditer(pattern, string, int flags=0):
+    """Yield all non-overlapping matches in the string.
+
+    For each match, the iterator returns a ``Match`` object.
+    Empty matches are included in the result."""
+    return compile(pattern, flags).finditer(string)
+
+
+def findall(pattern, string, int flags=0):
+    """Return a list of all non-overlapping matches in the string.
+
+    Each match is represented as a string or a tuple (when there are two ore
+    more groups). Empty matches are included in the result."""
+    return compile(pattern, flags).findall(string)
+
+
+def count(pattern, string, int flags=0):
+    """Return number of non-overlapping matches in the string.
+
+    Empty matches are included in the count."""
+    return compile(pattern, flags).count(string)
+
+
+def split(pattern, string, int maxsplit=0, int flags=0):
+    """Split the source string by the occurrences of the pattern,
+    returning a list containing the resulting substrings."""
+    return compile(pattern, flags).split(string, maxsplit)
+
+
+def sub(pattern, repl, string, int count=0, int flags=0):
+    """Return the string obtained by replacing the leftmost
+    non-overlapping occurrences of the pattern in string by the
+    replacement ``repl``. ``repl`` can be either a string or a callable;
+    if a string, backslash escapes in it are processed. If it is
+    a callable, it's passed the ``Match`` object and must return
+    a replacement string to be used."""
+    return compile(pattern, flags).sub(repl, string, count)
+
+
+def subn(pattern, repl, string, int count=0, int flags=0):
+    """Return a 2-tuple containing ``(new_string, number)``.
+    new_string is the string obtained by replacing the leftmost
+    non-overlapping occurrences of the pattern in the source
+    string by the replacement ``repl``. ``number`` is the number of
+    substitutions that were made. ``repl`` can be either a string or a
+    callable; if a string, backslash escapes in it are processed.
+    If it is a callable, it's passed the ``Match`` object and must
+    return a replacement string to be used."""
+    return compile(pattern, flags).subn(repl, string, count)
+
+
+def escape(pattern):
+    """Escape all non-alphanumeric characters in pattern."""
+    cdef bint uni = isinstance(pattern, unicode)
+    cdef list s
+    if PY2 or uni:
+        s = list(pattern)
+    else:
+        s = [bytes([c]) for c in pattern]
+    for i in range(len(pattern)):
+        # c = pattern[i]
+        c = s[i]
+        if ord(c) < 0x80 and not c.isalnum():
+            if uni:
+                if c == u'\000':
+                    s[i] = u'\\000'
+                else:
+                    s[i] = u"\\" + c
+            else:
+                if c == b'\000':
+                    s[i] = b'\\000'
+                else:
+                    s[i] = b'\\' + c
+    return u''.join(s) if uni else b''.join(s)
+
+
+class BackreferencesException(Exception):
+    """Search pattern contains backreferences."""
+    pass
+
+
+class CharClassProblemException(Exception):
+    """Search pattern contains unsupported character class."""
+    pass
+
+
+def set_fallback_notification(level):
+    """Set the fallback notification to a level; one of:
+        FALLBACK_QUIETLY
+        FALLBACK_WARNING
+        FALLBACK_EXCEPTION
+    """
+    global current_notification
+    level = int(level)
+    if level < 0 or level > 2:
+        raise ValueError("This function expects a valid notification level.")
+    current_notification = level
+
+
+cdef bint ishex(unsigned char c):
+    """Test whether ``c`` is in ``[0-9a-fA-F]``"""
+    return (b'0' <= c <= b'9' or b'a' <= c <= b'f' or b'A' <= c <= b'F')
+
+
+cdef bint isoct(unsigned char c):
+    """Test whether ``c`` is in ``[0-7]``"""
+    return b'0' <= c <= b'7'
+
+
+cdef bint isdigit(unsigned char c):
+    """Test whether ``c`` is in ``[0-9]``"""
+    return b'0' <= c <= b'9'
+
+
+cdef bint isident(unsigned char c):
+    """Test whether ``c`` is in ``[a-zA-Z0-9_]``"""
+    return (b'a' <= c <= b'z' or b'A' <= c <= b'Z'
+        or b'0' <= c <= b'9' or c == b'_')
+
+
+cdef inline bytes cpp_to_bytes(cpp_string input):
+    """Convert from a std::string object to a python string."""
+    # By taking the slice we go to the right size,
+    # despite spurious or missing null characters.
+    return input.data()[:input.length()]
+
+
+cdef inline unicode cpp_to_unicode(cpp_string input):
+    """Convert a std::string object to a unicode string."""
+    return cpython.unicode.PyUnicode_DecodeUTF8(
+            input.data(), input.length(), 'strict')
+
+
+cdef inline unicode char_to_unicode(const char * input, int length):
+    """Convert a C string to a unicode string."""
+    return cpython.unicode.PyUnicode_DecodeUTF8(input, length, 'strict')
+
+
+cdef inline unicode_to_bytes(object pystring, int * encoded,
+        int checkotherencoding):
+    """Convert a unicode string to a utf8 bytes object, if necessary.
+
+    If pystring is a bytes string or a buffer, return unchanged.
+    If checkotherencoding is 0 or 1 and using Python 3, raise an error
+    if its truth value is not equal to that of encoded.
+    encoded is set to 1 if encoded string can be treated as ASCII,
+    and 2 if it contains multibyte unicode characters."""
+    if cpython.unicode.PyUnicode_Check(pystring):
+        origlen = len(pystring)
+        pystring = pystring.encode('utf8')
+        encoded[0] = 1 if origlen == len(pystring) else 2
+    else:
+        encoded[0] = 0
+    if not PY2 and checkotherencoding > 0 and not encoded[0]:
+        raise TypeError("can't use a string pattern on a bytes-like object")
+    elif not PY2 and checkotherencoding == 0 and encoded[0]:
+        raise TypeError("can't use a bytes pattern on a string-like object")
+    return pystring
+
+
+cdef inline int pystring_to_cstring(
+        object pystring, char ** cstring, Py_ssize_t * size,
+        Py_buffer * buf):
+    """Get a pointer from bytes/buffer object ``pystring``.
+
+    On success, return 0, and set ``cstring``, ``size``, and ``buf``."""
+    cdef int result = -1
+    cstring[0] = NULL
+    size[0] = 0
+    if PyObject_CheckBuffer(pystring) == 1:  # new-style Buffer interface
+        result = PyObject_GetBuffer(pystring, buf, PyBUF_SIMPLE)
+        if result == 0:
+            cstring[0] = <char *>buf.buf
+            size[0] = buf.len
+    return result
+
+
+cdef inline void release_cstring(Py_buffer *buf):
+    """Release buffer if necessary."""
+    if not PY2:
+        PyBuffer_Release(buf)
+
+
+cdef utf8indices(char * cstring, int size, int *pos, int *endpos):
+    """Convert unicode indices ``pos`` and ``endpos`` to UTF-8 indices.
+
+    If the indices are out of range, leave them unchanged."""
+    cdef unsigned char * data = <unsigned char *>cstring
+    cdef int newpos = pos[0], newendpos = -1
+    cdef int cpos = 0, upos = 0
+    while cpos < size:
+        if data[cpos] < 0x80:
+            cpos += 1
+            upos += 1
+        elif data[cpos] < 0xe0:
+            cpos += 2
+            upos += 1
+        elif data[cpos] < 0xf0:
+            cpos += 3
+            upos += 1
+        else:
+            cpos += 4
+            upos += 1
+            # wide unicode chars get 2 unichars when Python <3.3 is compiled
+            # with --enable-unicode=ucs2
+            emit_if_narrow_unicode()
+            upos += 1
+            emit_endif()
+
+        if upos == pos[0]:
+            newpos = cpos
+            if endpos[0] == -1:
+                break
+        elif upos == endpos[0]:
+            newendpos = cpos
+            break
+    pos[0] = newpos
+    endpos[0] = newendpos
+
+
+cdef void unicodeindices(map[int, int] &positions,
+        char * cstring, int size, int * cpos, int * upos):
+    """Convert UTF-8 byte indices to unicode indices."""
+    cdef unsigned char * s = <unsigned char *>cstring
+    cdef map[int, int].iterator it = positions.begin()
+
+    if dereference(it).first == -1:
+        dereference(it).second = -1
+        postincrement(it)
+        if it == positions.end():
+            return
+    if dereference(it).first == cpos[0]:
+        dereference(it).second = upos[0]
+        postincrement(it)
+        if it == positions.end():
+            return
+
+    while cpos[0] < size:
+        if s[cpos[0]] < 0x80:
+            cpos[0] += 1
+            upos[0] += 1
+        elif s[cpos[0]] < 0xe0:
+            cpos[0] += 2
+            upos[0] += 1
+        elif s[cpos[0]] < 0xf0:
+            cpos[0] += 3
+            upos[0] += 1
+        else:
+            cpos[0] += 4
+            upos[0] += 1
+            # wide unicode chars get 2 unichars when Python <3.3 is compiled
+            # with --enable-unicode=ucs2
+            emit_if_narrow_unicode()
+            upos[0] += 1
+            emit_endif()
+
+        if dereference(it).first == cpos[0]:
+            dereference(it).second = upos[0]
+            postincrement(it)
+            if it == positions.end():
+                break
+
+
+__all__ = [
+        # exceptions
+        'BackreferencesException', 'CharClassProblemException',
+        'RegexError', 'error',
+        # constants
+        'FALLBACK_EXCEPTION', 'FALLBACK_QUIETLY', 'FALLBACK_WARNING', 'DEBUG',
+        'S', 'DOTALL', 'I', 'IGNORECASE', 'L', 'LOCALE', 'M', 'MULTILINE',
+        'U', 'UNICODE', 'X', 'VERBOSE', 'VERSION', 'VERSION_HEX',
+        # classes
+        'Match', 'Pattern', 'SREPattern',
+        # functions
+        'compile', 'count', 'escape', 'findall', 'finditer', 'fullmatch',
+        'match', 'purge', 'search', 'split', 'sub', 'subn',
+        'set_fallback_notification',
+        ]
author	robot-piglet <robot-piglet@yandex-team.com>	2023-12-02 01:45:21 +0300
committer	robot-piglet <robot-piglet@yandex-team.com>	2023-12-02 02:42:50 +0300
commit	9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c (patch)
tree	9f88a486917d371d099cd712efd91b4c122d209d /contrib/python/pyre2/py3/src
parent	32fb6dda1feb24f9ab69ece5df0cb9ec238ca5e6 (diff)
download	ydb-9c43d58f75cf086b744cf4fe2ae180e8f37e4a0c.tar.gz