diff options
| author | AlexSm <[email protected]> | 2024-03-05 10:40:59 +0100 | 
|---|---|---|
| committer | GitHub <[email protected]> | 2024-03-05 12:40:59 +0300 | 
| commit | 1ac13c847b5358faba44dbb638a828e24369467b (patch) | |
| tree | 07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/Modules/_sre | |
| parent | ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff) | |
Library import 16 (#2433)
Co-authored-by: robot-piglet <[email protected]>
Co-authored-by: deshevoy <[email protected]>
Co-authored-by: robot-contrib <[email protected]>
Co-authored-by: thegeorg <[email protected]>
Co-authored-by: robot-ya-builder <[email protected]>
Co-authored-by: svidyuk <[email protected]>
Co-authored-by: shadchin <[email protected]>
Co-authored-by: robot-ratatosk <[email protected]>
Co-authored-by: innokentii <[email protected]>
Co-authored-by: arkady-e1ppa <[email protected]>
Co-authored-by: snermolaev <[email protected]>
Co-authored-by: dimdim11 <[email protected]>
Co-authored-by: kickbutt <[email protected]>
Co-authored-by: abdullinsaid <[email protected]>
Co-authored-by: korsunandrei <[email protected]>
Co-authored-by: petrk <[email protected]>
Co-authored-by: miroslav2 <[email protected]>
Co-authored-by: serjflint <[email protected]>
Co-authored-by: akhropov <[email protected]>
Co-authored-by: prettyboy <[email protected]>
Co-authored-by: ilikepugs <[email protected]>
Co-authored-by: hiddenpath <[email protected]>
Co-authored-by: mikhnenko <[email protected]>
Co-authored-by: spreis <[email protected]>
Co-authored-by: andreyshspb <[email protected]>
Co-authored-by: dimaandreev <[email protected]>
Co-authored-by: rashid <[email protected]>
Co-authored-by: robot-ydb-importer <[email protected]>
Co-authored-by: r-vetrov <[email protected]>
Co-authored-by: ypodlesov <[email protected]>
Co-authored-by: zaverden <[email protected]>
Co-authored-by: vpozdyayev <[email protected]>
Co-authored-by: robot-cozmo <[email protected]>
Co-authored-by: v-korovin <[email protected]>
Co-authored-by: arikon <[email protected]>
Co-authored-by: khoden <[email protected]>
Co-authored-by: psydmm <[email protected]>
Co-authored-by: robot-javacom <[email protected]>
Co-authored-by: dtorilov <[email protected]>
Co-authored-by: sennikovmv <[email protected]>
Co-authored-by: hcpp <[email protected]>
Diffstat (limited to 'contrib/tools/python3/Modules/_sre')
| -rw-r--r-- | contrib/tools/python3/Modules/_sre/clinic/sre.c.h | 1463 | ||||
| -rw-r--r-- | contrib/tools/python3/Modules/_sre/sre.c | 3249 | ||||
| -rw-r--r-- | contrib/tools/python3/Modules/_sre/sre.h | 107 | ||||
| -rw-r--r-- | contrib/tools/python3/Modules/_sre/sre_constants.h | 99 | ||||
| -rw-r--r-- | contrib/tools/python3/Modules/_sre/sre_lib.h | 1818 | ||||
| -rw-r--r-- | contrib/tools/python3/Modules/_sre/sre_targets.h | 58 | 
6 files changed, 6794 insertions, 0 deletions
| diff --git a/contrib/tools/python3/Modules/_sre/clinic/sre.c.h b/contrib/tools/python3/Modules/_sre/clinic/sre.c.h new file mode 100644 index 00000000000..529c634e76d --- /dev/null +++ b/contrib/tools/python3/Modules/_sre/clinic/sre.c.h @@ -0,0 +1,1463 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +#  include "pycore_gc.h"            // PyGC_Head +#  include "pycore_runtime.h"       // _Py_ID() +#endif + + +PyDoc_STRVAR(_sre_getcodesize__doc__, +"getcodesize($module, /)\n" +"--\n" +"\n"); + +#define _SRE_GETCODESIZE_METHODDEF    \ +    {"getcodesize", (PyCFunction)_sre_getcodesize, METH_NOARGS, _sre_getcodesize__doc__}, + +static int +_sre_getcodesize_impl(PyObject *module); + +static PyObject * +_sre_getcodesize(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ +    PyObject *return_value = NULL; +    int _return_value; + +    _return_value = _sre_getcodesize_impl(module); +    if ((_return_value == -1) && PyErr_Occurred()) { +        goto exit; +    } +    return_value = PyLong_FromLong((long)_return_value); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_ascii_iscased__doc__, +"ascii_iscased($module, character, /)\n" +"--\n" +"\n"); + +#define _SRE_ASCII_ISCASED_METHODDEF    \ +    {"ascii_iscased", (PyCFunction)_sre_ascii_iscased, METH_O, _sre_ascii_iscased__doc__}, + +static int +_sre_ascii_iscased_impl(PyObject *module, int character); + +static PyObject * +_sre_ascii_iscased(PyObject *module, PyObject *arg) +{ +    PyObject *return_value = NULL; +    int character; +    int _return_value; + +    character = _PyLong_AsInt(arg); +    if (character == -1 && PyErr_Occurred()) { +        goto exit; +    } +    _return_value = _sre_ascii_iscased_impl(module, character); +    if ((_return_value == -1) && PyErr_Occurred()) { +        goto exit; +    } +    return_value = PyBool_FromLong((long)_return_value); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_unicode_iscased__doc__, +"unicode_iscased($module, character, /)\n" +"--\n" +"\n"); + +#define _SRE_UNICODE_ISCASED_METHODDEF    \ +    {"unicode_iscased", (PyCFunction)_sre_unicode_iscased, METH_O, _sre_unicode_iscased__doc__}, + +static int +_sre_unicode_iscased_impl(PyObject *module, int character); + +static PyObject * +_sre_unicode_iscased(PyObject *module, PyObject *arg) +{ +    PyObject *return_value = NULL; +    int character; +    int _return_value; + +    character = _PyLong_AsInt(arg); +    if (character == -1 && PyErr_Occurred()) { +        goto exit; +    } +    _return_value = _sre_unicode_iscased_impl(module, character); +    if ((_return_value == -1) && PyErr_Occurred()) { +        goto exit; +    } +    return_value = PyBool_FromLong((long)_return_value); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_ascii_tolower__doc__, +"ascii_tolower($module, character, /)\n" +"--\n" +"\n"); + +#define _SRE_ASCII_TOLOWER_METHODDEF    \ +    {"ascii_tolower", (PyCFunction)_sre_ascii_tolower, METH_O, _sre_ascii_tolower__doc__}, + +static int +_sre_ascii_tolower_impl(PyObject *module, int character); + +static PyObject * +_sre_ascii_tolower(PyObject *module, PyObject *arg) +{ +    PyObject *return_value = NULL; +    int character; +    int _return_value; + +    character = _PyLong_AsInt(arg); +    if (character == -1 && PyErr_Occurred()) { +        goto exit; +    } +    _return_value = _sre_ascii_tolower_impl(module, character); +    if ((_return_value == -1) && PyErr_Occurred()) { +        goto exit; +    } +    return_value = PyLong_FromLong((long)_return_value); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_unicode_tolower__doc__, +"unicode_tolower($module, character, /)\n" +"--\n" +"\n"); + +#define _SRE_UNICODE_TOLOWER_METHODDEF    \ +    {"unicode_tolower", (PyCFunction)_sre_unicode_tolower, METH_O, _sre_unicode_tolower__doc__}, + +static int +_sre_unicode_tolower_impl(PyObject *module, int character); + +static PyObject * +_sre_unicode_tolower(PyObject *module, PyObject *arg) +{ +    PyObject *return_value = NULL; +    int character; +    int _return_value; + +    character = _PyLong_AsInt(arg); +    if (character == -1 && PyErr_Occurred()) { +        goto exit; +    } +    _return_value = _sre_unicode_tolower_impl(module, character); +    if ((_return_value == -1) && PyErr_Occurred()) { +        goto exit; +    } +    return_value = PyLong_FromLong((long)_return_value); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_match__doc__, +"match($self, /, string, pos=0, endpos=sys.maxsize)\n" +"--\n" +"\n" +"Matches zero or more characters at the beginning of the string."); + +#define _SRE_SRE_PATTERN_MATCH_METHODDEF    \ +    {"match", _PyCFunction_CAST(_sre_SRE_Pattern_match), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_match__doc__}, + +static PyObject * +_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, +                            PyObject *string, Py_ssize_t pos, +                            Py_ssize_t endpos); + +static PyObject * +_sre_SRE_Pattern_match(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ +    PyObject *return_value = NULL; +    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + +    #define NUM_KEYWORDS 3 +    static struct { +        PyGC_Head _this_is_not_used; +        PyObject_VAR_HEAD +        PyObject *ob_item[NUM_KEYWORDS]; +    } _kwtuple = { +        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) +        .ob_item = { &_Py_ID(string), &_Py_ID(pos), &_Py_ID(endpos), }, +    }; +    #undef NUM_KEYWORDS +    #define KWTUPLE (&_kwtuple.ob_base.ob_base) + +    #else  // !Py_BUILD_CORE +    #  define KWTUPLE NULL +    #endif  // !Py_BUILD_CORE + +    static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; +    static _PyArg_Parser _parser = { +        .keywords = _keywords, +        .fname = "match", +        .kwtuple = KWTUPLE, +    }; +    #undef KWTUPLE +    PyObject *argsbuf[3]; +    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; +    PyObject *string; +    Py_ssize_t pos = 0; +    Py_ssize_t endpos = PY_SSIZE_T_MAX; + +    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf); +    if (!args) { +        goto exit; +    } +    string = args[0]; +    if (!noptargs) { +        goto skip_optional_pos; +    } +    if (args[1]) { +        { +            Py_ssize_t ival = -1; +            PyObject *iobj = _PyNumber_Index(args[1]); +            if (iobj != NULL) { +                ival = PyLong_AsSsize_t(iobj); +                Py_DECREF(iobj); +            } +            if (ival == -1 && PyErr_Occurred()) { +                goto exit; +            } +            pos = ival; +        } +        if (!--noptargs) { +            goto skip_optional_pos; +        } +    } +    { +        Py_ssize_t ival = -1; +        PyObject *iobj = _PyNumber_Index(args[2]); +        if (iobj != NULL) { +            ival = PyLong_AsSsize_t(iobj); +            Py_DECREF(iobj); +        } +        if (ival == -1 && PyErr_Occurred()) { +            goto exit; +        } +        endpos = ival; +    } +skip_optional_pos: +    return_value = _sre_SRE_Pattern_match_impl(self, cls, string, pos, endpos); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_fullmatch__doc__, +"fullmatch($self, /, string, pos=0, endpos=sys.maxsize)\n" +"--\n" +"\n" +"Matches against all of the string."); + +#define _SRE_SRE_PATTERN_FULLMATCH_METHODDEF    \ +    {"fullmatch", _PyCFunction_CAST(_sre_SRE_Pattern_fullmatch), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_fullmatch__doc__}, + +static PyObject * +_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls, +                                PyObject *string, Py_ssize_t pos, +                                Py_ssize_t endpos); + +static PyObject * +_sre_SRE_Pattern_fullmatch(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ +    PyObject *return_value = NULL; +    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + +    #define NUM_KEYWORDS 3 +    static struct { +        PyGC_Head _this_is_not_used; +        PyObject_VAR_HEAD +        PyObject *ob_item[NUM_KEYWORDS]; +    } _kwtuple = { +        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) +        .ob_item = { &_Py_ID(string), &_Py_ID(pos), &_Py_ID(endpos), }, +    }; +    #undef NUM_KEYWORDS +    #define KWTUPLE (&_kwtuple.ob_base.ob_base) + +    #else  // !Py_BUILD_CORE +    #  define KWTUPLE NULL +    #endif  // !Py_BUILD_CORE + +    static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; +    static _PyArg_Parser _parser = { +        .keywords = _keywords, +        .fname = "fullmatch", +        .kwtuple = KWTUPLE, +    }; +    #undef KWTUPLE +    PyObject *argsbuf[3]; +    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; +    PyObject *string; +    Py_ssize_t pos = 0; +    Py_ssize_t endpos = PY_SSIZE_T_MAX; + +    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf); +    if (!args) { +        goto exit; +    } +    string = args[0]; +    if (!noptargs) { +        goto skip_optional_pos; +    } +    if (args[1]) { +        { +            Py_ssize_t ival = -1; +            PyObject *iobj = _PyNumber_Index(args[1]); +            if (iobj != NULL) { +                ival = PyLong_AsSsize_t(iobj); +                Py_DECREF(iobj); +            } +            if (ival == -1 && PyErr_Occurred()) { +                goto exit; +            } +            pos = ival; +        } +        if (!--noptargs) { +            goto skip_optional_pos; +        } +    } +    { +        Py_ssize_t ival = -1; +        PyObject *iobj = _PyNumber_Index(args[2]); +        if (iobj != NULL) { +            ival = PyLong_AsSsize_t(iobj); +            Py_DECREF(iobj); +        } +        if (ival == -1 && PyErr_Occurred()) { +            goto exit; +        } +        endpos = ival; +    } +skip_optional_pos: +    return_value = _sre_SRE_Pattern_fullmatch_impl(self, cls, string, pos, endpos); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_search__doc__, +"search($self, /, string, pos=0, endpos=sys.maxsize)\n" +"--\n" +"\n" +"Scan through string looking for a match, and return a corresponding match object instance.\n" +"\n" +"Return None if no position in the string matches."); + +#define _SRE_SRE_PATTERN_SEARCH_METHODDEF    \ +    {"search", _PyCFunction_CAST(_sre_SRE_Pattern_search), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_search__doc__}, + +static PyObject * +_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls, +                             PyObject *string, Py_ssize_t pos, +                             Py_ssize_t endpos); + +static PyObject * +_sre_SRE_Pattern_search(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ +    PyObject *return_value = NULL; +    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + +    #define NUM_KEYWORDS 3 +    static struct { +        PyGC_Head _this_is_not_used; +        PyObject_VAR_HEAD +        PyObject *ob_item[NUM_KEYWORDS]; +    } _kwtuple = { +        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) +        .ob_item = { &_Py_ID(string), &_Py_ID(pos), &_Py_ID(endpos), }, +    }; +    #undef NUM_KEYWORDS +    #define KWTUPLE (&_kwtuple.ob_base.ob_base) + +    #else  // !Py_BUILD_CORE +    #  define KWTUPLE NULL +    #endif  // !Py_BUILD_CORE + +    static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; +    static _PyArg_Parser _parser = { +        .keywords = _keywords, +        .fname = "search", +        .kwtuple = KWTUPLE, +    }; +    #undef KWTUPLE +    PyObject *argsbuf[3]; +    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; +    PyObject *string; +    Py_ssize_t pos = 0; +    Py_ssize_t endpos = PY_SSIZE_T_MAX; + +    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf); +    if (!args) { +        goto exit; +    } +    string = args[0]; +    if (!noptargs) { +        goto skip_optional_pos; +    } +    if (args[1]) { +        { +            Py_ssize_t ival = -1; +            PyObject *iobj = _PyNumber_Index(args[1]); +            if (iobj != NULL) { +                ival = PyLong_AsSsize_t(iobj); +                Py_DECREF(iobj); +            } +            if (ival == -1 && PyErr_Occurred()) { +                goto exit; +            } +            pos = ival; +        } +        if (!--noptargs) { +            goto skip_optional_pos; +        } +    } +    { +        Py_ssize_t ival = -1; +        PyObject *iobj = _PyNumber_Index(args[2]); +        if (iobj != NULL) { +            ival = PyLong_AsSsize_t(iobj); +            Py_DECREF(iobj); +        } +        if (ival == -1 && PyErr_Occurred()) { +            goto exit; +        } +        endpos = ival; +    } +skip_optional_pos: +    return_value = _sre_SRE_Pattern_search_impl(self, cls, string, pos, endpos); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_findall__doc__, +"findall($self, /, string, pos=0, endpos=sys.maxsize)\n" +"--\n" +"\n" +"Return a list of all non-overlapping matches of pattern in string."); + +#define _SRE_SRE_PATTERN_FINDALL_METHODDEF    \ +    {"findall", _PyCFunction_CAST(_sre_SRE_Pattern_findall), METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_findall__doc__}, + +static PyObject * +_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string, +                              Py_ssize_t pos, Py_ssize_t endpos); + +static PyObject * +_sre_SRE_Pattern_findall(PatternObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ +    PyObject *return_value = NULL; +    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + +    #define NUM_KEYWORDS 3 +    static struct { +        PyGC_Head _this_is_not_used; +        PyObject_VAR_HEAD +        PyObject *ob_item[NUM_KEYWORDS]; +    } _kwtuple = { +        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) +        .ob_item = { &_Py_ID(string), &_Py_ID(pos), &_Py_ID(endpos), }, +    }; +    #undef NUM_KEYWORDS +    #define KWTUPLE (&_kwtuple.ob_base.ob_base) + +    #else  // !Py_BUILD_CORE +    #  define KWTUPLE NULL +    #endif  // !Py_BUILD_CORE + +    static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; +    static _PyArg_Parser _parser = { +        .keywords = _keywords, +        .fname = "findall", +        .kwtuple = KWTUPLE, +    }; +    #undef KWTUPLE +    PyObject *argsbuf[3]; +    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; +    PyObject *string; +    Py_ssize_t pos = 0; +    Py_ssize_t endpos = PY_SSIZE_T_MAX; + +    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf); +    if (!args) { +        goto exit; +    } +    string = args[0]; +    if (!noptargs) { +        goto skip_optional_pos; +    } +    if (args[1]) { +        { +            Py_ssize_t ival = -1; +            PyObject *iobj = _PyNumber_Index(args[1]); +            if (iobj != NULL) { +                ival = PyLong_AsSsize_t(iobj); +                Py_DECREF(iobj); +            } +            if (ival == -1 && PyErr_Occurred()) { +                goto exit; +            } +            pos = ival; +        } +        if (!--noptargs) { +            goto skip_optional_pos; +        } +    } +    { +        Py_ssize_t ival = -1; +        PyObject *iobj = _PyNumber_Index(args[2]); +        if (iobj != NULL) { +            ival = PyLong_AsSsize_t(iobj); +            Py_DECREF(iobj); +        } +        if (ival == -1 && PyErr_Occurred()) { +            goto exit; +        } +        endpos = ival; +    } +skip_optional_pos: +    return_value = _sre_SRE_Pattern_findall_impl(self, string, pos, endpos); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_finditer__doc__, +"finditer($self, /, string, pos=0, endpos=sys.maxsize)\n" +"--\n" +"\n" +"Return an iterator over all non-overlapping matches for the RE pattern in string.\n" +"\n" +"For each match, the iterator returns a match object."); + +#define _SRE_SRE_PATTERN_FINDITER_METHODDEF    \ +    {"finditer", _PyCFunction_CAST(_sre_SRE_Pattern_finditer), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_finditer__doc__}, + +static PyObject * +_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls, +                               PyObject *string, Py_ssize_t pos, +                               Py_ssize_t endpos); + +static PyObject * +_sre_SRE_Pattern_finditer(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ +    PyObject *return_value = NULL; +    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + +    #define NUM_KEYWORDS 3 +    static struct { +        PyGC_Head _this_is_not_used; +        PyObject_VAR_HEAD +        PyObject *ob_item[NUM_KEYWORDS]; +    } _kwtuple = { +        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) +        .ob_item = { &_Py_ID(string), &_Py_ID(pos), &_Py_ID(endpos), }, +    }; +    #undef NUM_KEYWORDS +    #define KWTUPLE (&_kwtuple.ob_base.ob_base) + +    #else  // !Py_BUILD_CORE +    #  define KWTUPLE NULL +    #endif  // !Py_BUILD_CORE + +    static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; +    static _PyArg_Parser _parser = { +        .keywords = _keywords, +        .fname = "finditer", +        .kwtuple = KWTUPLE, +    }; +    #undef KWTUPLE +    PyObject *argsbuf[3]; +    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; +    PyObject *string; +    Py_ssize_t pos = 0; +    Py_ssize_t endpos = PY_SSIZE_T_MAX; + +    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf); +    if (!args) { +        goto exit; +    } +    string = args[0]; +    if (!noptargs) { +        goto skip_optional_pos; +    } +    if (args[1]) { +        { +            Py_ssize_t ival = -1; +            PyObject *iobj = _PyNumber_Index(args[1]); +            if (iobj != NULL) { +                ival = PyLong_AsSsize_t(iobj); +                Py_DECREF(iobj); +            } +            if (ival == -1 && PyErr_Occurred()) { +                goto exit; +            } +            pos = ival; +        } +        if (!--noptargs) { +            goto skip_optional_pos; +        } +    } +    { +        Py_ssize_t ival = -1; +        PyObject *iobj = _PyNumber_Index(args[2]); +        if (iobj != NULL) { +            ival = PyLong_AsSsize_t(iobj); +            Py_DECREF(iobj); +        } +        if (ival == -1 && PyErr_Occurred()) { +            goto exit; +        } +        endpos = ival; +    } +skip_optional_pos: +    return_value = _sre_SRE_Pattern_finditer_impl(self, cls, string, pos, endpos); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_scanner__doc__, +"scanner($self, /, string, pos=0, endpos=sys.maxsize)\n" +"--\n" +"\n"); + +#define _SRE_SRE_PATTERN_SCANNER_METHODDEF    \ +    {"scanner", _PyCFunction_CAST(_sre_SRE_Pattern_scanner), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_scanner__doc__}, + +static PyObject * +_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls, +                              PyObject *string, Py_ssize_t pos, +                              Py_ssize_t endpos); + +static PyObject * +_sre_SRE_Pattern_scanner(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ +    PyObject *return_value = NULL; +    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + +    #define NUM_KEYWORDS 3 +    static struct { +        PyGC_Head _this_is_not_used; +        PyObject_VAR_HEAD +        PyObject *ob_item[NUM_KEYWORDS]; +    } _kwtuple = { +        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) +        .ob_item = { &_Py_ID(string), &_Py_ID(pos), &_Py_ID(endpos), }, +    }; +    #undef NUM_KEYWORDS +    #define KWTUPLE (&_kwtuple.ob_base.ob_base) + +    #else  // !Py_BUILD_CORE +    #  define KWTUPLE NULL +    #endif  // !Py_BUILD_CORE + +    static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; +    static _PyArg_Parser _parser = { +        .keywords = _keywords, +        .fname = "scanner", +        .kwtuple = KWTUPLE, +    }; +    #undef KWTUPLE +    PyObject *argsbuf[3]; +    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; +    PyObject *string; +    Py_ssize_t pos = 0; +    Py_ssize_t endpos = PY_SSIZE_T_MAX; + +    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf); +    if (!args) { +        goto exit; +    } +    string = args[0]; +    if (!noptargs) { +        goto skip_optional_pos; +    } +    if (args[1]) { +        { +            Py_ssize_t ival = -1; +            PyObject *iobj = _PyNumber_Index(args[1]); +            if (iobj != NULL) { +                ival = PyLong_AsSsize_t(iobj); +                Py_DECREF(iobj); +            } +            if (ival == -1 && PyErr_Occurred()) { +                goto exit; +            } +            pos = ival; +        } +        if (!--noptargs) { +            goto skip_optional_pos; +        } +    } +    { +        Py_ssize_t ival = -1; +        PyObject *iobj = _PyNumber_Index(args[2]); +        if (iobj != NULL) { +            ival = PyLong_AsSsize_t(iobj); +            Py_DECREF(iobj); +        } +        if (ival == -1 && PyErr_Occurred()) { +            goto exit; +        } +        endpos = ival; +    } +skip_optional_pos: +    return_value = _sre_SRE_Pattern_scanner_impl(self, cls, string, pos, endpos); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_split__doc__, +"split($self, /, string, maxsplit=0)\n" +"--\n" +"\n" +"Split string by the occurrences of pattern."); + +#define _SRE_SRE_PATTERN_SPLIT_METHODDEF    \ +    {"split", _PyCFunction_CAST(_sre_SRE_Pattern_split), METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_split__doc__}, + +static PyObject * +_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string, +                            Py_ssize_t maxsplit); + +static PyObject * +_sre_SRE_Pattern_split(PatternObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ +    PyObject *return_value = NULL; +    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + +    #define NUM_KEYWORDS 2 +    static struct { +        PyGC_Head _this_is_not_used; +        PyObject_VAR_HEAD +        PyObject *ob_item[NUM_KEYWORDS]; +    } _kwtuple = { +        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) +        .ob_item = { &_Py_ID(string), &_Py_ID(maxsplit), }, +    }; +    #undef NUM_KEYWORDS +    #define KWTUPLE (&_kwtuple.ob_base.ob_base) + +    #else  // !Py_BUILD_CORE +    #  define KWTUPLE NULL +    #endif  // !Py_BUILD_CORE + +    static const char * const _keywords[] = {"string", "maxsplit", NULL}; +    static _PyArg_Parser _parser = { +        .keywords = _keywords, +        .fname = "split", +        .kwtuple = KWTUPLE, +    }; +    #undef KWTUPLE +    PyObject *argsbuf[2]; +    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; +    PyObject *string; +    Py_ssize_t maxsplit = 0; + +    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf); +    if (!args) { +        goto exit; +    } +    string = args[0]; +    if (!noptargs) { +        goto skip_optional_pos; +    } +    { +        Py_ssize_t ival = -1; +        PyObject *iobj = _PyNumber_Index(args[1]); +        if (iobj != NULL) { +            ival = PyLong_AsSsize_t(iobj); +            Py_DECREF(iobj); +        } +        if (ival == -1 && PyErr_Occurred()) { +            goto exit; +        } +        maxsplit = ival; +    } +skip_optional_pos: +    return_value = _sre_SRE_Pattern_split_impl(self, string, maxsplit); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_sub__doc__, +"sub($self, /, repl, string, count=0)\n" +"--\n" +"\n" +"Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl."); + +#define _SRE_SRE_PATTERN_SUB_METHODDEF    \ +    {"sub", _PyCFunction_CAST(_sre_SRE_Pattern_sub), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_sub__doc__}, + +static PyObject * +_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls, +                          PyObject *repl, PyObject *string, Py_ssize_t count); + +static PyObject * +_sre_SRE_Pattern_sub(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ +    PyObject *return_value = NULL; +    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + +    #define NUM_KEYWORDS 3 +    static struct { +        PyGC_Head _this_is_not_used; +        PyObject_VAR_HEAD +        PyObject *ob_item[NUM_KEYWORDS]; +    } _kwtuple = { +        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) +        .ob_item = { &_Py_ID(repl), &_Py_ID(string), &_Py_ID(count), }, +    }; +    #undef NUM_KEYWORDS +    #define KWTUPLE (&_kwtuple.ob_base.ob_base) + +    #else  // !Py_BUILD_CORE +    #  define KWTUPLE NULL +    #endif  // !Py_BUILD_CORE + +    static const char * const _keywords[] = {"repl", "string", "count", NULL}; +    static _PyArg_Parser _parser = { +        .keywords = _keywords, +        .fname = "sub", +        .kwtuple = KWTUPLE, +    }; +    #undef KWTUPLE +    PyObject *argsbuf[3]; +    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 2; +    PyObject *repl; +    PyObject *string; +    Py_ssize_t count = 0; + +    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 3, 0, argsbuf); +    if (!args) { +        goto exit; +    } +    repl = args[0]; +    string = args[1]; +    if (!noptargs) { +        goto skip_optional_pos; +    } +    { +        Py_ssize_t ival = -1; +        PyObject *iobj = _PyNumber_Index(args[2]); +        if (iobj != NULL) { +            ival = PyLong_AsSsize_t(iobj); +            Py_DECREF(iobj); +        } +        if (ival == -1 && PyErr_Occurred()) { +            goto exit; +        } +        count = ival; +    } +skip_optional_pos: +    return_value = _sre_SRE_Pattern_sub_impl(self, cls, repl, string, count); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern_subn__doc__, +"subn($self, /, repl, string, count=0)\n" +"--\n" +"\n" +"Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl."); + +#define _SRE_SRE_PATTERN_SUBN_METHODDEF    \ +    {"subn", _PyCFunction_CAST(_sre_SRE_Pattern_subn), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_subn__doc__}, + +static PyObject * +_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls, +                           PyObject *repl, PyObject *string, +                           Py_ssize_t count); + +static PyObject * +_sre_SRE_Pattern_subn(PatternObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ +    PyObject *return_value = NULL; +    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + +    #define NUM_KEYWORDS 3 +    static struct { +        PyGC_Head _this_is_not_used; +        PyObject_VAR_HEAD +        PyObject *ob_item[NUM_KEYWORDS]; +    } _kwtuple = { +        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) +        .ob_item = { &_Py_ID(repl), &_Py_ID(string), &_Py_ID(count), }, +    }; +    #undef NUM_KEYWORDS +    #define KWTUPLE (&_kwtuple.ob_base.ob_base) + +    #else  // !Py_BUILD_CORE +    #  define KWTUPLE NULL +    #endif  // !Py_BUILD_CORE + +    static const char * const _keywords[] = {"repl", "string", "count", NULL}; +    static _PyArg_Parser _parser = { +        .keywords = _keywords, +        .fname = "subn", +        .kwtuple = KWTUPLE, +    }; +    #undef KWTUPLE +    PyObject *argsbuf[3]; +    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 2; +    PyObject *repl; +    PyObject *string; +    Py_ssize_t count = 0; + +    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 3, 0, argsbuf); +    if (!args) { +        goto exit; +    } +    repl = args[0]; +    string = args[1]; +    if (!noptargs) { +        goto skip_optional_pos; +    } +    { +        Py_ssize_t ival = -1; +        PyObject *iobj = _PyNumber_Index(args[2]); +        if (iobj != NULL) { +            ival = PyLong_AsSsize_t(iobj); +            Py_DECREF(iobj); +        } +        if (ival == -1 && PyErr_Occurred()) { +            goto exit; +        } +        count = ival; +    } +skip_optional_pos: +    return_value = _sre_SRE_Pattern_subn_impl(self, cls, repl, string, count); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Pattern___copy____doc__, +"__copy__($self, /)\n" +"--\n" +"\n"); + +#define _SRE_SRE_PATTERN___COPY___METHODDEF    \ +    {"__copy__", (PyCFunction)_sre_SRE_Pattern___copy__, METH_NOARGS, _sre_SRE_Pattern___copy____doc__}, + +static PyObject * +_sre_SRE_Pattern___copy___impl(PatternObject *self); + +static PyObject * +_sre_SRE_Pattern___copy__(PatternObject *self, PyObject *Py_UNUSED(ignored)) +{ +    return _sre_SRE_Pattern___copy___impl(self); +} + +PyDoc_STRVAR(_sre_SRE_Pattern___deepcopy____doc__, +"__deepcopy__($self, memo, /)\n" +"--\n" +"\n"); + +#define _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF    \ +    {"__deepcopy__", (PyCFunction)_sre_SRE_Pattern___deepcopy__, METH_O, _sre_SRE_Pattern___deepcopy____doc__}, + +PyDoc_STRVAR(_sre_compile__doc__, +"compile($module, /, pattern, flags, code, groups, groupindex,\n" +"        indexgroup)\n" +"--\n" +"\n"); + +#define _SRE_COMPILE_METHODDEF    \ +    {"compile", _PyCFunction_CAST(_sre_compile), METH_FASTCALL|METH_KEYWORDS, _sre_compile__doc__}, + +static PyObject * +_sre_compile_impl(PyObject *module, PyObject *pattern, int flags, +                  PyObject *code, Py_ssize_t groups, PyObject *groupindex, +                  PyObject *indexgroup); + +static PyObject * +_sre_compile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ +    PyObject *return_value = NULL; +    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + +    #define NUM_KEYWORDS 6 +    static struct { +        PyGC_Head _this_is_not_used; +        PyObject_VAR_HEAD +        PyObject *ob_item[NUM_KEYWORDS]; +    } _kwtuple = { +        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) +        .ob_item = { &_Py_ID(pattern), &_Py_ID(flags), &_Py_ID(code), &_Py_ID(groups), &_Py_ID(groupindex), &_Py_ID(indexgroup), }, +    }; +    #undef NUM_KEYWORDS +    #define KWTUPLE (&_kwtuple.ob_base.ob_base) + +    #else  // !Py_BUILD_CORE +    #  define KWTUPLE NULL +    #endif  // !Py_BUILD_CORE + +    static const char * const _keywords[] = {"pattern", "flags", "code", "groups", "groupindex", "indexgroup", NULL}; +    static _PyArg_Parser _parser = { +        .keywords = _keywords, +        .fname = "compile", +        .kwtuple = KWTUPLE, +    }; +    #undef KWTUPLE +    PyObject *argsbuf[6]; +    PyObject *pattern; +    int flags; +    PyObject *code; +    Py_ssize_t groups; +    PyObject *groupindex; +    PyObject *indexgroup; + +    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 6, 6, 0, argsbuf); +    if (!args) { +        goto exit; +    } +    pattern = args[0]; +    flags = _PyLong_AsInt(args[1]); +    if (flags == -1 && PyErr_Occurred()) { +        goto exit; +    } +    if (!PyList_Check(args[2])) { +        _PyArg_BadArgument("compile", "argument 'code'", "list", args[2]); +        goto exit; +    } +    code = args[2]; +    { +        Py_ssize_t ival = -1; +        PyObject *iobj = _PyNumber_Index(args[3]); +        if (iobj != NULL) { +            ival = PyLong_AsSsize_t(iobj); +            Py_DECREF(iobj); +        } +        if (ival == -1 && PyErr_Occurred()) { +            goto exit; +        } +        groups = ival; +    } +    if (!PyDict_Check(args[4])) { +        _PyArg_BadArgument("compile", "argument 'groupindex'", "dict", args[4]); +        goto exit; +    } +    groupindex = args[4]; +    if (!PyTuple_Check(args[5])) { +        _PyArg_BadArgument("compile", "argument 'indexgroup'", "tuple", args[5]); +        goto exit; +    } +    indexgroup = args[5]; +    return_value = _sre_compile_impl(module, pattern, flags, code, groups, groupindex, indexgroup); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_template__doc__, +"template($module, pattern, template, /)\n" +"--\n" +"\n" +"\n" +"\n" +"  template\n" +"    A list containing interleaved literal strings (str or bytes) and group\n" +"    indices (int), as returned by re._parser.parse_template():\n" +"        [literal1, group1, ..., literalN, groupN]"); + +#define _SRE_TEMPLATE_METHODDEF    \ +    {"template", _PyCFunction_CAST(_sre_template), METH_FASTCALL, _sre_template__doc__}, + +static PyObject * +_sre_template_impl(PyObject *module, PyObject *pattern, PyObject *template); + +static PyObject * +_sre_template(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ +    PyObject *return_value = NULL; +    PyObject *pattern; +    PyObject *template; + +    if (!_PyArg_CheckPositional("template", nargs, 2, 2)) { +        goto exit; +    } +    pattern = args[0]; +    if (!PyList_Check(args[1])) { +        _PyArg_BadArgument("template", "argument 2", "list", args[1]); +        goto exit; +    } +    template = args[1]; +    return_value = _sre_template_impl(module, pattern, template); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Match_expand__doc__, +"expand($self, /, template)\n" +"--\n" +"\n" +"Return the string obtained by doing backslash substitution on the string template, as done by the sub() method."); + +#define _SRE_SRE_MATCH_EXPAND_METHODDEF    \ +    {"expand", _PyCFunction_CAST(_sre_SRE_Match_expand), METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Match_expand__doc__}, + +static PyObject * +_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template); + +static PyObject * +_sre_SRE_Match_expand(MatchObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ +    PyObject *return_value = NULL; +    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + +    #define NUM_KEYWORDS 1 +    static struct { +        PyGC_Head _this_is_not_used; +        PyObject_VAR_HEAD +        PyObject *ob_item[NUM_KEYWORDS]; +    } _kwtuple = { +        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) +        .ob_item = { &_Py_ID(template), }, +    }; +    #undef NUM_KEYWORDS +    #define KWTUPLE (&_kwtuple.ob_base.ob_base) + +    #else  // !Py_BUILD_CORE +    #  define KWTUPLE NULL +    #endif  // !Py_BUILD_CORE + +    static const char * const _keywords[] = {"template", NULL}; +    static _PyArg_Parser _parser = { +        .keywords = _keywords, +        .fname = "expand", +        .kwtuple = KWTUPLE, +    }; +    #undef KWTUPLE +    PyObject *argsbuf[1]; +    PyObject *template; + +    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); +    if (!args) { +        goto exit; +    } +    template = args[0]; +    return_value = _sre_SRE_Match_expand_impl(self, template); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Match_groups__doc__, +"groups($self, /, default=None)\n" +"--\n" +"\n" +"Return a tuple containing all the subgroups of the match, from 1.\n" +"\n" +"  default\n" +"    Is used for groups that did not participate in the match."); + +#define _SRE_SRE_MATCH_GROUPS_METHODDEF    \ +    {"groups", _PyCFunction_CAST(_sre_SRE_Match_groups), METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Match_groups__doc__}, + +static PyObject * +_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value); + +static PyObject * +_sre_SRE_Match_groups(MatchObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ +    PyObject *return_value = NULL; +    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + +    #define NUM_KEYWORDS 1 +    static struct { +        PyGC_Head _this_is_not_used; +        PyObject_VAR_HEAD +        PyObject *ob_item[NUM_KEYWORDS]; +    } _kwtuple = { +        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) +        .ob_item = { &_Py_ID(default), }, +    }; +    #undef NUM_KEYWORDS +    #define KWTUPLE (&_kwtuple.ob_base.ob_base) + +    #else  // !Py_BUILD_CORE +    #  define KWTUPLE NULL +    #endif  // !Py_BUILD_CORE + +    static const char * const _keywords[] = {"default", NULL}; +    static _PyArg_Parser _parser = { +        .keywords = _keywords, +        .fname = "groups", +        .kwtuple = KWTUPLE, +    }; +    #undef KWTUPLE +    PyObject *argsbuf[1]; +    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; +    PyObject *default_value = Py_None; + +    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 1, 0, argsbuf); +    if (!args) { +        goto exit; +    } +    if (!noptargs) { +        goto skip_optional_pos; +    } +    default_value = args[0]; +skip_optional_pos: +    return_value = _sre_SRE_Match_groups_impl(self, default_value); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Match_groupdict__doc__, +"groupdict($self, /, default=None)\n" +"--\n" +"\n" +"Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name.\n" +"\n" +"  default\n" +"    Is used for groups that did not participate in the match."); + +#define _SRE_SRE_MATCH_GROUPDICT_METHODDEF    \ +    {"groupdict", _PyCFunction_CAST(_sre_SRE_Match_groupdict), METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Match_groupdict__doc__}, + +static PyObject * +_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value); + +static PyObject * +_sre_SRE_Match_groupdict(MatchObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ +    PyObject *return_value = NULL; +    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + +    #define NUM_KEYWORDS 1 +    static struct { +        PyGC_Head _this_is_not_used; +        PyObject_VAR_HEAD +        PyObject *ob_item[NUM_KEYWORDS]; +    } _kwtuple = { +        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) +        .ob_item = { &_Py_ID(default), }, +    }; +    #undef NUM_KEYWORDS +    #define KWTUPLE (&_kwtuple.ob_base.ob_base) + +    #else  // !Py_BUILD_CORE +    #  define KWTUPLE NULL +    #endif  // !Py_BUILD_CORE + +    static const char * const _keywords[] = {"default", NULL}; +    static _PyArg_Parser _parser = { +        .keywords = _keywords, +        .fname = "groupdict", +        .kwtuple = KWTUPLE, +    }; +    #undef KWTUPLE +    PyObject *argsbuf[1]; +    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; +    PyObject *default_value = Py_None; + +    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 1, 0, argsbuf); +    if (!args) { +        goto exit; +    } +    if (!noptargs) { +        goto skip_optional_pos; +    } +    default_value = args[0]; +skip_optional_pos: +    return_value = _sre_SRE_Match_groupdict_impl(self, default_value); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Match_start__doc__, +"start($self, group=0, /)\n" +"--\n" +"\n" +"Return index of the start of the substring matched by group."); + +#define _SRE_SRE_MATCH_START_METHODDEF    \ +    {"start", _PyCFunction_CAST(_sre_SRE_Match_start), METH_FASTCALL, _sre_SRE_Match_start__doc__}, + +static Py_ssize_t +_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group); + +static PyObject * +_sre_SRE_Match_start(MatchObject *self, PyObject *const *args, Py_ssize_t nargs) +{ +    PyObject *return_value = NULL; +    PyObject *group = NULL; +    Py_ssize_t _return_value; + +    if (!_PyArg_CheckPositional("start", nargs, 0, 1)) { +        goto exit; +    } +    if (nargs < 1) { +        goto skip_optional; +    } +    group = args[0]; +skip_optional: +    _return_value = _sre_SRE_Match_start_impl(self, group); +    if ((_return_value == -1) && PyErr_Occurred()) { +        goto exit; +    } +    return_value = PyLong_FromSsize_t(_return_value); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Match_end__doc__, +"end($self, group=0, /)\n" +"--\n" +"\n" +"Return index of the end of the substring matched by group."); + +#define _SRE_SRE_MATCH_END_METHODDEF    \ +    {"end", _PyCFunction_CAST(_sre_SRE_Match_end), METH_FASTCALL, _sre_SRE_Match_end__doc__}, + +static Py_ssize_t +_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group); + +static PyObject * +_sre_SRE_Match_end(MatchObject *self, PyObject *const *args, Py_ssize_t nargs) +{ +    PyObject *return_value = NULL; +    PyObject *group = NULL; +    Py_ssize_t _return_value; + +    if (!_PyArg_CheckPositional("end", nargs, 0, 1)) { +        goto exit; +    } +    if (nargs < 1) { +        goto skip_optional; +    } +    group = args[0]; +skip_optional: +    _return_value = _sre_SRE_Match_end_impl(self, group); +    if ((_return_value == -1) && PyErr_Occurred()) { +        goto exit; +    } +    return_value = PyLong_FromSsize_t(_return_value); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Match_span__doc__, +"span($self, group=0, /)\n" +"--\n" +"\n" +"For match object m, return the 2-tuple (m.start(group), m.end(group))."); + +#define _SRE_SRE_MATCH_SPAN_METHODDEF    \ +    {"span", _PyCFunction_CAST(_sre_SRE_Match_span), METH_FASTCALL, _sre_SRE_Match_span__doc__}, + +static PyObject * +_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group); + +static PyObject * +_sre_SRE_Match_span(MatchObject *self, PyObject *const *args, Py_ssize_t nargs) +{ +    PyObject *return_value = NULL; +    PyObject *group = NULL; + +    if (!_PyArg_CheckPositional("span", nargs, 0, 1)) { +        goto exit; +    } +    if (nargs < 1) { +        goto skip_optional; +    } +    group = args[0]; +skip_optional: +    return_value = _sre_SRE_Match_span_impl(self, group); + +exit: +    return return_value; +} + +PyDoc_STRVAR(_sre_SRE_Match___copy____doc__, +"__copy__($self, /)\n" +"--\n" +"\n"); + +#define _SRE_SRE_MATCH___COPY___METHODDEF    \ +    {"__copy__", (PyCFunction)_sre_SRE_Match___copy__, METH_NOARGS, _sre_SRE_Match___copy____doc__}, + +static PyObject * +_sre_SRE_Match___copy___impl(MatchObject *self); + +static PyObject * +_sre_SRE_Match___copy__(MatchObject *self, PyObject *Py_UNUSED(ignored)) +{ +    return _sre_SRE_Match___copy___impl(self); +} + +PyDoc_STRVAR(_sre_SRE_Match___deepcopy____doc__, +"__deepcopy__($self, memo, /)\n" +"--\n" +"\n"); + +#define _SRE_SRE_MATCH___DEEPCOPY___METHODDEF    \ +    {"__deepcopy__", (PyCFunction)_sre_SRE_Match___deepcopy__, METH_O, _sre_SRE_Match___deepcopy____doc__}, + +PyDoc_STRVAR(_sre_SRE_Scanner_match__doc__, +"match($self, /)\n" +"--\n" +"\n"); + +#define _SRE_SRE_SCANNER_MATCH_METHODDEF    \ +    {"match", _PyCFunction_CAST(_sre_SRE_Scanner_match), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Scanner_match__doc__}, + +static PyObject * +_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls); + +static PyObject * +_sre_SRE_Scanner_match(ScannerObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ +    if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { +        PyErr_SetString(PyExc_TypeError, "match() takes no arguments"); +        return NULL; +    } +    return _sre_SRE_Scanner_match_impl(self, cls); +} + +PyDoc_STRVAR(_sre_SRE_Scanner_search__doc__, +"search($self, /)\n" +"--\n" +"\n"); + +#define _SRE_SRE_SCANNER_SEARCH_METHODDEF    \ +    {"search", _PyCFunction_CAST(_sre_SRE_Scanner_search), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Scanner_search__doc__}, + +static PyObject * +_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls); + +static PyObject * +_sre_SRE_Scanner_search(ScannerObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ +    if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { +        PyErr_SetString(PyExc_TypeError, "search() takes no arguments"); +        return NULL; +    } +    return _sre_SRE_Scanner_search_impl(self, cls); +} +/*[clinic end generated code: output=045de53cfe02dee0 input=a9049054013a1b77]*/ diff --git a/contrib/tools/python3/Modules/_sre/sre.c b/contrib/tools/python3/Modules/_sre/sre.c new file mode 100644 index 00000000000..0547390454a --- /dev/null +++ b/contrib/tools/python3/Modules/_sre/sre.c @@ -0,0 +1,3249 @@ +/* + * Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * partial history: + * 1999-10-24 fl   created (based on existing template matcher code) + * 2000-03-06 fl   first alpha, sort of + * 2000-08-01 fl   fixes for 1.6b1 + * 2000-08-07 fl   use PyOS_CheckStack() if available + * 2000-09-20 fl   added expand method + * 2001-03-20 fl   lots of fixes for 2.1b2 + * 2001-04-15 fl   export copyright as Python attribute, not global + * 2001-04-28 fl   added __copy__ methods (work in progress) + * 2001-05-14 fl   fixes for 1.5.2 compatibility + * 2001-07-01 fl   added BIGCHARSET support (from Martin von Loewis) + * 2001-10-18 fl   fixed group reset issue (from Matthew Mueller) + * 2001-10-20 fl   added split primitive; re-enable unicode for 1.6/2.0/2.1 + * 2001-10-21 fl   added sub/subn primitive + * 2001-10-24 fl   added finditer primitive (for 2.2 only) + * 2001-12-07 fl   fixed memory leak in sub/subn (Guido van Rossum) + * 2002-11-09 fl   fixed empty sub/subn return type + * 2003-04-18 mvl  fully support 4-byte codes + * 2003-10-17 gn   implemented non recursive scheme + * 2013-02-04 mrab added fullmatch primitive + * + * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved. + * + * This version of the SRE library can be redistributed under CNRI's + * Python 1.6 license.  For any other use, please contact Secret Labs + * AB ([email protected]). + * + * Portions of this engine have been developed in cooperation with + * CNRI.  Hewlett-Packard provided funding for 1.6 integration and + * other compatibility work. + */ + +static const char copyright[] = +    " SRE 2.2.2 Copyright (c) 1997-2002 by Secret Labs AB "; + +#define PY_SSIZE_T_CLEAN + +#include "Python.h" +#include "pycore_long.h"          // _PyLong_GetZero() +#include "pycore_moduleobject.h"  // _PyModule_GetState() +#include "structmember.h"         // PyMemberDef + +#include "sre.h" + +#define SRE_CODE_BITS (8 * sizeof(SRE_CODE)) + +#include <ctype.h> + +/* defining this one enables tracing */ +#undef VERBOSE + +/* -------------------------------------------------------------------- */ + +#if defined(_MSC_VER) +#pragma optimize("agtw", on) /* doesn't seem to make much difference... */ +#pragma warning(disable: 4710) /* who cares if functions are not inlined ;-) */ +/* fastest possible local call under MSVC */ +#define LOCAL(type) static __inline type __fastcall +#else +#define LOCAL(type) static inline type +#endif + +/* error codes */ +#define SRE_ERROR_ILLEGAL -1 /* illegal opcode */ +#define SRE_ERROR_STATE -2 /* illegal state */ +#define SRE_ERROR_RECURSION_LIMIT -3 /* runaway recursion */ +#define SRE_ERROR_MEMORY -9 /* out of memory */ +#define SRE_ERROR_INTERRUPTED -10 /* signal handler raised exception */ + +#if defined(VERBOSE) +#define TRACE(v) printf v +#else +#define TRACE(v) +#endif + +/* -------------------------------------------------------------------- */ +/* search engine state */ + +#define SRE_IS_DIGIT(ch)\ +    ((ch) <= '9' && Py_ISDIGIT(ch)) +#define SRE_IS_SPACE(ch)\ +    ((ch) <= ' ' && Py_ISSPACE(ch)) +#define SRE_IS_LINEBREAK(ch)\ +    ((ch) == '\n') +#define SRE_IS_WORD(ch)\ +    ((ch) <= 'z' && (Py_ISALNUM(ch) || (ch) == '_')) + +static unsigned int sre_lower_ascii(unsigned int ch) +{ +    return ((ch) < 128 ? Py_TOLOWER(ch) : ch); +} + +/* locale-specific character predicates */ +/* !(c & ~N) == (c < N+1) for any unsigned c, this avoids + * warnings when c's type supports only numbers < N+1 */ +#define SRE_LOC_IS_ALNUM(ch) (!((ch) & ~255) ? isalnum((ch)) : 0) +#define SRE_LOC_IS_WORD(ch) (SRE_LOC_IS_ALNUM((ch)) || (ch) == '_') + +static unsigned int sre_lower_locale(unsigned int ch) +{ +    return ((ch) < 256 ? (unsigned int)tolower((ch)) : ch); +} + +static unsigned int sre_upper_locale(unsigned int ch) +{ +    return ((ch) < 256 ? (unsigned int)toupper((ch)) : ch); +} + +/* unicode-specific character predicates */ + +#define SRE_UNI_IS_DIGIT(ch) Py_UNICODE_ISDECIMAL(ch) +#define SRE_UNI_IS_SPACE(ch) Py_UNICODE_ISSPACE(ch) +#define SRE_UNI_IS_LINEBREAK(ch) Py_UNICODE_ISLINEBREAK(ch) +#define SRE_UNI_IS_ALNUM(ch) Py_UNICODE_ISALNUM(ch) +#define SRE_UNI_IS_WORD(ch) (SRE_UNI_IS_ALNUM(ch) || (ch) == '_') + +static unsigned int sre_lower_unicode(unsigned int ch) +{ +    return (unsigned int) Py_UNICODE_TOLOWER(ch); +} + +static unsigned int sre_upper_unicode(unsigned int ch) +{ +    return (unsigned int) Py_UNICODE_TOUPPER(ch); +} + +LOCAL(int) +sre_category(SRE_CODE category, unsigned int ch) +{ +    switch (category) { + +    case SRE_CATEGORY_DIGIT: +        return SRE_IS_DIGIT(ch); +    case SRE_CATEGORY_NOT_DIGIT: +        return !SRE_IS_DIGIT(ch); +    case SRE_CATEGORY_SPACE: +        return SRE_IS_SPACE(ch); +    case SRE_CATEGORY_NOT_SPACE: +        return !SRE_IS_SPACE(ch); +    case SRE_CATEGORY_WORD: +        return SRE_IS_WORD(ch); +    case SRE_CATEGORY_NOT_WORD: +        return !SRE_IS_WORD(ch); +    case SRE_CATEGORY_LINEBREAK: +        return SRE_IS_LINEBREAK(ch); +    case SRE_CATEGORY_NOT_LINEBREAK: +        return !SRE_IS_LINEBREAK(ch); + +    case SRE_CATEGORY_LOC_WORD: +        return SRE_LOC_IS_WORD(ch); +    case SRE_CATEGORY_LOC_NOT_WORD: +        return !SRE_LOC_IS_WORD(ch); + +    case SRE_CATEGORY_UNI_DIGIT: +        return SRE_UNI_IS_DIGIT(ch); +    case SRE_CATEGORY_UNI_NOT_DIGIT: +        return !SRE_UNI_IS_DIGIT(ch); +    case SRE_CATEGORY_UNI_SPACE: +        return SRE_UNI_IS_SPACE(ch); +    case SRE_CATEGORY_UNI_NOT_SPACE: +        return !SRE_UNI_IS_SPACE(ch); +    case SRE_CATEGORY_UNI_WORD: +        return SRE_UNI_IS_WORD(ch); +    case SRE_CATEGORY_UNI_NOT_WORD: +        return !SRE_UNI_IS_WORD(ch); +    case SRE_CATEGORY_UNI_LINEBREAK: +        return SRE_UNI_IS_LINEBREAK(ch); +    case SRE_CATEGORY_UNI_NOT_LINEBREAK: +        return !SRE_UNI_IS_LINEBREAK(ch); +    } +    return 0; +} + +LOCAL(int) +char_loc_ignore(SRE_CODE pattern, SRE_CODE ch) +{ +    return ch == pattern +        || (SRE_CODE) sre_lower_locale(ch) == pattern +        || (SRE_CODE) sre_upper_locale(ch) == pattern; +} + + +/* helpers */ + +static void +data_stack_dealloc(SRE_STATE* state) +{ +    if (state->data_stack) { +        PyMem_Free(state->data_stack); +        state->data_stack = NULL; +    } +    state->data_stack_size = state->data_stack_base = 0; +} + +static int +data_stack_grow(SRE_STATE* state, Py_ssize_t size) +{ +    Py_ssize_t minsize, cursize; +    minsize = state->data_stack_base+size; +    cursize = state->data_stack_size; +    if (cursize < minsize) { +        void* stack; +        cursize = minsize+minsize/4+1024; +        TRACE(("allocate/grow stack %zd\n", cursize)); +        stack = PyMem_Realloc(state->data_stack, cursize); +        if (!stack) { +            data_stack_dealloc(state); +            return SRE_ERROR_MEMORY; +        } +        state->data_stack = (char *)stack; +        state->data_stack_size = cursize; +    } +    return 0; +} + +/* generate 8-bit version */ + +#define SRE_CHAR Py_UCS1 +#define SIZEOF_SRE_CHAR 1 +#define SRE(F) sre_ucs1_##F +#include "sre_lib.h" + +/* generate 16-bit unicode version */ + +#define SRE_CHAR Py_UCS2 +#define SIZEOF_SRE_CHAR 2 +#define SRE(F) sre_ucs2_##F +#include "sre_lib.h" + +/* generate 32-bit unicode version */ + +#define SRE_CHAR Py_UCS4 +#define SIZEOF_SRE_CHAR 4 +#define SRE(F) sre_ucs4_##F +#include "sre_lib.h" + +/* -------------------------------------------------------------------- */ +/* factories and destructors */ + +/* module state */ +typedef struct { +    PyTypeObject *Pattern_Type; +    PyTypeObject *Match_Type; +    PyTypeObject *Scanner_Type; +    PyTypeObject *Template_Type; +    PyObject *compile_template;  // reference to re._compile_template +} _sremodulestate; + +static _sremodulestate * +get_sre_module_state(PyObject *m) +{ +    _sremodulestate *state = (_sremodulestate *)_PyModule_GetState(m); +    assert(state); +    return state; +} + +static struct PyModuleDef sremodule; +#define get_sre_module_state_by_class(cls) \ +    (get_sre_module_state(PyType_GetModule(cls))) + +/* see sre.h for object declarations */ +static PyObject*pattern_new_match(_sremodulestate *, PatternObject*, SRE_STATE*, Py_ssize_t); +static PyObject *pattern_scanner(_sremodulestate *, PatternObject *, PyObject *, Py_ssize_t, Py_ssize_t); + +/*[clinic input] +module _sre +class _sre.SRE_Pattern "PatternObject *" "get_sre_module_state_by_class(tp)->Pattern_Type" +class _sre.SRE_Match "MatchObject *" "get_sre_module_state_by_class(tp)->Match_Type" +class _sre.SRE_Scanner "ScannerObject *" "get_sre_module_state_by_class(tp)->Scanner_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=fe2966e32b66a231]*/ + +/*[clinic input] +_sre.getcodesize -> int +[clinic start generated code]*/ + +static int +_sre_getcodesize_impl(PyObject *module) +/*[clinic end generated code: output=e0db7ce34a6dd7b1 input=bd6f6ecf4916bb2b]*/ +{ +    return sizeof(SRE_CODE); +} + +/*[clinic input] +_sre.ascii_iscased -> bool + +    character: int +    / + +[clinic start generated code]*/ + +static int +_sre_ascii_iscased_impl(PyObject *module, int character) +/*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/ +{ +    unsigned int ch = (unsigned int)character; +    return ch < 128 && Py_ISALPHA(ch); +} + +/*[clinic input] +_sre.unicode_iscased -> bool + +    character: int +    / + +[clinic start generated code]*/ + +static int +_sre_unicode_iscased_impl(PyObject *module, int character) +/*[clinic end generated code: output=9c5ddee0dc2bc258 input=51e42c3b8dddb78e]*/ +{ +    unsigned int ch = (unsigned int)character; +    return ch != sre_lower_unicode(ch) || ch != sre_upper_unicode(ch); +} + +/*[clinic input] +_sre.ascii_tolower -> int + +    character: int +    / + +[clinic start generated code]*/ + +static int +_sre_ascii_tolower_impl(PyObject *module, int character) +/*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/ +{ +    return sre_lower_ascii(character); +} + +/*[clinic input] +_sre.unicode_tolower -> int + +    character: int +    / + +[clinic start generated code]*/ + +static int +_sre_unicode_tolower_impl(PyObject *module, int character) +/*[clinic end generated code: output=6422272d7d7fee65 input=91d708c5f3c2045a]*/ +{ +    return sre_lower_unicode(character); +} + +LOCAL(void) +state_reset(SRE_STATE* state) +{ +    /* state->mark will be set to 0 in SRE_OP_MARK dynamically. */ +    /*memset(state->mark, 0, sizeof(*state->mark) * SRE_MARK_SIZE);*/ + +    state->lastmark = -1; +    state->lastindex = -1; + +    state->repeat = NULL; + +    data_stack_dealloc(state); +} + +static const void* +getstring(PyObject* string, Py_ssize_t* p_length, +          int* p_isbytes, int* p_charsize, +          Py_buffer *view) +{ +    /* given a python object, return a data pointer, a length (in +       characters), and a character size.  return NULL if the object +       is not a string (or not compatible) */ + +    /* Unicode objects do not support the buffer API. So, get the data +       directly instead. */ +    if (PyUnicode_Check(string)) { +        if (PyUnicode_READY(string) == -1) +            return NULL; +        *p_length = PyUnicode_GET_LENGTH(string); +        *p_charsize = PyUnicode_KIND(string); +        *p_isbytes = 0; +        return PyUnicode_DATA(string); +    } + +    /* get pointer to byte string buffer */ +    if (PyObject_GetBuffer(string, view, PyBUF_SIMPLE) != 0) { +        PyErr_Format(PyExc_TypeError, "expected string or bytes-like " +                     "object, got '%.200s'", Py_TYPE(string)->tp_name); +        return NULL; +    } + +    *p_length = view->len; +    *p_charsize = 1; +    *p_isbytes = 1; + +    if (view->buf == NULL) { +        PyErr_SetString(PyExc_ValueError, "Buffer is NULL"); +        PyBuffer_Release(view); +        view->buf = NULL; +        return NULL; +    } +    return view->buf; +} + +LOCAL(PyObject*) +state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, +           Py_ssize_t start, Py_ssize_t end) +{ +    /* prepare state object */ + +    Py_ssize_t length; +    int isbytes, charsize; +    const void* ptr; + +    memset(state, 0, sizeof(SRE_STATE)); + +    state->mark = PyMem_New(const void *, pattern->groups * 2); +    if (!state->mark) { +        PyErr_NoMemory(); +        goto err; +    } +    state->lastmark = -1; +    state->lastindex = -1; + +    state->buffer.buf = NULL; +    ptr = getstring(string, &length, &isbytes, &charsize, &state->buffer); +    if (!ptr) +        goto err; + +    if (isbytes && pattern->isbytes == 0) { +        PyErr_SetString(PyExc_TypeError, +                        "cannot use a string pattern on a bytes-like object"); +        goto err; +    } +    if (!isbytes && pattern->isbytes > 0) { +        PyErr_SetString(PyExc_TypeError, +                        "cannot use a bytes pattern on a string-like object"); +        goto err; +    } + +    /* adjust boundaries */ +    if (start < 0) +        start = 0; +    else if (start > length) +        start = length; + +    if (end < 0) +        end = 0; +    else if (end > length) +        end = length; + +    state->isbytes = isbytes; +    state->charsize = charsize; +    state->match_all = 0; +    state->must_advance = 0; + +    state->beginning = ptr; + +    state->start = (void*) ((char*) ptr + start * state->charsize); +    state->end = (void*) ((char*) ptr + end * state->charsize); + +    state->string = Py_NewRef(string); +    state->pos = start; +    state->endpos = end; + +    return string; +  err: +    /* We add an explicit cast here because MSVC has a bug when +       compiling C code where it believes that `const void**` cannot be +       safely casted to `void*`, see bpo-39943 for details. */ +    PyMem_Free((void*) state->mark); +    state->mark = NULL; +    if (state->buffer.buf) +        PyBuffer_Release(&state->buffer); +    return NULL; +} + +LOCAL(void) +state_fini(SRE_STATE* state) +{ +    if (state->buffer.buf) +        PyBuffer_Release(&state->buffer); +    Py_XDECREF(state->string); +    data_stack_dealloc(state); +    /* See above PyMem_Del for why we explicitly cast here. */ +    PyMem_Free((void*) state->mark); +    state->mark = NULL; +} + +/* calculate offset from start of string */ +#define STATE_OFFSET(state, member)\ +    (((char*)(member) - (char*)(state)->beginning) / (state)->charsize) + +LOCAL(PyObject*) +getslice(int isbytes, const void *ptr, +         PyObject* string, Py_ssize_t start, Py_ssize_t end) +{ +    if (isbytes) { +        if (PyBytes_CheckExact(string) && +            start == 0 && end == PyBytes_GET_SIZE(string)) { +            return Py_NewRef(string); +        } +        return PyBytes_FromStringAndSize( +                (const char *)ptr + start, end - start); +    } +    else { +        return PyUnicode_Substring(string, start, end); +    } +} + +LOCAL(PyObject*) +state_getslice(SRE_STATE* state, Py_ssize_t index, PyObject* string, int empty) +{ +    Py_ssize_t i, j; + +    index = (index - 1) * 2; + +    if (string == Py_None || index >= state->lastmark || !state->mark[index] || !state->mark[index+1]) { +        if (empty) +            /* want empty string */ +            i = j = 0; +        else { +            Py_RETURN_NONE; +        } +    } else { +        i = STATE_OFFSET(state, state->mark[index]); +        j = STATE_OFFSET(state, state->mark[index+1]); + +        /* check wrong span */ +        if (i > j) { +            PyErr_SetString(PyExc_SystemError, +                            "The span of capturing group is wrong," +                            " please report a bug for the re module."); +            return NULL; +        } +    } + +    return getslice(state->isbytes, state->beginning, string, i, j); +} + +static void +pattern_error(Py_ssize_t status) +{ +    switch (status) { +    case SRE_ERROR_RECURSION_LIMIT: +        /* This error code seems to be unused. */ +        PyErr_SetString( +            PyExc_RecursionError, +            "maximum recursion limit exceeded" +            ); +        break; +    case SRE_ERROR_MEMORY: +        PyErr_NoMemory(); +        break; +    case SRE_ERROR_INTERRUPTED: +    /* An exception has already been raised, so let it fly */ +        break; +    default: +        /* other error codes indicate compiler/engine bugs */ +        PyErr_SetString( +            PyExc_RuntimeError, +            "internal error in regular expression engine" +            ); +    } +} + +static int +pattern_traverse(PatternObject *self, visitproc visit, void *arg) +{ +    Py_VISIT(Py_TYPE(self)); +    Py_VISIT(self->groupindex); +    Py_VISIT(self->indexgroup); +    Py_VISIT(self->pattern); +    return 0; +} + +static int +pattern_clear(PatternObject *self) +{ +    Py_CLEAR(self->groupindex); +    Py_CLEAR(self->indexgroup); +    Py_CLEAR(self->pattern); +    return 0; +} + +static void +pattern_dealloc(PatternObject* self) +{ +    PyTypeObject *tp = Py_TYPE(self); + +    PyObject_GC_UnTrack(self); +    if (self->weakreflist != NULL) { +        PyObject_ClearWeakRefs((PyObject *) self); +    } +    (void)pattern_clear(self); +    tp->tp_free(self); +    Py_DECREF(tp); +} + +LOCAL(Py_ssize_t) +sre_match(SRE_STATE* state, SRE_CODE* pattern) +{ +    if (state->charsize == 1) +        return sre_ucs1_match(state, pattern, 1); +    if (state->charsize == 2) +        return sre_ucs2_match(state, pattern, 1); +    assert(state->charsize == 4); +    return sre_ucs4_match(state, pattern, 1); +} + +LOCAL(Py_ssize_t) +sre_search(SRE_STATE* state, SRE_CODE* pattern) +{ +    if (state->charsize == 1) +        return sre_ucs1_search(state, pattern); +    if (state->charsize == 2) +        return sre_ucs2_search(state, pattern); +    assert(state->charsize == 4); +    return sre_ucs4_search(state, pattern); +} + +/*[clinic input] +_sre.SRE_Pattern.match + +    cls: defining_class +    / +    string: object +    pos: Py_ssize_t = 0 +    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize + +Matches zero or more characters at the beginning of the string. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, +                            PyObject *string, Py_ssize_t pos, +                            Py_ssize_t endpos) +/*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/ +{ +    _sremodulestate *module_state = get_sre_module_state_by_class(cls); +    SRE_STATE state; +    Py_ssize_t status; +    PyObject *match; + +    if (!state_init(&state, (PatternObject *)self, string, pos, endpos)) +        return NULL; + +    state.ptr = state.start; + +    TRACE(("|%p|%p|MATCH\n", PatternObject_GetCode(self), state.ptr)); + +    status = sre_match(&state, PatternObject_GetCode(self)); + +    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); +    if (PyErr_Occurred()) { +        state_fini(&state); +        return NULL; +    } + +    match = pattern_new_match(module_state, self, &state, status); +    state_fini(&state); +    return match; +} + +/*[clinic input] +_sre.SRE_Pattern.fullmatch + +    cls: defining_class +    / +    string: object +    pos: Py_ssize_t = 0 +    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize + +Matches against all of the string. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls, +                                PyObject *string, Py_ssize_t pos, +                                Py_ssize_t endpos) +/*[clinic end generated code: output=625b75b027ef94da input=50981172ab0fcfdd]*/ +{ +    _sremodulestate *module_state = get_sre_module_state_by_class(cls); +    SRE_STATE state; +    Py_ssize_t status; +    PyObject *match; + +    if (!state_init(&state, self, string, pos, endpos)) +        return NULL; + +    state.ptr = state.start; + +    TRACE(("|%p|%p|FULLMATCH\n", PatternObject_GetCode(self), state.ptr)); + +    state.match_all = 1; +    status = sre_match(&state, PatternObject_GetCode(self)); + +    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); +    if (PyErr_Occurred()) { +        state_fini(&state); +        return NULL; +    } + +    match = pattern_new_match(module_state, self, &state, status); +    state_fini(&state); +    return match; +} + +/*[clinic input] +_sre.SRE_Pattern.search + +    cls: defining_class +    / +    string: object +    pos: Py_ssize_t = 0 +    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize + +Scan through string looking for a match, and return a corresponding match object instance. + +Return None if no position in the string matches. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls, +                             PyObject *string, Py_ssize_t pos, +                             Py_ssize_t endpos) +/*[clinic end generated code: output=bd7f2d9d583e1463 input=afa9afb66a74a4b3]*/ +{ +    _sremodulestate *module_state = get_sre_module_state_by_class(cls); +    SRE_STATE state; +    Py_ssize_t status; +    PyObject *match; + +    if (!state_init(&state, self, string, pos, endpos)) +        return NULL; + +    TRACE(("|%p|%p|SEARCH\n", PatternObject_GetCode(self), state.ptr)); + +    status = sre_search(&state, PatternObject_GetCode(self)); + +    TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); + +    if (PyErr_Occurred()) { +        state_fini(&state); +        return NULL; +    } + +    match = pattern_new_match(module_state, self, &state, status); +    state_fini(&state); +    return match; +} + +/*[clinic input] +_sre.SRE_Pattern.findall + +    string: object +    pos: Py_ssize_t = 0 +    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize + +Return a list of all non-overlapping matches of pattern in string. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string, +                              Py_ssize_t pos, Py_ssize_t endpos) +/*[clinic end generated code: output=f4966baceea60aca input=5b6a4ee799741563]*/ +{ +    SRE_STATE state; +    PyObject* list; +    Py_ssize_t status; +    Py_ssize_t i, b, e; + +    if (!state_init(&state, self, string, pos, endpos)) +        return NULL; + +    list = PyList_New(0); +    if (!list) { +        state_fini(&state); +        return NULL; +    } + +    while (state.start <= state.end) { + +        PyObject* item; + +        state_reset(&state); + +        state.ptr = state.start; + +        status = sre_search(&state, PatternObject_GetCode(self)); +        if (PyErr_Occurred()) +            goto error; + +        if (status <= 0) { +            if (status == 0) +                break; +            pattern_error(status); +            goto error; +        } + +        /* don't bother to build a match object */ +        switch (self->groups) { +        case 0: +            b = STATE_OFFSET(&state, state.start); +            e = STATE_OFFSET(&state, state.ptr); +            item = getslice(state.isbytes, state.beginning, +                            string, b, e); +            if (!item) +                goto error; +            break; +        case 1: +            item = state_getslice(&state, 1, string, 1); +            if (!item) +                goto error; +            break; +        default: +            item = PyTuple_New(self->groups); +            if (!item) +                goto error; +            for (i = 0; i < self->groups; i++) { +                PyObject* o = state_getslice(&state, i+1, string, 1); +                if (!o) { +                    Py_DECREF(item); +                    goto error; +                } +                PyTuple_SET_ITEM(item, i, o); +            } +            break; +        } + +        status = PyList_Append(list, item); +        Py_DECREF(item); +        if (status < 0) +            goto error; + +        state.must_advance = (state.ptr == state.start); +        state.start = state.ptr; +    } + +    state_fini(&state); +    return list; + +error: +    Py_DECREF(list); +    state_fini(&state); +    return NULL; + +} + +/*[clinic input] +_sre.SRE_Pattern.finditer + +    cls: defining_class +    / +    string: object +    pos: Py_ssize_t = 0 +    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize + +Return an iterator over all non-overlapping matches for the RE pattern in string. + +For each match, the iterator returns a match object. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_finditer_impl(PatternObject *self, PyTypeObject *cls, +                               PyObject *string, Py_ssize_t pos, +                               Py_ssize_t endpos) +/*[clinic end generated code: output=1791dbf3618ade56 input=812e332a4848cbaf]*/ +{ +    _sremodulestate *module_state = get_sre_module_state_by_class(cls); +    PyObject* scanner; +    PyObject* search; +    PyObject* iterator; + +    scanner = pattern_scanner(module_state, self, string, pos, endpos); +    if (!scanner) +        return NULL; + +    search = PyObject_GetAttrString(scanner, "search"); +    Py_DECREF(scanner); +    if (!search) +        return NULL; + +    iterator = PyCallIter_New(search, Py_None); +    Py_DECREF(search); + +    return iterator; +} + +/*[clinic input] +_sre.SRE_Pattern.scanner + +    cls: defining_class +    / +    string: object +    pos: Py_ssize_t = 0 +    endpos: Py_ssize_t(c_default="PY_SSIZE_T_MAX") = sys.maxsize + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_scanner_impl(PatternObject *self, PyTypeObject *cls, +                              PyObject *string, Py_ssize_t pos, +                              Py_ssize_t endpos) +/*[clinic end generated code: output=f70cd506112f1bd9 input=2e487e5151bcee4c]*/ +{ +    _sremodulestate *module_state = get_sre_module_state_by_class(cls); + +    return pattern_scanner(module_state, self, string, pos, endpos); +} + +/*[clinic input] +_sre.SRE_Pattern.split + +    string: object +    maxsplit: Py_ssize_t = 0 + +Split string by the occurrences of pattern. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string, +                            Py_ssize_t maxsplit) +/*[clinic end generated code: output=7ac66f381c45e0be input=1eeeb10dafc9947a]*/ +{ +    SRE_STATE state; +    PyObject* list; +    PyObject* item; +    Py_ssize_t status; +    Py_ssize_t n; +    Py_ssize_t i; +    const void* last; + +    assert(self->codesize != 0); + +    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) +        return NULL; + +    list = PyList_New(0); +    if (!list) { +        state_fini(&state); +        return NULL; +    } + +    n = 0; +    last = state.start; + +    while (!maxsplit || n < maxsplit) { + +        state_reset(&state); + +        state.ptr = state.start; + +        status = sre_search(&state, PatternObject_GetCode(self)); +        if (PyErr_Occurred()) +            goto error; + +        if (status <= 0) { +            if (status == 0) +                break; +            pattern_error(status); +            goto error; +        } + +        /* get segment before this match */ +        item = getslice(state.isbytes, state.beginning, +            string, STATE_OFFSET(&state, last), +            STATE_OFFSET(&state, state.start) +            ); +        if (!item) +            goto error; +        status = PyList_Append(list, item); +        Py_DECREF(item); +        if (status < 0) +            goto error; + +        /* add groups (if any) */ +        for (i = 0; i < self->groups; i++) { +            item = state_getslice(&state, i+1, string, 0); +            if (!item) +                goto error; +            status = PyList_Append(list, item); +            Py_DECREF(item); +            if (status < 0) +                goto error; +        } + +        n = n + 1; +        state.must_advance = (state.ptr == state.start); +        last = state.start = state.ptr; + +    } + +    /* get segment following last match (even if empty) */ +    item = getslice(state.isbytes, state.beginning, +        string, STATE_OFFSET(&state, last), state.endpos +        ); +    if (!item) +        goto error; +    status = PyList_Append(list, item); +    Py_DECREF(item); +    if (status < 0) +        goto error; + +    state_fini(&state); +    return list; + +error: +    Py_DECREF(list); +    state_fini(&state); +    return NULL; + +} + +static PyObject * +compile_template(_sremodulestate *module_state, +                 PatternObject *pattern, PyObject *template) +{ +    /* delegate to Python code */ +    PyObject *func = module_state->compile_template; +    if (func == NULL) { +        func = _PyImport_GetModuleAttrString("re", "_compile_template"); +        if (func == NULL) { +            return NULL; +        } +        Py_XSETREF(module_state->compile_template, func); +    } + +    PyObject *args[] = {(PyObject *)pattern, template}; +    PyObject *result = PyObject_Vectorcall(func, args, 2, NULL); + +    if (result == NULL && PyErr_ExceptionMatches(PyExc_TypeError)) { +        /* If the replacement string is unhashable (e.g. bytearray), +         * convert it to the basic type (str or bytes) and repeat. */ +        if (PyUnicode_Check(template) && !PyUnicode_CheckExact(template)) { +            PyErr_Clear(); +            template = _PyUnicode_Copy(template); +        } +        else if (PyObject_CheckBuffer(template) && !PyBytes_CheckExact(template)) { +            PyErr_Clear(); +            template = PyBytes_FromObject(template); +        } +        else { +            return NULL; +        } +        if (template == NULL) { +            return NULL; +        } +        args[1] = template; +        result = PyObject_Vectorcall(func, args, 2, NULL); +        Py_DECREF(template); +    } + +    if (result != NULL && Py_TYPE(result) != module_state->Template_Type) { +        PyErr_Format(PyExc_RuntimeError, +                    "the result of compiling a replacement string is %.200s", +                    Py_TYPE(result)->tp_name); +        Py_DECREF(result); +        return NULL; +    } +    return result; +} + +static PyObject *expand_template(TemplateObject *, MatchObject *); /* Forward */ + +static PyObject* +pattern_subx(_sremodulestate* module_state, +             PatternObject* self, +             PyObject* ptemplate, +             PyObject* string, +             Py_ssize_t count, +             Py_ssize_t subn) +{ +    SRE_STATE state; +    PyObject* list; +    PyObject* joiner; +    PyObject* item; +    PyObject* filter; +    PyObject* match; +    const void* ptr; +    Py_ssize_t status; +    Py_ssize_t n; +    Py_ssize_t i, b, e; +    int isbytes, charsize; +    enum {LITERAL, TEMPLATE, CALLABLE} filter_type; +    Py_buffer view; + +    if (PyCallable_Check(ptemplate)) { +        /* sub/subn takes either a function or a template */ +        filter = Py_NewRef(ptemplate); +        filter_type = CALLABLE; +    } else { +        /* if not callable, check if it's a literal string */ +        int literal; +        view.buf = NULL; +        ptr = getstring(ptemplate, &n, &isbytes, &charsize, &view); +        if (ptr) { +            if (charsize == 1) +                literal = memchr(ptr, '\\', n) == NULL; +            else +                literal = PyUnicode_FindChar(ptemplate, '\\', 0, n, 1) == -1; +        } else { +            PyErr_Clear(); +            literal = 0; +        } +        if (view.buf) +            PyBuffer_Release(&view); +        if (literal) { +            filter = Py_NewRef(ptemplate); +            filter_type = LITERAL; +        } else { +            /* not a literal; hand it over to the template compiler */ +            filter = compile_template(module_state, self, ptemplate); +            if (!filter) +                return NULL; + +            assert(Py_TYPE(filter) == module_state->Template_Type); +            if (Py_SIZE(filter) == 0) { +                Py_SETREF(filter, +                          Py_NewRef(((TemplateObject *)filter)->literal)); +                filter_type = LITERAL; +            } +            else { +                filter_type = TEMPLATE; +            } +        } +    } + +    if (!state_init(&state, self, string, 0, PY_SSIZE_T_MAX)) { +        Py_DECREF(filter); +        return NULL; +    } + +    list = PyList_New(0); +    if (!list) { +        Py_DECREF(filter); +        state_fini(&state); +        return NULL; +    } + +    n = i = 0; + +    while (!count || n < count) { + +        state_reset(&state); + +        state.ptr = state.start; + +        status = sre_search(&state, PatternObject_GetCode(self)); +        if (PyErr_Occurred()) +            goto error; + +        if (status <= 0) { +            if (status == 0) +                break; +            pattern_error(status); +            goto error; +        } + +        b = STATE_OFFSET(&state, state.start); +        e = STATE_OFFSET(&state, state.ptr); + +        if (i < b) { +            /* get segment before this match */ +            item = getslice(state.isbytes, state.beginning, +                string, i, b); +            if (!item) +                goto error; +            status = PyList_Append(list, item); +            Py_DECREF(item); +            if (status < 0) +                goto error; + +        } + +        if (filter_type != LITERAL) { +            /* pass match object through filter */ +            match = pattern_new_match(module_state, self, &state, 1); +            if (!match) +                goto error; +            if (filter_type == TEMPLATE) { +                item = expand_template((TemplateObject *)filter, +                                       (MatchObject *)match); +            } +            else { +                assert(filter_type == CALLABLE); +                item = PyObject_CallOneArg(filter, match); +            } +            Py_DECREF(match); +            if (!item) +                goto error; +        } else { +            /* filter is literal string */ +            item = Py_NewRef(filter); +        } + +        /* add to list */ +        if (item != Py_None) { +            status = PyList_Append(list, item); +            Py_DECREF(item); +            if (status < 0) +                goto error; +        } + +        i = e; +        n = n + 1; +        state.must_advance = (state.ptr == state.start); +        state.start = state.ptr; +    } + +    /* get segment following last match */ +    if (i < state.endpos) { +        item = getslice(state.isbytes, state.beginning, +                        string, i, state.endpos); +        if (!item) +            goto error; +        status = PyList_Append(list, item); +        Py_DECREF(item); +        if (status < 0) +            goto error; +    } + +    state_fini(&state); + +    Py_DECREF(filter); + +    /* convert list to single string (also removes list) */ +    joiner = getslice(state.isbytes, state.beginning, string, 0, 0); +    if (!joiner) { +        Py_DECREF(list); +        return NULL; +    } +    if (PyList_GET_SIZE(list) == 0) { +        Py_DECREF(list); +        item = joiner; +    } +    else { +        if (state.isbytes) +            item = _PyBytes_Join(joiner, list); +        else +            item = PyUnicode_Join(joiner, list); +        Py_DECREF(joiner); +        Py_DECREF(list); +        if (!item) +            return NULL; +    } + +    if (subn) +        return Py_BuildValue("Nn", item, n); + +    return item; + +error: +    Py_DECREF(list); +    state_fini(&state); +    Py_DECREF(filter); +    return NULL; + +} + +/*[clinic input] +_sre.SRE_Pattern.sub + +    cls: defining_class +    / +    repl: object +    string: object +    count: Py_ssize_t = 0 + +Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls, +                          PyObject *repl, PyObject *string, Py_ssize_t count) +/*[clinic end generated code: output=4be141ab04bca60d input=d8d1d4ac2311a07c]*/ +{ +    _sremodulestate *module_state = get_sre_module_state_by_class(cls); + +    return pattern_subx(module_state, self, repl, string, count, 0); +} + +/*[clinic input] +_sre.SRE_Pattern.subn + +    cls: defining_class +    / +    repl: object +    string: object +    count: Py_ssize_t = 0 + +Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls, +                           PyObject *repl, PyObject *string, +                           Py_ssize_t count) +/*[clinic end generated code: output=da02fd85258b1e1f input=8b78a65b8302e58d]*/ +{ +    _sremodulestate *module_state = get_sre_module_state_by_class(cls); + +    return pattern_subx(module_state, self, repl, string, count, 1); +} + +/*[clinic input] +_sre.SRE_Pattern.__copy__ + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern___copy___impl(PatternObject *self) +/*[clinic end generated code: output=85dedc2db1bd8694 input=a730a59d863bc9f5]*/ +{ +    return Py_NewRef(self); +} + +/*[clinic input] +_sre.SRE_Pattern.__deepcopy__ + +    memo: object +    / + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo) +/*[clinic end generated code: output=2ad25679c1f1204a input=a465b1602f997bed]*/ +{ +    return Py_NewRef(self); +} + +static PyObject * +pattern_repr(PatternObject *obj) +{ +    static const struct { +        const char *name; +        int value; +    } flag_names[] = { +        {"re.TEMPLATE", SRE_FLAG_TEMPLATE}, +        {"re.IGNORECASE", SRE_FLAG_IGNORECASE}, +        {"re.LOCALE", SRE_FLAG_LOCALE}, +        {"re.MULTILINE", SRE_FLAG_MULTILINE}, +        {"re.DOTALL", SRE_FLAG_DOTALL}, +        {"re.UNICODE", SRE_FLAG_UNICODE}, +        {"re.VERBOSE", SRE_FLAG_VERBOSE}, +        {"re.DEBUG", SRE_FLAG_DEBUG}, +        {"re.ASCII", SRE_FLAG_ASCII}, +    }; +    PyObject *result = NULL; +    PyObject *flag_items; +    size_t i; +    int flags = obj->flags; + +    /* Omit re.UNICODE for valid string patterns. */ +    if (obj->isbytes == 0 && +        (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) == +         SRE_FLAG_UNICODE) +        flags &= ~SRE_FLAG_UNICODE; + +    flag_items = PyList_New(0); +    if (!flag_items) +        return NULL; + +    for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) { +        if (flags & flag_names[i].value) { +            PyObject *item = PyUnicode_FromString(flag_names[i].name); +            if (!item) +                goto done; + +            if (PyList_Append(flag_items, item) < 0) { +                Py_DECREF(item); +                goto done; +            } +            Py_DECREF(item); +            flags &= ~flag_names[i].value; +        } +    } +    if (flags) { +        PyObject *item = PyUnicode_FromFormat("0x%x", flags); +        if (!item) +            goto done; + +        if (PyList_Append(flag_items, item) < 0) { +            Py_DECREF(item); +            goto done; +        } +        Py_DECREF(item); +    } + +    if (PyList_Size(flag_items) > 0) { +        PyObject *flags_result; +        PyObject *sep = PyUnicode_FromString("|"); +        if (!sep) +            goto done; +        flags_result = PyUnicode_Join(sep, flag_items); +        Py_DECREF(sep); +        if (!flags_result) +            goto done; +        result = PyUnicode_FromFormat("re.compile(%.200R, %S)", +                                      obj->pattern, flags_result); +        Py_DECREF(flags_result); +    } +    else { +        result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern); +    } + +done: +    Py_DECREF(flag_items); +    return result; +} + +PyDoc_STRVAR(pattern_doc, "Compiled regular expression object."); + +/* PatternObject's 'groupindex' method. */ +static PyObject * +pattern_groupindex(PatternObject *self, void *Py_UNUSED(ignored)) +{ +    if (self->groupindex == NULL) +        return PyDict_New(); +    return PyDictProxy_New(self->groupindex); +} + +static int _validate(PatternObject *self); /* Forward */ + +/*[clinic input] +_sre.compile + +    pattern: object +    flags: int +    code: object(subclass_of='&PyList_Type') +    groups: Py_ssize_t +    groupindex: object(subclass_of='&PyDict_Type') +    indexgroup: object(subclass_of='&PyTuple_Type') + +[clinic start generated code]*/ + +static PyObject * +_sre_compile_impl(PyObject *module, PyObject *pattern, int flags, +                  PyObject *code, Py_ssize_t groups, PyObject *groupindex, +                  PyObject *indexgroup) +/*[clinic end generated code: output=ef9c2b3693776404 input=0a68476dbbe5db30]*/ +{ +    /* "compile" pattern descriptor to pattern object */ + +    _sremodulestate *module_state = get_sre_module_state(module); +    PatternObject* self; +    Py_ssize_t i, n; + +    n = PyList_GET_SIZE(code); +    /* coverity[ampersand_in_size] */ +    self = PyObject_GC_NewVar(PatternObject, module_state->Pattern_Type, n); +    if (!self) +        return NULL; +    self->weakreflist = NULL; +    self->pattern = NULL; +    self->groupindex = NULL; +    self->indexgroup = NULL; + +    self->codesize = n; + +    for (i = 0; i < n; i++) { +        PyObject *o = PyList_GET_ITEM(code, i); +        unsigned long value = PyLong_AsUnsignedLong(o); +        if (value == (unsigned long)-1 && PyErr_Occurred()) { +            break; +        } +        self->code[i] = (SRE_CODE) value; +        if ((unsigned long) self->code[i] != value) { +            PyErr_SetString(PyExc_OverflowError, +                            "regular expression code size limit exceeded"); +            break; +        } +    } +    PyObject_GC_Track(self); + +    if (PyErr_Occurred()) { +        Py_DECREF(self); +        return NULL; +    } + +    if (pattern == Py_None) { +        self->isbytes = -1; +    } +    else { +        Py_ssize_t p_length; +        int charsize; +        Py_buffer view; +        view.buf = NULL; +        if (!getstring(pattern, &p_length, &self->isbytes, +                       &charsize, &view)) { +            Py_DECREF(self); +            return NULL; +        } +        if (view.buf) +            PyBuffer_Release(&view); +    } + +    self->pattern = Py_NewRef(pattern); + +    self->flags = flags; + +    self->groups = groups; + +    if (PyDict_GET_SIZE(groupindex) > 0) { +        self->groupindex = Py_NewRef(groupindex); +        if (PyTuple_GET_SIZE(indexgroup) > 0) { +            self->indexgroup = Py_NewRef(indexgroup); +        } +    } + +    if (!_validate(self)) { +        Py_DECREF(self); +        return NULL; +    } + +    return (PyObject*) self; +} + +/*[clinic input] +_sre.template + +    pattern: object +    template: object(subclass_of="&PyList_Type") +        A list containing interleaved literal strings (str or bytes) and group +        indices (int), as returned by re._parser.parse_template(): +            [literal1, group1, ..., literalN, groupN] +    / + +[clinic start generated code]*/ + +static PyObject * +_sre_template_impl(PyObject *module, PyObject *pattern, PyObject *template) +/*[clinic end generated code: output=d51290e596ebca86 input=af55380b27f02942]*/ +{ +    /* template is a list containing interleaved literal strings (str or bytes) +     * and group indices (int), as returned by _parser.parse_template: +     * [literal1, group1, literal2, ..., literalN]. +     */ +    _sremodulestate *module_state = get_sre_module_state(module); +    TemplateObject *self = NULL; +    Py_ssize_t n = PyList_GET_SIZE(template); +    if ((n & 1) == 0 || n < 1) { +        goto bad_template; +    } +    n /= 2; +    self = PyObject_GC_NewVar(TemplateObject, module_state->Template_Type, n); +    if (!self) +        return NULL; +    self->chunks = 1 + 2*n; +    self->literal = Py_NewRef(PyList_GET_ITEM(template, 0)); +    for (Py_ssize_t i = 0; i < n; i++) { +        Py_ssize_t index = PyLong_AsSsize_t(PyList_GET_ITEM(template, 2*i+1)); +        if (index == -1 && PyErr_Occurred()) { +            Py_SET_SIZE(self, i); +            Py_DECREF(self); +            return NULL; +        } +        if (index < 0) { +            Py_SET_SIZE(self, i); +            goto bad_template; +        } +        self->items[i].index = index; + +        PyObject *literal = PyList_GET_ITEM(template, 2*i+2); +        // Skip empty literals. +        if ((PyUnicode_Check(literal) && !PyUnicode_GET_LENGTH(literal)) || +            (PyBytes_Check(literal) && !PyBytes_GET_SIZE(literal))) +        { +            literal = NULL; +            self->chunks--; +        } +        self->items[i].literal = Py_XNewRef(literal); +    } +    return (PyObject*) self; + +bad_template: +    PyErr_SetString(PyExc_TypeError, "invalid template"); +    Py_XDECREF(self); +    return NULL; +} + +/* -------------------------------------------------------------------- */ +/* Code validation */ + +/* To learn more about this code, have a look at the _compile() function in +   Lib/sre_compile.py.  The validation functions below checks the code array +   for conformance with the code patterns generated there. + +   The nice thing about the generated code is that it is position-independent: +   all jumps are relative jumps forward.  Also, jumps don't cross each other: +   the target of a later jump is always earlier than the target of an earlier +   jump.  IOW, this is okay: + +   J---------J-------T--------T +    \         \_____/        / +     \______________________/ + +   but this is not: + +   J---------J-------T--------T +    \_________\_____/        / +               \____________/ + +   It also helps that SRE_CODE is always an unsigned type. +*/ + +/* Defining this one enables tracing of the validator */ +#undef VVERBOSE + +/* Trace macro for the validator */ +#if defined(VVERBOSE) +#define VTRACE(v) printf v +#else +#define VTRACE(v) do {} while(0)  /* do nothing */ +#endif + +/* Report failure */ +#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return -1; } while (0) + +/* Extract opcode, argument, or skip count from code array */ +#define GET_OP                                          \ +    do {                                                \ +        VTRACE(("%p: ", code));                         \ +        if (code >= end) FAIL;                          \ +        op = *code++;                                   \ +        VTRACE(("%lu (op)\n", (unsigned long)op));      \ +    } while (0) +#define GET_ARG                                         \ +    do {                                                \ +        VTRACE(("%p= ", code));                         \ +        if (code >= end) FAIL;                          \ +        arg = *code++;                                  \ +        VTRACE(("%lu (arg)\n", (unsigned long)arg));    \ +    } while (0) +#define GET_SKIP_ADJ(adj)                               \ +    do {                                                \ +        VTRACE(("%p= ", code));                         \ +        if (code >= end) FAIL;                          \ +        skip = *code;                                   \ +        VTRACE(("%lu (skip to %p)\n",                   \ +               (unsigned long)skip, code+skip));        \ +        if (skip-adj > (uintptr_t)(end - code))         \ +            FAIL;                                       \ +        code++;                                         \ +    } while (0) +#define GET_SKIP GET_SKIP_ADJ(0) + +static int +_validate_charset(SRE_CODE *code, SRE_CODE *end) +{ +    /* Some variables are manipulated by the macros above */ +    SRE_CODE op; +    SRE_CODE arg; +    SRE_CODE offset; +    int i; + +    while (code < end) { +        GET_OP; +        switch (op) { + +        case SRE_OP_NEGATE: +            break; + +        case SRE_OP_LITERAL: +            GET_ARG; +            break; + +        case SRE_OP_RANGE: +        case SRE_OP_RANGE_UNI_IGNORE: +            GET_ARG; +            GET_ARG; +            break; + +        case SRE_OP_CHARSET: +            offset = 256/SRE_CODE_BITS; /* 256-bit bitmap */ +            if (offset > (uintptr_t)(end - code)) +                FAIL; +            code += offset; +            break; + +        case SRE_OP_BIGCHARSET: +            GET_ARG; /* Number of blocks */ +            offset = 256/sizeof(SRE_CODE); /* 256-byte table */ +            if (offset > (uintptr_t)(end - code)) +                FAIL; +            /* Make sure that each byte points to a valid block */ +            for (i = 0; i < 256; i++) { +                if (((unsigned char *)code)[i] >= arg) +                    FAIL; +            } +            code += offset; +            offset = arg * (256/SRE_CODE_BITS); /* 256-bit bitmap times arg */ +            if (offset > (uintptr_t)(end - code)) +                FAIL; +            code += offset; +            break; + +        case SRE_OP_CATEGORY: +            GET_ARG; +            switch (arg) { +            case SRE_CATEGORY_DIGIT: +            case SRE_CATEGORY_NOT_DIGIT: +            case SRE_CATEGORY_SPACE: +            case SRE_CATEGORY_NOT_SPACE: +            case SRE_CATEGORY_WORD: +            case SRE_CATEGORY_NOT_WORD: +            case SRE_CATEGORY_LINEBREAK: +            case SRE_CATEGORY_NOT_LINEBREAK: +            case SRE_CATEGORY_LOC_WORD: +            case SRE_CATEGORY_LOC_NOT_WORD: +            case SRE_CATEGORY_UNI_DIGIT: +            case SRE_CATEGORY_UNI_NOT_DIGIT: +            case SRE_CATEGORY_UNI_SPACE: +            case SRE_CATEGORY_UNI_NOT_SPACE: +            case SRE_CATEGORY_UNI_WORD: +            case SRE_CATEGORY_UNI_NOT_WORD: +            case SRE_CATEGORY_UNI_LINEBREAK: +            case SRE_CATEGORY_UNI_NOT_LINEBREAK: +                break; +            default: +                FAIL; +            } +            break; + +        default: +            FAIL; + +        } +    } + +    return 0; +} + +/* Returns 0 on success, -1 on failure, and 1 if the last op is JUMP. */ +static int +_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) +{ +    /* Some variables are manipulated by the macros above */ +    SRE_CODE op; +    SRE_CODE arg; +    SRE_CODE skip; + +    VTRACE(("code=%p, end=%p\n", code, end)); + +    if (code > end) +        FAIL; + +    while (code < end) { +        GET_OP; +        switch (op) { + +        case SRE_OP_MARK: +            /* We don't check whether marks are properly nested; the +               sre_match() code is robust even if they don't, and the worst +               you can get is nonsensical match results. */ +            GET_ARG; +            if (arg > 2 * (size_t)groups + 1) { +                VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups)); +                FAIL; +            } +            break; + +        case SRE_OP_LITERAL: +        case SRE_OP_NOT_LITERAL: +        case SRE_OP_LITERAL_IGNORE: +        case SRE_OP_NOT_LITERAL_IGNORE: +        case SRE_OP_LITERAL_UNI_IGNORE: +        case SRE_OP_NOT_LITERAL_UNI_IGNORE: +        case SRE_OP_LITERAL_LOC_IGNORE: +        case SRE_OP_NOT_LITERAL_LOC_IGNORE: +            GET_ARG; +            /* The arg is just a character, nothing to check */ +            break; + +        case SRE_OP_SUCCESS: +        case SRE_OP_FAILURE: +            /* Nothing to check; these normally end the matching process */ +            break; + +        case SRE_OP_AT: +            GET_ARG; +            switch (arg) { +            case SRE_AT_BEGINNING: +            case SRE_AT_BEGINNING_STRING: +            case SRE_AT_BEGINNING_LINE: +            case SRE_AT_END: +            case SRE_AT_END_LINE: +            case SRE_AT_END_STRING: +            case SRE_AT_BOUNDARY: +            case SRE_AT_NON_BOUNDARY: +            case SRE_AT_LOC_BOUNDARY: +            case SRE_AT_LOC_NON_BOUNDARY: +            case SRE_AT_UNI_BOUNDARY: +            case SRE_AT_UNI_NON_BOUNDARY: +                break; +            default: +                FAIL; +            } +            break; + +        case SRE_OP_ANY: +        case SRE_OP_ANY_ALL: +            /* These have no operands */ +            break; + +        case SRE_OP_IN: +        case SRE_OP_IN_IGNORE: +        case SRE_OP_IN_UNI_IGNORE: +        case SRE_OP_IN_LOC_IGNORE: +            GET_SKIP; +            /* Stop 1 before the end; we check the FAILURE below */ +            if (_validate_charset(code, code+skip-2)) +                FAIL; +            if (code[skip-2] != SRE_OP_FAILURE) +                FAIL; +            code += skip-1; +            break; + +        case SRE_OP_INFO: +            { +                /* A minimal info field is +                   <INFO> <1=skip> <2=flags> <3=min> <4=max>; +                   If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags, +                   more follows. */ +                SRE_CODE flags, i; +                SRE_CODE *newcode; +                GET_SKIP; +                newcode = code+skip-1; +                GET_ARG; flags = arg; +                GET_ARG; +                GET_ARG; +                /* Check that only valid flags are present */ +                if ((flags & ~(SRE_INFO_PREFIX | +                               SRE_INFO_LITERAL | +                               SRE_INFO_CHARSET)) != 0) +                    FAIL; +                /* PREFIX and CHARSET are mutually exclusive */ +                if ((flags & SRE_INFO_PREFIX) && +                    (flags & SRE_INFO_CHARSET)) +                    FAIL; +                /* LITERAL implies PREFIX */ +                if ((flags & SRE_INFO_LITERAL) && +                    !(flags & SRE_INFO_PREFIX)) +                    FAIL; +                /* Validate the prefix */ +                if (flags & SRE_INFO_PREFIX) { +                    SRE_CODE prefix_len; +                    GET_ARG; prefix_len = arg; +                    GET_ARG; +                    /* Here comes the prefix string */ +                    if (prefix_len > (uintptr_t)(newcode - code)) +                        FAIL; +                    code += prefix_len; +                    /* And here comes the overlap table */ +                    if (prefix_len > (uintptr_t)(newcode - code)) +                        FAIL; +                    /* Each overlap value should be < prefix_len */ +                    for (i = 0; i < prefix_len; i++) { +                        if (code[i] >= prefix_len) +                            FAIL; +                    } +                    code += prefix_len; +                } +                /* Validate the charset */ +                if (flags & SRE_INFO_CHARSET) { +                    if (_validate_charset(code, newcode-1)) +                        FAIL; +                    if (newcode[-1] != SRE_OP_FAILURE) +                        FAIL; +                    code = newcode; +                } +                else if (code != newcode) { +                  VTRACE(("code=%p, newcode=%p\n", code, newcode)); +                    FAIL; +                } +            } +            break; + +        case SRE_OP_BRANCH: +            { +                SRE_CODE *target = NULL; +                for (;;) { +                    GET_SKIP; +                    if (skip == 0) +                        break; +                    /* Stop 2 before the end; we check the JUMP below */ +                    if (_validate_inner(code, code+skip-3, groups)) +                        FAIL; +                    code += skip-3; +                    /* Check that it ends with a JUMP, and that each JUMP +                       has the same target */ +                    GET_OP; +                    if (op != SRE_OP_JUMP) +                        FAIL; +                    GET_SKIP; +                    if (target == NULL) +                        target = code+skip-1; +                    else if (code+skip-1 != target) +                        FAIL; +                } +                if (code != target) +                    FAIL; +            } +            break; + +        case SRE_OP_REPEAT_ONE: +        case SRE_OP_MIN_REPEAT_ONE: +        case SRE_OP_POSSESSIVE_REPEAT_ONE: +            { +                SRE_CODE min, max; +                GET_SKIP; +                GET_ARG; min = arg; +                GET_ARG; max = arg; +                if (min > max) +                    FAIL; +                if (max > SRE_MAXREPEAT) +                    FAIL; +                if (_validate_inner(code, code+skip-4, groups)) +                    FAIL; +                code += skip-4; +                GET_OP; +                if (op != SRE_OP_SUCCESS) +                    FAIL; +            } +            break; + +        case SRE_OP_REPEAT: +        case SRE_OP_POSSESSIVE_REPEAT: +            { +                SRE_CODE op1 = op, min, max; +                GET_SKIP; +                GET_ARG; min = arg; +                GET_ARG; max = arg; +                if (min > max) +                    FAIL; +                if (max > SRE_MAXREPEAT) +                    FAIL; +                if (_validate_inner(code, code+skip-3, groups)) +                    FAIL; +                code += skip-3; +                GET_OP; +                if (op1 == SRE_OP_POSSESSIVE_REPEAT) { +                    if (op != SRE_OP_SUCCESS) +                        FAIL; +                } +                else { +                    if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL) +                        FAIL; +                } +            } +            break; + +        case SRE_OP_ATOMIC_GROUP: +            { +                GET_SKIP; +                if (_validate_inner(code, code+skip-2, groups)) +                    FAIL; +                code += skip-2; +                GET_OP; +                if (op != SRE_OP_SUCCESS) +                    FAIL; +            } +            break; + +        case SRE_OP_GROUPREF: +        case SRE_OP_GROUPREF_IGNORE: +        case SRE_OP_GROUPREF_UNI_IGNORE: +        case SRE_OP_GROUPREF_LOC_IGNORE: +            GET_ARG; +            if (arg >= (size_t)groups) +                FAIL; +            break; + +        case SRE_OP_GROUPREF_EXISTS: +            /* The regex syntax for this is: '(?(group)then|else)', where +               'group' is either an integer group number or a group name, +               'then' and 'else' are sub-regexes, and 'else' is optional. */ +            GET_ARG; +            if (arg >= (size_t)groups) +                FAIL; +            GET_SKIP_ADJ(1); +            code--; /* The skip is relative to the first arg! */ +            /* There are two possibilities here: if there is both a 'then' +               part and an 'else' part, the generated code looks like: + +               GROUPREF_EXISTS +               <group> +               <skipyes> +               ...then part... +               JUMP +               <skipno> +               (<skipyes> jumps here) +               ...else part... +               (<skipno> jumps here) + +               If there is only a 'then' part, it looks like: + +               GROUPREF_EXISTS +               <group> +               <skip> +               ...then part... +               (<skip> jumps here) + +               There is no direct way to decide which it is, and we don't want +               to allow arbitrary jumps anywhere in the code; so we just look +               for a JUMP opcode preceding our skip target. +            */ +            VTRACE(("then part:\n")); +            int rc = _validate_inner(code+1, code+skip-1, groups); +            if (rc == 1) { +                VTRACE(("else part:\n")); +                code += skip-2; /* Position after JUMP, at <skipno> */ +                GET_SKIP; +                rc = _validate_inner(code, code+skip-1, groups); +            } +            if (rc) +                FAIL; +            code += skip-1; +            break; + +        case SRE_OP_ASSERT: +        case SRE_OP_ASSERT_NOT: +            GET_SKIP; +            GET_ARG; /* 0 for lookahead, width for lookbehind */ +            code--; /* Back up over arg to simplify math below */ +            /* Stop 1 before the end; we check the SUCCESS below */ +            if (_validate_inner(code+1, code+skip-2, groups)) +                FAIL; +            code += skip-2; +            GET_OP; +            if (op != SRE_OP_SUCCESS) +                FAIL; +            break; + +        case SRE_OP_JUMP: +            if (code + 1 != end) +                FAIL; +            VTRACE(("JUMP: %d\n", __LINE__)); +            return 1; + +        default: +            FAIL; + +        } +    } + +    VTRACE(("okay\n")); +    return 0; +} + +static int +_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups) +{ +    if (groups < 0 || (size_t)groups > SRE_MAXGROUPS || +        code >= end || end[-1] != SRE_OP_SUCCESS) +        FAIL; +    return _validate_inner(code, end-1, groups); +} + +static int +_validate(PatternObject *self) +{ +    if (_validate_outer(self->code, self->code+self->codesize, self->groups)) +    { +        PyErr_SetString(PyExc_RuntimeError, "invalid SRE code"); +        return 0; +    } +    else +        VTRACE(("Success!\n")); +    return 1; +} + +/* -------------------------------------------------------------------- */ +/* match methods */ + +static int +match_traverse(MatchObject *self, visitproc visit, void *arg) +{ +    Py_VISIT(Py_TYPE(self)); +    Py_VISIT(self->string); +    Py_VISIT(self->regs); +    Py_VISIT(self->pattern); +    return 0; +} + +static int +match_clear(MatchObject *self) +{ +    Py_CLEAR(self->string); +    Py_CLEAR(self->regs); +    Py_CLEAR(self->pattern); +    return 0; +} + +static void +match_dealloc(MatchObject* self) +{ +    PyTypeObject *tp = Py_TYPE(self); + +    PyObject_GC_UnTrack(self); +    (void)match_clear(self); +    tp->tp_free(self); +    Py_DECREF(tp); +} + +static PyObject* +match_getslice_by_index(MatchObject* self, Py_ssize_t index, PyObject* def) +{ +    Py_ssize_t length; +    int isbytes, charsize; +    Py_buffer view; +    PyObject *result; +    const void* ptr; +    Py_ssize_t i, j; + +    assert(0 <= index && index < self->groups); +    index *= 2; + +    if (self->string == Py_None || self->mark[index] < 0) { +        /* return default value if the string or group is undefined */ +        return Py_NewRef(def); +    } + +    ptr = getstring(self->string, &length, &isbytes, &charsize, &view); +    if (ptr == NULL) +        return NULL; + +    i = self->mark[index]; +    j = self->mark[index+1]; +    i = Py_MIN(i, length); +    j = Py_MIN(j, length); +    result = getslice(isbytes, ptr, self->string, i, j); +    if (isbytes && view.buf != NULL) +        PyBuffer_Release(&view); +    return result; +} + +static Py_ssize_t +match_getindex(MatchObject* self, PyObject* index) +{ +    Py_ssize_t i; + +    if (index == NULL) +        /* Default value */ +        return 0; + +    if (PyIndex_Check(index)) { +        i = PyNumber_AsSsize_t(index, NULL); +    } +    else { +        i = -1; + +        if (self->pattern->groupindex) { +            index = PyDict_GetItemWithError(self->pattern->groupindex, index); +            if (index && PyLong_Check(index)) { +                i = PyLong_AsSsize_t(index); +            } +        } +    } +    if (i < 0 || i >= self->groups) { +        /* raise IndexError if we were given a bad group number */ +        if (!PyErr_Occurred()) { +            PyErr_SetString(PyExc_IndexError, "no such group"); +        } +        return -1; +    } + +    return i; +} + +static PyObject* +match_getslice(MatchObject* self, PyObject* index, PyObject* def) +{ +    Py_ssize_t i = match_getindex(self, index); + +    if (i < 0) { +        return NULL; +    } + +    return match_getslice_by_index(self, i, def); +} + +/*[clinic input] +_sre.SRE_Match.expand + +    template: object + +Return the string obtained by doing backslash substitution on the string template, as done by the sub() method. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Match_expand_impl(MatchObject *self, PyObject *template) +/*[clinic end generated code: output=931b58ccc323c3a1 input=4bfdb22c2f8b146a]*/ +{ +    _sremodulestate *module_state = get_sre_module_state_by_class(Py_TYPE(self)); +    PyObject *filter = compile_template(module_state, self->pattern, template); +    if (filter == NULL) { +        return NULL; +    } +    PyObject *result = expand_template((TemplateObject *)filter, self); +    Py_DECREF(filter); +    return result; +} + +static PyObject* +match_group(MatchObject* self, PyObject* args) +{ +    PyObject* result; +    Py_ssize_t i, size; + +    size = PyTuple_GET_SIZE(args); + +    switch (size) { +    case 0: +        result = match_getslice(self, _PyLong_GetZero(), Py_None); +        break; +    case 1: +        result = match_getslice(self, PyTuple_GET_ITEM(args, 0), Py_None); +        break; +    default: +        /* fetch multiple items */ +        result = PyTuple_New(size); +        if (!result) +            return NULL; +        for (i = 0; i < size; i++) { +            PyObject* item = match_getslice( +                self, PyTuple_GET_ITEM(args, i), Py_None +                ); +            if (!item) { +                Py_DECREF(result); +                return NULL; +            } +            PyTuple_SET_ITEM(result, i, item); +        } +        break; +    } +    return result; +} + +static PyObject* +match_getitem(MatchObject* self, PyObject* name) +{ +    return match_getslice(self, name, Py_None); +} + +/*[clinic input] +_sre.SRE_Match.groups + +    default: object = None +        Is used for groups that did not participate in the match. + +Return a tuple containing all the subgroups of the match, from 1. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Match_groups_impl(MatchObject *self, PyObject *default_value) +/*[clinic end generated code: output=daf8e2641537238a input=bb069ef55dabca91]*/ +{ +    PyObject* result; +    Py_ssize_t index; + +    result = PyTuple_New(self->groups-1); +    if (!result) +        return NULL; + +    for (index = 1; index < self->groups; index++) { +        PyObject* item; +        item = match_getslice_by_index(self, index, default_value); +        if (!item) { +            Py_DECREF(result); +            return NULL; +        } +        PyTuple_SET_ITEM(result, index-1, item); +    } + +    return result; +} + +/*[clinic input] +_sre.SRE_Match.groupdict + +    default: object = None +        Is used for groups that did not participate in the match. + +Return a dictionary containing all the named subgroups of the match, keyed by the subgroup name. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value) +/*[clinic end generated code: output=29917c9073e41757 input=0ded7960b23780aa]*/ +{ +    PyObject *result; +    PyObject *key; +    PyObject *value; +    Py_ssize_t pos = 0; +    Py_hash_t hash; + +    result = PyDict_New(); +    if (!result || !self->pattern->groupindex) +        return result; + +    while (_PyDict_Next(self->pattern->groupindex, &pos, &key, &value, &hash)) { +        int status; +        Py_INCREF(key); +        value = match_getslice(self, key, default_value); +        if (!value) { +            Py_DECREF(key); +            goto failed; +        } +        status = _PyDict_SetItem_KnownHash(result, key, value, hash); +        Py_DECREF(value); +        Py_DECREF(key); +        if (status < 0) +            goto failed; +    } + +    return result; + +failed: +    Py_DECREF(result); +    return NULL; +} + +/*[clinic input] +_sre.SRE_Match.start -> Py_ssize_t + +    group: object(c_default="NULL") = 0 +    / + +Return index of the start of the substring matched by group. +[clinic start generated code]*/ + +static Py_ssize_t +_sre_SRE_Match_start_impl(MatchObject *self, PyObject *group) +/*[clinic end generated code: output=3f6e7f9df2fb5201 input=ced8e4ed4b33ee6c]*/ +{ +    Py_ssize_t index = match_getindex(self, group); + +    if (index < 0) { +        return -1; +    } + +    /* mark is -1 if group is undefined */ +    return self->mark[index*2]; +} + +/*[clinic input] +_sre.SRE_Match.end -> Py_ssize_t + +    group: object(c_default="NULL") = 0 +    / + +Return index of the end of the substring matched by group. +[clinic start generated code]*/ + +static Py_ssize_t +_sre_SRE_Match_end_impl(MatchObject *self, PyObject *group) +/*[clinic end generated code: output=f4240b09911f7692 input=1b799560c7f3d7e6]*/ +{ +    Py_ssize_t index = match_getindex(self, group); + +    if (index < 0) { +        return -1; +    } + +    /* mark is -1 if group is undefined */ +    return self->mark[index*2+1]; +} + +LOCAL(PyObject*) +_pair(Py_ssize_t i1, Py_ssize_t i2) +{ +    PyObject* pair; +    PyObject* item; + +    pair = PyTuple_New(2); +    if (!pair) +        return NULL; + +    item = PyLong_FromSsize_t(i1); +    if (!item) +        goto error; +    PyTuple_SET_ITEM(pair, 0, item); + +    item = PyLong_FromSsize_t(i2); +    if (!item) +        goto error; +    PyTuple_SET_ITEM(pair, 1, item); + +    return pair; + +  error: +    Py_DECREF(pair); +    return NULL; +} + +/*[clinic input] +_sre.SRE_Match.span + +    group: object(c_default="NULL") = 0 +    / + +For match object m, return the 2-tuple (m.start(group), m.end(group)). +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Match_span_impl(MatchObject *self, PyObject *group) +/*[clinic end generated code: output=f02ae40594d14fe6 input=8fa6014e982d71d4]*/ +{ +    Py_ssize_t index = match_getindex(self, group); + +    if (index < 0) { +        return NULL; +    } + +    /* marks are -1 if group is undefined */ +    return _pair(self->mark[index*2], self->mark[index*2+1]); +} + +static PyObject* +match_regs(MatchObject* self) +{ +    PyObject* regs; +    PyObject* item; +    Py_ssize_t index; + +    regs = PyTuple_New(self->groups); +    if (!regs) +        return NULL; + +    for (index = 0; index < self->groups; index++) { +        item = _pair(self->mark[index*2], self->mark[index*2+1]); +        if (!item) { +            Py_DECREF(regs); +            return NULL; +        } +        PyTuple_SET_ITEM(regs, index, item); +    } + +    self->regs = Py_NewRef(regs); + +    return regs; +} + +/*[clinic input] +_sre.SRE_Match.__copy__ + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Match___copy___impl(MatchObject *self) +/*[clinic end generated code: output=a779c5fc8b5b4eb4 input=3bb4d30b6baddb5b]*/ +{ +    return Py_NewRef(self); +} + +/*[clinic input] +_sre.SRE_Match.__deepcopy__ + +    memo: object +    / + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Match___deepcopy__(MatchObject *self, PyObject *memo) +/*[clinic end generated code: output=ba7cb46d655e4ee2 input=779d12a31c2c325e]*/ +{ +    return Py_NewRef(self); +} + +PyDoc_STRVAR(match_doc, +"The result of re.match() and re.search().\n\ +Match objects always have a boolean value of True."); + +PyDoc_STRVAR(match_group_doc, +"group([group1, ...]) -> str or tuple.\n\ +    Return subgroup(s) of the match by indices or names.\n\ +    For 0 returns the entire match."); + +static PyObject * +match_lastindex_get(MatchObject *self, void *Py_UNUSED(ignored)) +{ +    if (self->lastindex >= 0) +        return PyLong_FromSsize_t(self->lastindex); +    Py_RETURN_NONE; +} + +static PyObject * +match_lastgroup_get(MatchObject *self, void *Py_UNUSED(ignored)) +{ +    if (self->pattern->indexgroup && +        self->lastindex >= 0 && +        self->lastindex < PyTuple_GET_SIZE(self->pattern->indexgroup)) +    { +        PyObject *result = PyTuple_GET_ITEM(self->pattern->indexgroup, +                                            self->lastindex); +        return Py_NewRef(result); +    } +    Py_RETURN_NONE; +} + +static PyObject * +match_regs_get(MatchObject *self, void *Py_UNUSED(ignored)) +{ +    if (self->regs) { +        return Py_NewRef(self->regs); +    } else +        return match_regs(self); +} + +static PyObject * +match_repr(MatchObject *self) +{ +    PyObject *result; +    PyObject *group0 = match_getslice_by_index(self, 0, Py_None); +    if (group0 == NULL) +        return NULL; +    result = PyUnicode_FromFormat( +            "<%s object; span=(%zd, %zd), match=%.50R>", +            Py_TYPE(self)->tp_name, +            self->mark[0], self->mark[1], group0); +    Py_DECREF(group0); +    return result; +} + + +static PyObject* +pattern_new_match(_sremodulestate* module_state, +                  PatternObject* pattern, +                  SRE_STATE* state, +                  Py_ssize_t status) +{ +    /* create match object (from state object) */ + +    MatchObject* match; +    Py_ssize_t i, j; +    char* base; +    int n; + +    if (status > 0) { + +        /* create match object (with room for extra group marks) */ +        /* coverity[ampersand_in_size] */ +        match = PyObject_GC_NewVar(MatchObject, +                                   module_state->Match_Type, +                                   2*(pattern->groups+1)); +        if (!match) +            return NULL; + +        match->pattern = (PatternObject*)Py_NewRef(pattern); + +        match->string = Py_NewRef(state->string); + +        match->regs = NULL; +        match->groups = pattern->groups+1; + +        /* fill in group slices */ + +        base = (char*) state->beginning; +        n = state->charsize; + +        match->mark[0] = ((char*) state->start - base) / n; +        match->mark[1] = ((char*) state->ptr - base) / n; + +        for (i = j = 0; i < pattern->groups; i++, j+=2) +            if (j+1 <= state->lastmark && state->mark[j] && state->mark[j+1]) { +                match->mark[j+2] = ((char*) state->mark[j] - base) / n; +                match->mark[j+3] = ((char*) state->mark[j+1] - base) / n; + +                /* check wrong span */ +                if (match->mark[j+2] > match->mark[j+3]) { +                    PyErr_SetString(PyExc_SystemError, +                                    "The span of capturing group is wrong," +                                    " please report a bug for the re module."); +                    Py_DECREF(match); +                    return NULL; +                } +            } else +                match->mark[j+2] = match->mark[j+3] = -1; /* undefined */ + +        match->pos = state->pos; +        match->endpos = state->endpos; + +        match->lastindex = state->lastindex; + +        PyObject_GC_Track(match); +        return (PyObject*) match; + +    } else if (status == 0) { + +        /* no match */ +        Py_RETURN_NONE; + +    } + +    /* internal error */ +    pattern_error(status); +    return NULL; +} + + +/* -------------------------------------------------------------------- */ +/* scanner methods (experimental) */ + +static int +scanner_traverse(ScannerObject *self, visitproc visit, void *arg) +{ +    Py_VISIT(Py_TYPE(self)); +    Py_VISIT(self->pattern); +    return 0; +} + +static int +scanner_clear(ScannerObject *self) +{ +    Py_CLEAR(self->pattern); +    return 0; +} + +static void +scanner_dealloc(ScannerObject* self) +{ +    PyTypeObject *tp = Py_TYPE(self); + +    PyObject_GC_UnTrack(self); +    state_fini(&self->state); +    (void)scanner_clear(self); +    tp->tp_free(self); +    Py_DECREF(tp); +} + +static int +scanner_begin(ScannerObject* self) +{ +    if (self->executing) { +        PyErr_SetString(PyExc_ValueError, +                        "regular expression scanner already executing"); +        return 0; +    } +    self->executing = 1; +    return 1; +} + +static void +scanner_end(ScannerObject* self) +{ +    assert(self->executing); +    self->executing = 0; +} + +/*[clinic input] +_sre.SRE_Scanner.match + +    cls: defining_class +    / + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls) +/*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/ +{ +    _sremodulestate *module_state = get_sre_module_state_by_class(cls); +    SRE_STATE* state = &self->state; +    PyObject* match; +    Py_ssize_t status; + +    if (!scanner_begin(self)) { +        return NULL; +    } +    if (state->start == NULL) { +        scanner_end(self); +        Py_RETURN_NONE; +    } + +    state_reset(state); + +    state->ptr = state->start; + +    status = sre_match(state, PatternObject_GetCode(self->pattern)); +    if (PyErr_Occurred()) { +        scanner_end(self); +        return NULL; +    } + +    match = pattern_new_match(module_state, (PatternObject*) self->pattern, +                              state, status); + +    if (status == 0) +        state->start = NULL; +    else { +        state->must_advance = (state->ptr == state->start); +        state->start = state->ptr; +    } + +    scanner_end(self); +    return match; +} + + +/*[clinic input] +_sre.SRE_Scanner.search + +    cls: defining_class +    / + +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls) +/*[clinic end generated code: output=23e8fc78013f9161 input=056c2d37171d0bf2]*/ +{ +    _sremodulestate *module_state = get_sre_module_state_by_class(cls); +    SRE_STATE* state = &self->state; +    PyObject* match; +    Py_ssize_t status; + +    if (!scanner_begin(self)) { +        return NULL; +    } +    if (state->start == NULL) { +        scanner_end(self); +        Py_RETURN_NONE; +    } + +    state_reset(state); + +    state->ptr = state->start; + +    status = sre_search(state, PatternObject_GetCode(self->pattern)); +    if (PyErr_Occurred()) { +        scanner_end(self); +        return NULL; +    } + +    match = pattern_new_match(module_state, (PatternObject*) self->pattern, +                              state, status); + +    if (status == 0) +        state->start = NULL; +    else { +        state->must_advance = (state->ptr == state->start); +        state->start = state->ptr; +    } + +    scanner_end(self); +    return match; +} + +static PyObject * +pattern_scanner(_sremodulestate *module_state, +                PatternObject *self, +                PyObject *string, +                Py_ssize_t pos, +                Py_ssize_t endpos) +{ +    ScannerObject* scanner; + +    /* create scanner object */ +    scanner = PyObject_GC_New(ScannerObject, module_state->Scanner_Type); +    if (!scanner) +        return NULL; +    scanner->pattern = NULL; +    scanner->executing = 0; + +    /* create search state object */ +    if (!state_init(&scanner->state, self, string, pos, endpos)) { +        Py_DECREF(scanner); +        return NULL; +    } + +    scanner->pattern = Py_NewRef(self); + +    PyObject_GC_Track(scanner); +    return (PyObject*) scanner; +} + +/* -------------------------------------------------------------------- */ +/* template methods */ + +static int +template_traverse(TemplateObject *self, visitproc visit, void *arg) +{ +    Py_VISIT(Py_TYPE(self)); +    Py_VISIT(self->literal); +    for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) { +        Py_VISIT(self->items[i].literal); +    } +    return 0; +} + +static int +template_clear(TemplateObject *self) +{ +    Py_CLEAR(self->literal); +    for (Py_ssize_t i = 0, n = Py_SIZE(self); i < n; i++) { +        Py_CLEAR(self->items[i].literal); +    } +    return 0; +} + +static void +template_dealloc(TemplateObject *self) +{ +    PyTypeObject *tp = Py_TYPE(self); + +    PyObject_GC_UnTrack(self); +    (void)template_clear(self); +    tp->tp_free(self); +    Py_DECREF(tp); +} + +static PyObject * +expand_template(TemplateObject *self, MatchObject *match) +{ +    if (Py_SIZE(self) == 0) { +        return Py_NewRef(self->literal); +    } + +    PyObject *result = NULL; +    Py_ssize_t count = 0;  // the number of non-empty chunks +    /* For small number of strings use a buffer allocated on the stack, +     * otherwise use a list object. */ +    PyObject *buffer[10]; +    PyObject **out = buffer; +    PyObject *list = NULL; +    if (self->chunks > (int)Py_ARRAY_LENGTH(buffer) || +        !PyUnicode_Check(self->literal)) +    { +        list = PyList_New(self->chunks); +        if (!list) { +            return NULL; +        } +        out = &PyList_GET_ITEM(list, 0); +    } + +    out[count++] = Py_NewRef(self->literal); +    for (Py_ssize_t i = 0; i < Py_SIZE(self); i++) { +        Py_ssize_t index = self->items[i].index; +        if (index >= match->groups) { +            PyErr_SetString(PyExc_IndexError, "no such group"); +            goto cleanup; +        } +        PyObject *item = match_getslice_by_index(match, index, Py_None); +        if (item == NULL) { +            goto cleanup; +        } +        if (item != Py_None) { +            out[count++] = Py_NewRef(item); +        } +        Py_DECREF(item); + +        PyObject *literal = self->items[i].literal; +        if (literal != NULL) { +            out[count++] = Py_NewRef(literal); +        } +    } + +    if (PyUnicode_Check(self->literal)) { +        result = _PyUnicode_JoinArray(&_Py_STR(empty), out, count); +    } +    else { +        Py_SET_SIZE(list, count); +        result = _PyBytes_Join((PyObject *)&_Py_SINGLETON(bytes_empty), list); +    } + +cleanup: +    if (list) { +        Py_DECREF(list); +    } +    else { +        for (Py_ssize_t i = 0; i < count; i++) { +            Py_DECREF(out[i]); +        } +    } +    return result; +} + + +static Py_hash_t +pattern_hash(PatternObject *self) +{ +    Py_hash_t hash, hash2; + +    hash = PyObject_Hash(self->pattern); +    if (hash == -1) { +        return -1; +    } + +    hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize); +    hash ^= hash2; + +    hash ^= self->flags; +    hash ^= self->isbytes; +    hash ^= self->codesize; + +    if (hash == -1) { +        hash = -2; +    } +    return hash; +} + +static PyObject* +pattern_richcompare(PyObject *lefto, PyObject *righto, int op) +{ +    PyTypeObject *tp = Py_TYPE(lefto); +    _sremodulestate *module_state = get_sre_module_state_by_class(tp); +    PatternObject *left, *right; +    int cmp; + +    if (op != Py_EQ && op != Py_NE) { +        Py_RETURN_NOTIMPLEMENTED; +    } + +    if (!Py_IS_TYPE(righto, module_state->Pattern_Type)) +    { +        Py_RETURN_NOTIMPLEMENTED; +    } + +    if (lefto == righto) { +        /* a pattern is equal to itself */ +        return PyBool_FromLong(op == Py_EQ); +    } + +    left = (PatternObject *)lefto; +    right = (PatternObject *)righto; + +    cmp = (left->flags == right->flags +           && left->isbytes == right->isbytes +           && left->codesize == right->codesize); +    if (cmp) { +        /* Compare the code and the pattern because the same pattern can +           produce different codes depending on the locale used to compile the +           pattern when the re.LOCALE flag is used. Don't compare groups, +           indexgroup nor groupindex: they are derivated from the pattern. */ +        cmp = (memcmp(left->code, right->code, +                      sizeof(left->code[0]) * left->codesize) == 0); +    } +    if (cmp) { +        cmp = PyObject_RichCompareBool(left->pattern, right->pattern, +                                       Py_EQ); +        if (cmp < 0) { +            return NULL; +        } +    } +    if (op == Py_NE) { +        cmp = !cmp; +    } +    return PyBool_FromLong(cmp); +} + +#include "clinic/sre.c.h" + +static PyMethodDef pattern_methods[] = { +    _SRE_SRE_PATTERN_MATCH_METHODDEF +    _SRE_SRE_PATTERN_FULLMATCH_METHODDEF +    _SRE_SRE_PATTERN_SEARCH_METHODDEF +    _SRE_SRE_PATTERN_SUB_METHODDEF +    _SRE_SRE_PATTERN_SUBN_METHODDEF +    _SRE_SRE_PATTERN_FINDALL_METHODDEF +    _SRE_SRE_PATTERN_SPLIT_METHODDEF +    _SRE_SRE_PATTERN_FINDITER_METHODDEF +    _SRE_SRE_PATTERN_SCANNER_METHODDEF +    _SRE_SRE_PATTERN___COPY___METHODDEF +    _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF +    {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, +     PyDoc_STR("See PEP 585")}, +    {NULL, NULL} +}; + +static PyGetSetDef pattern_getset[] = { +    {"groupindex", (getter)pattern_groupindex, (setter)NULL, +      "A dictionary mapping group names to group numbers."}, +    {NULL}  /* Sentinel */ +}; + +#define PAT_OFF(x) offsetof(PatternObject, x) +static PyMemberDef pattern_members[] = { +    {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY, +     "The pattern string from which the RE object was compiled."}, +    {"flags",      T_INT,       PAT_OFF(flags),         READONLY, +     "The regex matching flags."}, +    {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY, +     "The number of capturing groups in the pattern."}, +    {"__weaklistoffset__", T_PYSSIZET, offsetof(PatternObject, weakreflist), READONLY}, +    {NULL}  /* Sentinel */ +}; + +static PyType_Slot pattern_slots[] = { +    {Py_tp_dealloc, (destructor)pattern_dealloc}, +    {Py_tp_repr, (reprfunc)pattern_repr}, +    {Py_tp_hash, (hashfunc)pattern_hash}, +    {Py_tp_doc, (void *)pattern_doc}, +    {Py_tp_richcompare, pattern_richcompare}, +    {Py_tp_methods, pattern_methods}, +    {Py_tp_members, pattern_members}, +    {Py_tp_getset, pattern_getset}, +    {Py_tp_traverse, pattern_traverse}, +    {Py_tp_clear, pattern_clear}, +    {0, NULL}, +}; + +static PyType_Spec pattern_spec = { +    .name = "re.Pattern", +    .basicsize = sizeof(PatternObject), +    .itemsize = sizeof(SRE_CODE), +    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | +              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), +    .slots = pattern_slots, +}; + +static PyMethodDef match_methods[] = { +    {"group", (PyCFunction) match_group, METH_VARARGS, match_group_doc}, +    _SRE_SRE_MATCH_START_METHODDEF +    _SRE_SRE_MATCH_END_METHODDEF +    _SRE_SRE_MATCH_SPAN_METHODDEF +    _SRE_SRE_MATCH_GROUPS_METHODDEF +    _SRE_SRE_MATCH_GROUPDICT_METHODDEF +    _SRE_SRE_MATCH_EXPAND_METHODDEF +    _SRE_SRE_MATCH___COPY___METHODDEF +    _SRE_SRE_MATCH___DEEPCOPY___METHODDEF +    {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, +     PyDoc_STR("See PEP 585")}, +    {NULL, NULL} +}; + +static PyGetSetDef match_getset[] = { +    {"lastindex", (getter)match_lastindex_get, (setter)NULL, +     "The integer index of the last matched capturing group."}, +    {"lastgroup", (getter)match_lastgroup_get, (setter)NULL, +     "The name of the last matched capturing group."}, +    {"regs",      (getter)match_regs_get,      (setter)NULL}, +    {NULL} +}; + +#define MATCH_OFF(x) offsetof(MatchObject, x) +static PyMemberDef match_members[] = { +    {"string",  T_OBJECT,   MATCH_OFF(string),  READONLY, +     "The string passed to match() or search()."}, +    {"re",      T_OBJECT,   MATCH_OFF(pattern), READONLY, +     "The regular expression object."}, +    {"pos",     T_PYSSIZET, MATCH_OFF(pos),     READONLY, +     "The index into the string at which the RE engine started looking for a match."}, +    {"endpos",  T_PYSSIZET, MATCH_OFF(endpos),  READONLY, +     "The index into the string beyond which the RE engine will not go."}, +    {NULL} +}; + +/* FIXME: implement setattr("string", None) as a special case (to +   detach the associated string, if any */ +static PyType_Slot match_slots[] = { +    {Py_tp_dealloc, match_dealloc}, +    {Py_tp_repr, match_repr}, +    {Py_tp_doc, (void *)match_doc}, +    {Py_tp_methods, match_methods}, +    {Py_tp_members, match_members}, +    {Py_tp_getset, match_getset}, +    {Py_tp_traverse, match_traverse}, +    {Py_tp_clear, match_clear}, + +    /* As mapping. +     * +     * Match objects do not support length or assignment, but do support +     * __getitem__. +     */ +    {Py_mp_subscript, match_getitem}, + +    {0, NULL}, +}; + +static PyType_Spec match_spec = { +    .name = "re.Match", +    .basicsize = sizeof(MatchObject), +    .itemsize = sizeof(Py_ssize_t), +    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | +              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), +    .slots = match_slots, +}; + +static PyMethodDef scanner_methods[] = { +    _SRE_SRE_SCANNER_MATCH_METHODDEF +    _SRE_SRE_SCANNER_SEARCH_METHODDEF +    {NULL, NULL} +}; + +#define SCAN_OFF(x) offsetof(ScannerObject, x) +static PyMemberDef scanner_members[] = { +    {"pattern", T_OBJECT, SCAN_OFF(pattern), READONLY}, +    {NULL}  /* Sentinel */ +}; + +static PyType_Slot scanner_slots[] = { +    {Py_tp_dealloc, scanner_dealloc}, +    {Py_tp_methods, scanner_methods}, +    {Py_tp_members, scanner_members}, +    {Py_tp_traverse, scanner_traverse}, +    {Py_tp_clear, scanner_clear}, +    {0, NULL}, +}; + +static PyType_Spec scanner_spec = { +    .name = "_sre.SRE_Scanner", +    .basicsize = sizeof(ScannerObject), +    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | +              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), +    .slots = scanner_slots, +}; + +static PyType_Slot template_slots[] = { +    {Py_tp_dealloc, template_dealloc}, +    {Py_tp_traverse, template_traverse}, +    {Py_tp_clear, template_clear}, +    {0, NULL}, +}; + +static PyType_Spec template_spec = { +    .name = "_sre.SRE_Template", +    .basicsize = sizeof(TemplateObject), +    .itemsize = sizeof(((TemplateObject *)0)->items[0]), +    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | +              Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_HAVE_GC), +    .slots = template_slots, +}; + +static PyMethodDef _functions[] = { +    _SRE_COMPILE_METHODDEF +    _SRE_TEMPLATE_METHODDEF +    _SRE_GETCODESIZE_METHODDEF +    _SRE_ASCII_ISCASED_METHODDEF +    _SRE_UNICODE_ISCASED_METHODDEF +    _SRE_ASCII_TOLOWER_METHODDEF +    _SRE_UNICODE_TOLOWER_METHODDEF +    {NULL, NULL} +}; + +static int +sre_traverse(PyObject *module, visitproc visit, void *arg) +{ +    _sremodulestate *state = get_sre_module_state(module); + +    Py_VISIT(state->Pattern_Type); +    Py_VISIT(state->Match_Type); +    Py_VISIT(state->Scanner_Type); +    Py_VISIT(state->Template_Type); +    Py_VISIT(state->compile_template); + +    return 0; +} + +static int +sre_clear(PyObject *module) +{ +    _sremodulestate *state = get_sre_module_state(module); + +    Py_CLEAR(state->Pattern_Type); +    Py_CLEAR(state->Match_Type); +    Py_CLEAR(state->Scanner_Type); +    Py_CLEAR(state->Template_Type); +    Py_CLEAR(state->compile_template); + +    return 0; +} + +static void +sre_free(void *module) +{ +    sre_clear((PyObject *)module); +} + +#define CREATE_TYPE(m, type, spec)                                  \ +do {                                                                \ +    type = (PyTypeObject *)PyType_FromModuleAndSpec(m, spec, NULL); \ +    if (type == NULL) {                                             \ +        goto error;                                                 \ +    }                                                               \ +} while (0) + +#define ADD_ULONG_CONSTANT(module, name, value)           \ +    do {                                                  \ +        PyObject *o = PyLong_FromUnsignedLong(value);     \ +        if (!o)                                           \ +            goto error;                                   \ +        int res = PyModule_AddObjectRef(module, name, o); \ +        Py_DECREF(o);                                     \ +        if (res < 0) {                                    \ +            goto error;                                   \ +        }                                                 \ +} while (0) + +static int +sre_exec(PyObject *m) +{ +    _sremodulestate *state; + +    /* Create heap types */ +    state = get_sre_module_state(m); +    CREATE_TYPE(m, state->Pattern_Type, &pattern_spec); +    CREATE_TYPE(m, state->Match_Type, &match_spec); +    CREATE_TYPE(m, state->Scanner_Type, &scanner_spec); +    CREATE_TYPE(m, state->Template_Type, &template_spec); + +    if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) { +        goto error; +    } + +    if (PyModule_AddIntConstant(m, "CODESIZE", sizeof(SRE_CODE)) < 0) { +        goto error; +    } + +    ADD_ULONG_CONSTANT(m, "MAXREPEAT", SRE_MAXREPEAT); +    ADD_ULONG_CONSTANT(m, "MAXGROUPS", SRE_MAXGROUPS); + +    if (PyModule_AddStringConstant(m, "copyright", copyright) < 0) { +        goto error; +    } + +    return 0; + +error: +    return -1; +} + +static PyModuleDef_Slot sre_slots[] = { +    {Py_mod_exec, sre_exec}, +    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, +    {0, NULL}, +}; + +static struct PyModuleDef sremodule = { +    .m_base = PyModuleDef_HEAD_INIT, +    .m_name = "_sre", +    .m_size = sizeof(_sremodulestate), +    .m_methods = _functions, +    .m_slots = sre_slots, +    .m_traverse = sre_traverse, +    .m_free = sre_free, +    .m_clear = sre_clear, +}; + +PyMODINIT_FUNC +PyInit__sre(void) +{ +    return PyModuleDef_Init(&sremodule); +} + +/* vim:ts=4:sw=4:et +*/ diff --git a/contrib/tools/python3/Modules/_sre/sre.h b/contrib/tools/python3/Modules/_sre/sre.h new file mode 100644 index 00000000000..a0f235606e2 --- /dev/null +++ b/contrib/tools/python3/Modules/_sre/sre.h @@ -0,0 +1,107 @@ +/* + * Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved. + * + * See the sre.c file for information on usage and redistribution. + */ + +#ifndef SRE_INCLUDED +#define SRE_INCLUDED + +#include "sre_constants.h" + +/* size of a code word (must be unsigned short or larger, and +   large enough to hold a UCS4 character) */ +#define SRE_CODE Py_UCS4 +#if SIZEOF_SIZE_T > 4 +# define SRE_MAXREPEAT (~(SRE_CODE)0) +# define SRE_MAXGROUPS ((SRE_CODE)INT32_MAX / 2) +#else +# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX) +# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_VOID_P / 2) +#endif + +typedef struct { +    PyObject_VAR_HEAD +    Py_ssize_t groups; /* must be first! */ +    PyObject* groupindex; /* dict */ +    PyObject* indexgroup; /* tuple */ +    /* compatibility */ +    PyObject* pattern; /* pattern source (or None) */ +    int flags; /* flags used when compiling pattern source */ +    PyObject *weakreflist; /* List of weak references */ +    int isbytes; /* pattern type (1 - bytes, 0 - string, -1 - None) */ +    /* pattern code */ +    Py_ssize_t codesize; +    SRE_CODE code[1]; +} PatternObject; + +#define PatternObject_GetCode(o) (((PatternObject*)(o))->code) + +typedef struct { +    PyObject_VAR_HEAD +    PyObject* string; /* link to the target string (must be first) */ +    PyObject* regs; /* cached list of matching spans */ +    PatternObject* pattern; /* link to the regex (pattern) object */ +    Py_ssize_t pos, endpos; /* current target slice */ +    Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */ +    Py_ssize_t groups; /* number of groups (start/end marks) */ +    Py_ssize_t mark[1]; +} MatchObject; + +typedef struct { +    PyObject_VAR_HEAD +    Py_ssize_t chunks;  /* the number of group references and non-NULL literals +                         * self->chunks <= 2*Py_SIZE(self) + 1 */ +    PyObject *literal; +    struct { +        Py_ssize_t index; +        PyObject *literal;  /* NULL if empty */ +    } items[0]; +} TemplateObject; + +typedef struct SRE_REPEAT_T { +    Py_ssize_t count; +    const SRE_CODE* pattern; /* points to REPEAT operator arguments */ +    const void* last_ptr; /* helper to check for infinite loops */ +    struct SRE_REPEAT_T *prev; /* points to previous repeat context */ +} SRE_REPEAT; + +typedef struct { +    /* string pointers */ +    const void* ptr; /* current position (also end of current slice) */ +    const void* beginning; /* start of original string */ +    const void* start; /* start of current slice */ +    const void* end; /* end of original string */ +    /* attributes for the match object */ +    PyObject* string; +    Py_buffer buffer; +    Py_ssize_t pos, endpos; +    int isbytes; +    int charsize; /* character size */ +    int match_all; +    int must_advance; +    /* marks */ +    int lastmark; +    int lastindex; +    const void** mark; +    /* dynamically allocated stuff */ +    char* data_stack; +    size_t data_stack_size; +    size_t data_stack_base; +    /* current repeat context */ +    SRE_REPEAT *repeat; +    unsigned int sigcount; +} SRE_STATE; + +typedef struct { +    PyObject_HEAD +    PyObject* pattern; +    SRE_STATE state; +    int executing; +} ScannerObject; + +#endif diff --git a/contrib/tools/python3/Modules/_sre/sre_constants.h b/contrib/tools/python3/Modules/_sre/sre_constants.h new file mode 100644 index 00000000000..b5692292f65 --- /dev/null +++ b/contrib/tools/python3/Modules/_sre/sre_constants.h @@ -0,0 +1,99 @@ +/* + * Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * Auto-generated by Tools/build/generate_sre_constants.py from + * Lib/re/_constants.py. + * + * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved. + * + * See the sre.c file for information on usage and redistribution. + */ + +#define SRE_MAGIC 20221023 +#define SRE_OP_FAILURE 0 +#define SRE_OP_SUCCESS 1 +#define SRE_OP_ANY 2 +#define SRE_OP_ANY_ALL 3 +#define SRE_OP_ASSERT 4 +#define SRE_OP_ASSERT_NOT 5 +#define SRE_OP_AT 6 +#define SRE_OP_BRANCH 7 +#define SRE_OP_CATEGORY 8 +#define SRE_OP_CHARSET 9 +#define SRE_OP_BIGCHARSET 10 +#define SRE_OP_GROUPREF 11 +#define SRE_OP_GROUPREF_EXISTS 12 +#define SRE_OP_IN 13 +#define SRE_OP_INFO 14 +#define SRE_OP_JUMP 15 +#define SRE_OP_LITERAL 16 +#define SRE_OP_MARK 17 +#define SRE_OP_MAX_UNTIL 18 +#define SRE_OP_MIN_UNTIL 19 +#define SRE_OP_NOT_LITERAL 20 +#define SRE_OP_NEGATE 21 +#define SRE_OP_RANGE 22 +#define SRE_OP_REPEAT 23 +#define SRE_OP_REPEAT_ONE 24 +#define SRE_OP_SUBPATTERN 25 +#define SRE_OP_MIN_REPEAT_ONE 26 +#define SRE_OP_ATOMIC_GROUP 27 +#define SRE_OP_POSSESSIVE_REPEAT 28 +#define SRE_OP_POSSESSIVE_REPEAT_ONE 29 +#define SRE_OP_GROUPREF_IGNORE 30 +#define SRE_OP_IN_IGNORE 31 +#define SRE_OP_LITERAL_IGNORE 32 +#define SRE_OP_NOT_LITERAL_IGNORE 33 +#define SRE_OP_GROUPREF_LOC_IGNORE 34 +#define SRE_OP_IN_LOC_IGNORE 35 +#define SRE_OP_LITERAL_LOC_IGNORE 36 +#define SRE_OP_NOT_LITERAL_LOC_IGNORE 37 +#define SRE_OP_GROUPREF_UNI_IGNORE 38 +#define SRE_OP_IN_UNI_IGNORE 39 +#define SRE_OP_LITERAL_UNI_IGNORE 40 +#define SRE_OP_NOT_LITERAL_UNI_IGNORE 41 +#define SRE_OP_RANGE_UNI_IGNORE 42 +#define SRE_AT_BEGINNING 0 +#define SRE_AT_BEGINNING_LINE 1 +#define SRE_AT_BEGINNING_STRING 2 +#define SRE_AT_BOUNDARY 3 +#define SRE_AT_NON_BOUNDARY 4 +#define SRE_AT_END 5 +#define SRE_AT_END_LINE 6 +#define SRE_AT_END_STRING 7 +#define SRE_AT_LOC_BOUNDARY 8 +#define SRE_AT_LOC_NON_BOUNDARY 9 +#define SRE_AT_UNI_BOUNDARY 10 +#define SRE_AT_UNI_NON_BOUNDARY 11 +#define SRE_CATEGORY_DIGIT 0 +#define SRE_CATEGORY_NOT_DIGIT 1 +#define SRE_CATEGORY_SPACE 2 +#define SRE_CATEGORY_NOT_SPACE 3 +#define SRE_CATEGORY_WORD 4 +#define SRE_CATEGORY_NOT_WORD 5 +#define SRE_CATEGORY_LINEBREAK 6 +#define SRE_CATEGORY_NOT_LINEBREAK 7 +#define SRE_CATEGORY_LOC_WORD 8 +#define SRE_CATEGORY_LOC_NOT_WORD 9 +#define SRE_CATEGORY_UNI_DIGIT 10 +#define SRE_CATEGORY_UNI_NOT_DIGIT 11 +#define SRE_CATEGORY_UNI_SPACE 12 +#define SRE_CATEGORY_UNI_NOT_SPACE 13 +#define SRE_CATEGORY_UNI_WORD 14 +#define SRE_CATEGORY_UNI_NOT_WORD 15 +#define SRE_CATEGORY_UNI_LINEBREAK 16 +#define SRE_CATEGORY_UNI_NOT_LINEBREAK 17 +#define SRE_FLAG_TEMPLATE 1 +#define SRE_FLAG_IGNORECASE 2 +#define SRE_FLAG_LOCALE 4 +#define SRE_FLAG_MULTILINE 8 +#define SRE_FLAG_DOTALL 16 +#define SRE_FLAG_UNICODE 32 +#define SRE_FLAG_VERBOSE 64 +#define SRE_FLAG_DEBUG 128 +#define SRE_FLAG_ASCII 256 +#define SRE_INFO_PREFIX 1 +#define SRE_INFO_LITERAL 2 +#define SRE_INFO_CHARSET 4 diff --git a/contrib/tools/python3/Modules/_sre/sre_lib.h b/contrib/tools/python3/Modules/_sre/sre_lib.h new file mode 100644 index 00000000000..95c1ada908d --- /dev/null +++ b/contrib/tools/python3/Modules/_sre/sre_lib.h @@ -0,0 +1,1818 @@ +/* + * Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved. + * + * See the sre.c file for information on usage and redistribution. + */ + +/* String matching engine */ + +/* This file is included three times, with different character settings */ + +LOCAL(int) +SRE(at)(SRE_STATE* state, const SRE_CHAR* ptr, SRE_CODE at) +{ +    /* check if pointer is at given position */ + +    Py_ssize_t thisp, thatp; + +    switch (at) { + +    case SRE_AT_BEGINNING: +    case SRE_AT_BEGINNING_STRING: +        return ((void*) ptr == state->beginning); + +    case SRE_AT_BEGINNING_LINE: +        return ((void*) ptr == state->beginning || +                SRE_IS_LINEBREAK((int) ptr[-1])); + +    case SRE_AT_END: +        return (((SRE_CHAR *)state->end - ptr == 1 && +                 SRE_IS_LINEBREAK((int) ptr[0])) || +                ((void*) ptr == state->end)); + +    case SRE_AT_END_LINE: +        return ((void*) ptr == state->end || +                SRE_IS_LINEBREAK((int) ptr[0])); + +    case SRE_AT_END_STRING: +        return ((void*) ptr == state->end); + +    case SRE_AT_BOUNDARY: +        if (state->beginning == state->end) +            return 0; +        thatp = ((void*) ptr > state->beginning) ? +            SRE_IS_WORD((int) ptr[-1]) : 0; +        thisp = ((void*) ptr < state->end) ? +            SRE_IS_WORD((int) ptr[0]) : 0; +        return thisp != thatp; + +    case SRE_AT_NON_BOUNDARY: +        if (state->beginning == state->end) +            return 0; +        thatp = ((void*) ptr > state->beginning) ? +            SRE_IS_WORD((int) ptr[-1]) : 0; +        thisp = ((void*) ptr < state->end) ? +            SRE_IS_WORD((int) ptr[0]) : 0; +        return thisp == thatp; + +    case SRE_AT_LOC_BOUNDARY: +        if (state->beginning == state->end) +            return 0; +        thatp = ((void*) ptr > state->beginning) ? +            SRE_LOC_IS_WORD((int) ptr[-1]) : 0; +        thisp = ((void*) ptr < state->end) ? +            SRE_LOC_IS_WORD((int) ptr[0]) : 0; +        return thisp != thatp; + +    case SRE_AT_LOC_NON_BOUNDARY: +        if (state->beginning == state->end) +            return 0; +        thatp = ((void*) ptr > state->beginning) ? +            SRE_LOC_IS_WORD((int) ptr[-1]) : 0; +        thisp = ((void*) ptr < state->end) ? +            SRE_LOC_IS_WORD((int) ptr[0]) : 0; +        return thisp == thatp; + +    case SRE_AT_UNI_BOUNDARY: +        if (state->beginning == state->end) +            return 0; +        thatp = ((void*) ptr > state->beginning) ? +            SRE_UNI_IS_WORD((int) ptr[-1]) : 0; +        thisp = ((void*) ptr < state->end) ? +            SRE_UNI_IS_WORD((int) ptr[0]) : 0; +        return thisp != thatp; + +    case SRE_AT_UNI_NON_BOUNDARY: +        if (state->beginning == state->end) +            return 0; +        thatp = ((void*) ptr > state->beginning) ? +            SRE_UNI_IS_WORD((int) ptr[-1]) : 0; +        thisp = ((void*) ptr < state->end) ? +            SRE_UNI_IS_WORD((int) ptr[0]) : 0; +        return thisp == thatp; + +    } + +    return 0; +} + +LOCAL(int) +SRE(charset)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch) +{ +    /* check if character is a member of the given set */ + +    int ok = 1; + +    for (;;) { +        switch (*set++) { + +        case SRE_OP_FAILURE: +            return !ok; + +        case SRE_OP_LITERAL: +            /* <LITERAL> <code> */ +            if (ch == set[0]) +                return ok; +            set++; +            break; + +        case SRE_OP_CATEGORY: +            /* <CATEGORY> <code> */ +            if (sre_category(set[0], (int) ch)) +                return ok; +            set++; +            break; + +        case SRE_OP_CHARSET: +            /* <CHARSET> <bitmap> */ +            if (ch < 256 && +                (set[ch/SRE_CODE_BITS] & (1u << (ch & (SRE_CODE_BITS-1))))) +                return ok; +            set += 256/SRE_CODE_BITS; +            break; + +        case SRE_OP_RANGE: +            /* <RANGE> <lower> <upper> */ +            if (set[0] <= ch && ch <= set[1]) +                return ok; +            set += 2; +            break; + +        case SRE_OP_RANGE_UNI_IGNORE: +            /* <RANGE_UNI_IGNORE> <lower> <upper> */ +        { +            SRE_CODE uch; +            /* ch is already lower cased */ +            if (set[0] <= ch && ch <= set[1]) +                return ok; +            uch = sre_upper_unicode(ch); +            if (set[0] <= uch && uch <= set[1]) +                return ok; +            set += 2; +            break; +        } + +        case SRE_OP_NEGATE: +            ok = !ok; +            break; + +        case SRE_OP_BIGCHARSET: +            /* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */ +        { +            Py_ssize_t count, block; +            count = *(set++); + +            if (ch < 0x10000u) +                block = ((unsigned char*)set)[ch >> 8]; +            else +                block = -1; +            set += 256/sizeof(SRE_CODE); +            if (block >=0 && +                (set[(block * 256 + (ch & 255))/SRE_CODE_BITS] & +                    (1u << (ch & (SRE_CODE_BITS-1))))) +                return ok; +            set += count * (256/SRE_CODE_BITS); +            break; +        } + +        default: +            /* internal error -- there's not much we can do about it +               here, so let's just pretend it didn't match... */ +            return 0; +        } +    } +} + +LOCAL(int) +SRE(charset_loc_ignore)(SRE_STATE* state, const SRE_CODE* set, SRE_CODE ch) +{ +    SRE_CODE lo, up; +    lo = sre_lower_locale(ch); +    if (SRE(charset)(state, set, lo)) +       return 1; + +    up = sre_upper_locale(ch); +    return up != lo && SRE(charset)(state, set, up); +} + +LOCAL(Py_ssize_t) SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel); + +LOCAL(Py_ssize_t) +SRE(count)(SRE_STATE* state, const SRE_CODE* pattern, Py_ssize_t maxcount) +{ +    SRE_CODE chr; +    SRE_CHAR c; +    const SRE_CHAR* ptr = (const SRE_CHAR *)state->ptr; +    const SRE_CHAR* end = (const SRE_CHAR *)state->end; +    Py_ssize_t i; + +    /* adjust end */ +    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT) +        end = ptr + maxcount; + +    switch (pattern[0]) { + +    case SRE_OP_IN: +        /* repeated set */ +        TRACE(("|%p|%p|COUNT IN\n", pattern, ptr)); +        while (ptr < end && SRE(charset)(state, pattern + 2, *ptr)) +            ptr++; +        break; + +    case SRE_OP_ANY: +        /* repeated dot wildcard. */ +        TRACE(("|%p|%p|COUNT ANY\n", pattern, ptr)); +        while (ptr < end && !SRE_IS_LINEBREAK(*ptr)) +            ptr++; +        break; + +    case SRE_OP_ANY_ALL: +        /* repeated dot wildcard.  skip to the end of the target +           string, and backtrack from there */ +        TRACE(("|%p|%p|COUNT ANY_ALL\n", pattern, ptr)); +        ptr = end; +        break; + +    case SRE_OP_LITERAL: +        /* repeated literal */ +        chr = pattern[1]; +        TRACE(("|%p|%p|COUNT LITERAL %d\n", pattern, ptr, chr)); +        c = (SRE_CHAR) chr; +#if SIZEOF_SRE_CHAR < 4 +        if ((SRE_CODE) c != chr) +            ; /* literal can't match: doesn't fit in char width */ +        else +#endif +        while (ptr < end && *ptr == c) +            ptr++; +        break; + +    case SRE_OP_LITERAL_IGNORE: +        /* repeated literal */ +        chr = pattern[1]; +        TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr)); +        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr) +            ptr++; +        break; + +    case SRE_OP_LITERAL_UNI_IGNORE: +        /* repeated literal */ +        chr = pattern[1]; +        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); +        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr) +            ptr++; +        break; + +    case SRE_OP_LITERAL_LOC_IGNORE: +        /* repeated literal */ +        chr = pattern[1]; +        TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); +        while (ptr < end && char_loc_ignore(chr, *ptr)) +            ptr++; +        break; + +    case SRE_OP_NOT_LITERAL: +        /* repeated non-literal */ +        chr = pattern[1]; +        TRACE(("|%p|%p|COUNT NOT_LITERAL %d\n", pattern, ptr, chr)); +        c = (SRE_CHAR) chr; +#if SIZEOF_SRE_CHAR < 4 +        if ((SRE_CODE) c != chr) +            ptr = end; /* literal can't match: doesn't fit in char width */ +        else +#endif +        while (ptr < end && *ptr != c) +            ptr++; +        break; + +    case SRE_OP_NOT_LITERAL_IGNORE: +        /* repeated non-literal */ +        chr = pattern[1]; +        TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr)); +        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr) +            ptr++; +        break; + +    case SRE_OP_NOT_LITERAL_UNI_IGNORE: +        /* repeated non-literal */ +        chr = pattern[1]; +        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr)); +        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr) +            ptr++; +        break; + +    case SRE_OP_NOT_LITERAL_LOC_IGNORE: +        /* repeated non-literal */ +        chr = pattern[1]; +        TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr)); +        while (ptr < end && !char_loc_ignore(chr, *ptr)) +            ptr++; +        break; + +    default: +        /* repeated single character pattern */ +        TRACE(("|%p|%p|COUNT SUBPATTERN\n", pattern, ptr)); +        while ((SRE_CHAR*) state->ptr < end) { +            i = SRE(match)(state, pattern, 0); +            if (i < 0) +                return i; +            if (!i) +                break; +        } +        TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, +               (SRE_CHAR*) state->ptr - ptr)); +        return (SRE_CHAR*) state->ptr - ptr; +    } + +    TRACE(("|%p|%p|COUNT %zd\n", pattern, ptr, +           ptr - (SRE_CHAR*) state->ptr)); +    return ptr - (SRE_CHAR*) state->ptr; +} + +/* The macros below should be used to protect recursive SRE(match)() + * calls that *failed* and do *not* return immediately (IOW, those + * that will backtrack). Explaining: + * + * - Recursive SRE(match)() returned true: that's usually a success + *   (besides atypical cases like ASSERT_NOT), therefore there's no + *   reason to restore lastmark; + * + * - Recursive SRE(match)() returned false but the current SRE(match)() + *   is returning to the caller: If the current SRE(match)() is the + *   top function of the recursion, returning false will be a matching + *   failure, and it doesn't matter where lastmark is pointing to. + *   If it's *not* the top function, it will be a recursive SRE(match)() + *   failure by itself, and the calling SRE(match)() will have to deal + *   with the failure by the same rules explained here (it will restore + *   lastmark by itself if necessary); + * + * - Recursive SRE(match)() returned false, and will continue the + *   outside 'for' loop: must be protected when breaking, since the next + *   OP could potentially depend on lastmark; + * + * - Recursive SRE(match)() returned false, and will be called again + *   inside a local for/while loop: must be protected between each + *   loop iteration, since the recursive SRE(match)() could do anything, + *   and could potentially depend on lastmark. + * + * For more information, check the discussion at SF patch #712900. + */ +#define LASTMARK_SAVE()     \ +    do { \ +        ctx->lastmark = state->lastmark; \ +        ctx->lastindex = state->lastindex; \ +    } while (0) +#define LASTMARK_RESTORE()  \ +    do { \ +        state->lastmark = ctx->lastmark; \ +        state->lastindex = ctx->lastindex; \ +    } while (0) + +#define RETURN_ERROR(i) do { return i; } while(0) +#define RETURN_FAILURE do { ret = 0; goto exit; } while(0) +#define RETURN_SUCCESS do { ret = 1; goto exit; } while(0) + +#define RETURN_ON_ERROR(i) \ +    do { if (i < 0) RETURN_ERROR(i); } while (0) +#define RETURN_ON_SUCCESS(i) \ +    do { RETURN_ON_ERROR(i); if (i > 0) RETURN_SUCCESS; } while (0) +#define RETURN_ON_FAILURE(i) \ +    do { RETURN_ON_ERROR(i); if (i == 0) RETURN_FAILURE; } while (0) + +#define DATA_STACK_ALLOC(state, type, ptr) \ +do { \ +    alloc_pos = state->data_stack_base; \ +    TRACE(("allocating %s in %zd (%zd)\n", \ +           Py_STRINGIFY(type), alloc_pos, sizeof(type))); \ +    if (sizeof(type) > state->data_stack_size - alloc_pos) { \ +        int j = data_stack_grow(state, sizeof(type)); \ +        if (j < 0) return j; \ +        if (ctx_pos != -1) \ +            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ +    } \ +    ptr = (type*)(state->data_stack+alloc_pos); \ +    state->data_stack_base += sizeof(type); \ +} while (0) + +#define DATA_STACK_LOOKUP_AT(state, type, ptr, pos) \ +do { \ +    TRACE(("looking up %s at %zd\n", Py_STRINGIFY(type), pos)); \ +    ptr = (type*)(state->data_stack+pos); \ +} while (0) + +#define DATA_STACK_PUSH(state, data, size) \ +do { \ +    TRACE(("copy data in %p to %zd (%zd)\n", \ +           data, state->data_stack_base, size)); \ +    if (size > state->data_stack_size - state->data_stack_base) { \ +        int j = data_stack_grow(state, size); \ +        if (j < 0) return j; \ +        if (ctx_pos != -1) \ +            DATA_STACK_LOOKUP_AT(state, SRE(match_context), ctx, ctx_pos); \ +    } \ +    memcpy(state->data_stack+state->data_stack_base, data, size); \ +    state->data_stack_base += size; \ +} while (0) + +/* We add an explicit cast to memcpy here because MSVC has a bug when +   compiling C code where it believes that `const void**` cannot be +   safely casted to `void*`, see bpo-39943 for details. */ +#define DATA_STACK_POP(state, data, size, discard) \ +do { \ +    TRACE(("copy data to %p from %zd (%zd)\n", \ +           data, state->data_stack_base-size, size)); \ +    memcpy((void*) data, state->data_stack+state->data_stack_base-size, size); \ +    if (discard) \ +        state->data_stack_base -= size; \ +} while (0) + +#define DATA_STACK_POP_DISCARD(state, size) \ +do { \ +    TRACE(("discard data from %zd (%zd)\n", \ +           state->data_stack_base-size, size)); \ +    state->data_stack_base -= size; \ +} while(0) + +#define DATA_PUSH(x) \ +    DATA_STACK_PUSH(state, (x), sizeof(*(x))) +#define DATA_POP(x) \ +    DATA_STACK_POP(state, (x), sizeof(*(x)), 1) +#define DATA_POP_DISCARD(x) \ +    DATA_STACK_POP_DISCARD(state, sizeof(*(x))) +#define DATA_ALLOC(t,p) \ +    DATA_STACK_ALLOC(state, t, p) +#define DATA_LOOKUP_AT(t,p,pos) \ +    DATA_STACK_LOOKUP_AT(state,t,p,pos) + +#define MARK_PUSH(lastmark) \ +    do if (lastmark >= 0) { \ +        size_t _marks_size = (lastmark+1) * sizeof(void*); \ +        DATA_STACK_PUSH(state, state->mark, _marks_size); \ +    } while (0) +#define MARK_POP(lastmark) \ +    do if (lastmark >= 0) { \ +        size_t _marks_size = (lastmark+1) * sizeof(void*); \ +        DATA_STACK_POP(state, state->mark, _marks_size, 1); \ +    } while (0) +#define MARK_POP_KEEP(lastmark) \ +    do if (lastmark >= 0) { \ +        size_t _marks_size = (lastmark+1) * sizeof(void*); \ +        DATA_STACK_POP(state, state->mark, _marks_size, 0); \ +    } while (0) +#define MARK_POP_DISCARD(lastmark) \ +    do if (lastmark >= 0) { \ +        size_t _marks_size = (lastmark+1) * sizeof(void*); \ +        DATA_STACK_POP_DISCARD(state, _marks_size); \ +    } while (0) + +#define JUMP_NONE            0 +#define JUMP_MAX_UNTIL_1     1 +#define JUMP_MAX_UNTIL_2     2 +#define JUMP_MAX_UNTIL_3     3 +#define JUMP_MIN_UNTIL_1     4 +#define JUMP_MIN_UNTIL_2     5 +#define JUMP_MIN_UNTIL_3     6 +#define JUMP_REPEAT          7 +#define JUMP_REPEAT_ONE_1    8 +#define JUMP_REPEAT_ONE_2    9 +#define JUMP_MIN_REPEAT_ONE  10 +#define JUMP_BRANCH          11 +#define JUMP_ASSERT          12 +#define JUMP_ASSERT_NOT      13 +#define JUMP_POSS_REPEAT_1   14 +#define JUMP_POSS_REPEAT_2   15 +#define JUMP_ATOMIC_GROUP    16 + +#define DO_JUMPX(jumpvalue, jumplabel, nextpattern, toplevel_) \ +    ctx->pattern = pattern; \ +    ctx->ptr = ptr; \ +    DATA_ALLOC(SRE(match_context), nextctx); \ +    nextctx->pattern = nextpattern; \ +    nextctx->toplevel = toplevel_; \ +    nextctx->jump = jumpvalue; \ +    nextctx->last_ctx_pos = ctx_pos; \ +    pattern = nextpattern; \ +    ctx_pos = alloc_pos; \ +    ctx = nextctx; \ +    goto entrance; \ +    jumplabel: \ +    pattern = ctx->pattern; \ +    ptr = ctx->ptr; + +#define DO_JUMP(jumpvalue, jumplabel, nextpattern) \ +    DO_JUMPX(jumpvalue, jumplabel, nextpattern, ctx->toplevel) + +#define DO_JUMP0(jumpvalue, jumplabel, nextpattern) \ +    DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0) + +typedef struct { +    Py_ssize_t count; +    union { +        SRE_CODE chr; +        SRE_REPEAT* rep; +    } u; +    int lastmark; +    int lastindex; +    const SRE_CODE* pattern; +    const SRE_CHAR* ptr; +    int toplevel; +    int jump; +    Py_ssize_t last_ctx_pos; +} SRE(match_context); + +#define MAYBE_CHECK_SIGNALS                                        \ +    do {                                                           \ +        if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \ +            RETURN_ERROR(SRE_ERROR_INTERRUPTED);                   \ +        }                                                          \ +    } while (0) + +#ifdef HAVE_COMPUTED_GOTOS +    #ifndef USE_COMPUTED_GOTOS +    #define USE_COMPUTED_GOTOS 1 +    #endif +#elif defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS +    #error "Computed gotos are not supported on this compiler." +#else +    #undef USE_COMPUTED_GOTOS +    #define USE_COMPUTED_GOTOS 0 +#endif + +#if USE_COMPUTED_GOTOS +    #define TARGET(OP) TARGET_ ## OP +    #define DISPATCH                       \ +        do {                               \ +            MAYBE_CHECK_SIGNALS;           \ +            goto *sre_targets[*pattern++]; \ +        } while (0) +#else +    #define TARGET(OP) case OP +    #define DISPATCH goto dispatch +#endif + +/* check if string matches the given pattern.  returns <0 for +   error, 0 for failure, and 1 for success */ +LOCAL(Py_ssize_t) +SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) +{ +    const SRE_CHAR* end = (const SRE_CHAR *)state->end; +    Py_ssize_t alloc_pos, ctx_pos = -1; +    Py_ssize_t ret = 0; +    int jump; +    unsigned int sigcount = state->sigcount; + +    SRE(match_context)* ctx; +    SRE(match_context)* nextctx; + +    TRACE(("|%p|%p|ENTER\n", pattern, state->ptr)); + +    DATA_ALLOC(SRE(match_context), ctx); +    ctx->last_ctx_pos = -1; +    ctx->jump = JUMP_NONE; +    ctx->toplevel = toplevel; +    ctx_pos = alloc_pos; + +#if USE_COMPUTED_GOTOS +#include "sre_targets.h" +#endif + +entrance: + +    ;  // Fashion statement. +    const SRE_CHAR *ptr = (SRE_CHAR *)state->ptr; + +    if (pattern[0] == SRE_OP_INFO) { +        /* optimization info block */ +        /* <INFO> <1=skip> <2=flags> <3=min> ... */ +        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { +            TRACE(("reject (got %tu chars, need %zu)\n", +                   end - ptr, (size_t) pattern[3])); +            RETURN_FAILURE; +        } +        pattern += pattern[1] + 1; +    } + +#if USE_COMPUTED_GOTOS +    DISPATCH; +#else +dispatch: +    MAYBE_CHECK_SIGNALS; +    switch (*pattern++) +#endif +    { + +        TARGET(SRE_OP_MARK): +            /* set mark */ +            /* <MARK> <gid> */ +            TRACE(("|%p|%p|MARK %d\n", pattern, +                   ptr, pattern[0])); +            { +                int i = pattern[0]; +                if (i & 1) +                    state->lastindex = i/2 + 1; +                if (i > state->lastmark) { +                    /* state->lastmark is the highest valid index in the +                       state->mark array.  If it is increased by more than 1, +                       the intervening marks must be set to NULL to signal +                       that these marks have not been encountered. */ +                    int j = state->lastmark + 1; +                    while (j < i) +                        state->mark[j++] = NULL; +                    state->lastmark = i; +                } +                state->mark[i] = ptr; +            } +            pattern++; +            DISPATCH; + +        TARGET(SRE_OP_LITERAL): +            /* match literal string */ +            /* <LITERAL> <code> */ +            TRACE(("|%p|%p|LITERAL %d\n", pattern, +                   ptr, *pattern)); +            if (ptr >= end || (SRE_CODE) ptr[0] != pattern[0]) +                RETURN_FAILURE; +            pattern++; +            ptr++; +            DISPATCH; + +        TARGET(SRE_OP_NOT_LITERAL): +            /* match anything that is not literal character */ +            /* <NOT_LITERAL> <code> */ +            TRACE(("|%p|%p|NOT_LITERAL %d\n", pattern, +                   ptr, *pattern)); +            if (ptr >= end || (SRE_CODE) ptr[0] == pattern[0]) +                RETURN_FAILURE; +            pattern++; +            ptr++; +            DISPATCH; + +        TARGET(SRE_OP_SUCCESS): +            /* end of pattern */ +            TRACE(("|%p|%p|SUCCESS\n", pattern, ptr)); +            if (ctx->toplevel && +                ((state->match_all && ptr != state->end) || +                 (state->must_advance && ptr == state->start))) +            { +                RETURN_FAILURE; +            } +            state->ptr = ptr; +            RETURN_SUCCESS; + +        TARGET(SRE_OP_AT): +            /* match at given position */ +            /* <AT> <code> */ +            TRACE(("|%p|%p|AT %d\n", pattern, ptr, *pattern)); +            if (!SRE(at)(state, ptr, *pattern)) +                RETURN_FAILURE; +            pattern++; +            DISPATCH; + +        TARGET(SRE_OP_CATEGORY): +            /* match at given category */ +            /* <CATEGORY> <code> */ +            TRACE(("|%p|%p|CATEGORY %d\n", pattern, +                   ptr, *pattern)); +            if (ptr >= end || !sre_category(pattern[0], ptr[0])) +                RETURN_FAILURE; +            pattern++; +            ptr++; +            DISPATCH; + +        TARGET(SRE_OP_ANY): +            /* match anything (except a newline) */ +            /* <ANY> */ +            TRACE(("|%p|%p|ANY\n", pattern, ptr)); +            if (ptr >= end || SRE_IS_LINEBREAK(ptr[0])) +                RETURN_FAILURE; +            ptr++; +            DISPATCH; + +        TARGET(SRE_OP_ANY_ALL): +            /* match anything */ +            /* <ANY_ALL> */ +            TRACE(("|%p|%p|ANY_ALL\n", pattern, ptr)); +            if (ptr >= end) +                RETURN_FAILURE; +            ptr++; +            DISPATCH; + +        TARGET(SRE_OP_IN): +            /* match set member (or non_member) */ +            /* <IN> <skip> <set> */ +            TRACE(("|%p|%p|IN\n", pattern, ptr)); +            if (ptr >= end || +                !SRE(charset)(state, pattern + 1, *ptr)) +                RETURN_FAILURE; +            pattern += pattern[0]; +            ptr++; +            DISPATCH; + +        TARGET(SRE_OP_LITERAL_IGNORE): +            TRACE(("|%p|%p|LITERAL_IGNORE %d\n", +                   pattern, ptr, pattern[0])); +            if (ptr >= end || +                sre_lower_ascii(*ptr) != *pattern) +                RETURN_FAILURE; +            pattern++; +            ptr++; +            DISPATCH; + +        TARGET(SRE_OP_LITERAL_UNI_IGNORE): +            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n", +                   pattern, ptr, pattern[0])); +            if (ptr >= end || +                sre_lower_unicode(*ptr) != *pattern) +                RETURN_FAILURE; +            pattern++; +            ptr++; +            DISPATCH; + +        TARGET(SRE_OP_LITERAL_LOC_IGNORE): +            TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n", +                   pattern, ptr, pattern[0])); +            if (ptr >= end +                || !char_loc_ignore(*pattern, *ptr)) +                RETURN_FAILURE; +            pattern++; +            ptr++; +            DISPATCH; + +        TARGET(SRE_OP_NOT_LITERAL_IGNORE): +            TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n", +                   pattern, ptr, *pattern)); +            if (ptr >= end || +                sre_lower_ascii(*ptr) == *pattern) +                RETURN_FAILURE; +            pattern++; +            ptr++; +            DISPATCH; + +        TARGET(SRE_OP_NOT_LITERAL_UNI_IGNORE): +            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n", +                   pattern, ptr, *pattern)); +            if (ptr >= end || +                sre_lower_unicode(*ptr) == *pattern) +                RETURN_FAILURE; +            pattern++; +            ptr++; +            DISPATCH; + +        TARGET(SRE_OP_NOT_LITERAL_LOC_IGNORE): +            TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n", +                   pattern, ptr, *pattern)); +            if (ptr >= end +                || char_loc_ignore(*pattern, *ptr)) +                RETURN_FAILURE; +            pattern++; +            ptr++; +            DISPATCH; + +        TARGET(SRE_OP_IN_IGNORE): +            TRACE(("|%p|%p|IN_IGNORE\n", pattern, ptr)); +            if (ptr >= end +                || !SRE(charset)(state, pattern+1, +                                 (SRE_CODE)sre_lower_ascii(*ptr))) +                RETURN_FAILURE; +            pattern += pattern[0]; +            ptr++; +            DISPATCH; + +        TARGET(SRE_OP_IN_UNI_IGNORE): +            TRACE(("|%p|%p|IN_UNI_IGNORE\n", pattern, ptr)); +            if (ptr >= end +                || !SRE(charset)(state, pattern+1, +                                 (SRE_CODE)sre_lower_unicode(*ptr))) +                RETURN_FAILURE; +            pattern += pattern[0]; +            ptr++; +            DISPATCH; + +        TARGET(SRE_OP_IN_LOC_IGNORE): +            TRACE(("|%p|%p|IN_LOC_IGNORE\n", pattern, ptr)); +            if (ptr >= end +                || !SRE(charset_loc_ignore)(state, pattern+1, *ptr)) +                RETURN_FAILURE; +            pattern += pattern[0]; +            ptr++; +            DISPATCH; + +        TARGET(SRE_OP_JUMP): +        TARGET(SRE_OP_INFO): +            /* jump forward */ +            /* <JUMP> <offset> */ +            TRACE(("|%p|%p|JUMP %d\n", pattern, +                   ptr, pattern[0])); +            pattern += pattern[0]; +            DISPATCH; + +        TARGET(SRE_OP_BRANCH): +            /* alternation */ +            /* <BRANCH> <0=skip> code <JUMP> ... <NULL> */ +            TRACE(("|%p|%p|BRANCH\n", pattern, ptr)); +            LASTMARK_SAVE(); +            if (state->repeat) +                MARK_PUSH(ctx->lastmark); +            for (; pattern[0]; pattern += pattern[0]) { +                if (pattern[1] == SRE_OP_LITERAL && +                    (ptr >= end || +                     (SRE_CODE) *ptr != pattern[2])) +                    continue; +                if (pattern[1] == SRE_OP_IN && +                    (ptr >= end || +                     !SRE(charset)(state, pattern + 3, +                                   (SRE_CODE) *ptr))) +                    continue; +                state->ptr = ptr; +                DO_JUMP(JUMP_BRANCH, jump_branch, pattern+1); +                if (ret) { +                    if (state->repeat) +                        MARK_POP_DISCARD(ctx->lastmark); +                    RETURN_ON_ERROR(ret); +                    RETURN_SUCCESS; +                } +                if (state->repeat) +                    MARK_POP_KEEP(ctx->lastmark); +                LASTMARK_RESTORE(); +            } +            if (state->repeat) +                MARK_POP_DISCARD(ctx->lastmark); +            RETURN_FAILURE; + +        TARGET(SRE_OP_REPEAT_ONE): +            /* match repeated sequence (maximizing regexp) */ + +            /* this operator only works if the repeated item is +               exactly one character wide, and we're not already +               collecting backtracking points.  for other cases, +               use the MAX_REPEAT operator */ + +            /* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ + +            TRACE(("|%p|%p|REPEAT_ONE %d %d\n", pattern, ptr, +                   pattern[1], pattern[2])); + +            if ((Py_ssize_t) pattern[1] > end - ptr) +                RETURN_FAILURE; /* cannot match */ + +            state->ptr = ptr; + +            ret = SRE(count)(state, pattern+3, pattern[2]); +            RETURN_ON_ERROR(ret); +            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); +            ctx->count = ret; +            ptr += ctx->count; + +            /* when we arrive here, count contains the number of +               matches, and ptr points to the tail of the target +               string.  check if the rest of the pattern matches, +               and backtrack if not. */ + +            if (ctx->count < (Py_ssize_t) pattern[1]) +                RETURN_FAILURE; + +            if (pattern[pattern[0]] == SRE_OP_SUCCESS && +                ptr == state->end && +                !(ctx->toplevel && state->must_advance && ptr == state->start)) +            { +                /* tail is empty.  we're finished */ +                state->ptr = ptr; +                RETURN_SUCCESS; +            } + +            LASTMARK_SAVE(); +            if (state->repeat) +                MARK_PUSH(ctx->lastmark); + +            if (pattern[pattern[0]] == SRE_OP_LITERAL) { +                /* tail starts with a literal. skip positions where +                   the rest of the pattern cannot possibly match */ +                ctx->u.chr = pattern[pattern[0]+1]; +                for (;;) { +                    while (ctx->count >= (Py_ssize_t) pattern[1] && +                           (ptr >= end || *ptr != ctx->u.chr)) { +                        ptr--; +                        ctx->count--; +                    } +                    if (ctx->count < (Py_ssize_t) pattern[1]) +                        break; +                    state->ptr = ptr; +                    DO_JUMP(JUMP_REPEAT_ONE_1, jump_repeat_one_1, +                            pattern+pattern[0]); +                    if (ret) { +                        if (state->repeat) +                            MARK_POP_DISCARD(ctx->lastmark); +                        RETURN_ON_ERROR(ret); +                        RETURN_SUCCESS; +                    } +                    if (state->repeat) +                        MARK_POP_KEEP(ctx->lastmark); +                    LASTMARK_RESTORE(); + +                    ptr--; +                    ctx->count--; +                } +                if (state->repeat) +                    MARK_POP_DISCARD(ctx->lastmark); +            } else { +                /* general case */ +                while (ctx->count >= (Py_ssize_t) pattern[1]) { +                    state->ptr = ptr; +                    DO_JUMP(JUMP_REPEAT_ONE_2, jump_repeat_one_2, +                            pattern+pattern[0]); +                    if (ret) { +                        if (state->repeat) +                            MARK_POP_DISCARD(ctx->lastmark); +                        RETURN_ON_ERROR(ret); +                        RETURN_SUCCESS; +                    } +                    if (state->repeat) +                        MARK_POP_KEEP(ctx->lastmark); +                    LASTMARK_RESTORE(); + +                    ptr--; +                    ctx->count--; +                } +                if (state->repeat) +                    MARK_POP_DISCARD(ctx->lastmark); +            } +            RETURN_FAILURE; + +        TARGET(SRE_OP_MIN_REPEAT_ONE): +            /* match repeated sequence (minimizing regexp) */ + +            /* this operator only works if the repeated item is +               exactly one character wide, and we're not already +               collecting backtracking points.  for other cases, +               use the MIN_REPEAT operator */ + +            /* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */ + +            TRACE(("|%p|%p|MIN_REPEAT_ONE %d %d\n", pattern, ptr, +                   pattern[1], pattern[2])); + +            if ((Py_ssize_t) pattern[1] > end - ptr) +                RETURN_FAILURE; /* cannot match */ + +            state->ptr = ptr; + +            if (pattern[1] == 0) +                ctx->count = 0; +            else { +                /* count using pattern min as the maximum */ +                ret = SRE(count)(state, pattern+3, pattern[1]); +                RETURN_ON_ERROR(ret); +                DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); +                if (ret < (Py_ssize_t) pattern[1]) +                    /* didn't match minimum number of times */ +                    RETURN_FAILURE; +                /* advance past minimum matches of repeat */ +                ctx->count = ret; +                ptr += ctx->count; +            } + +            if (pattern[pattern[0]] == SRE_OP_SUCCESS && +                !(ctx->toplevel && +                  ((state->match_all && ptr != state->end) || +                   (state->must_advance && ptr == state->start)))) +            { +                /* tail is empty.  we're finished */ +                state->ptr = ptr; +                RETURN_SUCCESS; + +            } else { +                /* general case */ +                LASTMARK_SAVE(); +                if (state->repeat) +                    MARK_PUSH(ctx->lastmark); + +                while ((Py_ssize_t)pattern[2] == SRE_MAXREPEAT +                       || ctx->count <= (Py_ssize_t)pattern[2]) { +                    state->ptr = ptr; +                    DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one, +                            pattern+pattern[0]); +                    if (ret) { +                        if (state->repeat) +                            MARK_POP_DISCARD(ctx->lastmark); +                        RETURN_ON_ERROR(ret); +                        RETURN_SUCCESS; +                    } +                    if (state->repeat) +                        MARK_POP_KEEP(ctx->lastmark); +                    LASTMARK_RESTORE(); + +                    state->ptr = ptr; +                    ret = SRE(count)(state, pattern+3, 1); +                    RETURN_ON_ERROR(ret); +                    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); +                    if (ret == 0) +                        break; +                    assert(ret == 1); +                    ptr++; +                    ctx->count++; +                } +                if (state->repeat) +                    MARK_POP_DISCARD(ctx->lastmark); +            } +            RETURN_FAILURE; + +        TARGET(SRE_OP_POSSESSIVE_REPEAT_ONE): +            /* match repeated sequence (maximizing regexp) without +               backtracking */ + +            /* this operator only works if the repeated item is +               exactly one character wide, and we're not already +               collecting backtracking points.  for other cases, +               use the MAX_REPEAT operator */ + +            /* <POSSESSIVE_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> +               tail */ + +            TRACE(("|%p|%p|POSSESSIVE_REPEAT_ONE %d %d\n", pattern, +                   ptr, pattern[1], pattern[2])); + +            if (ptr + pattern[1] > end) { +                RETURN_FAILURE; /* cannot match */ +            } + +            state->ptr = ptr; + +            ret = SRE(count)(state, pattern + 3, pattern[2]); +            RETURN_ON_ERROR(ret); +            DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); +            ctx->count = ret; +            ptr += ctx->count; + +            /* when we arrive here, count contains the number of +               matches, and ptr points to the tail of the target +               string.  check if the rest of the pattern matches, +               and fail if not. */ + +            /* Test for not enough repetitions in match */ +            if (ctx->count < (Py_ssize_t) pattern[1]) { +                RETURN_FAILURE; +            } + +            /* Update the pattern to point to the next op code */ +            pattern += pattern[0]; + +            /* Let the tail be evaluated separately and consider this +               match successful. */ +            if (*pattern == SRE_OP_SUCCESS && +                ptr == state->end && +                !(ctx->toplevel && state->must_advance && ptr == state->start)) +            { +                /* tail is empty.  we're finished */ +                state->ptr = ptr; +                RETURN_SUCCESS; +            } + +            /* Attempt to match the rest of the string */ +            DISPATCH; + +        TARGET(SRE_OP_REPEAT): +            /* create repeat context.  all the hard work is done +               by the UNTIL operator (MAX_UNTIL, MIN_UNTIL) */ +            /* <REPEAT> <skip> <1=min> <2=max> +               <3=repeat_index> item <UNTIL> tail */ +            TRACE(("|%p|%p|REPEAT %d %d\n", pattern, ptr, +                   pattern[1], pattern[2])); + +            /* install new repeat context */ +            /* TODO(https://github.com/python/cpython/issues/67877): Fix this +             * potential memory leak. */ +            ctx->u.rep = (SRE_REPEAT*) PyObject_Malloc(sizeof(*ctx->u.rep)); +            if (!ctx->u.rep) { +                PyErr_NoMemory(); +                RETURN_FAILURE; +            } +            ctx->u.rep->count = -1; +            ctx->u.rep->pattern = pattern; +            ctx->u.rep->prev = state->repeat; +            ctx->u.rep->last_ptr = NULL; +            state->repeat = ctx->u.rep; + +            state->ptr = ptr; +            DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); +            state->repeat = ctx->u.rep->prev; +            PyObject_Free(ctx->u.rep); + +            if (ret) { +                RETURN_ON_ERROR(ret); +                RETURN_SUCCESS; +            } +            RETURN_FAILURE; + +        TARGET(SRE_OP_MAX_UNTIL): +            /* maximizing repeat */ +            /* <REPEAT> <skip> <1=min> <2=max> item <MAX_UNTIL> tail */ + +            /* FIXME: we probably need to deal with zero-width +               matches in here... */ + +            ctx->u.rep = state->repeat; +            if (!ctx->u.rep) +                RETURN_ERROR(SRE_ERROR_STATE); + +            state->ptr = ptr; + +            ctx->count = ctx->u.rep->count+1; + +            TRACE(("|%p|%p|MAX_UNTIL %zd\n", pattern, +                   ptr, ctx->count)); + +            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { +                /* not enough matches */ +                ctx->u.rep->count = ctx->count; +                DO_JUMP(JUMP_MAX_UNTIL_1, jump_max_until_1, +                        ctx->u.rep->pattern+3); +                if (ret) { +                    RETURN_ON_ERROR(ret); +                    RETURN_SUCCESS; +                } +                ctx->u.rep->count = ctx->count-1; +                state->ptr = ptr; +                RETURN_FAILURE; +            } + +            if ((ctx->count < (Py_ssize_t) ctx->u.rep->pattern[2] || +                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) && +                state->ptr != ctx->u.rep->last_ptr) { +                /* we may have enough matches, but if we can +                   match another item, do so */ +                ctx->u.rep->count = ctx->count; +                LASTMARK_SAVE(); +                MARK_PUSH(ctx->lastmark); +                /* zero-width match protection */ +                DATA_PUSH(&ctx->u.rep->last_ptr); +                ctx->u.rep->last_ptr = state->ptr; +                DO_JUMP(JUMP_MAX_UNTIL_2, jump_max_until_2, +                        ctx->u.rep->pattern+3); +                DATA_POP(&ctx->u.rep->last_ptr); +                if (ret) { +                    MARK_POP_DISCARD(ctx->lastmark); +                    RETURN_ON_ERROR(ret); +                    RETURN_SUCCESS; +                } +                MARK_POP(ctx->lastmark); +                LASTMARK_RESTORE(); +                ctx->u.rep->count = ctx->count-1; +                state->ptr = ptr; +            } + +            /* cannot match more repeated items here.  make sure the +               tail matches */ +            state->repeat = ctx->u.rep->prev; +            DO_JUMP(JUMP_MAX_UNTIL_3, jump_max_until_3, pattern); +            state->repeat = ctx->u.rep; // restore repeat before return + +            RETURN_ON_SUCCESS(ret); +            state->ptr = ptr; +            RETURN_FAILURE; + +        TARGET(SRE_OP_MIN_UNTIL): +            /* minimizing repeat */ +            /* <REPEAT> <skip> <1=min> <2=max> item <MIN_UNTIL> tail */ + +            ctx->u.rep = state->repeat; +            if (!ctx->u.rep) +                RETURN_ERROR(SRE_ERROR_STATE); + +            state->ptr = ptr; + +            ctx->count = ctx->u.rep->count+1; + +            TRACE(("|%p|%p|MIN_UNTIL %zd %p\n", pattern, +                   ptr, ctx->count, ctx->u.rep->pattern)); + +            if (ctx->count < (Py_ssize_t) ctx->u.rep->pattern[1]) { +                /* not enough matches */ +                ctx->u.rep->count = ctx->count; +                DO_JUMP(JUMP_MIN_UNTIL_1, jump_min_until_1, +                        ctx->u.rep->pattern+3); +                if (ret) { +                    RETURN_ON_ERROR(ret); +                    RETURN_SUCCESS; +                } +                ctx->u.rep->count = ctx->count-1; +                state->ptr = ptr; +                RETURN_FAILURE; +            } + +            /* see if the tail matches */ +            state->repeat = ctx->u.rep->prev; + +            LASTMARK_SAVE(); +            if (state->repeat) +                MARK_PUSH(ctx->lastmark); + +            DO_JUMP(JUMP_MIN_UNTIL_2, jump_min_until_2, pattern); +            SRE_REPEAT *repeat_of_tail = state->repeat; +            state->repeat = ctx->u.rep; // restore repeat before return + +            if (ret) { +                if (repeat_of_tail) +                    MARK_POP_DISCARD(ctx->lastmark); +                RETURN_ON_ERROR(ret); +                RETURN_SUCCESS; +            } +            if (repeat_of_tail) +                MARK_POP(ctx->lastmark); +            LASTMARK_RESTORE(); + +            state->ptr = ptr; + +            if ((ctx->count >= (Py_ssize_t) ctx->u.rep->pattern[2] +                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT) || +                state->ptr == ctx->u.rep->last_ptr) +                RETURN_FAILURE; + +            ctx->u.rep->count = ctx->count; +            /* zero-width match protection */ +            DATA_PUSH(&ctx->u.rep->last_ptr); +            ctx->u.rep->last_ptr = state->ptr; +            DO_JUMP(JUMP_MIN_UNTIL_3,jump_min_until_3, +                    ctx->u.rep->pattern+3); +            DATA_POP(&ctx->u.rep->last_ptr); +            if (ret) { +                RETURN_ON_ERROR(ret); +                RETURN_SUCCESS; +            } +            ctx->u.rep->count = ctx->count-1; +            state->ptr = ptr; +            RETURN_FAILURE; + +        TARGET(SRE_OP_POSSESSIVE_REPEAT): +            /* create possessive repeat contexts. */ +            /* <POSSESSIVE_REPEAT> <skip> <1=min> <2=max> pattern +               <SUCCESS> tail */ +            TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern, +                   ptr, pattern[1], pattern[2])); + +            /* Set the global Input pointer to this context's Input +               pointer */ +            state->ptr = ptr; + +            /* Initialize Count to 0 */ +            ctx->count = 0; + +            /* Check for minimum required matches. */ +            while (ctx->count < (Py_ssize_t)pattern[1]) { +                /* not enough matches */ +                DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, +                         &pattern[3]); +                if (ret) { +                    RETURN_ON_ERROR(ret); +                    ctx->count++; +                } +                else { +                    state->ptr = ptr; +                    RETURN_FAILURE; +                } +            } + +            /* Clear the context's Input stream pointer so that it +               doesn't match the global state so that the while loop can +               be entered. */ +            ptr = NULL; + +            /* Keep trying to parse the <pattern> sub-pattern until the +               end is reached, creating a new context each time. */ +            while ((ctx->count < (Py_ssize_t)pattern[2] || +                    (Py_ssize_t)pattern[2] == SRE_MAXREPEAT) && +                   state->ptr != ptr) { +                /* Save the Capture Group Marker state into the current +                   Context and back up the current highest number +                   Capture Group marker. */ +                LASTMARK_SAVE(); +                MARK_PUSH(ctx->lastmark); + +                /* zero-width match protection */ +                /* Set the context's Input Stream pointer to be the +                   current Input Stream pointer from the global +                   state.  When the loop reaches the next iteration, +                   the context will then store the last known good +                   position with the global state holding the Input +                   Input Stream position that has been updated with +                   the most recent match.  Thus, if state's Input +                   stream remains the same as the one stored in the +                   current Context, we know we have successfully +                   matched an empty string and that all subsequent +                   matches will also be the empty string until the +                   maximum number of matches are counted, and because +                   of this, we could immediately stop at that point and +                   consider this match successful. */ +                ptr = state->ptr; + +                /* We have not reached the maximin matches, so try to +                   match once more. */ +                DO_JUMP0(JUMP_POSS_REPEAT_2, jump_poss_repeat_2, +                         &pattern[3]); + +                /* Check to see if the last attempted match +                   succeeded. */ +                if (ret) { +                    /* Drop the saved highest number Capture Group +                       marker saved above and use the newly updated +                       value. */ +                    MARK_POP_DISCARD(ctx->lastmark); +                    RETURN_ON_ERROR(ret); + +                    /* Success, increment the count. */ +                    ctx->count++; +                } +                /* Last attempted match failed. */ +                else { +                    /* Restore the previously saved highest number +                       Capture Group marker since the last iteration +                       did not match, then restore that to the global +                       state. */ +                    MARK_POP(ctx->lastmark); +                    LASTMARK_RESTORE(); + +                    /* Restore the global Input Stream pointer +                       since it can change after jumps. */ +                    state->ptr = ptr; + +                    /* We have sufficient matches, so exit loop. */ +                    break; +                } +            } + +            /* Evaluate Tail */ +            /* Jump to end of pattern indicated by skip, and then skip +               the SUCCESS op code that follows it. */ +            pattern += pattern[0] + 1; +            ptr = state->ptr; +            DISPATCH; + +        TARGET(SRE_OP_ATOMIC_GROUP): +            /* Atomic Group Sub Pattern */ +            /* <ATOMIC_GROUP> <skip> pattern <SUCCESS> tail */ +            TRACE(("|%p|%p|ATOMIC_GROUP\n", pattern, ptr)); + +            /* Set the global Input pointer to this context's Input +               pointer */ +            state->ptr = ptr; + +            /* Evaluate the Atomic Group in a new context, terminating +               when the end of the group, represented by a SUCCESS op +               code, is reached. */ +            /* Group Pattern begins at an offset of 1 code. */ +            DO_JUMP0(JUMP_ATOMIC_GROUP, jump_atomic_group, +                     &pattern[1]); + +            /* Test Exit Condition */ +            RETURN_ON_ERROR(ret); + +            if (ret == 0) { +                /* Atomic Group failed to Match. */ +                state->ptr = ptr; +                RETURN_FAILURE; +            } + +            /* Evaluate Tail */ +            /* Jump to end of pattern indicated by skip, and then skip +               the SUCCESS op code that follows it. */ +            pattern += pattern[0]; +            ptr = state->ptr; +            DISPATCH; + +        TARGET(SRE_OP_GROUPREF): +            /* match backreference */ +            TRACE(("|%p|%p|GROUPREF %d\n", pattern, +                   ptr, pattern[0])); +            { +                int groupref = pattern[0] * 2; +                if (groupref >= state->lastmark) { +                    RETURN_FAILURE; +                } else { +                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; +                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; +                    if (!p || !e || e < p) +                        RETURN_FAILURE; +                    while (p < e) { +                        if (ptr >= end || *ptr != *p) +                            RETURN_FAILURE; +                        p++; +                        ptr++; +                    } +                } +            } +            pattern++; +            DISPATCH; + +        TARGET(SRE_OP_GROUPREF_IGNORE): +            /* match backreference */ +            TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern, +                   ptr, pattern[0])); +            { +                int groupref = pattern[0] * 2; +                if (groupref >= state->lastmark) { +                    RETURN_FAILURE; +                } else { +                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; +                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; +                    if (!p || !e || e < p) +                        RETURN_FAILURE; +                    while (p < e) { +                        if (ptr >= end || +                            sre_lower_ascii(*ptr) != sre_lower_ascii(*p)) +                            RETURN_FAILURE; +                        p++; +                        ptr++; +                    } +                } +            } +            pattern++; +            DISPATCH; + +        TARGET(SRE_OP_GROUPREF_UNI_IGNORE): +            /* match backreference */ +            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern, +                   ptr, pattern[0])); +            { +                int groupref = pattern[0] * 2; +                if (groupref >= state->lastmark) { +                    RETURN_FAILURE; +                } else { +                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; +                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; +                    if (!p || !e || e < p) +                        RETURN_FAILURE; +                    while (p < e) { +                        if (ptr >= end || +                            sre_lower_unicode(*ptr) != sre_lower_unicode(*p)) +                            RETURN_FAILURE; +                        p++; +                        ptr++; +                    } +                } +            } +            pattern++; +            DISPATCH; + +        TARGET(SRE_OP_GROUPREF_LOC_IGNORE): +            /* match backreference */ +            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern, +                   ptr, pattern[0])); +            { +                int groupref = pattern[0] * 2; +                if (groupref >= state->lastmark) { +                    RETURN_FAILURE; +                } else { +                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; +                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; +                    if (!p || !e || e < p) +                        RETURN_FAILURE; +                    while (p < e) { +                        if (ptr >= end || +                            sre_lower_locale(*ptr) != sre_lower_locale(*p)) +                            RETURN_FAILURE; +                        p++; +                        ptr++; +                    } +                } +            } +            pattern++; +            DISPATCH; + +        TARGET(SRE_OP_GROUPREF_EXISTS): +            TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern, +                   ptr, pattern[0])); +            /* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */ +            { +                int groupref = pattern[0] * 2; +                if (groupref >= state->lastmark) { +                    pattern += pattern[1]; +                    DISPATCH; +                } else { +                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref]; +                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1]; +                    if (!p || !e || e < p) { +                        pattern += pattern[1]; +                        DISPATCH; +                    } +                } +            } +            pattern += 2; +            DISPATCH; + +        TARGET(SRE_OP_ASSERT): +            /* assert subpattern */ +            /* <ASSERT> <skip> <back> <pattern> */ +            TRACE(("|%p|%p|ASSERT %d\n", pattern, +                   ptr, pattern[1])); +            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) < pattern[1]) +                RETURN_FAILURE; +            state->ptr = ptr - pattern[1]; +            DO_JUMP0(JUMP_ASSERT, jump_assert, pattern+2); +            RETURN_ON_FAILURE(ret); +            pattern += pattern[0]; +            DISPATCH; + +        TARGET(SRE_OP_ASSERT_NOT): +            /* assert not subpattern */ +            /* <ASSERT_NOT> <skip> <back> <pattern> */ +            TRACE(("|%p|%p|ASSERT_NOT %d\n", pattern, +                   ptr, pattern[1])); +            if ((uintptr_t)(ptr - (SRE_CHAR *)state->beginning) >= pattern[1]) { +                state->ptr = ptr - pattern[1]; +                LASTMARK_SAVE(); +                if (state->repeat) +                    MARK_PUSH(ctx->lastmark); + +                DO_JUMP0(JUMP_ASSERT_NOT, jump_assert_not, pattern+2); +                if (ret) { +                    if (state->repeat) +                        MARK_POP_DISCARD(ctx->lastmark); +                    RETURN_ON_ERROR(ret); +                    RETURN_FAILURE; +                } +                if (state->repeat) +                    MARK_POP(ctx->lastmark); +                LASTMARK_RESTORE(); +            } +            pattern += pattern[0]; +            DISPATCH; + +        TARGET(SRE_OP_FAILURE): +            /* immediate failure */ +            TRACE(("|%p|%p|FAILURE\n", pattern, ptr)); +            RETURN_FAILURE; + +#if !USE_COMPUTED_GOTOS +        default: +#endif +        // Also any unused opcodes: +        TARGET(SRE_OP_RANGE_UNI_IGNORE): +        TARGET(SRE_OP_SUBPATTERN): +        TARGET(SRE_OP_RANGE): +        TARGET(SRE_OP_NEGATE): +        TARGET(SRE_OP_BIGCHARSET): +        TARGET(SRE_OP_CHARSET): +            TRACE(("|%p|%p|UNKNOWN %d\n", pattern, ptr, +                   pattern[-1])); +            RETURN_ERROR(SRE_ERROR_ILLEGAL); + +    } + +exit: +    ctx_pos = ctx->last_ctx_pos; +    jump = ctx->jump; +    DATA_POP_DISCARD(ctx); +    if (ctx_pos == -1) { +        state->sigcount = sigcount; +        return ret; +    } +    DATA_LOOKUP_AT(SRE(match_context), ctx, ctx_pos); + +    switch (jump) { +        case JUMP_MAX_UNTIL_2: +            TRACE(("|%p|%p|JUMP_MAX_UNTIL_2\n", pattern, ptr)); +            goto jump_max_until_2; +        case JUMP_MAX_UNTIL_3: +            TRACE(("|%p|%p|JUMP_MAX_UNTIL_3\n", pattern, ptr)); +            goto jump_max_until_3; +        case JUMP_MIN_UNTIL_2: +            TRACE(("|%p|%p|JUMP_MIN_UNTIL_2\n", pattern, ptr)); +            goto jump_min_until_2; +        case JUMP_MIN_UNTIL_3: +            TRACE(("|%p|%p|JUMP_MIN_UNTIL_3\n", pattern, ptr)); +            goto jump_min_until_3; +        case JUMP_BRANCH: +            TRACE(("|%p|%p|JUMP_BRANCH\n", pattern, ptr)); +            goto jump_branch; +        case JUMP_MAX_UNTIL_1: +            TRACE(("|%p|%p|JUMP_MAX_UNTIL_1\n", pattern, ptr)); +            goto jump_max_until_1; +        case JUMP_MIN_UNTIL_1: +            TRACE(("|%p|%p|JUMP_MIN_UNTIL_1\n", pattern, ptr)); +            goto jump_min_until_1; +        case JUMP_POSS_REPEAT_1: +            TRACE(("|%p|%p|JUMP_POSS_REPEAT_1\n", pattern, ptr)); +            goto jump_poss_repeat_1; +        case JUMP_POSS_REPEAT_2: +            TRACE(("|%p|%p|JUMP_POSS_REPEAT_2\n", pattern, ptr)); +            goto jump_poss_repeat_2; +        case JUMP_REPEAT: +            TRACE(("|%p|%p|JUMP_REPEAT\n", pattern, ptr)); +            goto jump_repeat; +        case JUMP_REPEAT_ONE_1: +            TRACE(("|%p|%p|JUMP_REPEAT_ONE_1\n", pattern, ptr)); +            goto jump_repeat_one_1; +        case JUMP_REPEAT_ONE_2: +            TRACE(("|%p|%p|JUMP_REPEAT_ONE_2\n", pattern, ptr)); +            goto jump_repeat_one_2; +        case JUMP_MIN_REPEAT_ONE: +            TRACE(("|%p|%p|JUMP_MIN_REPEAT_ONE\n", pattern, ptr)); +            goto jump_min_repeat_one; +        case JUMP_ATOMIC_GROUP: +            TRACE(("|%p|%p|JUMP_ATOMIC_GROUP\n", pattern, ptr)); +            goto jump_atomic_group; +        case JUMP_ASSERT: +            TRACE(("|%p|%p|JUMP_ASSERT\n", pattern, ptr)); +            goto jump_assert; +        case JUMP_ASSERT_NOT: +            TRACE(("|%p|%p|JUMP_ASSERT_NOT\n", pattern, ptr)); +            goto jump_assert_not; +        case JUMP_NONE: +            TRACE(("|%p|%p|RETURN %zd\n", pattern, +                   ptr, ret)); +            break; +    } + +    return ret; /* should never get here */ +} + +/* need to reset capturing groups between two SRE(match) callings in loops */ +#define RESET_CAPTURE_GROUP() \ +    do { state->lastmark = state->lastindex = -1; } while (0) + +LOCAL(Py_ssize_t) +SRE(search)(SRE_STATE* state, SRE_CODE* pattern) +{ +    SRE_CHAR* ptr = (SRE_CHAR *)state->start; +    SRE_CHAR* end = (SRE_CHAR *)state->end; +    Py_ssize_t status = 0; +    Py_ssize_t prefix_len = 0; +    Py_ssize_t prefix_skip = 0; +    SRE_CODE* prefix = NULL; +    SRE_CODE* charset = NULL; +    SRE_CODE* overlap = NULL; +    int flags = 0; + +    if (ptr > end) +        return 0; + +    if (pattern[0] == SRE_OP_INFO) { +        /* optimization info block */ +        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */ + +        flags = pattern[2]; + +        if (pattern[3] && (uintptr_t)(end - ptr) < pattern[3]) { +            TRACE(("reject (got %tu chars, need %zu)\n", +                   end - ptr, (size_t) pattern[3])); +            return 0; +        } +        if (pattern[3] > 1) { +            /* adjust end point (but make sure we leave at least one +               character in there, so literal search will work) */ +            end -= pattern[3] - 1; +            if (end <= ptr) +                end = ptr; +        } + +        if (flags & SRE_INFO_PREFIX) { +            /* pattern starts with a known prefix */ +            /* <length> <skip> <prefix data> <overlap data> */ +            prefix_len = pattern[5]; +            prefix_skip = pattern[6]; +            prefix = pattern + 7; +            overlap = prefix + prefix_len - 1; +        } else if (flags & SRE_INFO_CHARSET) +            /* pattern starts with a character from a known set */ +            /* <charset> */ +            charset = pattern + 5; + +        pattern += 1 + pattern[1]; +    } + +    TRACE(("prefix = %p %zd %zd\n", +           prefix, prefix_len, prefix_skip)); +    TRACE(("charset = %p\n", charset)); + +    if (prefix_len == 1) { +        /* pattern starts with a literal character */ +        SRE_CHAR c = (SRE_CHAR) prefix[0]; +#if SIZEOF_SRE_CHAR < 4 +        if ((SRE_CODE) c != prefix[0]) +            return 0; /* literal can't match: doesn't fit in char width */ +#endif +        end = (SRE_CHAR *)state->end; +        state->must_advance = 0; +        while (ptr < end) { +            while (*ptr != c) { +                if (++ptr >= end) +                    return 0; +            } +            TRACE(("|%p|%p|SEARCH LITERAL\n", pattern, ptr)); +            state->start = ptr; +            state->ptr = ptr + prefix_skip; +            if (flags & SRE_INFO_LITERAL) +                return 1; /* we got all of it */ +            status = SRE(match)(state, pattern + 2*prefix_skip, 0); +            if (status != 0) +                return status; +            ++ptr; +            RESET_CAPTURE_GROUP(); +        } +        return 0; +    } + +    if (prefix_len > 1) { +        /* pattern starts with a known prefix.  use the overlap +           table to skip forward as fast as we possibly can */ +        Py_ssize_t i = 0; + +        end = (SRE_CHAR *)state->end; +        if (prefix_len > end - ptr) +            return 0; +#if SIZEOF_SRE_CHAR < 4 +        for (i = 0; i < prefix_len; i++) +            if ((SRE_CODE)(SRE_CHAR) prefix[i] != prefix[i]) +                return 0; /* literal can't match: doesn't fit in char width */ +#endif +        while (ptr < end) { +            SRE_CHAR c = (SRE_CHAR) prefix[0]; +            while (*ptr++ != c) { +                if (ptr >= end) +                    return 0; +            } +            if (ptr >= end) +                return 0; + +            i = 1; +            state->must_advance = 0; +            do { +                if (*ptr == (SRE_CHAR) prefix[i]) { +                    if (++i != prefix_len) { +                        if (++ptr >= end) +                            return 0; +                        continue; +                    } +                    /* found a potential match */ +                    TRACE(("|%p|%p|SEARCH SCAN\n", pattern, ptr)); +                    state->start = ptr - (prefix_len - 1); +                    state->ptr = ptr - (prefix_len - prefix_skip - 1); +                    if (flags & SRE_INFO_LITERAL) +                        return 1; /* we got all of it */ +                    status = SRE(match)(state, pattern + 2*prefix_skip, 0); +                    if (status != 0) +                        return status; +                    /* close but no cigar -- try again */ +                    if (++ptr >= end) +                        return 0; +                    RESET_CAPTURE_GROUP(); +                } +                i = overlap[i]; +            } while (i != 0); +        } +        return 0; +    } + +    if (charset) { +        /* pattern starts with a character from a known set */ +        end = (SRE_CHAR *)state->end; +        state->must_advance = 0; +        for (;;) { +            while (ptr < end && !SRE(charset)(state, charset, *ptr)) +                ptr++; +            if (ptr >= end) +                return 0; +            TRACE(("|%p|%p|SEARCH CHARSET\n", pattern, ptr)); +            state->start = ptr; +            state->ptr = ptr; +            status = SRE(match)(state, pattern, 0); +            if (status != 0) +                break; +            ptr++; +            RESET_CAPTURE_GROUP(); +        } +    } else { +        /* general case */ +        assert(ptr <= end); +        TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); +        state->start = state->ptr = ptr; +        status = SRE(match)(state, pattern, 1); +        state->must_advance = 0; +        if (status == 0 && pattern[0] == SRE_OP_AT && +            (pattern[1] == SRE_AT_BEGINNING || +             pattern[1] == SRE_AT_BEGINNING_STRING)) +        { +            state->start = state->ptr = ptr = end; +            return 0; +        } +        while (status == 0 && ptr < end) { +            ptr++; +            RESET_CAPTURE_GROUP(); +            TRACE(("|%p|%p|SEARCH\n", pattern, ptr)); +            state->start = state->ptr = ptr; +            status = SRE(match)(state, pattern, 0); +        } +    } + +    return status; +} + +#undef SRE_CHAR +#undef SIZEOF_SRE_CHAR +#undef SRE + +/* vim:ts=4:sw=4:et +*/ diff --git a/contrib/tools/python3/Modules/_sre/sre_targets.h b/contrib/tools/python3/Modules/_sre/sre_targets.h new file mode 100644 index 00000000000..62761a0000d --- /dev/null +++ b/contrib/tools/python3/Modules/_sre/sre_targets.h @@ -0,0 +1,58 @@ +/* + * Secret Labs' Regular Expression Engine + * + * regular expression matching engine + * + * Auto-generated by Tools/build/generate_sre_constants.py from + * Lib/re/_constants.py. + * + * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved. + * + * See the sre.c file for information on usage and redistribution. + */ + +static void *sre_targets[43] = { +    &&TARGET_SRE_OP_FAILURE, +    &&TARGET_SRE_OP_SUCCESS, +    &&TARGET_SRE_OP_ANY, +    &&TARGET_SRE_OP_ANY_ALL, +    &&TARGET_SRE_OP_ASSERT, +    &&TARGET_SRE_OP_ASSERT_NOT, +    &&TARGET_SRE_OP_AT, +    &&TARGET_SRE_OP_BRANCH, +    &&TARGET_SRE_OP_CATEGORY, +    &&TARGET_SRE_OP_CHARSET, +    &&TARGET_SRE_OP_BIGCHARSET, +    &&TARGET_SRE_OP_GROUPREF, +    &&TARGET_SRE_OP_GROUPREF_EXISTS, +    &&TARGET_SRE_OP_IN, +    &&TARGET_SRE_OP_INFO, +    &&TARGET_SRE_OP_JUMP, +    &&TARGET_SRE_OP_LITERAL, +    &&TARGET_SRE_OP_MARK, +    &&TARGET_SRE_OP_MAX_UNTIL, +    &&TARGET_SRE_OP_MIN_UNTIL, +    &&TARGET_SRE_OP_NOT_LITERAL, +    &&TARGET_SRE_OP_NEGATE, +    &&TARGET_SRE_OP_RANGE, +    &&TARGET_SRE_OP_REPEAT, +    &&TARGET_SRE_OP_REPEAT_ONE, +    &&TARGET_SRE_OP_SUBPATTERN, +    &&TARGET_SRE_OP_MIN_REPEAT_ONE, +    &&TARGET_SRE_OP_ATOMIC_GROUP, +    &&TARGET_SRE_OP_POSSESSIVE_REPEAT, +    &&TARGET_SRE_OP_POSSESSIVE_REPEAT_ONE, +    &&TARGET_SRE_OP_GROUPREF_IGNORE, +    &&TARGET_SRE_OP_IN_IGNORE, +    &&TARGET_SRE_OP_LITERAL_IGNORE, +    &&TARGET_SRE_OP_NOT_LITERAL_IGNORE, +    &&TARGET_SRE_OP_GROUPREF_LOC_IGNORE, +    &&TARGET_SRE_OP_IN_LOC_IGNORE, +    &&TARGET_SRE_OP_LITERAL_LOC_IGNORE, +    &&TARGET_SRE_OP_NOT_LITERAL_LOC_IGNORE, +    &&TARGET_SRE_OP_GROUPREF_UNI_IGNORE, +    &&TARGET_SRE_OP_IN_UNI_IGNORE, +    &&TARGET_SRE_OP_LITERAL_UNI_IGNORE, +    &&TARGET_SRE_OP_NOT_LITERAL_UNI_IGNORE, +    &&TARGET_SRE_OP_RANGE_UNI_IGNORE, +}; | 
