diff options
author | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:24:06 +0300 |
---|---|---|
committer | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:41:34 +0300 |
commit | e0e3e1717e3d33762ce61950504f9637a6e669ed (patch) | |
tree | bca3ff6939b10ed60c3d5c12439963a1146b9711 /contrib/tools/python3/src/Python/Python-tokenize.c | |
parent | 38f2c5852db84c7b4d83adfcb009eb61541d1ccd (diff) | |
download | ydb-e0e3e1717e3d33762ce61950504f9637a6e669ed.tar.gz |
add ydb deps
Diffstat (limited to 'contrib/tools/python3/src/Python/Python-tokenize.c')
-rw-r--r-- | contrib/tools/python3/src/Python/Python-tokenize.c | 194 |
1 files changed, 194 insertions, 0 deletions
diff --git a/contrib/tools/python3/src/Python/Python-tokenize.c b/contrib/tools/python3/src/Python/Python-tokenize.c new file mode 100644 index 0000000000..6acfc2a7cf --- /dev/null +++ b/contrib/tools/python3/src/Python/Python-tokenize.c @@ -0,0 +1,194 @@ +#include "Python.h" +#include "../Parser/tokenizer.h" + +static struct PyModuleDef _tokenizemodule; + +typedef struct { + PyTypeObject *TokenizerIter; +} tokenize_state; + +static tokenize_state * +get_tokenize_state(PyObject *module) { + return (tokenize_state *)PyModule_GetState(module); +} + +#define _tokenize_get_state_by_type(type) \ + get_tokenize_state(PyType_GetModuleByDef(type, &_tokenizemodule)) + +#include "clinic/Python-tokenize.c.h" + +/*[clinic input] +module _tokenizer +class _tokenizer.tokenizeriter "tokenizeriterobject *" "_tokenize_get_state_by_type(type)->TokenizerIter" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=96d98ee2fef7a8bc]*/ + +typedef struct +{ + PyObject_HEAD struct tok_state *tok; +} tokenizeriterobject; + +/*[clinic input] +@classmethod +_tokenizer.tokenizeriter.__new__ as tokenizeriter_new + + source: str +[clinic start generated code]*/ + +static PyObject * +tokenizeriter_new_impl(PyTypeObject *type, const char *source) +/*[clinic end generated code: output=7fd9f46cf9263cbb input=4384b368407375c6]*/ +{ + tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0); + if (self == NULL) { + return NULL; + } + PyObject *filename = PyUnicode_FromString("<string>"); + if (filename == NULL) { + return NULL; + } + self->tok = _PyTokenizer_FromUTF8(source, 1); + if (self->tok == NULL) { + Py_DECREF(filename); + return NULL; + } + self->tok->filename = filename; + return (PyObject *)self; +} + +static PyObject * +tokenizeriter_next(tokenizeriterobject *it) +{ + const char *start; + const char *end; + int type = _PyTokenizer_Get(it->tok, &start, &end); + if (type == ERRORTOKEN && PyErr_Occurred()) { + return NULL; + } + if (type == ERRORTOKEN || type == ENDMARKER) { + PyErr_SetString(PyExc_StopIteration, "EOF"); + return NULL; + } + PyObject *str = NULL; + if (start == NULL || end == NULL) { + str = PyUnicode_FromString(""); + } + else { + str = PyUnicode_FromStringAndSize(start, end - start); + } + if (str == NULL) { + return NULL; + } + + Py_ssize_t size = it->tok->inp - it->tok->buf; + PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace"); + if (line == NULL) { + Py_DECREF(str); + return NULL; + } + const char *line_start = type == STRING ? it->tok->multi_line_start : it->tok->line_start; + int lineno = type == STRING ? it->tok->first_lineno : it->tok->lineno; + int end_lineno = it->tok->lineno; + int col_offset = -1; + int end_col_offset = -1; + if (start != NULL && start >= line_start) { + col_offset = (int)(start - line_start); + } + if (end != NULL && end >= it->tok->line_start) { + end_col_offset = (int)(end - it->tok->line_start); + } + + return Py_BuildValue("(NiiiiiN)", str, type, lineno, end_lineno, col_offset, end_col_offset, line); +} + +static void +tokenizeriter_dealloc(tokenizeriterobject *it) +{ + PyTypeObject *tp = Py_TYPE(it); + _PyTokenizer_Free(it->tok); + tp->tp_free(it); + Py_DECREF(tp); +} + +static PyType_Slot tokenizeriter_slots[] = { + {Py_tp_new, tokenizeriter_new}, + {Py_tp_dealloc, tokenizeriter_dealloc}, + {Py_tp_getattro, PyObject_GenericGetAttr}, + {Py_tp_iter, PyObject_SelfIter}, + {Py_tp_iternext, tokenizeriter_next}, + {0, NULL}, +}; + +static PyType_Spec tokenizeriter_spec = { + .name = "_tokenize.TokenizerIter", + .basicsize = sizeof(tokenizeriterobject), + .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE), + .slots = tokenizeriter_slots, +}; + +static int +tokenizemodule_exec(PyObject *m) +{ + tokenize_state *state = get_tokenize_state(m); + if (state == NULL) { + return -1; + } + + state->TokenizerIter = (PyTypeObject *)PyType_FromModuleAndSpec(m, &tokenizeriter_spec, NULL); + if (state->TokenizerIter == NULL) { + return -1; + } + if (PyModule_AddType(m, state->TokenizerIter) < 0) { + return -1; + } + + return 0; +} + +static PyMethodDef tokenize_methods[] = { + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + +static PyModuleDef_Slot tokenizemodule_slots[] = { + {Py_mod_exec, tokenizemodule_exec}, + {0, NULL} +}; + +static int +tokenizemodule_traverse(PyObject *m, visitproc visit, void *arg) +{ + tokenize_state *state = get_tokenize_state(m); + Py_VISIT(state->TokenizerIter); + return 0; +} + +static int +tokenizemodule_clear(PyObject *m) +{ + tokenize_state *state = get_tokenize_state(m); + Py_CLEAR(state->TokenizerIter); + return 0; +} + +static void +tokenizemodule_free(void *m) +{ + tokenizemodule_clear((PyObject *)m); +} + +static struct PyModuleDef _tokenizemodule = { + PyModuleDef_HEAD_INIT, + .m_name = "_tokenize", + .m_size = sizeof(tokenize_state), + .m_slots = tokenizemodule_slots, + .m_methods = tokenize_methods, + .m_traverse = tokenizemodule_traverse, + .m_clear = tokenizemodule_clear, + .m_free = tokenizemodule_free, +}; + +PyMODINIT_FUNC +PyInit__tokenize(void) +{ + return PyModuleDef_Init(&_tokenizemodule); +} |