Update Python from 3.11.8 to 3.12.2

author: shadchin <shadchin@yandex-team.com> 2024-02-12 07:53:52 +0300
committer: Daniil Cherednik <dcherednik@ydb.tech> 2024-02-14 14:26:16 +0000
commit: 31f2a419764a8ba77c2a970cfc80056c6cd06756 (patch)
tree: c1995d239eba8571cefc640f6648e1d5dd4ce9e2 /contrib/tools/python3/src/Parser/tokenizer.h
parent: fe2ef02b38d9c85d80060963b265a1df9f38c3bb (diff)
download: ydb-31f2a419764a8ba77c2a970cfc80056c6cd06756.tar.gz
1 files changed, 59 insertions, 7 deletions
diff --git a/contrib/tools/python3/src/Parser/tokenizer.h b/contrib/tools/python3/src/Parser/tokenizer.h
index 0593d773e5..1e1daa3648 100644
--- a/contrib/tools/python3/src/Parser/tokenizer.h
+++ b/contrib/tools/python3/src/Parser/tokenizer.h
@@ -8,10 +8,11 @@ extern "C" {
 
 /* Tokenizer interface */
 
-#include "token.h"      /* For token types */
+#include "pycore_token.h" /* For token types */
 
-#define MAXINDENT 100   /* Max indentation level */
-#define MAXLEVEL 200    /* Max parentheses level */
+#define MAXINDENT 100       /* Max indentation level */
+#define MAXLEVEL 200        /* Max parentheses level */
+#define MAXFSTRINGLEVEL 150 /* Max f-string nesting level */
 
 enum decoding_state {
     STATE_INIT,
@@ -27,11 +28,47 @@ enum interactive_underflow_t {
     IUNDERFLOW_STOP,
 };
 
+struct token {
+    int level;
+    int lineno, col_offset, end_lineno, end_col_offset;
+    const char *start, *end;
+    PyObject *metadata;
+};
+
+enum tokenizer_mode_kind_t {
+    TOK_REGULAR_MODE,
+    TOK_FSTRING_MODE,
+};
+
+#define MAX_EXPR_NESTING 3
+
+typedef struct _tokenizer_mode {
+    enum tokenizer_mode_kind_t kind;
+
+    int curly_bracket_depth;
+    int curly_bracket_expr_start_depth;
+
+    char f_string_quote;
+    int f_string_quote_size;
+    int f_string_raw;
+    const char* f_string_start;
+    const char* f_string_multi_line_start;
+    int f_string_line_start;
+
+    Py_ssize_t f_string_start_offset;
+    Py_ssize_t f_string_multi_line_start_offset;
+
+    Py_ssize_t last_expr_size;
+    Py_ssize_t last_expr_end;
+    char* last_expr_buffer;
+    int f_string_debug;
+} tokenizer_mode;
+
 /* Tokenizer state */
 struct tok_state {
     /* Input state; buf <= cur <= inp <= end */
     /* NB an entire line is held in the buffer */
-    char *buf;          /* Input buffer, or NULL; malloc'ed if fp != NULL */
+    char *buf;          /* Input buffer, or NULL; malloc'ed if fp != NULL or readline != NULL */
     char *cur;          /* Next character in buffer */
     char *inp;          /* End of data in buffer */
     int fp_interactive; /* If the file descriptor is interactive */
@@ -51,6 +88,8 @@ struct tok_state {
     int lineno;         /* Current line number */
     int first_lineno;   /* First line of a single line or multi line string
                            expression (cf. issue 16806) */
+    int starting_col_offset; /* The column offset at the beginning of a token */
+    int col_offset;     /* Current col offset */
     int level;          /* () [] {} Parentheses nesting level */
             /* Used to allow free continuations inside them */
     char parenstack[MAXLEVEL];
@@ -70,6 +109,7 @@ struct tok_state {
                                      expression (cf. issue 16806) */
     PyObject *decoding_readline; /* open(...).readline */
     PyObject *decoding_buffer;
+    PyObject *readline;     /* readline() function */
     const char* enc;        /* Encoding for the current str. */
     char* str;          /* Source string being tokenized (if tokenizing from a string)*/
     char* input;       /* Tokenizer's newline translated copy of the string. */
@@ -85,14 +125,26 @@ struct tok_state {
     /* How to proceed when asked for a new token in interactive mode */
     enum interactive_underflow_t interactive_underflow;
     int report_warnings;
+    // TODO: Factor this into its own thing
+    tokenizer_mode tok_mode_stack[MAXFSTRINGLEVEL];
+    int tok_mode_stack_index;
+    int tok_extra_tokens;
+    int comment_newline;
+    int implicit_newline;
+#ifdef Py_DEBUG
+    int debug;
+#endif
 };
 
-extern struct tok_state *_PyTokenizer_FromString(const char *, int);
-extern struct tok_state *_PyTokenizer_FromUTF8(const char *, int);
+extern struct tok_state *_PyTokenizer_FromString(const char *, int, int);
+extern struct tok_state *_PyTokenizer_FromUTF8(const char *, int, int);
+extern struct tok_state *_PyTokenizer_FromReadline(PyObject*, const char*, int, int);
 extern struct tok_state *_PyTokenizer_FromFile(FILE *, const char*,
                                               const char *, const char *);
 extern void _PyTokenizer_Free(struct tok_state *);
-extern int _PyTokenizer_Get(struct tok_state *, const char **, const char **);
+extern void _PyToken_Free(struct token *);
+extern void _PyToken_Init(struct token *);
+extern int _PyTokenizer_Get(struct tok_state *, struct token *);
 
 #define tok_dump _Py_tok_dump
author	shadchin <shadchin@yandex-team.com>	2024-02-12 07:53:52 +0300
committer	Daniil Cherednik <dcherednik@ydb.tech>	2024-02-14 14:26:16 +0000
commit	31f2a419764a8ba77c2a970cfc80056c6cd06756 (patch)
tree	c1995d239eba8571cefc640f6648e1d5dd4ce9e2 /contrib/tools/python3/src/Parser/tokenizer.h
parent	fe2ef02b38d9c85d80060963b265a1df9f38c3bb (diff)
download	ydb-31f2a419764a8ba77c2a970cfc80056c6cd06756.tar.gz