add ymake export to ydb

author: alexv-smirnov <alex@ydb.tech> 2023-06-13 11:05:01 +0300
committer: alexv-smirnov <alex@ydb.tech> 2023-06-13 11:05:01 +0300
commit: bf0f13dd39ee3e65092ba3572bb5b1fcd125dcd0 (patch)
tree: 1d1df72c0541a59a81439842f46d95396d3e7189 /contrib/tools/cython/Cython/Parser
parent: 8bfdfa9a9bd19bddbc58d888e180fbd1218681be (diff)
download: ydb-bf0f13dd39ee3e65092ba3572bb5b1fcd125dcd0.tar.gz
3 files changed, 302 insertions, 0 deletions
diff --git a/contrib/tools/cython/Cython/Parser/ConcreteSyntaxTree.pyx b/contrib/tools/cython/Cython/Parser/ConcreteSyntaxTree.pyx
new file mode 100644
index 0000000000..f9888c561c
--- /dev/null
+++ b/contrib/tools/cython/Cython/Parser/ConcreteSyntaxTree.pyx
@@ -0,0 +1,88 @@
+cdef extern from "graminit.c":
+    ctypedef struct grammar:
+        pass
+    cdef grammar _PyParser_Grammar
+    cdef int Py_file_input
+
+cdef extern from "node.h":
+    ctypedef struct node
+    void PyNode_Free(node* n)
+    int NCH(node* n)
+    node* CHILD(node* n, int ix)
+    node* RCHILD(node* n, int ix)
+    short TYPE(node* n)
+    char* STR(node* n)
+
+cdef extern from "parsetok.h":
+    ctypedef struct perrdetail:
+        pass
+    cdef void PyParser_SetError(perrdetail *err) except *
+    cdef node * PyParser_ParseStringFlagsFilenameEx(
+        const char * s,
+        const char * filename,
+        grammar * g,
+        int start,
+        perrdetail * err_ret,
+        int * flags)
+
+import distutils.sysconfig
+import os
+import re
+
+def extract_names(path):
+    # All parse tree types are #defined in these files as ints.
+    type_names = {}
+    for line in open(path):
+        if line.startswith('#define'):
+            try:
+                _, name, value = line.strip().split()
+                type_names[int(value)] = name
+            except:
+                pass
+    return type_names
+
+cdef dict type_names = {}
+
+cdef print_tree(node* n, indent=""):
+    if not type_names:
+        type_names.update(extract_names(
+            os.path.join(distutils.sysconfig.get_python_inc(), 'token.h')))
+        type_names.update(extract_names(
+            os.path.join(os.path.dirname(__file__), 'graminit.h')))
+
+    print indent, type_names.get(TYPE(n), 'unknown'), <object>STR(n) if NCH(n) == 0 else NCH(n)
+    indent += "  "
+    for i in range(NCH(n)):
+        print_tree(CHILD(n, i), indent)
+
+def handle_includes(source, path):
+    # TODO: Use include directory.
+    def include_here(include_line):
+        included = os.path.join(os.path.dirname(path), include_line.group(1)[1:-1])
+        if not os.path.exists(included):
+            return include_line.group(0) + ' # no such path: ' + included
+        return handle_includes(open(included).read(), path)
+    # TODO: Proper string tokenizing.
+    return re.sub(r'^include\s+([^\n]+[\'"])\s*(#.*)?$', include_here, source, flags=re.M)
+
+def p_module(path):
+    cdef perrdetail err
+    cdef int flags
+    cdef node* n
+    source = open(path).read()
+    if '\ninclude ' in source:
+        # TODO: Tokanizer needs to understand includes.
+        source = handle_includes(source, path)
+        path = "preparse(%s)" % path
+    n = PyParser_ParseStringFlagsFilenameEx(
+        source,
+        path,
+        &_PyParser_Grammar,
+        Py_file_input,
+        &err,
+        &flags)
+    if n:
+#        print_tree(n)
+        PyNode_Free(n)
+    else:
+        PyParser_SetError(&err)
diff --git a/contrib/tools/cython/Cython/Parser/Grammar b/contrib/tools/cython/Cython/Parser/Grammar
new file mode 100644
index 0000000000..214e36d5a3
--- /dev/null
+++ b/contrib/tools/cython/Cython/Parser/Grammar
@@ -0,0 +1,214 @@
+# Grammar for Cython, based on the Grammar for Python 3
+
+# Note: This grammar is not yet used by the Cython parser and is subject to change.
+
+# Start symbols for the grammar:
+#       single_input is a single interactive statement;
+#       file_input is a module or sequence of commands read from an input file;
+#       eval_input is the input for the eval() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+file_input: (NEWLINE | stmt)* ENDMARKER
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_PY_NAME [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef | async_funcdef | cdef_stmt)
+async_funcdef: 'async' funcdef
+funcdef: 'def' PY_NAME parameters ['->' test] ':' suite
+parameters: '(' [typedargslist] ')'
+typedargslist: (tfpdef ['=' (test | '*')] (',' tfpdef ['=' (test | '*')])* [','
+       ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
+     |  '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) [',' ellipsis]
+tfpdef: maybe_typed_name [('not' | 'or') 'None'] [':' test]
+varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
+       ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
+     |  '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
+vfpdef: maybe_typed_name ['not' 'None']
+
+stmt: simple_stmt | compound_stmt | cdef_stmt | ctypedef_stmt | DEF_stmt | IF_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
+             import_stmt | global_stmt | nonlocal_stmt | assert_stmt | print_stmt)
+expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
+                     ('=' (yield_expr|testlist_star_expr))*)
+testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
+augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+            '<<=' | '>>=' | '**=' | '//=')
+print_stmt: 'print' ( [ test (',' test)* [','] ] |
+                      '>>' test [ (',' test)+ [','] ] )
+# For normal assignments, additional restrictions enforced by the interpreter
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test ['from' test]]
+# raise_stmt: 'raise' [test [',' test [',' test]]]
+import_stmt: import_PY_NAME | import_from
+import_PY_NAME: ('import' | 'cimport') dotted_as_PY_NAMEs
+# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
+import_from: ('from' (('.' | '...')* dotted_PY_NAME | ('.' | '...')+)
+              ('import' | 'cimport') ('*' | '(' import_as_PY_NAMEs ')' | import_as_PY_NAMEs))
+import_as_PY_NAME: PY_NAME ['as' PY_NAME]
+dotted_as_PY_NAME: dotted_PY_NAME ['as' PY_NAME]
+import_as_PY_NAMEs: import_as_PY_NAME (',' import_as_PY_NAME)* [',']
+dotted_as_PY_NAMEs: dotted_as_PY_NAME (',' dotted_as_PY_NAME)*
+dotted_PY_NAME: PY_NAME ('.' PY_NAME)*
+global_stmt: 'global' PY_NAME (',' PY_NAME)*
+nonlocal_stmt: 'nonlocal' PY_NAME (',' PY_NAME)*
+exec_stmt: 'exec' expr ['in' test [',' test]]
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist ('in' testlist | for_from_clause)':' suite ['else' ':' suite]
+for_from_clause: 'from' expr comp_op PY_NAME comp_op expr ['by' expr]
+try_stmt: ('try' ':' suite
+           ((except_clause ':' suite)+
+            ['else' ':' suite]
+            ['finally' ':' suite] |
+           'finally' ':' suite))
+with_stmt: 'with' with_item (',' with_item)*  ':' suite
+with_item: test ['as' expr]
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test [('as' | ',') test]]
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
+
+test: or_test ['if' or_test 'else' test] | lambdef
+test_nocond: or_test | lambdef_nocond
+lambdef: 'lambda' [varargslist] ':' test
+lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+# <> isn't actually a valid comparison operator in Python. It's here for the
+# sake of a __future__ import described in PEP 401
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+star_expr: '*' expr
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power | address | size_of | cast
+power: atom_expr ['**' factor]
+atom_expr: ['await'] atom trailer*
+atom: ('(' [yield_expr|testlist_comp] ')' |
+       '[' [testlist_comp] ']' |
+       '{' [dictorsetmaker] '}' |
+       new_expr |
+       PY_NAME | NUMBER | STRING+ | ellipsis | 'None' | 'True' | 'False')
+testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' (PY_NAME | 'sizeof')
+subscriptlist: subscript (',' subscript)* [',']
+subscript: test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
+testlist: test (',' test)* [',']
+dictorsetmaker: ( ((test ':' test | '**' expr)
+                   (comp_for | (',' (test ':' test | '**' expr))* [','])) |
+                  ((test | star_expr)
+                   (comp_for | (',' (test | star_expr))* [','])) )
+
+classdef: 'class' PY_NAME ['(' [arglist] ')'] ':' suite
+
+arglist: argument (',' argument)*  [',']
+
+# The reason that keywords are test nodes instead of NAME is that using NAME
+# results in an ambiguity. ast.c makes sure it's a NAME.
+# "test '=' test" is really "keyword '=' test", but we have no such token.
+# These need to be in a single rule to avoid grammar that is ambiguous
+# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
+# we explicitly match '*' here, too, to give it proper precedence.
+# Illegal combinations and orderings are blocked in ast.c:
+# multiple (test comp_for) arguments are blocked; keyword unpackings
+# that precede iterable unpackings are blocked; etc.
+argument: ( test [comp_for] |
+            test '=' test |
+            '**' expr |
+            star_expr )
+
+comp_iter: comp_for | comp_if
+comp_for: 'for' exprlist ('in' or_test | for_from_clause) [comp_iter]
+comp_if: 'if' test_nocond [comp_iter]
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [yield_arg]
+yield_arg: 'from' test | testlist
+
+
+# Cython extensions
+
+# Accommodate to Py2 tokenizer.
+ellipsis: '...' | '.' '.' '.'
+
+signedness: 'unsigned' | 'signed'
+longness: 'char' | 'short' | 'long' | 'long' 'long'
+# TODO: [unsigned] double doesn't make sens, but we need long double
+int_type: signedness [longness] | longness | [signedness] [longness] ('int' | 'double')  | 'complex'
+
+type: ['const'] (NAME ('.' PY_NAME)* | int_type | '(' type ')') ['complex'] [type_qualifiers]
+maybe_typed_name: ['const'] (NAME [('.' PY_NAME)* ['complex'] [type_qualifiers] NAME] | (int_type | '(' type ')') ['complex'] [type_qualifiers] NAME)
+teplate_params: '[' NAME (',' NAME)* ']'
+type_qualifiers: type_qualifier+
+type_qualifier: '*' | '**' | '&' | type_index ('.' NAME [type_index])*
+# TODO: old buffer syntax
+type_index: '[' [(NUMBER | type (',' type)* | (memory_view_index (',' memory_view_index)*))] ']'
+memory_view_index: ':' [':'] [NUMBER]
+
+address: '&' factor
+cast: '<' type ['?'] '>' factor
+size_of: 'sizeof' '(' (type) ')'
+type_id: 'typeid' '(' (type) ')'
+new_expr: 'new' type '(' [arglist] ')'
+
+# TODO: Restrict cdef_stmt to "top-level" statements.
+cdef_stmt: ('cdef' | 'cpdef') (cvar_def | cdef_type_decl | extern_block)
+cdef_type_decl: ctype_decl | fused | cclass
+ctype_decl: struct | enum | cppclass
+# TODO: Does the cdef/ctypedef distinction even make sense for fused?
+ctypedef_stmt: 'ctypedef' (cvar_decl | struct | enum | fused)
+
+# Note: these two are similar but can't be used in an or clause
+# as it would cause ambiguity in the LL(1) parser.
+# Requires a type
+cvar_decl: [visibility] type cname (NEWLINE | cfunc)
+# Allows an assignment
+cvar_def: [visibility] maybe_typed_name (['=' test] (',' PY_NAME ['=' test])* NEWLINE | cfunc)
+
+visibility: 'public' | 'api' | 'readonly'
+# TODO: Standardize gil_spec first or last.
+cfunc: [teplate_params] parameters [gil_spec] [exception_value] [gil_spec] (':' suite | NEWLINE)
+exception_value: 'except' (['?'] expr | '*' | '+' [PY_NAME])
+gil_spec: 'with' ('gil' | 'nogil') | 'nogil'
+
+cname: NAME [STRING]
+cclass: classdef
+fused: 'fused' PY_NAME ':' NEWLINE INDENT ( type NEWLINE)+ DEDENT
+enum: 'enum' [cname] (NEWLINE | ':' enum_suite)
+enum_suite: NEWLINE INDENT (cname ['=' NUMBER] NEWLINE | pass_stmt NEWLINE)+ DEDENT
+struct: ('struct' | 'union') cname (NEWLINE | (':' struct_suite))
+struct_suite: NEWLINE INDENT (cvar_decl | pass_stmt NEWLINE)+ DEDENT
+cppclass: 'cppclass' cname [teplate_params] [cppclass_bases] (NEWLINE | ':' cppclass_suite)
+cppclass_bases: '(' dotted_PY_NAME (',' dotted_PY_NAME [teplate_params])*')'
+cppclass_suite: NEWLINE INDENT (cvar_decl | ctype_decl | pass_stmt NEWLINE)+ DEDENT
+# TODO: C++ constructors, operators
+
+extern_block: 'extern' (cvar_decl | 'from' ('*' | STRING) ['namespace' STRING] [gil_spec] ':' (pass_stmt | extern_suite))
+extern_suite: NEWLINE INDENT (['cdef' | 'cpdef'] (cvar_decl | cdef_type_decl) | ctypedef_stmt)+ DEDENT
+
+cy_type_kwd: 'struct' | 'union' | 'fused' | 'cppclass' | 'int' | 'double' | 'complex'
+cy_kwd: cy_type_kwd | signedness | longness | visibility | 'gil' | 'nogil' | 'namespace' | 'const' | 'by' | 'extern'
+PY_NAME: NAME | cy_kwd
+
+# TODO: Do we really want these? Don't play well with include...
+DEF_stmt: 'DEF' NAME '=' testlist
+IF_stmt: 'IF' test ':' suite ('ELIF' test ':' suite)* ['ELSE' ':' suite]
diff --git a/contrib/tools/cython/Cython/Parser/__init__.py b/contrib/tools/cython/Cython/Parser/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/contrib/tools/cython/Cython/Parser/__init__.py
author	alexv-smirnov <alex@ydb.tech>	2023-06-13 11:05:01 +0300
committer	alexv-smirnov <alex@ydb.tech>	2023-06-13 11:05:01 +0300
commit	bf0f13dd39ee3e65092ba3572bb5b1fcd125dcd0 (patch)
tree	1d1df72c0541a59a81439842f46d95396d3e7189 /contrib/tools/cython/Cython/Parser
parent	8bfdfa9a9bd19bddbc58d888e180fbd1218681be (diff)
download	ydb-bf0f13dd39ee3e65092ba3572bb5b1fcd125dcd0.tar.gz