aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/cython/Cython/Parser
diff options
context:
space:
mode:
authoralexv-smirnov <alex@ydb.tech>2023-06-13 11:05:01 +0300
committeralexv-smirnov <alex@ydb.tech>2023-06-13 11:05:01 +0300
commitbf0f13dd39ee3e65092ba3572bb5b1fcd125dcd0 (patch)
tree1d1df72c0541a59a81439842f46d95396d3e7189 /contrib/tools/cython/Cython/Parser
parent8bfdfa9a9bd19bddbc58d888e180fbd1218681be (diff)
downloadydb-bf0f13dd39ee3e65092ba3572bb5b1fcd125dcd0.tar.gz
add ymake export to ydb
Diffstat (limited to 'contrib/tools/cython/Cython/Parser')
-rw-r--r--contrib/tools/cython/Cython/Parser/ConcreteSyntaxTree.pyx88
-rw-r--r--contrib/tools/cython/Cython/Parser/Grammar214
-rw-r--r--contrib/tools/cython/Cython/Parser/__init__.py0
3 files changed, 302 insertions, 0 deletions
diff --git a/contrib/tools/cython/Cython/Parser/ConcreteSyntaxTree.pyx b/contrib/tools/cython/Cython/Parser/ConcreteSyntaxTree.pyx
new file mode 100644
index 0000000000..f9888c561c
--- /dev/null
+++ b/contrib/tools/cython/Cython/Parser/ConcreteSyntaxTree.pyx
@@ -0,0 +1,88 @@
+cdef extern from "graminit.c":
+ ctypedef struct grammar:
+ pass
+ cdef grammar _PyParser_Grammar
+ cdef int Py_file_input
+
+cdef extern from "node.h":
+ ctypedef struct node
+ void PyNode_Free(node* n)
+ int NCH(node* n)
+ node* CHILD(node* n, int ix)
+ node* RCHILD(node* n, int ix)
+ short TYPE(node* n)
+ char* STR(node* n)
+
+cdef extern from "parsetok.h":
+ ctypedef struct perrdetail:
+ pass
+ cdef void PyParser_SetError(perrdetail *err) except *
+ cdef node * PyParser_ParseStringFlagsFilenameEx(
+ const char * s,
+ const char * filename,
+ grammar * g,
+ int start,
+ perrdetail * err_ret,
+ int * flags)
+
+import distutils.sysconfig
+import os
+import re
+
+def extract_names(path):
+ # All parse tree types are #defined in these files as ints.
+ type_names = {}
+ for line in open(path):
+ if line.startswith('#define'):
+ try:
+ _, name, value = line.strip().split()
+ type_names[int(value)] = name
+ except:
+ pass
+ return type_names
+
+cdef dict type_names = {}
+
+cdef print_tree(node* n, indent=""):
+ if not type_names:
+ type_names.update(extract_names(
+ os.path.join(distutils.sysconfig.get_python_inc(), 'token.h')))
+ type_names.update(extract_names(
+ os.path.join(os.path.dirname(__file__), 'graminit.h')))
+
+ print indent, type_names.get(TYPE(n), 'unknown'), <object>STR(n) if NCH(n) == 0 else NCH(n)
+ indent += " "
+ for i in range(NCH(n)):
+ print_tree(CHILD(n, i), indent)
+
+def handle_includes(source, path):
+ # TODO: Use include directory.
+ def include_here(include_line):
+ included = os.path.join(os.path.dirname(path), include_line.group(1)[1:-1])
+ if not os.path.exists(included):
+ return include_line.group(0) + ' # no such path: ' + included
+ return handle_includes(open(included).read(), path)
+ # TODO: Proper string tokenizing.
+ return re.sub(r'^include\s+([^\n]+[\'"])\s*(#.*)?$', include_here, source, flags=re.M)
+
+def p_module(path):
+ cdef perrdetail err
+ cdef int flags
+ cdef node* n
+ source = open(path).read()
+ if '\ninclude ' in source:
+ # TODO: Tokanizer needs to understand includes.
+ source = handle_includes(source, path)
+ path = "preparse(%s)" % path
+ n = PyParser_ParseStringFlagsFilenameEx(
+ source,
+ path,
+ &_PyParser_Grammar,
+ Py_file_input,
+ &err,
+ &flags)
+ if n:
+# print_tree(n)
+ PyNode_Free(n)
+ else:
+ PyParser_SetError(&err)
diff --git a/contrib/tools/cython/Cython/Parser/Grammar b/contrib/tools/cython/Cython/Parser/Grammar
new file mode 100644
index 0000000000..214e36d5a3
--- /dev/null
+++ b/contrib/tools/cython/Cython/Parser/Grammar
@@ -0,0 +1,214 @@
+# Grammar for Cython, based on the Grammar for Python 3
+
+# Note: This grammar is not yet used by the Cython parser and is subject to change.
+
+# Start symbols for the grammar:
+# single_input is a single interactive statement;
+# file_input is a module or sequence of commands read from an input file;
+# eval_input is the input for the eval() functions.
+# NB: compound_stmt in single_input is followed by extra NEWLINE!
+single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+file_input: (NEWLINE | stmt)* ENDMARKER
+eval_input: testlist NEWLINE* ENDMARKER
+
+decorator: '@' dotted_PY_NAME [ '(' [arglist] ')' ] NEWLINE
+decorators: decorator+
+decorated: decorators (classdef | funcdef | async_funcdef | cdef_stmt)
+async_funcdef: 'async' funcdef
+funcdef: 'def' PY_NAME parameters ['->' test] ':' suite
+parameters: '(' [typedargslist] ')'
+typedargslist: (tfpdef ['=' (test | '*')] (',' tfpdef ['=' (test | '*')])* [','
+ ['*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef]]
+ | '*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef] | '**' tfpdef) [',' ellipsis]
+tfpdef: maybe_typed_name [('not' | 'or') 'None'] [':' test]
+varargslist: (vfpdef ['=' test] (',' vfpdef ['=' test])* [','
+ ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]]
+ | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef)
+vfpdef: maybe_typed_name ['not' 'None']
+
+stmt: simple_stmt | compound_stmt | cdef_stmt | ctypedef_stmt | DEF_stmt | IF_stmt
+simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
+small_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
+ import_stmt | global_stmt | nonlocal_stmt | assert_stmt | print_stmt)
+expr_stmt: testlist_star_expr (augassign (yield_expr|testlist) |
+ ('=' (yield_expr|testlist_star_expr))*)
+testlist_star_expr: (test|star_expr) (',' (test|star_expr))* [',']
+augassign: ('+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' |
+ '<<=' | '>>=' | '**=' | '//=')
+print_stmt: 'print' ( [ test (',' test)* [','] ] |
+ '>>' test [ (',' test)+ [','] ] )
+# For normal assignments, additional restrictions enforced by the interpreter
+del_stmt: 'del' exprlist
+pass_stmt: 'pass'
+flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+break_stmt: 'break'
+continue_stmt: 'continue'
+return_stmt: 'return' [testlist]
+yield_stmt: yield_expr
+raise_stmt: 'raise' [test ['from' test]]
+# raise_stmt: 'raise' [test [',' test [',' test]]]
+import_stmt: import_PY_NAME | import_from
+import_PY_NAME: ('import' | 'cimport') dotted_as_PY_NAMEs
+# note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
+import_from: ('from' (('.' | '...')* dotted_PY_NAME | ('.' | '...')+)
+ ('import' | 'cimport') ('*' | '(' import_as_PY_NAMEs ')' | import_as_PY_NAMEs))
+import_as_PY_NAME: PY_NAME ['as' PY_NAME]
+dotted_as_PY_NAME: dotted_PY_NAME ['as' PY_NAME]
+import_as_PY_NAMEs: import_as_PY_NAME (',' import_as_PY_NAME)* [',']
+dotted_as_PY_NAMEs: dotted_as_PY_NAME (',' dotted_as_PY_NAME)*
+dotted_PY_NAME: PY_NAME ('.' PY_NAME)*
+global_stmt: 'global' PY_NAME (',' PY_NAME)*
+nonlocal_stmt: 'nonlocal' PY_NAME (',' PY_NAME)*
+exec_stmt: 'exec' expr ['in' test [',' test]]
+assert_stmt: 'assert' test [',' test]
+
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated
+if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
+while_stmt: 'while' test ':' suite ['else' ':' suite]
+for_stmt: 'for' exprlist ('in' testlist | for_from_clause)':' suite ['else' ':' suite]
+for_from_clause: 'from' expr comp_op PY_NAME comp_op expr ['by' expr]
+try_stmt: ('try' ':' suite
+ ((except_clause ':' suite)+
+ ['else' ':' suite]
+ ['finally' ':' suite] |
+ 'finally' ':' suite))
+with_stmt: 'with' with_item (',' with_item)* ':' suite
+with_item: test ['as' expr]
+# NB compile.c makes sure that the default except clause is last
+except_clause: 'except' [test [('as' | ',') test]]
+suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
+
+test: or_test ['if' or_test 'else' test] | lambdef
+test_nocond: or_test | lambdef_nocond
+lambdef: 'lambda' [varargslist] ':' test
+lambdef_nocond: 'lambda' [varargslist] ':' test_nocond
+or_test: and_test ('or' and_test)*
+and_test: not_test ('and' not_test)*
+not_test: 'not' not_test | comparison
+comparison: expr (comp_op expr)*
+# <> isn't actually a valid comparison operator in Python. It's here for the
+# sake of a __future__ import described in PEP 401
+comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
+star_expr: '*' expr
+expr: xor_expr ('|' xor_expr)*
+xor_expr: and_expr ('^' and_expr)*
+and_expr: shift_expr ('&' shift_expr)*
+shift_expr: arith_expr (('<<'|'>>') arith_expr)*
+arith_expr: term (('+'|'-') term)*
+term: factor (('*'|'/'|'%'|'//') factor)*
+factor: ('+'|'-'|'~') factor | power | address | size_of | cast
+power: atom_expr ['**' factor]
+atom_expr: ['await'] atom trailer*
+atom: ('(' [yield_expr|testlist_comp] ')' |
+ '[' [testlist_comp] ']' |
+ '{' [dictorsetmaker] '}' |
+ new_expr |
+ PY_NAME | NUMBER | STRING+ | ellipsis | 'None' | 'True' | 'False')
+testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )
+trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' (PY_NAME | 'sizeof')
+subscriptlist: subscript (',' subscript)* [',']
+subscript: test | [test] ':' [test] [sliceop]
+sliceop: ':' [test]
+exprlist: (expr|star_expr) (',' (expr|star_expr))* [',']
+testlist: test (',' test)* [',']
+dictorsetmaker: ( ((test ':' test | '**' expr)
+ (comp_for | (',' (test ':' test | '**' expr))* [','])) |
+ ((test | star_expr)
+ (comp_for | (',' (test | star_expr))* [','])) )
+
+classdef: 'class' PY_NAME ['(' [arglist] ')'] ':' suite
+
+arglist: argument (',' argument)* [',']
+
+# The reason that keywords are test nodes instead of NAME is that using NAME
+# results in an ambiguity. ast.c makes sure it's a NAME.
+# "test '=' test" is really "keyword '=' test", but we have no such token.
+# These need to be in a single rule to avoid grammar that is ambiguous
+# to our LL(1) parser. Even though 'test' includes '*expr' in star_expr,
+# we explicitly match '*' here, too, to give it proper precedence.
+# Illegal combinations and orderings are blocked in ast.c:
+# multiple (test comp_for) arguments are blocked; keyword unpackings
+# that precede iterable unpackings are blocked; etc.
+argument: ( test [comp_for] |
+ test '=' test |
+ '**' expr |
+ star_expr )
+
+comp_iter: comp_for | comp_if
+comp_for: 'for' exprlist ('in' or_test | for_from_clause) [comp_iter]
+comp_if: 'if' test_nocond [comp_iter]
+
+# not used in grammar, but may appear in "node" passed from Parser to Compiler
+encoding_decl: NAME
+
+yield_expr: 'yield' [yield_arg]
+yield_arg: 'from' test | testlist
+
+
+# Cython extensions
+
+# Accommodate to Py2 tokenizer.
+ellipsis: '...' | '.' '.' '.'
+
+signedness: 'unsigned' | 'signed'
+longness: 'char' | 'short' | 'long' | 'long' 'long'
+# TODO: [unsigned] double doesn't make sens, but we need long double
+int_type: signedness [longness] | longness | [signedness] [longness] ('int' | 'double') | 'complex'
+
+type: ['const'] (NAME ('.' PY_NAME)* | int_type | '(' type ')') ['complex'] [type_qualifiers]
+maybe_typed_name: ['const'] (NAME [('.' PY_NAME)* ['complex'] [type_qualifiers] NAME] | (int_type | '(' type ')') ['complex'] [type_qualifiers] NAME)
+teplate_params: '[' NAME (',' NAME)* ']'
+type_qualifiers: type_qualifier+
+type_qualifier: '*' | '**' | '&' | type_index ('.' NAME [type_index])*
+# TODO: old buffer syntax
+type_index: '[' [(NUMBER | type (',' type)* | (memory_view_index (',' memory_view_index)*))] ']'
+memory_view_index: ':' [':'] [NUMBER]
+
+address: '&' factor
+cast: '<' type ['?'] '>' factor
+size_of: 'sizeof' '(' (type) ')'
+type_id: 'typeid' '(' (type) ')'
+new_expr: 'new' type '(' [arglist] ')'
+
+# TODO: Restrict cdef_stmt to "top-level" statements.
+cdef_stmt: ('cdef' | 'cpdef') (cvar_def | cdef_type_decl | extern_block)
+cdef_type_decl: ctype_decl | fused | cclass
+ctype_decl: struct | enum | cppclass
+# TODO: Does the cdef/ctypedef distinction even make sense for fused?
+ctypedef_stmt: 'ctypedef' (cvar_decl | struct | enum | fused)
+
+# Note: these two are similar but can't be used in an or clause
+# as it would cause ambiguity in the LL(1) parser.
+# Requires a type
+cvar_decl: [visibility] type cname (NEWLINE | cfunc)
+# Allows an assignment
+cvar_def: [visibility] maybe_typed_name (['=' test] (',' PY_NAME ['=' test])* NEWLINE | cfunc)
+
+visibility: 'public' | 'api' | 'readonly'
+# TODO: Standardize gil_spec first or last.
+cfunc: [teplate_params] parameters [gil_spec] [exception_value] [gil_spec] (':' suite | NEWLINE)
+exception_value: 'except' (['?'] expr | '*' | '+' [PY_NAME])
+gil_spec: 'with' ('gil' | 'nogil') | 'nogil'
+
+cname: NAME [STRING]
+cclass: classdef
+fused: 'fused' PY_NAME ':' NEWLINE INDENT ( type NEWLINE)+ DEDENT
+enum: 'enum' [cname] (NEWLINE | ':' enum_suite)
+enum_suite: NEWLINE INDENT (cname ['=' NUMBER] NEWLINE | pass_stmt NEWLINE)+ DEDENT
+struct: ('struct' | 'union') cname (NEWLINE | (':' struct_suite))
+struct_suite: NEWLINE INDENT (cvar_decl | pass_stmt NEWLINE)+ DEDENT
+cppclass: 'cppclass' cname [teplate_params] [cppclass_bases] (NEWLINE | ':' cppclass_suite)
+cppclass_bases: '(' dotted_PY_NAME (',' dotted_PY_NAME [teplate_params])*')'
+cppclass_suite: NEWLINE INDENT (cvar_decl | ctype_decl | pass_stmt NEWLINE)+ DEDENT
+# TODO: C++ constructors, operators
+
+extern_block: 'extern' (cvar_decl | 'from' ('*' | STRING) ['namespace' STRING] [gil_spec] ':' (pass_stmt | extern_suite))
+extern_suite: NEWLINE INDENT (['cdef' | 'cpdef'] (cvar_decl | cdef_type_decl) | ctypedef_stmt)+ DEDENT
+
+cy_type_kwd: 'struct' | 'union' | 'fused' | 'cppclass' | 'int' | 'double' | 'complex'
+cy_kwd: cy_type_kwd | signedness | longness | visibility | 'gil' | 'nogil' | 'namespace' | 'const' | 'by' | 'extern'
+PY_NAME: NAME | cy_kwd
+
+# TODO: Do we really want these? Don't play well with include...
+DEF_stmt: 'DEF' NAME '=' testlist
+IF_stmt: 'IF' test ':' suite ('ELIF' test ':' suite)* ['ELSE' ':' suite]
diff --git a/contrib/tools/cython/Cython/Parser/__init__.py b/contrib/tools/cython/Cython/Parser/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/contrib/tools/cython/Cython/Parser/__init__.py