diff options
| author | pefavel <[email protected]> | 2026-03-16 17:44:57 +0300 |
|---|---|---|
| committer | pefavel <[email protected]> | 2026-03-17 11:40:58 +0300 |
| commit | 6eecc739c342dbfca9be6328231233dd8e77d9f4 (patch) | |
| tree | 491834a1c01185c100a79d420a7492c7e53ba32a /contrib/tools/python/src/Parser | |
| parent | 58b88dfd7db837890ffc2edbe80e5235298cec10 (diff) | |
revert piglet config change
commit_hash:d068d68a89226c414a3d5a1f8ad102579bdd233b
Diffstat (limited to 'contrib/tools/python/src/Parser')
| -rw-r--r-- | contrib/tools/python/src/Parser/Python.asdl | 115 | ||||
| -rw-r--r-- | contrib/tools/python/src/Parser/asdl.py | 413 | ||||
| -rwxr-xr-x | contrib/tools/python/src/Parser/asdl_c.py | 1250 | ||||
| -rw-r--r-- | contrib/tools/python/src/Parser/intrcheck.c | 178 | ||||
| -rw-r--r-- | contrib/tools/python/src/Parser/pgenmain.c | 174 | ||||
| -rw-r--r-- | contrib/tools/python/src/Parser/printgrammar.c | 117 | ||||
| -rw-r--r-- | contrib/tools/python/src/Parser/spark.py | 839 | ||||
| -rw-r--r-- | contrib/tools/python/src/Parser/tokenizer_pgen.c | 2 |
8 files changed, 0 insertions, 3088 deletions
diff --git a/contrib/tools/python/src/Parser/Python.asdl b/contrib/tools/python/src/Parser/Python.asdl deleted file mode 100644 index 9a9b933143e..00000000000 --- a/contrib/tools/python/src/Parser/Python.asdl +++ /dev/null @@ -1,115 +0,0 @@ --- ASDL's five builtin types are identifier, int, string, object, bool - -module Python version "$Revision$" -{ - mod = Module(stmt* body) - | Interactive(stmt* body) - | Expression(expr body) - - -- not really an actual node but useful in Jython's typesystem. - | Suite(stmt* body) - - stmt = FunctionDef(identifier name, arguments args, - stmt* body, expr* decorator_list) - | ClassDef(identifier name, expr* bases, stmt* body, expr* decorator_list) - | Return(expr? value) - - | Delete(expr* targets) - | Assign(expr* targets, expr value) - | AugAssign(expr target, operator op, expr value) - - -- not sure if bool is allowed, can always use int - | Print(expr? dest, expr* values, bool nl) - - -- use 'orelse' because else is a keyword in target languages - | For(expr target, expr iter, stmt* body, stmt* orelse) - | While(expr test, stmt* body, stmt* orelse) - | If(expr test, stmt* body, stmt* orelse) - | With(expr context_expr, expr? optional_vars, stmt* body) - - -- 'type' is a bad name - | Raise(expr? type, expr? inst, expr? tback) - | TryExcept(stmt* body, excepthandler* handlers, stmt* orelse) - | TryFinally(stmt* body, stmt* finalbody) - | Assert(expr test, expr? msg) - - | Import(alias* names) - | ImportFrom(identifier? module, alias* names, int? level) - - -- Doesn't capture requirement that locals must be - -- defined if globals is - -- still supports use as a function! - | Exec(expr body, expr? globals, expr? locals) - - | Global(identifier* names) - | Expr(expr value) - | Pass | Break | Continue - - -- XXX Jython will be different - -- col_offset is the byte offset in the utf8 string the parser uses - attributes (int lineno, int col_offset) - - -- BoolOp() can use left & right? - expr = BoolOp(boolop op, expr* values) - | BinOp(expr left, operator op, expr right) - | UnaryOp(unaryop op, expr operand) - | Lambda(arguments args, expr body) - | IfExp(expr test, expr body, expr orelse) - | Dict(expr* keys, expr* values) - | Set(expr* elts) - | ListComp(expr elt, comprehension* generators) - | SetComp(expr elt, comprehension* generators) - | DictComp(expr key, expr value, comprehension* generators) - | GeneratorExp(expr elt, comprehension* generators) - -- the grammar constrains where yield expressions can occur - | Yield(expr? value) - -- need sequences for compare to distinguish between - -- x < 4 < 3 and (x < 4) < 3 - | Compare(expr left, cmpop* ops, expr* comparators) - | Call(expr func, expr* args, keyword* keywords, - expr? starargs, expr? kwargs) - | Repr(expr value) - | Num(object n) -- a number as a PyObject. - | Str(string s) -- need to specify raw, unicode, etc? - -- other literals? bools? - - -- the following expression can appear in assignment context - | Attribute(expr value, identifier attr, expr_context ctx) - | Subscript(expr value, slice slice, expr_context ctx) - | Name(identifier id, expr_context ctx) - | List(expr* elts, expr_context ctx) - | Tuple(expr* elts, expr_context ctx) - - -- col_offset is the byte offset in the utf8 string the parser uses - attributes (int lineno, int col_offset) - - expr_context = Load | Store | Del | AugLoad | AugStore | Param - - slice = Ellipsis | Slice(expr? lower, expr? upper, expr? step) - | ExtSlice(slice* dims) - | Index(expr value) - - boolop = And | Or - - operator = Add | Sub | Mult | Div | Mod | Pow | LShift - | RShift | BitOr | BitXor | BitAnd | FloorDiv - - unaryop = Invert | Not | UAdd | USub - - cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn - - comprehension = (expr target, expr iter, expr* ifs) - - -- not sure what to call the first argument for raise and except - excepthandler = ExceptHandler(expr? type, expr? name, stmt* body) - attributes (int lineno, int col_offset) - - arguments = (expr* args, identifier? vararg, - identifier? kwarg, expr* defaults) - - -- keyword arguments supplied to call - keyword = (identifier arg, expr value) - - -- import name with optional 'as' alias. - alias = (identifier name, identifier? asname) -} diff --git a/contrib/tools/python/src/Parser/asdl.py b/contrib/tools/python/src/Parser/asdl.py deleted file mode 100644 index 1ddc3f8fb43..00000000000 --- a/contrib/tools/python/src/Parser/asdl.py +++ /dev/null @@ -1,413 +0,0 @@ -"""An implementation of the Zephyr Abstract Syntax Definition Language. - -See http://asdl.sourceforge.net/ and -http://www.cs.princeton.edu/research/techreps/TR-554-97 - -Only supports top level module decl, not view. I'm guessing that view -is intended to support the browser and I'm not interested in the -browser. - -Changes for Python: Add support for module versions -""" - -import os -import traceback - -import spark - -class Token(object): - # spark seems to dispatch in the parser based on a token's - # type attribute - def __init__(self, type, lineno): - self.type = type - self.lineno = lineno - - def __str__(self): - return self.type - - def __repr__(self): - return str(self) - -class Id(Token): - def __init__(self, value, lineno): - self.type = 'Id' - self.value = value - self.lineno = lineno - - def __str__(self): - return self.value - -class String(Token): - def __init__(self, value, lineno): - self.type = 'String' - self.value = value - self.lineno = lineno - -class ASDLSyntaxError(Exception): - - def __init__(self, lineno, token=None, msg=None): - self.lineno = lineno - self.token = token - self.msg = msg - - def __str__(self): - if self.msg is None: - return "Error at '%s', line %d" % (self.token, self.lineno) - else: - return "%s, line %d" % (self.msg, self.lineno) - -class ASDLScanner(spark.GenericScanner, object): - - def tokenize(self, input): - self.rv = [] - self.lineno = 1 - super(ASDLScanner, self).tokenize(input) - return self.rv - - def t_id(self, s): - r"[\w\.]+" - # XXX doesn't distinguish upper vs. lower, which is - # significant for ASDL. - self.rv.append(Id(s, self.lineno)) - - def t_string(self, s): - r'"[^"]*"' - self.rv.append(String(s, self.lineno)) - - def t_xxx(self, s): # not sure what this production means - r"<=" - self.rv.append(Token(s, self.lineno)) - - def t_punctuation(self, s): - r"[\{\}\*\=\|\(\)\,\?\:]" - self.rv.append(Token(s, self.lineno)) - - def t_comment(self, s): - r"\-\-[^\n]*" - pass - - def t_newline(self, s): - r"\n" - self.lineno += 1 - - def t_whitespace(self, s): - r"[ \t]+" - pass - - def t_default(self, s): - r" . +" - raise ValueError, "unmatched input: %s" % `s` - -class ASDLParser(spark.GenericParser, object): - def __init__(self): - super(ASDLParser, self).__init__("module") - - def typestring(self, tok): - return tok.type - - def error(self, tok): - raise ASDLSyntaxError(tok.lineno, tok) - - def p_module_0(self, (module, name, version, _0, _1)): - " module ::= Id Id version { } " - if module.value != "module": - raise ASDLSyntaxError(module.lineno, - msg="expected 'module', found %s" % module) - return Module(name, None, version) - - def p_module(self, (module, name, version, _0, definitions, _1)): - " module ::= Id Id version { definitions } " - if module.value != "module": - raise ASDLSyntaxError(module.lineno, - msg="expected 'module', found %s" % module) - return Module(name, definitions, version) - - def p_version(self, (version, V)): - "version ::= Id String" - if version.value != "version": - raise ASDLSyntaxError(version.lineno, - msg="expected 'version', found %s" % version) - return V - - def p_definition_0(self, (definition,)): - " definitions ::= definition " - return definition - - def p_definition_1(self, (definitions, definition)): - " definitions ::= definition definitions " - return definitions + definition - - def p_definition(self, (id, _, type)): - " definition ::= Id = type " - return [Type(id, type)] - - def p_type_0(self, (product,)): - " type ::= product " - return product - - def p_type_1(self, (sum,)): - " type ::= sum " - return Sum(sum) - - def p_type_2(self, (sum, id, _0, attributes, _1)): - " type ::= sum Id ( fields ) " - if id.value != "attributes": - raise ASDLSyntaxError(id.lineno, - msg="expected attributes, found %s" % id) - if attributes: - attributes.reverse() - return Sum(sum, attributes) - - def p_product(self, (_0, fields, _1)): - " product ::= ( fields ) " - # XXX can't I just construct things in the right order? - fields.reverse() - return Product(fields) - - def p_sum_0(self, (constructor,)): - " sum ::= constructor " - return [constructor] - - def p_sum_1(self, (constructor, _, sum)): - " sum ::= constructor | sum " - return [constructor] + sum - - def p_sum_2(self, (constructor, _, sum)): - " sum ::= constructor | sum " - return [constructor] + sum - - def p_constructor_0(self, (id,)): - " constructor ::= Id " - return Constructor(id) - - def p_constructor_1(self, (id, _0, fields, _1)): - " constructor ::= Id ( fields ) " - # XXX can't I just construct things in the right order? - fields.reverse() - return Constructor(id, fields) - - def p_fields_0(self, (field,)): - " fields ::= field " - return [field] - - def p_fields_1(self, (field, _, fields)): - " fields ::= field , fields " - return fields + [field] - - def p_field_0(self, (type,)): - " field ::= Id " - return Field(type) - - def p_field_1(self, (type, name)): - " field ::= Id Id " - return Field(type, name) - - def p_field_2(self, (type, _, name)): - " field ::= Id * Id " - return Field(type, name, seq=True) - - def p_field_3(self, (type, _, name)): - " field ::= Id ? Id " - return Field(type, name, opt=True) - - def p_field_4(self, (type, _)): - " field ::= Id * " - return Field(type, seq=True) - - def p_field_5(self, (type, _)): - " field ::= Id ? " - return Field(type, opt=True) - -builtin_types = ("identifier", "string", "int", "bool", "object") - -# below is a collection of classes to capture the AST of an AST :-) -# not sure if any of the methods are useful yet, but I'm adding them -# piecemeal as they seem helpful - -class AST(object): - pass # a marker class - -class Module(AST): - def __init__(self, name, dfns, version): - self.name = name - self.dfns = dfns - self.version = version - self.types = {} # maps type name to value (from dfns) - for type in dfns: - self.types[type.name.value] = type.value - - def __repr__(self): - return "Module(%s, %s)" % (self.name, self.dfns) - -class Type(AST): - def __init__(self, name, value): - self.name = name - self.value = value - - def __repr__(self): - return "Type(%s, %s)" % (self.name, self.value) - -class Constructor(AST): - def __init__(self, name, fields=None): - self.name = name - self.fields = fields or [] - - def __repr__(self): - return "Constructor(%s, %s)" % (self.name, self.fields) - -class Field(AST): - def __init__(self, type, name=None, seq=False, opt=False): - self.type = type - self.name = name - self.seq = seq - self.opt = opt - - def __repr__(self): - if self.seq: - extra = ", seq=True" - elif self.opt: - extra = ", opt=True" - else: - extra = "" - if self.name is None: - return "Field(%s%s)" % (self.type, extra) - else: - return "Field(%s, %s%s)" % (self.type, self.name, extra) - -class Sum(AST): - def __init__(self, types, attributes=None): - self.types = types - self.attributes = attributes or [] - - def __repr__(self): - if self.attributes is None: - return "Sum(%s)" % self.types - else: - return "Sum(%s, %s)" % (self.types, self.attributes) - -class Product(AST): - def __init__(self, fields): - self.fields = fields - - def __repr__(self): - return "Product(%s)" % self.fields - -class VisitorBase(object): - - def __init__(self, skip=False): - self.cache = {} - self.skip = skip - - def visit(self, object, *args): - meth = self._dispatch(object) - if meth is None: - return - try: - meth(object, *args) - except Exception, err: - print "Error visiting", repr(object) - print err - traceback.print_exc() - # XXX hack - if hasattr(self, 'file'): - self.file.flush() - os._exit(1) - - def _dispatch(self, object): - assert isinstance(object, AST), repr(object) - klass = object.__class__ - meth = self.cache.get(klass) - if meth is None: - methname = "visit" + klass.__name__ - if self.skip: - meth = getattr(self, methname, None) - else: - meth = getattr(self, methname) - self.cache[klass] = meth - return meth - -class Check(VisitorBase): - - def __init__(self): - super(Check, self).__init__(skip=True) - self.cons = {} - self.errors = 0 - self.types = {} - - def visitModule(self, mod): - for dfn in mod.dfns: - self.visit(dfn) - - def visitType(self, type): - self.visit(type.value, str(type.name)) - - def visitSum(self, sum, name): - for t in sum.types: - self.visit(t, name) - - def visitConstructor(self, cons, name): - key = str(cons.name) - conflict = self.cons.get(key) - if conflict is None: - self.cons[key] = name - else: - print "Redefinition of constructor %s" % key - print "Defined in %s and %s" % (conflict, name) - self.errors += 1 - for f in cons.fields: - self.visit(f, key) - - def visitField(self, field, name): - key = str(field.type) - l = self.types.setdefault(key, []) - l.append(name) - - def visitProduct(self, prod, name): - for f in prod.fields: - self.visit(f, name) - -def check(mod): - v = Check() - v.visit(mod) - - for t in v.types: - if t not in mod.types and not t in builtin_types: - v.errors += 1 - uses = ", ".join(v.types[t]) - print "Undefined type %s, used in %s" % (t, uses) - - return not v.errors - -def parse(file): - scanner = ASDLScanner() - parser = ASDLParser() - - buf = open(file).read() - tokens = scanner.tokenize(buf) - try: - return parser.parse(tokens) - except ASDLSyntaxError, err: - print err - lines = buf.split("\n") - print lines[err.lineno - 1] # lines starts at 0, files at 1 - -if __name__ == "__main__": - import glob - import sys - - if len(sys.argv) > 1: - files = sys.argv[1:] - else: - testdir = "tests" - files = glob.glob(testdir + "/*.asdl") - - for file in files: - print file - mod = parse(file) - print "module", mod.name - print len(mod.dfns), "definitions" - if not check(mod): - print "Check failed" - else: - for dfn in mod.dfns: - print dfn.type diff --git a/contrib/tools/python/src/Parser/asdl_c.py b/contrib/tools/python/src/Parser/asdl_c.py deleted file mode 100755 index ac61c78afc4..00000000000 --- a/contrib/tools/python/src/Parser/asdl_c.py +++ /dev/null @@ -1,1250 +0,0 @@ -#! /usr/bin/env python -"""Generate C code from an ASDL description.""" - -# TO DO -# handle fields that have a type but no name - -import os, sys - -import asdl - -TABSIZE = 8 -MAX_COL = 80 - -def get_c_type(name): - """Return a string for the C name of the type. - - This function special cases the default types provided by asdl: - identifier, string, int, bool. - """ - # XXX ack! need to figure out where Id is useful and where string - if isinstance(name, asdl.Id): - name = name.value - if name in asdl.builtin_types: - return name - else: - return "%s_ty" % name - -def reflow_lines(s, depth): - """Reflow the line s indented depth tabs. - - Return a sequence of lines where no line extends beyond MAX_COL - when properly indented. The first line is properly indented based - exclusively on depth * TABSIZE. All following lines -- these are - the reflowed lines generated by this function -- start at the same - column as the first character beyond the opening { in the first - line. - """ - size = MAX_COL - depth * TABSIZE - if len(s) < size: - return [s] - - lines = [] - cur = s - padding = "" - while len(cur) > size: - i = cur.rfind(' ', 0, size) - # XXX this should be fixed for real - if i == -1 and 'GeneratorExp' in cur: - i = size + 3 - assert i != -1, "Impossible line %d to reflow: %r" % (size, s) - lines.append(padding + cur[:i]) - if len(lines) == 1: - # find new size based on brace - j = cur.find('{', 0, i) - if j >= 0: - j += 2 # account for the brace and the space after it - size -= j - padding = " " * j - else: - j = cur.find('(', 0, i) - if j >= 0: - j += 1 # account for the paren (no space after it) - size -= j - padding = " " * j - cur = cur[i+1:] - else: - lines.append(padding + cur) - return lines - -def is_simple(sum): - """Return True if a sum is a simple. - - A sum is simple if its types have no fields, e.g. - unaryop = Invert | Not | UAdd | USub - """ - for t in sum.types: - if t.fields: - return False - return True - - -class EmitVisitor(asdl.VisitorBase): - """Visit that emits lines""" - - def __init__(self, file): - self.file = file - super(EmitVisitor, self).__init__() - - def emit(self, s, depth, reflow=True): - # XXX reflow long lines? - if reflow: - lines = reflow_lines(s, depth) - else: - lines = [s] - for line in lines: - line = (" " * TABSIZE * depth) + line + "\n" - self.file.write(line) - - -class TypeDefVisitor(EmitVisitor): - def visitModule(self, mod): - for dfn in mod.dfns: - self.visit(dfn) - - def visitType(self, type, depth=0): - self.visit(type.value, type.name, depth) - - def visitSum(self, sum, name, depth): - if is_simple(sum): - self.simple_sum(sum, name, depth) - else: - self.sum_with_constructors(sum, name, depth) - - def simple_sum(self, sum, name, depth): - enum = [] - for i in range(len(sum.types)): - type = sum.types[i] - enum.append("%s=%d" % (type.name, i + 1)) - enums = ", ".join(enum) - ctype = get_c_type(name) - s = "typedef enum _%s { %s } %s;" % (name, enums, ctype) - self.emit(s, depth) - self.emit("", depth) - - def sum_with_constructors(self, sum, name, depth): - ctype = get_c_type(name) - s = "typedef struct _%(name)s *%(ctype)s;" % locals() - self.emit(s, depth) - self.emit("", depth) - - def visitProduct(self, product, name, depth): - ctype = get_c_type(name) - s = "typedef struct _%(name)s *%(ctype)s;" % locals() - self.emit(s, depth) - self.emit("", depth) - - -class StructVisitor(EmitVisitor): - """Visitor to generate typedefs for AST.""" - - def visitModule(self, mod): - for dfn in mod.dfns: - self.visit(dfn) - - def visitType(self, type, depth=0): - self.visit(type.value, type.name, depth) - - def visitSum(self, sum, name, depth): - if not is_simple(sum): - self.sum_with_constructors(sum, name, depth) - - def sum_with_constructors(self, sum, name, depth): - def emit(s, depth=depth): - self.emit(s % sys._getframe(1).f_locals, depth) - enum = [] - for i in range(len(sum.types)): - type = sum.types[i] - enum.append("%s_kind=%d" % (type.name, i + 1)) - - emit("enum _%(name)s_kind {" + ", ".join(enum) + "};") - - emit("struct _%(name)s {") - emit("enum _%(name)s_kind kind;", depth + 1) - emit("union {", depth + 1) - for t in sum.types: - self.visit(t, depth + 2) - emit("} v;", depth + 1) - for field in sum.attributes: - # rudimentary attribute handling - type = str(field.type) - assert type in asdl.builtin_types, type - emit("%s %s;" % (type, field.name), depth + 1); - emit("};") - emit("") - - def visitConstructor(self, cons, depth): - if cons.fields: - self.emit("struct {", depth) - for f in cons.fields: - self.visit(f, depth + 1) - self.emit("} %s;" % cons.name, depth) - self.emit("", depth) - else: - # XXX not sure what I want here, nothing is probably fine - pass - - def visitField(self, field, depth): - # XXX need to lookup field.type, because it might be something - # like a builtin... - ctype = get_c_type(field.type) - name = field.name - if field.seq: - if field.type.value in ('cmpop',): - self.emit("asdl_int_seq *%(name)s;" % locals(), depth) - else: - self.emit("asdl_seq *%(name)s;" % locals(), depth) - else: - self.emit("%(ctype)s %(name)s;" % locals(), depth) - - def visitProduct(self, product, name, depth): - self.emit("struct _%(name)s {" % locals(), depth) - for f in product.fields: - self.visit(f, depth + 1) - self.emit("};", depth) - self.emit("", depth) - - -class PrototypeVisitor(EmitVisitor): - """Generate function prototypes for the .h file""" - - def visitModule(self, mod): - for dfn in mod.dfns: - self.visit(dfn) - - def visitType(self, type): - self.visit(type.value, type.name) - - def visitSum(self, sum, name): - if is_simple(sum): - pass # XXX - else: - for t in sum.types: - self.visit(t, name, sum.attributes) - - def get_args(self, fields): - """Return list of C argument into, one for each field. - - Argument info is 3-tuple of a C type, variable name, and flag - that is true if type can be NULL. - """ - args = [] - unnamed = {} - for f in fields: - if f.name is None: - name = f.type - c = unnamed[name] = unnamed.get(name, 0) + 1 - if c > 1: - name = "name%d" % (c - 1) - else: - name = f.name - # XXX should extend get_c_type() to handle this - if f.seq: - if f.type.value in ('cmpop',): - ctype = "asdl_int_seq *" - else: - ctype = "asdl_seq *" - else: - ctype = get_c_type(f.type) - args.append((ctype, name, f.opt or f.seq)) - return args - - def visitConstructor(self, cons, type, attrs): - args = self.get_args(cons.fields) - attrs = self.get_args(attrs) - ctype = get_c_type(type) - self.emit_function(cons.name, ctype, args, attrs) - - def emit_function(self, name, ctype, args, attrs, union=True): - args = args + attrs - if args: - argstr = ", ".join(["%s %s" % (atype, aname) - for atype, aname, opt in args]) - argstr += ", PyArena *arena" - else: - argstr = "PyArena *arena" - margs = "a0" - for i in range(1, len(args)+1): - margs += ", a%d" % i - self.emit("#define %s(%s) _Py_%s(%s)" % (name, margs, name, margs), 0, - reflow=False) - self.emit("%s _Py_%s(%s);" % (ctype, name, argstr), False) - - def visitProduct(self, prod, name): - self.emit_function(name, get_c_type(name), - self.get_args(prod.fields), [], union=False) - - -class FunctionVisitor(PrototypeVisitor): - """Visitor to generate constructor functions for AST.""" - - def emit_function(self, name, ctype, args, attrs, union=True): - def emit(s, depth=0, reflow=True): - self.emit(s, depth, reflow) - argstr = ", ".join(["%s %s" % (atype, aname) - for atype, aname, opt in args + attrs]) - if argstr: - argstr += ", PyArena *arena" - else: - argstr = "PyArena *arena" - self.emit("%s" % ctype, 0) - emit("%s(%s)" % (name, argstr)) - emit("{") - emit("%s p;" % ctype, 1) - for argtype, argname, opt in args: - # XXX hack alert: false is allowed for a bool - if not opt and not (argtype == "bool" or argtype == "int"): - emit("if (!%s) {" % argname, 1) - emit("PyErr_SetString(PyExc_ValueError,", 2) - msg = "field %s is required for %s" % (argname, name) - emit(' "%s");' % msg, - 2, reflow=False) - emit('return NULL;', 2) - emit('}', 1) - - emit("p = (%s)PyArena_Malloc(arena, sizeof(*p));" % ctype, 1); - emit("if (!p)", 1) - emit("return NULL;", 2) - if union: - self.emit_body_union(name, args, attrs) - else: - self.emit_body_struct(name, args, attrs) - emit("return p;", 1) - emit("}") - emit("") - - def emit_body_union(self, name, args, attrs): - def emit(s, depth=0, reflow=True): - self.emit(s, depth, reflow) - emit("p->kind = %s_kind;" % name, 1) - for argtype, argname, opt in args: - emit("p->v.%s.%s = %s;" % (name, argname, argname), 1) - for argtype, argname, opt in attrs: - emit("p->%s = %s;" % (argname, argname), 1) - - def emit_body_struct(self, name, args, attrs): - def emit(s, depth=0, reflow=True): - self.emit(s, depth, reflow) - for argtype, argname, opt in args: - emit("p->%s = %s;" % (argname, argname), 1) - assert not attrs - - -class PickleVisitor(EmitVisitor): - - def visitModule(self, mod): - for dfn in mod.dfns: - self.visit(dfn) - - def visitType(self, type): - self.visit(type.value, type.name) - - def visitSum(self, sum, name): - pass - - def visitProduct(self, sum, name): - pass - - def visitConstructor(self, cons, name): - pass - - def visitField(self, sum): - pass - - -class Obj2ModPrototypeVisitor(PickleVisitor): - def visitProduct(self, prod, name): - code = "static int obj2ast_%s(PyObject* obj, %s* out, PyArena* arena);" - self.emit(code % (name, get_c_type(name)), 0) - - visitSum = visitProduct - - -class Obj2ModVisitor(PickleVisitor): - def funcHeader(self, name): - ctype = get_c_type(name) - self.emit("int", 0) - self.emit("obj2ast_%s(PyObject* obj, %s* out, PyArena* arena)" % (name, ctype), 0) - self.emit("{", 0) - self.emit("PyObject* tmp = NULL;", 1) - self.emit("int isinstance;", 1) - self.emit("", 0) - - def sumTrailer(self, name): - self.emit("", 0) - self.emit("tmp = PyObject_Repr(obj);", 1) - # there's really nothing more we can do if this fails ... - self.emit("if (tmp == NULL) goto failed;", 1) - error = "expected some sort of %s, but got %%.400s" % name - format = "PyErr_Format(PyExc_TypeError, \"%s\", PyString_AS_STRING(tmp));" - self.emit(format % error, 1, reflow=False) - self.emit("failed:", 0) - self.emit("Py_XDECREF(tmp);", 1) - self.emit("return 1;", 1) - self.emit("}", 0) - self.emit("", 0) - - def simpleSum(self, sum, name): - self.funcHeader(name) - for t in sum.types: - line = ("isinstance = PyObject_IsInstance(obj, " - "(PyObject *)%s_type);") - self.emit(line % (t.name,), 1) - self.emit("if (isinstance == -1) {", 1) - self.emit("return 1;", 2) - self.emit("}", 1) - self.emit("if (isinstance) {", 1) - self.emit("*out = %s;" % t.name, 2) - self.emit("return 0;", 2) - self.emit("}", 1) - self.sumTrailer(name) - - def buildArgs(self, fields): - return ", ".join(fields + ["arena"]) - - def complexSum(self, sum, name): - self.funcHeader(name) - for a in sum.attributes: - self.visitAttributeDeclaration(a, name, sum=sum) - self.emit("", 0) - # XXX: should we only do this for 'expr'? - self.emit("if (obj == Py_None) {", 1) - self.emit("*out = NULL;", 2) - self.emit("return 0;", 2) - self.emit("}", 1) - for a in sum.attributes: - self.visitField(a, name, sum=sum, depth=1) - for t in sum.types: - line = "isinstance = PyObject_IsInstance(obj, (PyObject*)%s_type);" - self.emit(line % (t.name,), 1) - self.emit("if (isinstance == -1) {", 1) - self.emit("return 1;", 2) - self.emit("}", 1) - self.emit("if (isinstance) {", 1) - for f in t.fields: - self.visitFieldDeclaration(f, t.name, sum=sum, depth=2) - self.emit("", 0) - for f in t.fields: - self.visitField(f, t.name, sum=sum, depth=2) - args = [f.name.value for f in t.fields] + [a.name.value for a in sum.attributes] - self.emit("*out = %s(%s);" % (t.name, self.buildArgs(args)), 2) - self.emit("if (*out == NULL) goto failed;", 2) - self.emit("return 0;", 2) - self.emit("}", 1) - self.sumTrailer(name) - - def visitAttributeDeclaration(self, a, name, sum=sum): - ctype = get_c_type(a.type) - self.emit("%s %s;" % (ctype, a.name), 1) - - def visitSum(self, sum, name): - if is_simple(sum): - self.simpleSum(sum, name) - else: - self.complexSum(sum, name) - - def visitProduct(self, prod, name): - ctype = get_c_type(name) - self.emit("int", 0) - self.emit("obj2ast_%s(PyObject* obj, %s* out, PyArena* arena)" % (name, ctype), 0) - self.emit("{", 0) - self.emit("PyObject* tmp = NULL;", 1) - for f in prod.fields: - self.visitFieldDeclaration(f, name, prod=prod, depth=1) - self.emit("", 0) - for f in prod.fields: - self.visitField(f, name, prod=prod, depth=1) - args = [f.name.value for f in prod.fields] - self.emit("*out = %s(%s);" % (name, self.buildArgs(args)), 1) - self.emit("return 0;", 1) - self.emit("failed:", 0) - self.emit("Py_XDECREF(tmp);", 1) - self.emit("return 1;", 1) - self.emit("}", 0) - self.emit("", 0) - - def visitFieldDeclaration(self, field, name, sum=None, prod=None, depth=0): - ctype = get_c_type(field.type) - if field.seq: - if self.isSimpleType(field): - self.emit("asdl_int_seq* %s;" % field.name, depth) - else: - self.emit("asdl_seq* %s;" % field.name, depth) - else: - ctype = get_c_type(field.type) - self.emit("%s %s;" % (ctype, field.name), depth) - - def isSimpleSum(self, field): - # XXX can the members of this list be determined automatically? - return field.type.value in ('expr_context', 'boolop', 'operator', - 'unaryop', 'cmpop') - - def isNumeric(self, field): - return get_c_type(field.type) in ("int", "bool") - - def isSimpleType(self, field): - return self.isSimpleSum(field) or self.isNumeric(field) - - def visitField(self, field, name, sum=None, prod=None, depth=0): - ctype = get_c_type(field.type) - self.emit("if (PyObject_HasAttrString(obj, \"%s\")) {" % field.name, depth) - self.emit("int res;", depth+1) - if field.seq: - self.emit("Py_ssize_t len;", depth+1) - self.emit("Py_ssize_t i;", depth+1) - self.emit("tmp = PyObject_GetAttrString(obj, \"%s\");" % field.name, depth+1) - self.emit("if (tmp == NULL) goto failed;", depth+1) - if field.seq: - self.emit("if (!PyList_Check(tmp)) {", depth+1) - self.emit("PyErr_Format(PyExc_TypeError, \"%s field \\\"%s\\\" must " - "be a list, not a %%.200s\", tmp->ob_type->tp_name);" % - (name, field.name), - depth+2, reflow=False) - self.emit("goto failed;", depth+2) - self.emit("}", depth+1) - self.emit("len = PyList_GET_SIZE(tmp);", depth+1) - if self.isSimpleType(field): - self.emit("%s = asdl_int_seq_new(len, arena);" % field.name, depth+1) - else: - self.emit("%s = asdl_seq_new(len, arena);" % field.name, depth+1) - self.emit("if (%s == NULL) goto failed;" % field.name, depth+1) - self.emit("for (i = 0; i < len; i++) {", depth+1) - self.emit("%s val;" % ctype, depth+2) - self.emit("res = obj2ast_%s(PyList_GET_ITEM(tmp, i), &val, arena);" % - field.type, depth+2, reflow=False) - self.emit("if (res != 0) goto failed;", depth+2) - self.emit("if (len != PyList_GET_SIZE(tmp)) {", depth+2) - self.emit("PyErr_SetString(PyExc_RuntimeError, \"%s field \\\"%s\\\" " - "changed size during iteration\");" % - (name, field.name), - depth+3, reflow=False) - self.emit("goto failed;", depth+3) - self.emit("}", depth+2) - self.emit("asdl_seq_SET(%s, i, val);" % field.name, depth+2) - self.emit("}", depth+1) - else: - self.emit("res = obj2ast_%s(tmp, &%s, arena);" % - (field.type, field.name), depth+1) - self.emit("if (res != 0) goto failed;", depth+1) - - self.emit("Py_XDECREF(tmp);", depth+1) - self.emit("tmp = NULL;", depth+1) - self.emit("} else {", depth) - if not field.opt: - message = "required field \\\"%s\\\" missing from %s" % (field.name, name) - format = "PyErr_SetString(PyExc_TypeError, \"%s\");" - self.emit(format % message, depth+1, reflow=False) - self.emit("return 1;", depth+1) - else: - if self.isNumeric(field): - self.emit("%s = 0;" % field.name, depth+1) - elif not self.isSimpleType(field): - self.emit("%s = NULL;" % field.name, depth+1) - else: - raise TypeError("could not determine the default value for %s" % field.name) - self.emit("}", depth) - - -class MarshalPrototypeVisitor(PickleVisitor): - - def prototype(self, sum, name): - ctype = get_c_type(name) - self.emit("static int marshal_write_%s(PyObject **, int *, %s);" - % (name, ctype), 0) - - visitProduct = visitSum = prototype - - -class PyTypesDeclareVisitor(PickleVisitor): - - def visitProduct(self, prod, name): - self.emit("static PyTypeObject *%s_type;" % name, 0) - self.emit("static PyObject* ast2obj_%s(void*);" % name, 0) - if prod.fields: - self.emit("static char *%s_fields[]={" % name,0) - for f in prod.fields: - self.emit('"%s",' % f.name, 1) - self.emit("};", 0) - - def visitSum(self, sum, name): - self.emit("static PyTypeObject *%s_type;" % name, 0) - if sum.attributes: - self.emit("static char *%s_attributes[] = {" % name, 0) - for a in sum.attributes: - self.emit('"%s",' % a.name, 1) - self.emit("};", 0) - ptype = "void*" - if is_simple(sum): - ptype = get_c_type(name) - tnames = [] - for t in sum.types: - tnames.append(str(t.name)+"_singleton") - tnames = ", *".join(tnames) - self.emit("static PyObject *%s;" % tnames, 0) - self.emit("static PyObject* ast2obj_%s(%s);" % (name, ptype), 0) - for t in sum.types: - self.visitConstructor(t, name) - - def visitConstructor(self, cons, name): - self.emit("static PyTypeObject *%s_type;" % cons.name, 0) - if cons.fields: - self.emit("static char *%s_fields[]={" % cons.name, 0) - for t in cons.fields: - self.emit('"%s",' % t.name, 1) - self.emit("};",0) - -class PyTypesVisitor(PickleVisitor): - - def visitModule(self, mod): - self.emit(""" -static int -ast_type_init(PyObject *self, PyObject *args, PyObject *kw) -{ - Py_ssize_t i, numfields = 0; - int res = -1; - PyObject *key, *value, *fields; - fields = PyObject_GetAttrString((PyObject*)Py_TYPE(self), "_fields"); - if (!fields) - PyErr_Clear(); - if (fields) { - numfields = PySequence_Size(fields); - if (numfields == -1) - goto cleanup; - } - res = 0; /* if no error occurs, this stays 0 to the end */ - if (PyTuple_GET_SIZE(args) > 0) { - if (numfields != PyTuple_GET_SIZE(args)) { - PyErr_Format(PyExc_TypeError, "%.400s constructor takes %s" - "%zd positional argument%s", - Py_TYPE(self)->tp_name, - numfields == 0 ? "" : "either 0 or ", - numfields, numfields == 1 ? "" : "s"); - res = -1; - goto cleanup; - } - for (i = 0; i < PyTuple_GET_SIZE(args); i++) { - /* cannot be reached when fields is NULL */ - PyObject *name = PySequence_GetItem(fields, i); - if (!name) { - res = -1; - goto cleanup; - } - res = PyObject_SetAttr(self, name, PyTuple_GET_ITEM(args, i)); - Py_DECREF(name); - if (res < 0) - goto cleanup; - } - } - if (kw) { - i = 0; /* needed by PyDict_Next */ - while (PyDict_Next(kw, &i, &key, &value)) { - res = PyObject_SetAttr(self, key, value); - if (res < 0) - goto cleanup; - } - } - cleanup: - Py_XDECREF(fields); - return res; -} - -/* Pickling support */ -static PyObject * -ast_type_reduce(PyObject *self, PyObject *unused) -{ - PyObject *res; - PyObject *dict = PyObject_GetAttrString(self, "__dict__"); - if (dict == NULL) { - if (PyErr_ExceptionMatches(PyExc_AttributeError)) - PyErr_Clear(); - else - return NULL; - } - if (dict) { - res = Py_BuildValue("O()O", Py_TYPE(self), dict); - Py_DECREF(dict); - return res; - } - return Py_BuildValue("O()", Py_TYPE(self)); -} - -static PyMethodDef ast_type_methods[] = { - {"__reduce__", ast_type_reduce, METH_NOARGS, NULL}, - {NULL} -}; - -static PyTypeObject AST_type = { - PyVarObject_HEAD_INIT(&PyType_Type, 0) - "_ast.AST", - sizeof(PyObject), - 0, - 0, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - PyObject_GenericGetAttr, /* tp_getattro */ - PyObject_GenericSetAttr, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ - 0, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - ast_type_methods, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)ast_type_init, /* tp_init */ - PyType_GenericAlloc, /* tp_alloc */ - PyType_GenericNew, /* tp_new */ - PyObject_Del, /* tp_free */ -}; - - -static PyTypeObject* make_type(char *type, PyTypeObject* base, char**fields, int num_fields) -{ - PyObject *fnames, *result; - int i; - fnames = PyTuple_New(num_fields); - if (!fnames) return NULL; - for (i = 0; i < num_fields; i++) { - PyObject *field = PyString_FromString(fields[i]); - if (!field) { - Py_DECREF(fnames); - return NULL; - } - PyTuple_SET_ITEM(fnames, i, field); - } - result = PyObject_CallFunction((PyObject*)&PyType_Type, "s(O){sOss}", - type, base, "_fields", fnames, "__module__", "_ast"); - Py_DECREF(fnames); - return (PyTypeObject*)result; -} - -static int add_attributes(PyTypeObject* type, char**attrs, int num_fields) -{ - int i, result; - PyObject *s, *l = PyTuple_New(num_fields); - if (!l) - return 0; - for (i = 0; i < num_fields; i++) { - s = PyString_FromString(attrs[i]); - if (!s) { - Py_DECREF(l); - return 0; - } - PyTuple_SET_ITEM(l, i, s); - } - result = PyObject_SetAttrString((PyObject*)type, "_attributes", l) >= 0; - Py_DECREF(l); - return result; -} - -/* Conversion AST -> Python */ - -static PyObject* ast2obj_list(asdl_seq *seq, PyObject* (*func)(void*)) -{ - int i, n = asdl_seq_LEN(seq); - PyObject *result = PyList_New(n); - PyObject *value; - if (!result) - return NULL; - for (i = 0; i < n; i++) { - value = func(asdl_seq_GET(seq, i)); - if (!value) { - Py_DECREF(result); - return NULL; - } - PyList_SET_ITEM(result, i, value); - } - return result; -} - -static PyObject* ast2obj_object(void *o) -{ - if (!o) - o = Py_None; - Py_INCREF((PyObject*)o); - return (PyObject*)o; -} -#define ast2obj_identifier ast2obj_object -#define ast2obj_string ast2obj_object -static PyObject* ast2obj_bool(bool b) -{ - return PyBool_FromLong(b); -} - -static PyObject* ast2obj_int(long b) -{ - return PyInt_FromLong(b); -} - -/* Conversion Python -> AST */ - -static int obj2ast_object(PyObject* obj, PyObject** out, PyArena* arena) -{ - if (obj == Py_None) - obj = NULL; - if (obj) - PyArena_AddPyObject(arena, obj); - Py_XINCREF(obj); - *out = obj; - return 0; -} - -static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena) -{ - if (!PyString_CheckExact(obj) && obj != Py_None) { - PyErr_Format(PyExc_TypeError, - "AST identifier must be of type str"); - return 1; - } - return obj2ast_object(obj, out, arena); -} - -static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena) -{ - if (!PyString_CheckExact(obj) && !PyUnicode_CheckExact(obj)) { - PyErr_SetString(PyExc_TypeError, - "AST string must be of type str or unicode"); - return 1; - } - return obj2ast_object(obj, out, arena); -} - -static int obj2ast_int(PyObject* obj, int* out, PyArena* arena) -{ - int i; - if (!_PyAnyInt_Check(obj)) { - PyObject *s = PyObject_Repr(obj); - if (s == NULL) return 1; - PyErr_Format(PyExc_ValueError, "invalid integer value: %.400s", - PyString_AS_STRING(s)); - Py_DECREF(s); - return 1; - } - - i = (int)PyLong_AsLong(obj); - if (i == -1 && PyErr_Occurred()) - return 1; - *out = i; - return 0; -} - -static int obj2ast_bool(PyObject* obj, bool* out, PyArena* arena) -{ - if (!PyBool_Check(obj)) { - PyObject *s = PyObject_Repr(obj); - if (s == NULL) return 1; - PyErr_Format(PyExc_ValueError, "invalid boolean value: %.400s", - PyString_AS_STRING(s)); - Py_DECREF(s); - return 1; - } - - *out = (obj == Py_True); - return 0; -} - -static int add_ast_fields(void) -{ - PyObject *empty_tuple, *d; - if (PyType_Ready(&AST_type) < 0) - return -1; - d = AST_type.tp_dict; - empty_tuple = PyTuple_New(0); - if (!empty_tuple || - PyDict_SetItemString(d, "_fields", empty_tuple) < 0 || - PyDict_SetItemString(d, "_attributes", empty_tuple) < 0) { - Py_XDECREF(empty_tuple); - return -1; - } - Py_DECREF(empty_tuple); - return 0; -} - -""", 0, reflow=False) - - self.emit("static int init_types(void)",0) - self.emit("{", 0) - self.emit("static int initialized;", 1) - self.emit("if (initialized) return 1;", 1) - self.emit("if (add_ast_fields() < 0) return 0;", 1) - for dfn in mod.dfns: - self.visit(dfn) - self.emit("initialized = 1;", 1) - self.emit("return 1;", 1); - self.emit("}", 0) - - def visitProduct(self, prod, name): - if prod.fields: - fields = name.value+"_fields" - else: - fields = "NULL" - self.emit('%s_type = make_type("%s", &AST_type, %s, %d);' % - (name, name, fields, len(prod.fields)), 1) - self.emit("if (!%s_type) return 0;" % name, 1) - - def visitSum(self, sum, name): - self.emit('%s_type = make_type("%s", &AST_type, NULL, 0);' % - (name, name), 1) - self.emit("if (!%s_type) return 0;" % name, 1) - if sum.attributes: - self.emit("if (!add_attributes(%s_type, %s_attributes, %d)) return 0;" % - (name, name, len(sum.attributes)), 1) - else: - self.emit("if (!add_attributes(%s_type, NULL, 0)) return 0;" % name, 1) - simple = is_simple(sum) - for t in sum.types: - self.visitConstructor(t, name, simple) - - def visitConstructor(self, cons, name, simple): - if cons.fields: - fields = cons.name.value+"_fields" - else: - fields = "NULL" - self.emit('%s_type = make_type("%s", %s_type, %s, %d);' % - (cons.name, cons.name, name, fields, len(cons.fields)), 1) - self.emit("if (!%s_type) return 0;" % cons.name, 1) - if simple: - self.emit("%s_singleton = PyType_GenericNew(%s_type, NULL, NULL);" % - (cons.name, cons.name), 1) - self.emit("if (!%s_singleton) return 0;" % cons.name, 1) - - -class ASTModuleVisitor(PickleVisitor): - - def visitModule(self, mod): - self.emit("PyMODINIT_FUNC", 0) - self.emit("init_ast(void)", 0) - self.emit("{", 0) - self.emit("PyObject *m, *d;", 1) - self.emit("if (!init_types()) return;", 1) - self.emit('m = Py_InitModule3("_ast", NULL, NULL);', 1) - self.emit("if (!m) return;", 1) - self.emit("d = PyModule_GetDict(m);", 1) - self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return;', 1) - self.emit('if (PyModule_AddIntConstant(m, "PyCF_ONLY_AST", PyCF_ONLY_AST) < 0)', 1) - self.emit("return;", 2) - # Value of version: "$Revision$" - self.emit('if (PyModule_AddStringConstant(m, "__version__", "%s") < 0)' - % mod.version, 1) - self.emit("return;", 2) - for dfn in mod.dfns: - self.visit(dfn) - self.emit("}", 0) - - def visitProduct(self, prod, name): - self.addObj(name) - - def visitSum(self, sum, name): - self.addObj(name) - for t in sum.types: - self.visitConstructor(t, name) - - def visitConstructor(self, cons, name): - self.addObj(cons.name) - - def addObj(self, name): - self.emit('if (PyDict_SetItemString(d, "%s", (PyObject*)%s_type) < 0) return;' % (name, name), 1) - - -_SPECIALIZED_SEQUENCES = ('stmt', 'expr') - -def find_sequence(fields, doing_specialization): - """Return True if any field uses a sequence.""" - for f in fields: - if f.seq: - if not doing_specialization: - return True - if str(f.type) not in _SPECIALIZED_SEQUENCES: - return True - return False - -def has_sequence(types, doing_specialization): - for t in types: - if find_sequence(t.fields, doing_specialization): - return True - return False - - -class StaticVisitor(PickleVisitor): - CODE = '''Very simple, always emit this static code. Override CODE''' - - def visit(self, object): - self.emit(self.CODE, 0, reflow=False) - - -class ObjVisitor(PickleVisitor): - - def func_begin(self, name): - ctype = get_c_type(name) - self.emit("PyObject*", 0) - self.emit("ast2obj_%s(void* _o)" % (name), 0) - self.emit("{", 0) - self.emit("%s o = (%s)_o;" % (ctype, ctype), 1) - self.emit("PyObject *result = NULL, *value = NULL;", 1) - self.emit('if (!o) {', 1) - self.emit("Py_INCREF(Py_None);", 2) - self.emit('return Py_None;', 2) - self.emit("}", 1) - self.emit('', 0) - - def func_end(self): - self.emit("return result;", 1) - self.emit("failed:", 0) - self.emit("Py_XDECREF(value);", 1) - self.emit("Py_XDECREF(result);", 1) - self.emit("return NULL;", 1) - self.emit("}", 0) - self.emit("", 0) - - def visitSum(self, sum, name): - if is_simple(sum): - self.simpleSum(sum, name) - return - self.func_begin(name) - self.emit("switch (o->kind) {", 1) - for i in range(len(sum.types)): - t = sum.types[i] - self.visitConstructor(t, i + 1, name) - self.emit("}", 1) - for a in sum.attributes: - self.emit("value = ast2obj_%s(o->%s);" % (a.type, a.name), 1) - self.emit("if (!value) goto failed;", 1) - self.emit('if (PyObject_SetAttrString(result, "%s", value) < 0)' % a.name, 1) - self.emit('goto failed;', 2) - self.emit('Py_DECREF(value);', 1) - self.func_end() - - def simpleSum(self, sum, name): - self.emit("PyObject* ast2obj_%s(%s_ty o)" % (name, name), 0) - self.emit("{", 0) - self.emit("switch(o) {", 1) - for t in sum.types: - self.emit("case %s:" % t.name, 2) - self.emit("Py_INCREF(%s_singleton);" % t.name, 3) - self.emit("return %s_singleton;" % t.name, 3) - self.emit("default:", 2) - self.emit('/* should never happen, but just in case ... */', 3) - code = "PyErr_Format(PyExc_SystemError, \"unknown %s found\");" % name - self.emit(code, 3, reflow=False) - self.emit("return NULL;", 3) - self.emit("}", 1) - self.emit("}", 0) - - def visitProduct(self, prod, name): - self.func_begin(name) - self.emit("result = PyType_GenericNew(%s_type, NULL, NULL);" % name, 1); - self.emit("if (!result) return NULL;", 1) - for field in prod.fields: - self.visitField(field, name, 1, True) - self.func_end() - - def visitConstructor(self, cons, enum, name): - self.emit("case %s_kind:" % cons.name, 1) - self.emit("result = PyType_GenericNew(%s_type, NULL, NULL);" % cons.name, 2); - self.emit("if (!result) goto failed;", 2) - for f in cons.fields: - self.visitField(f, cons.name, 2, False) - self.emit("break;", 2) - - def visitField(self, field, name, depth, product): - def emit(s, d): - self.emit(s, depth + d) - if product: - value = "o->%s" % field.name - else: - value = "o->v.%s.%s" % (name, field.name) - self.set(field, value, depth) - emit("if (!value) goto failed;", 0) - emit('if (PyObject_SetAttrString(result, "%s", value) == -1)' % field.name, 0) - emit("goto failed;", 1) - emit("Py_DECREF(value);", 0) - - def emitSeq(self, field, value, depth, emit): - emit("seq = %s;" % value, 0) - emit("n = asdl_seq_LEN(seq);", 0) - emit("value = PyList_New(n);", 0) - emit("if (!value) goto failed;", 0) - emit("for (i = 0; i < n; i++) {", 0) - self.set("value", field, "asdl_seq_GET(seq, i)", depth + 1) - emit("if (!value1) goto failed;", 1) - emit("PyList_SET_ITEM(value, i, value1);", 1) - emit("value1 = NULL;", 1) - emit("}", 0) - - def set(self, field, value, depth): - if field.seq: - # XXX should really check for is_simple, but that requires a symbol table - if field.type.value == "cmpop": - # While the sequence elements are stored as void*, - # ast2obj_cmpop expects an enum - self.emit("{", depth) - self.emit("int i, n = asdl_seq_LEN(%s);" % value, depth+1) - self.emit("value = PyList_New(n);", depth+1) - self.emit("if (!value) goto failed;", depth+1) - self.emit("for(i = 0; i < n; i++)", depth+1) - # This cannot fail, so no need for error handling - self.emit("PyList_SET_ITEM(value, i, ast2obj_cmpop((cmpop_ty)asdl_seq_GET(%s, i)));" % value, - depth+2, reflow=False) - self.emit("}", depth) - else: - self.emit("value = ast2obj_list(%s, ast2obj_%s);" % (value, field.type), depth) - else: - ctype = get_c_type(field.type) - self.emit("value = ast2obj_%s(%s);" % (field.type, value), depth, reflow=False) - - -class PartingShots(StaticVisitor): - - CODE = """ -PyObject* PyAST_mod2obj(mod_ty t) -{ - init_types(); - return ast2obj_mod(t); -} - -/* mode is 0 for "exec", 1 for "eval" and 2 for "single" input */ -mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode) -{ - mod_ty res; - PyObject *req_type[3]; - char *req_name[3]; - int isinstance; - - req_type[0] = (PyObject*)Module_type; - req_type[1] = (PyObject*)Expression_type; - req_type[2] = (PyObject*)Interactive_type; - - req_name[0] = "Module"; - req_name[1] = "Expression"; - req_name[2] = "Interactive"; - - assert(0 <= mode && mode <= 2); - - init_types(); - - isinstance = PyObject_IsInstance(ast, req_type[mode]); - if (isinstance == -1) - return NULL; - if (!isinstance) { - PyErr_Format(PyExc_TypeError, "expected %s node, got %.400s", - req_name[mode], Py_TYPE(ast)->tp_name); - return NULL; - } - if (obj2ast_mod(ast, &res, arena) != 0) - return NULL; - else - return res; -} - -int PyAST_Check(PyObject* obj) -{ - init_types(); - return PyObject_IsInstance(obj, (PyObject*)&AST_type); -} -""" - -class ChainOfVisitors: - def __init__(self, *visitors): - self.visitors = visitors - - def visit(self, object): - for v in self.visitors: - v.visit(object) - v.emit("", 0) - -common_msg = "/* File automatically generated by %s. */\n\n" - -c_file_msg = """ -/* - __version__ %s. - - This module must be committed separately after each AST grammar change; - The __version__ number is set to the revision number of the commit - containing the grammar change. -*/ - -""" - -def main(srcfile): - argv0 = sys.argv[0] - components = argv0.split(os.sep) - argv0 = os.sep.join(components[-2:]) - auto_gen_msg = common_msg % argv0 - mod = asdl.parse(srcfile) - mod.version = "82160" - if not asdl.check(mod): - sys.exit(1) - if INC_DIR: - p = "%s/%s-ast.h" % (INC_DIR, mod.name) - f = open(p, "wb") - f.write(auto_gen_msg) - f.write('#include "asdl.h"\n\n') - c = ChainOfVisitors(TypeDefVisitor(f), - StructVisitor(f), - PrototypeVisitor(f), - ) - c.visit(mod) - f.write("PyObject* PyAST_mod2obj(mod_ty t);\n") - f.write("mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode);\n") - f.write("int PyAST_Check(PyObject* obj);\n") - f.close() - - if SRC_DIR: - p = os.path.join(SRC_DIR, str(mod.name) + "-ast.c") - f = open(p, "wb") - f.write(auto_gen_msg) - f.write(c_file_msg % mod.version) - f.write('#include "Python.h"\n') - f.write('#include "%s-ast.h"\n' % mod.name) - f.write('\n') - f.write("static PyTypeObject AST_type;\n") - v = ChainOfVisitors( - PyTypesDeclareVisitor(f), - PyTypesVisitor(f), - Obj2ModPrototypeVisitor(f), - FunctionVisitor(f), - ObjVisitor(f), - Obj2ModVisitor(f), - ASTModuleVisitor(f), - PartingShots(f), - ) - v.visit(mod) - f.close() - -if __name__ == "__main__": - import sys - import getopt - - INC_DIR = '' - SRC_DIR = '' - opts, args = getopt.getopt(sys.argv[1:], "h:c:") - if len(opts) != 1: - print "Must specify exactly one output file" - sys.exit(1) - for o, v in opts: - if o == '-h': - INC_DIR = v - if o == '-c': - SRC_DIR = v - if len(args) != 1: - print "Must specify single input file" - sys.exit(1) - main(args[0]) diff --git a/contrib/tools/python/src/Parser/intrcheck.c b/contrib/tools/python/src/Parser/intrcheck.c deleted file mode 100644 index 5844a9a85e1..00000000000 --- a/contrib/tools/python/src/Parser/intrcheck.c +++ /dev/null @@ -1,178 +0,0 @@ - -/* Check for interrupts */ - -#include "Python.h" -#include "pythread.h" - -#ifdef QUICKWIN - -#include <io.h> - -void -PyOS_InitInterrupts(void) -{ -} - -void -PyOS_FiniInterrupts(void) -{ -} - -int -PyOS_InterruptOccurred(void) -{ - _wyield(); -} - -#define OK - -#endif /* QUICKWIN */ - -#if defined(_M_IX86) && !defined(__QNX__) -#include <io.h> -#endif - -#if defined(MSDOS) && !defined(QUICKWIN) - -#ifdef __GNUC__ - -/* This is for DJGPP's GO32 extender. I don't know how to trap - * control-C (There's no API for ctrl-C, and I don't want to mess with - * the interrupt vectors.) However, this DOES catch control-break. - * --Amrit - */ - -#include <go32.h> - -void -PyOS_InitInterrupts(void) -{ - _go32_want_ctrl_break(1 /* TRUE */); -} - -void -PyOS_FiniInterrupts(void) -{ -} - -int -PyOS_InterruptOccurred(void) -{ - return _go32_was_ctrl_break_hit(); -} - -#else /* !__GNUC__ */ - -/* This might work for MS-DOS (untested though): */ - -void -PyOS_InitInterrupts(void) -{ -} - -void -PyOS_FiniInterrupts(void) -{ -} - -int -PyOS_InterruptOccurred(void) -{ - int interrupted = 0; - while (kbhit()) { - if (getch() == '\003') - interrupted = 1; - } - return interrupted; -} - -#endif /* __GNUC__ */ - -#define OK - -#endif /* MSDOS && !QUICKWIN */ - - -#ifndef OK - -/* Default version -- for real operating systems and for Standard C */ - -#include <stdio.h> -#include <string.h> -#include <signal.h> - -static int interrupted; - -void -PyErr_SetInterrupt(void) -{ - interrupted = 1; -} - -extern int PyErr_CheckSignals(void); - -static int -checksignals_witharg(void * arg) -{ - return PyErr_CheckSignals(); -} - -static void -intcatcher(int sig) -{ - extern void Py_Exit(int); - static char message[] = -"python: to interrupt a truly hanging Python program, interrupt once more.\n"; - switch (interrupted++) { - case 0: - break; - case 1: -#ifdef RISCOS - fprintf(stderr, message); -#else - write(2, message, strlen(message)); -#endif - break; - case 2: - interrupted = 0; - Py_Exit(1); - break; - } - PyOS_setsig(SIGINT, intcatcher); - Py_AddPendingCall(checksignals_witharg, NULL); -} - -static void (*old_siginthandler)(int) = SIG_DFL; - -void -PyOS_InitInterrupts(void) -{ - if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN) - PyOS_setsig(SIGINT, intcatcher); -} - -void -PyOS_FiniInterrupts(void) -{ - PyOS_setsig(SIGINT, old_siginthandler); -} - -int -PyOS_InterruptOccurred(void) -{ - if (!interrupted) - return 0; - interrupted = 0; - return 1; -} - -#endif /* !OK */ - -void -PyOS_AfterFork(void) -{ -#ifdef WITH_THREAD - PyThread_ReInitTLS(); - PyEval_ReInitThreads(); -#endif -} diff --git a/contrib/tools/python/src/Parser/pgenmain.c b/contrib/tools/python/src/Parser/pgenmain.c deleted file mode 100644 index 0b47295c1b4..00000000000 --- a/contrib/tools/python/src/Parser/pgenmain.c +++ /dev/null @@ -1,174 +0,0 @@ - -/* Parser generator main program */ - -/* This expects a filename containing the grammar as argv[1] (UNIX) - or asks the console for such a file name (THINK C). - It writes its output on two files in the current directory: - - "graminit.c" gets the grammar as a bunch of initialized data - - "graminit.h" gets the grammar's non-terminals as #defines. - Error messages and status info during the generation process are - written to stdout, or sometimes to stderr. */ - -/* XXX TO DO: - - check for duplicate definitions of names (instead of fatal err) -*/ - -#include "Python.h" -#include "pgenheaders.h" -#include "grammar.h" -#include "node.h" -#include "parsetok.h" -#include "pgen.h" - -int Py_DebugFlag; -int Py_VerboseFlag; -int Py_IgnoreEnvironmentFlag; - -/* Forward */ -grammar *getgrammar(char *filename); - -void -Py_Exit(int sts) -{ - exit(sts); -} - -int -main(int argc, char **argv) -{ - grammar *g; - FILE *fp; - char *filename, *graminit_h, *graminit_c; - - if (argc != 4) { - fprintf(stderr, - "usage: %s grammar graminit.h graminit.c\n", argv[0]); - Py_Exit(2); - } - filename = argv[1]; - graminit_h = argv[2]; - graminit_c = argv[3]; - g = getgrammar(filename); - fp = fopen(graminit_c, "w"); - if (fp == NULL) { - perror(graminit_c); - Py_Exit(1); - } - if (Py_DebugFlag) - printf("Writing %s ...\n", graminit_c); - printgrammar(g, fp); - fclose(fp); - fp = fopen(graminit_h, "w"); - if (fp == NULL) { - perror(graminit_h); - Py_Exit(1); - } - if (Py_DebugFlag) - printf("Writing %s ...\n", graminit_h); - printnonterminals(g, fp); - fclose(fp); - freegrammar(g); - Py_Exit(0); - return 0; /* Make gcc -Wall happy */ -} - -grammar * -getgrammar(char *filename) -{ - FILE *fp; - node *n; - grammar *g0, *g; - perrdetail err; - - fp = fopen(filename, "r"); - if (fp == NULL) { - perror(filename); - Py_Exit(1); - } - g0 = meta_grammar(); - n = PyParser_ParseFile(fp, filename, g0, g0->g_start, - (char *)NULL, (char *)NULL, &err); - fclose(fp); - if (n == NULL) { - fprintf(stderr, "Parsing error %d, line %d.\n", - err.error, err.lineno); - if (err.text != NULL) { - size_t i; - fprintf(stderr, "%s", err.text); - i = strlen(err.text); - if (i == 0 || err.text[i-1] != '\n') - fprintf(stderr, "\n"); - for (i = 0; i < err.offset; i++) { - if (err.text[i] == '\t') - putc('\t', stderr); - else - putc(' ', stderr); - } - fprintf(stderr, "^\n"); - PyObject_FREE(err.text); - } - Py_Exit(1); - } - g = pgen(n); - if (g == NULL) { - printf("Bad grammar.\n"); - Py_Exit(1); - } - return g; -} - -/* Can't happen in pgen */ -PyObject* -PyErr_Occurred() -{ - return 0; -} - -void -Py_FatalError(const char *msg) -{ - fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg); - Py_Exit(1); -} - -/* No-nonsense my_readline() for tokenizer.c */ - -char * -PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt) -{ - size_t n = 1000; - char *p = (char *)PyMem_MALLOC(n); - char *q; - if (p == NULL) - return NULL; - fprintf(stderr, "%s", prompt); - q = fgets(p, n, sys_stdin); - if (q == NULL) { - *p = '\0'; - return p; - } - n = strlen(p); - if (n > 0 && p[n-1] != '\n') - p[n-1] = '\n'; - return (char *)PyMem_REALLOC(p, n+1); -} - -/* No-nonsense fgets */ -char * -Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) -{ - return fgets(buf, n, stream); -} - - -#include <stdarg.h> - -void -PySys_WriteStderr(const char *format, ...) -{ - va_list va; - - va_start(va, format); - vfprintf(stderr, format, va); - va_end(va); -} diff --git a/contrib/tools/python/src/Parser/printgrammar.c b/contrib/tools/python/src/Parser/printgrammar.c deleted file mode 100644 index 01f552f2d70..00000000000 --- a/contrib/tools/python/src/Parser/printgrammar.c +++ /dev/null @@ -1,117 +0,0 @@ - -/* Print a bunch of C initializers that represent a grammar */ - -#include "pgenheaders.h" -#include "grammar.h" - -/* Forward */ -static void printarcs(int, dfa *, FILE *); -static void printstates(grammar *, FILE *); -static void printdfas(grammar *, FILE *); -static void printlabels(grammar *, FILE *); - -void -printgrammar(grammar *g, FILE *fp) -{ - fprintf(fp, "/* Generated by Parser/pgen */\n\n"); - fprintf(fp, "#include \"pgenheaders.h\"\n"); - fprintf(fp, "#include \"grammar.h\"\n"); - fprintf(fp, "PyAPI_DATA(grammar) _PyParser_Grammar;\n"); - printdfas(g, fp); - printlabels(g, fp); - fprintf(fp, "grammar _PyParser_Grammar = {\n"); - fprintf(fp, " %d,\n", g->g_ndfas); - fprintf(fp, " dfas,\n"); - fprintf(fp, " {%d, labels},\n", g->g_ll.ll_nlabels); - fprintf(fp, " %d\n", g->g_start); - fprintf(fp, "};\n"); -} - -void -printnonterminals(grammar *g, FILE *fp) -{ - dfa *d; - int i; - - fprintf(fp, "/* Generated by Parser/pgen */\n\n"); - - d = g->g_dfa; - for (i = g->g_ndfas; --i >= 0; d++) - fprintf(fp, "#define %s %d\n", d->d_name, d->d_type); -} - -static void -printarcs(int i, dfa *d, FILE *fp) -{ - arc *a; - state *s; - int j, k; - - s = d->d_state; - for (j = 0; j < d->d_nstates; j++, s++) { - fprintf(fp, "static arc arcs_%d_%d[%d] = {\n", - i, j, s->s_narcs); - a = s->s_arc; - for (k = 0; k < s->s_narcs; k++, a++) - fprintf(fp, " {%d, %d},\n", a->a_lbl, a->a_arrow); - fprintf(fp, "};\n"); - } -} - -static void -printstates(grammar *g, FILE *fp) -{ - state *s; - dfa *d; - int i, j; - - d = g->g_dfa; - for (i = 0; i < g->g_ndfas; i++, d++) { - printarcs(i, d, fp); - fprintf(fp, "static state states_%d[%d] = {\n", - i, d->d_nstates); - s = d->d_state; - for (j = 0; j < d->d_nstates; j++, s++) - fprintf(fp, " {%d, arcs_%d_%d},\n", - s->s_narcs, i, j); - fprintf(fp, "};\n"); - } -} - -static void -printdfas(grammar *g, FILE *fp) -{ - dfa *d; - int i, j; - - printstates(g, fp); - fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas); - d = g->g_dfa; - for (i = 0; i < g->g_ndfas; i++, d++) { - fprintf(fp, " {%d, \"%s\", %d, %d, states_%d,\n", - d->d_type, d->d_name, d->d_initial, d->d_nstates, i); - fprintf(fp, " \""); - for (j = 0; j < NBYTES(g->g_ll.ll_nlabels); j++) - fprintf(fp, "\\%03o", d->d_first[j] & 0xff); - fprintf(fp, "\"},\n"); - } - fprintf(fp, "};\n"); -} - -static void -printlabels(grammar *g, FILE *fp) -{ - label *l; - int i; - - fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels); - l = g->g_ll.ll_label; - for (i = g->g_ll.ll_nlabels; --i >= 0; l++) { - if (l->lb_str == NULL) - fprintf(fp, " {%d, 0},\n", l->lb_type); - else - fprintf(fp, " {%d, \"%s\"},\n", - l->lb_type, l->lb_str); - } - fprintf(fp, "};\n"); -} diff --git a/contrib/tools/python/src/Parser/spark.py b/contrib/tools/python/src/Parser/spark.py deleted file mode 100644 index b064d62ec68..00000000000 --- a/contrib/tools/python/src/Parser/spark.py +++ /dev/null @@ -1,839 +0,0 @@ -# Copyright (c) 1998-2002 John Aycock -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -__version__ = 'SPARK-0.7 (pre-alpha-5)' - -import re -import string - -def _namelist(instance): - namelist, namedict, classlist = [], {}, [instance.__class__] - for c in classlist: - for b in c.__bases__: - classlist.append(b) - for name in c.__dict__.keys(): - if not namedict.has_key(name): - namelist.append(name) - namedict[name] = 1 - return namelist - -class GenericScanner: - def __init__(self, flags=0): - pattern = self.reflect() - self.re = re.compile(pattern, re.VERBOSE|flags) - - self.index2func = {} - for name, number in self.re.groupindex.items(): - self.index2func[number-1] = getattr(self, 't_' + name) - - def makeRE(self, name): - doc = getattr(self, name).__doc__ - rv = '(?P<%s>%s)' % (name[2:], doc) - return rv - - def reflect(self): - rv = [] - for name in _namelist(self): - if name[:2] == 't_' and name != 't_default': - rv.append(self.makeRE(name)) - - rv.append(self.makeRE('t_default')) - return string.join(rv, '|') - - def error(self, s, pos): - print "Lexical error at position %s" % pos - raise SystemExit - - def tokenize(self, s): - pos = 0 - n = len(s) - while pos < n: - m = self.re.match(s, pos) - if m is None: - self.error(s, pos) - - groups = m.groups() - for i in range(len(groups)): - if groups[i] and self.index2func.has_key(i): - self.index2func[i](groups[i]) - pos = m.end() - - def t_default(self, s): - r'( . | \n )+' - print "Specification error: unmatched input" - raise SystemExit - -# -# Extracted from GenericParser and made global so that [un]picking works. -# -class _State: - def __init__(self, stateno, items): - self.T, self.complete, self.items = [], [], items - self.stateno = stateno - -class GenericParser: - # - # An Earley parser, as per J. Earley, "An Efficient Context-Free - # Parsing Algorithm", CACM 13(2), pp. 94-102. Also J. C. Earley, - # "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis, - # Carnegie-Mellon University, August 1968. New formulation of - # the parser according to J. Aycock, "Practical Earley Parsing - # and the SPARK Toolkit", Ph.D. thesis, University of Victoria, - # 2001, and J. Aycock and R. N. Horspool, "Practical Earley - # Parsing", unpublished paper, 2001. - # - - def __init__(self, start): - self.rules = {} - self.rule2func = {} - self.rule2name = {} - self.collectRules() - self.augment(start) - self.ruleschanged = 1 - - _NULLABLE = '\e_' - _START = 'START' - _BOF = '|-' - - # - # When pickling, take the time to generate the full state machine; - # some information is then extraneous, too. Unfortunately we - # can't save the rule2func map. - # - def __getstate__(self): - if self.ruleschanged: - # - # XXX - duplicated from parse() - # - self.computeNull() - self.newrules = {} - self.new2old = {} - self.makeNewRules() - self.ruleschanged = 0 - self.edges, self.cores = {}, {} - self.states = { 0: self.makeState0() } - self.makeState(0, self._BOF) - # - # XXX - should find a better way to do this.. - # - changes = 1 - while changes: - changes = 0 - for k, v in self.edges.items(): - if v is None: - state, sym = k - if self.states.has_key(state): - self.goto(state, sym) - changes = 1 - rv = self.__dict__.copy() - for s in self.states.values(): - del s.items - del rv['rule2func'] - del rv['nullable'] - del rv['cores'] - return rv - - def __setstate__(self, D): - self.rules = {} - self.rule2func = {} - self.rule2name = {} - self.collectRules() - start = D['rules'][self._START][0][1][1] # Blech. - self.augment(start) - D['rule2func'] = self.rule2func - D['makeSet'] = self.makeSet_fast - self.__dict__ = D - - # - # A hook for GenericASTBuilder and GenericASTMatcher. Mess - # thee not with this; nor shall thee toucheth the _preprocess - # argument to addRule. - # - def preprocess(self, rule, func): return rule, func - - def addRule(self, doc, func, _preprocess=1): - fn = func - rules = string.split(doc) - - index = [] - for i in range(len(rules)): - if rules[i] == '::=': - index.append(i-1) - index.append(len(rules)) - - for i in range(len(index)-1): - lhs = rules[index[i]] - rhs = rules[index[i]+2:index[i+1]] - rule = (lhs, tuple(rhs)) - - if _preprocess: - rule, fn = self.preprocess(rule, func) - - if self.rules.has_key(lhs): - self.rules[lhs].append(rule) - else: - self.rules[lhs] = [ rule ] - self.rule2func[rule] = fn - self.rule2name[rule] = func.__name__[2:] - self.ruleschanged = 1 - - def collectRules(self): - for name in _namelist(self): - if name[:2] == 'p_': - func = getattr(self, name) - doc = func.__doc__ - self.addRule(doc, func) - - def augment(self, start): - rule = '%s ::= %s %s' % (self._START, self._BOF, start) - self.addRule(rule, lambda args: args[1], 0) - - def computeNull(self): - self.nullable = {} - tbd = [] - - for rulelist in self.rules.values(): - lhs = rulelist[0][0] - self.nullable[lhs] = 0 - for rule in rulelist: - rhs = rule[1] - if len(rhs) == 0: - self.nullable[lhs] = 1 - continue - # - # We only need to consider rules which - # consist entirely of nonterminal symbols. - # This should be a savings on typical - # grammars. - # - for sym in rhs: - if not self.rules.has_key(sym): - break - else: - tbd.append(rule) - changes = 1 - while changes: - changes = 0 - for lhs, rhs in tbd: - if self.nullable[lhs]: - continue - for sym in rhs: - if not self.nullable[sym]: - break - else: - self.nullable[lhs] = 1 - changes = 1 - - def makeState0(self): - s0 = _State(0, []) - for rule in self.newrules[self._START]: - s0.items.append((rule, 0)) - return s0 - - def finalState(self, tokens): - # - # Yuck. - # - if len(self.newrules[self._START]) == 2 and len(tokens) == 0: - return 1 - start = self.rules[self._START][0][1][1] - return self.goto(1, start) - - def makeNewRules(self): - worklist = [] - for rulelist in self.rules.values(): - for rule in rulelist: - worklist.append((rule, 0, 1, rule)) - - for rule, i, candidate, oldrule in worklist: - lhs, rhs = rule - n = len(rhs) - while i < n: - sym = rhs[i] - if not self.rules.has_key(sym) or \ - not self.nullable[sym]: - candidate = 0 - i = i + 1 - continue - - newrhs = list(rhs) - newrhs[i] = self._NULLABLE+sym - newrule = (lhs, tuple(newrhs)) - worklist.append((newrule, i+1, - candidate, oldrule)) - candidate = 0 - i = i + 1 - else: - if candidate: - lhs = self._NULLABLE+lhs - rule = (lhs, rhs) - if self.newrules.has_key(lhs): - self.newrules[lhs].append(rule) - else: - self.newrules[lhs] = [ rule ] - self.new2old[rule] = oldrule - - def typestring(self, token): - return None - - def error(self, token): - print "Syntax error at or near `%s' token" % token - raise SystemExit - - def parse(self, tokens): - sets = [ [(1,0), (2,0)] ] - self.links = {} - - if self.ruleschanged: - self.computeNull() - self.newrules = {} - self.new2old = {} - self.makeNewRules() - self.ruleschanged = 0 - self.edges, self.cores = {}, {} - self.states = { 0: self.makeState0() } - self.makeState(0, self._BOF) - - for i in xrange(len(tokens)): - sets.append([]) - - if sets[i] == []: - break - self.makeSet(tokens[i], sets, i) - else: - sets.append([]) - self.makeSet(None, sets, len(tokens)) - - #_dump(tokens, sets, self.states) - - finalitem = (self.finalState(tokens), 0) - if finalitem not in sets[-2]: - if len(tokens) > 0: - self.error(tokens[i-1]) - else: - self.error(None) - - return self.buildTree(self._START, finalitem, - tokens, len(sets)-2) - - def isnullable(self, sym): - # - # For symbols in G_e only. If we weren't supporting 1.5, - # could just use sym.startswith(). - # - return self._NULLABLE == sym[0:len(self._NULLABLE)] - - def skip(self, (lhs, rhs), pos=0): - n = len(rhs) - while pos < n: - if not self.isnullable(rhs[pos]): - break - pos = pos + 1 - return pos - - def makeState(self, state, sym): - assert sym is not None - # - # Compute \epsilon-kernel state's core and see if - # it exists already. - # - kitems = [] - for rule, pos in self.states[state].items: - lhs, rhs = rule - if rhs[pos:pos+1] == (sym,): - kitems.append((rule, self.skip(rule, pos+1))) - core = kitems - - core.sort() - tcore = tuple(core) - if self.cores.has_key(tcore): - return self.cores[tcore] - # - # Nope, doesn't exist. Compute it and the associated - # \epsilon-nonkernel state together; we'll need it right away. - # - k = self.cores[tcore] = len(self.states) - K, NK = _State(k, kitems), _State(k+1, []) - self.states[k] = K - predicted = {} - - edges = self.edges - rules = self.newrules - for X in K, NK: - worklist = X.items - for item in worklist: - rule, pos = item - lhs, rhs = rule - if pos == len(rhs): - X.complete.append(rule) - continue - - nextSym = rhs[pos] - key = (X.stateno, nextSym) - if not rules.has_key(nextSym): - if not edges.has_key(key): - edges[key] = None - X.T.append(nextSym) - else: - edges[key] = None - if not predicted.has_key(nextSym): - predicted[nextSym] = 1 - for prule in rules[nextSym]: - ppos = self.skip(prule) - new = (prule, ppos) - NK.items.append(new) - # - # Problem: we know K needs generating, but we - # don't yet know about NK. Can't commit anything - # regarding NK to self.edges until we're sure. Should - # we delay committing on both K and NK to avoid this - # hacky code? This creates other problems.. - # - if X is K: - edges = {} - - if NK.items == []: - return k - - # - # Check for \epsilon-nonkernel's core. Unfortunately we - # need to know the entire set of predicted nonterminals - # to do this without accidentally duplicating states. - # - core = predicted.keys() - core.sort() - tcore = tuple(core) - if self.cores.has_key(tcore): - self.edges[(k, None)] = self.cores[tcore] - return k - - nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno - self.edges.update(edges) - self.states[nk] = NK - return k - - def goto(self, state, sym): - key = (state, sym) - if not self.edges.has_key(key): - # - # No transitions from state on sym. - # - return None - - rv = self.edges[key] - if rv is None: - # - # Target state isn't generated yet. Remedy this. - # - rv = self.makeState(state, sym) - self.edges[key] = rv - return rv - - def gotoT(self, state, t): - return [self.goto(state, t)] - - def gotoST(self, state, st): - rv = [] - for t in self.states[state].T: - if st == t: - rv.append(self.goto(state, t)) - return rv - - def add(self, set, item, i=None, predecessor=None, causal=None): - if predecessor is None: - if item not in set: - set.append(item) - else: - key = (item, i) - if item not in set: - self.links[key] = [] - set.append(item) - self.links[key].append((predecessor, causal)) - - def makeSet(self, token, sets, i): - cur, next = sets[i], sets[i+1] - - ttype = token is not None and self.typestring(token) or None - if ttype is not None: - fn, arg = self.gotoT, ttype - else: - fn, arg = self.gotoST, token - - for item in cur: - ptr = (item, i) - state, parent = item - add = fn(state, arg) - for k in add: - if k is not None: - self.add(next, (k, parent), i+1, ptr) - nk = self.goto(k, None) - if nk is not None: - self.add(next, (nk, i+1)) - - if parent == i: - continue - - for rule in self.states[state].complete: - lhs, rhs = rule - for pitem in sets[parent]: - pstate, pparent = pitem - k = self.goto(pstate, lhs) - if k is not None: - why = (item, i, rule) - pptr = (pitem, parent) - self.add(cur, (k, pparent), - i, pptr, why) - nk = self.goto(k, None) - if nk is not None: - self.add(cur, (nk, i)) - - def makeSet_fast(self, token, sets, i): - # - # Call *only* when the entire state machine has been built! - # It relies on self.edges being filled in completely, and - # then duplicates and inlines code to boost speed at the - # cost of extreme ugliness. - # - cur, next = sets[i], sets[i+1] - ttype = token is not None and self.typestring(token) or None - - for item in cur: - ptr = (item, i) - state, parent = item - if ttype is not None: - k = self.edges.get((state, ttype), None) - if k is not None: - #self.add(next, (k, parent), i+1, ptr) - #INLINED --v - new = (k, parent) - key = (new, i+1) - if new not in next: - self.links[key] = [] - next.append(new) - self.links[key].append((ptr, None)) - #INLINED --^ - #nk = self.goto(k, None) - nk = self.edges.get((k, None), None) - if nk is not None: - #self.add(next, (nk, i+1)) - #INLINED --v - new = (nk, i+1) - if new not in next: - next.append(new) - #INLINED --^ - else: - add = self.gotoST(state, token) - for k in add: - if k is not None: - self.add(next, (k, parent), i+1, ptr) - #nk = self.goto(k, None) - nk = self.edges.get((k, None), None) - if nk is not None: - self.add(next, (nk, i+1)) - - if parent == i: - continue - - for rule in self.states[state].complete: - lhs, rhs = rule - for pitem in sets[parent]: - pstate, pparent = pitem - #k = self.goto(pstate, lhs) - k = self.edges.get((pstate, lhs), None) - if k is not None: - why = (item, i, rule) - pptr = (pitem, parent) - #self.add(cur, (k, pparent), - # i, pptr, why) - #INLINED --v - new = (k, pparent) - key = (new, i) - if new not in cur: - self.links[key] = [] - cur.append(new) - self.links[key].append((pptr, why)) - #INLINED --^ - #nk = self.goto(k, None) - nk = self.edges.get((k, None), None) - if nk is not None: - #self.add(cur, (nk, i)) - #INLINED --v - new = (nk, i) - if new not in cur: - cur.append(new) - #INLINED --^ - - def predecessor(self, key, causal): - for p, c in self.links[key]: - if c == causal: - return p - assert 0 - - def causal(self, key): - links = self.links[key] - if len(links) == 1: - return links[0][1] - choices = [] - rule2cause = {} - for p, c in links: - rule = c[2] - choices.append(rule) - rule2cause[rule] = c - return rule2cause[self.ambiguity(choices)] - - def deriveEpsilon(self, nt): - if len(self.newrules[nt]) > 1: - rule = self.ambiguity(self.newrules[nt]) - else: - rule = self.newrules[nt][0] - #print rule - - rhs = rule[1] - attr = [None] * len(rhs) - - for i in range(len(rhs)-1, -1, -1): - attr[i] = self.deriveEpsilon(rhs[i]) - return self.rule2func[self.new2old[rule]](attr) - - def buildTree(self, nt, item, tokens, k): - state, parent = item - - choices = [] - for rule in self.states[state].complete: - if rule[0] == nt: - choices.append(rule) - rule = choices[0] - if len(choices) > 1: - rule = self.ambiguity(choices) - #print rule - - rhs = rule[1] - attr = [None] * len(rhs) - - for i in range(len(rhs)-1, -1, -1): - sym = rhs[i] - if not self.newrules.has_key(sym): - if sym != self._BOF: - attr[i] = tokens[k-1] - key = (item, k) - item, k = self.predecessor(key, None) - #elif self.isnullable(sym): - elif self._NULLABLE == sym[0:len(self._NULLABLE)]: - attr[i] = self.deriveEpsilon(sym) - else: - key = (item, k) - why = self.causal(key) - attr[i] = self.buildTree(sym, why[0], - tokens, why[1]) - item, k = self.predecessor(key, why) - return self.rule2func[self.new2old[rule]](attr) - - def ambiguity(self, rules): - # - # XXX - problem here and in collectRules() if the same rule - # appears in >1 method. Also undefined results if rules - # causing the ambiguity appear in the same method. - # - sortlist = [] - name2index = {} - for i in range(len(rules)): - lhs, rhs = rule = rules[i] - name = self.rule2name[self.new2old[rule]] - sortlist.append((len(rhs), name)) - name2index[name] = i - sortlist.sort() - list = map(lambda (a,b): b, sortlist) - return rules[name2index[self.resolve(list)]] - - def resolve(self, list): - # - # Resolve ambiguity in favor of the shortest RHS. - # Since we walk the tree from the top down, this - # should effectively resolve in favor of a "shift". - # - return list[0] - -# -# GenericASTBuilder automagically constructs a concrete/abstract syntax tree -# for a given input. The extra argument is a class (not an instance!) -# which supports the "__setslice__" and "__len__" methods. -# -# XXX - silently overrides any user code in methods. -# - -class GenericASTBuilder(GenericParser): - def __init__(self, AST, start): - GenericParser.__init__(self, start) - self.AST = AST - - def preprocess(self, rule, func): - rebind = lambda lhs, self=self: \ - lambda args, lhs=lhs, self=self: \ - self.buildASTNode(args, lhs) - lhs, rhs = rule - return rule, rebind(lhs) - - def buildASTNode(self, args, lhs): - children = [] - for arg in args: - if isinstance(arg, self.AST): - children.append(arg) - else: - children.append(self.terminal(arg)) - return self.nonterminal(lhs, children) - - def terminal(self, token): return token - - def nonterminal(self, type, args): - rv = self.AST(type) - rv[:len(args)] = args - return rv - -# -# GenericASTTraversal is a Visitor pattern according to Design Patterns. For -# each node it attempts to invoke the method n_<node type>, falling -# back onto the default() method if the n_* can't be found. The preorder -# traversal also looks for an exit hook named n_<node type>_exit (no default -# routine is called if it's not found). To prematurely halt traversal -# of a subtree, call the prune() method -- this only makes sense for a -# preorder traversal. Node type is determined via the typestring() method. -# - -class GenericASTTraversalPruningException: - pass - -class GenericASTTraversal: - def __init__(self, ast): - self.ast = ast - - def typestring(self, node): - return node.type - - def prune(self): - raise GenericASTTraversalPruningException - - def preorder(self, node=None): - if node is None: - node = self.ast - - try: - name = 'n_' + self.typestring(node) - if hasattr(self, name): - func = getattr(self, name) - func(node) - else: - self.default(node) - except GenericASTTraversalPruningException: - return - - for kid in node: - self.preorder(kid) - - name = name + '_exit' - if hasattr(self, name): - func = getattr(self, name) - func(node) - - def postorder(self, node=None): - if node is None: - node = self.ast - - for kid in node: - self.postorder(kid) - - name = 'n_' + self.typestring(node) - if hasattr(self, name): - func = getattr(self, name) - func(node) - else: - self.default(node) - - - def default(self, node): - pass - -# -# GenericASTMatcher. AST nodes must have "__getitem__" and "__cmp__" -# implemented. -# -# XXX - makes assumptions about how GenericParser walks the parse tree. -# - -class GenericASTMatcher(GenericParser): - def __init__(self, start, ast): - GenericParser.__init__(self, start) - self.ast = ast - - def preprocess(self, rule, func): - rebind = lambda func, self=self: \ - lambda args, func=func, self=self: \ - self.foundMatch(args, func) - lhs, rhs = rule - rhslist = list(rhs) - rhslist.reverse() - - return (lhs, tuple(rhslist)), rebind(func) - - def foundMatch(self, args, func): - func(args[-1]) - return args[-1] - - def match_r(self, node): - self.input.insert(0, node) - children = 0 - - for child in node: - if children == 0: - self.input.insert(0, '(') - children = children + 1 - self.match_r(child) - - if children > 0: - self.input.insert(0, ')') - - def match(self, ast=None): - if ast is None: - ast = self.ast - self.input = [] - - self.match_r(ast) - self.parse(self.input) - - def resolve(self, list): - # - # Resolve ambiguity in favor of the longest RHS. - # - return list[-1] - -def _dump(tokens, sets, states): - for i in range(len(sets)): - print 'set', i - for item in sets[i]: - print '\t', item - for (lhs, rhs), pos in states[item[0]].items: - print '\t\t', lhs, '::=', - print string.join(rhs[:pos]), - print '.', - print string.join(rhs[pos:]) - if i < len(tokens): - print - print 'token', str(tokens[i]) - print diff --git a/contrib/tools/python/src/Parser/tokenizer_pgen.c b/contrib/tools/python/src/Parser/tokenizer_pgen.c deleted file mode 100644 index 9cb8492d6a6..00000000000 --- a/contrib/tools/python/src/Parser/tokenizer_pgen.c +++ /dev/null @@ -1,2 +0,0 @@ -#define PGEN -#include "tokenizer.c" |
