summaryrefslogtreecommitdiffstats
path: root/contrib/tools/python/src/Parser
diff options
context:
space:
mode:
authorpefavel <[email protected]>2026-03-16 17:44:57 +0300
committerpefavel <[email protected]>2026-03-17 11:40:58 +0300
commit6eecc739c342dbfca9be6328231233dd8e77d9f4 (patch)
tree491834a1c01185c100a79d420a7492c7e53ba32a /contrib/tools/python/src/Parser
parent58b88dfd7db837890ffc2edbe80e5235298cec10 (diff)
revert piglet config change
commit_hash:d068d68a89226c414a3d5a1f8ad102579bdd233b
Diffstat (limited to 'contrib/tools/python/src/Parser')
-rw-r--r--contrib/tools/python/src/Parser/Python.asdl115
-rw-r--r--contrib/tools/python/src/Parser/asdl.py413
-rwxr-xr-xcontrib/tools/python/src/Parser/asdl_c.py1250
-rw-r--r--contrib/tools/python/src/Parser/intrcheck.c178
-rw-r--r--contrib/tools/python/src/Parser/pgenmain.c174
-rw-r--r--contrib/tools/python/src/Parser/printgrammar.c117
-rw-r--r--contrib/tools/python/src/Parser/spark.py839
-rw-r--r--contrib/tools/python/src/Parser/tokenizer_pgen.c2
8 files changed, 0 insertions, 3088 deletions
diff --git a/contrib/tools/python/src/Parser/Python.asdl b/contrib/tools/python/src/Parser/Python.asdl
deleted file mode 100644
index 9a9b933143e..00000000000
--- a/contrib/tools/python/src/Parser/Python.asdl
+++ /dev/null
@@ -1,115 +0,0 @@
--- ASDL's five builtin types are identifier, int, string, object, bool
-
-module Python version "$Revision$"
-{
- mod = Module(stmt* body)
- | Interactive(stmt* body)
- | Expression(expr body)
-
- -- not really an actual node but useful in Jython's typesystem.
- | Suite(stmt* body)
-
- stmt = FunctionDef(identifier name, arguments args,
- stmt* body, expr* decorator_list)
- | ClassDef(identifier name, expr* bases, stmt* body, expr* decorator_list)
- | Return(expr? value)
-
- | Delete(expr* targets)
- | Assign(expr* targets, expr value)
- | AugAssign(expr target, operator op, expr value)
-
- -- not sure if bool is allowed, can always use int
- | Print(expr? dest, expr* values, bool nl)
-
- -- use 'orelse' because else is a keyword in target languages
- | For(expr target, expr iter, stmt* body, stmt* orelse)
- | While(expr test, stmt* body, stmt* orelse)
- | If(expr test, stmt* body, stmt* orelse)
- | With(expr context_expr, expr? optional_vars, stmt* body)
-
- -- 'type' is a bad name
- | Raise(expr? type, expr? inst, expr? tback)
- | TryExcept(stmt* body, excepthandler* handlers, stmt* orelse)
- | TryFinally(stmt* body, stmt* finalbody)
- | Assert(expr test, expr? msg)
-
- | Import(alias* names)
- | ImportFrom(identifier? module, alias* names, int? level)
-
- -- Doesn't capture requirement that locals must be
- -- defined if globals is
- -- still supports use as a function!
- | Exec(expr body, expr? globals, expr? locals)
-
- | Global(identifier* names)
- | Expr(expr value)
- | Pass | Break | Continue
-
- -- XXX Jython will be different
- -- col_offset is the byte offset in the utf8 string the parser uses
- attributes (int lineno, int col_offset)
-
- -- BoolOp() can use left & right?
- expr = BoolOp(boolop op, expr* values)
- | BinOp(expr left, operator op, expr right)
- | UnaryOp(unaryop op, expr operand)
- | Lambda(arguments args, expr body)
- | IfExp(expr test, expr body, expr orelse)
- | Dict(expr* keys, expr* values)
- | Set(expr* elts)
- | ListComp(expr elt, comprehension* generators)
- | SetComp(expr elt, comprehension* generators)
- | DictComp(expr key, expr value, comprehension* generators)
- | GeneratorExp(expr elt, comprehension* generators)
- -- the grammar constrains where yield expressions can occur
- | Yield(expr? value)
- -- need sequences for compare to distinguish between
- -- x < 4 < 3 and (x < 4) < 3
- | Compare(expr left, cmpop* ops, expr* comparators)
- | Call(expr func, expr* args, keyword* keywords,
- expr? starargs, expr? kwargs)
- | Repr(expr value)
- | Num(object n) -- a number as a PyObject.
- | Str(string s) -- need to specify raw, unicode, etc?
- -- other literals? bools?
-
- -- the following expression can appear in assignment context
- | Attribute(expr value, identifier attr, expr_context ctx)
- | Subscript(expr value, slice slice, expr_context ctx)
- | Name(identifier id, expr_context ctx)
- | List(expr* elts, expr_context ctx)
- | Tuple(expr* elts, expr_context ctx)
-
- -- col_offset is the byte offset in the utf8 string the parser uses
- attributes (int lineno, int col_offset)
-
- expr_context = Load | Store | Del | AugLoad | AugStore | Param
-
- slice = Ellipsis | Slice(expr? lower, expr? upper, expr? step)
- | ExtSlice(slice* dims)
- | Index(expr value)
-
- boolop = And | Or
-
- operator = Add | Sub | Mult | Div | Mod | Pow | LShift
- | RShift | BitOr | BitXor | BitAnd | FloorDiv
-
- unaryop = Invert | Not | UAdd | USub
-
- cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn
-
- comprehension = (expr target, expr iter, expr* ifs)
-
- -- not sure what to call the first argument for raise and except
- excepthandler = ExceptHandler(expr? type, expr? name, stmt* body)
- attributes (int lineno, int col_offset)
-
- arguments = (expr* args, identifier? vararg,
- identifier? kwarg, expr* defaults)
-
- -- keyword arguments supplied to call
- keyword = (identifier arg, expr value)
-
- -- import name with optional 'as' alias.
- alias = (identifier name, identifier? asname)
-}
diff --git a/contrib/tools/python/src/Parser/asdl.py b/contrib/tools/python/src/Parser/asdl.py
deleted file mode 100644
index 1ddc3f8fb43..00000000000
--- a/contrib/tools/python/src/Parser/asdl.py
+++ /dev/null
@@ -1,413 +0,0 @@
-"""An implementation of the Zephyr Abstract Syntax Definition Language.
-
-See http://asdl.sourceforge.net/ and
-http://www.cs.princeton.edu/research/techreps/TR-554-97
-
-Only supports top level module decl, not view. I'm guessing that view
-is intended to support the browser and I'm not interested in the
-browser.
-
-Changes for Python: Add support for module versions
-"""
-
-import os
-import traceback
-
-import spark
-
-class Token(object):
- # spark seems to dispatch in the parser based on a token's
- # type attribute
- def __init__(self, type, lineno):
- self.type = type
- self.lineno = lineno
-
- def __str__(self):
- return self.type
-
- def __repr__(self):
- return str(self)
-
-class Id(Token):
- def __init__(self, value, lineno):
- self.type = 'Id'
- self.value = value
- self.lineno = lineno
-
- def __str__(self):
- return self.value
-
-class String(Token):
- def __init__(self, value, lineno):
- self.type = 'String'
- self.value = value
- self.lineno = lineno
-
-class ASDLSyntaxError(Exception):
-
- def __init__(self, lineno, token=None, msg=None):
- self.lineno = lineno
- self.token = token
- self.msg = msg
-
- def __str__(self):
- if self.msg is None:
- return "Error at '%s', line %d" % (self.token, self.lineno)
- else:
- return "%s, line %d" % (self.msg, self.lineno)
-
-class ASDLScanner(spark.GenericScanner, object):
-
- def tokenize(self, input):
- self.rv = []
- self.lineno = 1
- super(ASDLScanner, self).tokenize(input)
- return self.rv
-
- def t_id(self, s):
- r"[\w\.]+"
- # XXX doesn't distinguish upper vs. lower, which is
- # significant for ASDL.
- self.rv.append(Id(s, self.lineno))
-
- def t_string(self, s):
- r'"[^"]*"'
- self.rv.append(String(s, self.lineno))
-
- def t_xxx(self, s): # not sure what this production means
- r"<="
- self.rv.append(Token(s, self.lineno))
-
- def t_punctuation(self, s):
- r"[\{\}\*\=\|\(\)\,\?\:]"
- self.rv.append(Token(s, self.lineno))
-
- def t_comment(self, s):
- r"\-\-[^\n]*"
- pass
-
- def t_newline(self, s):
- r"\n"
- self.lineno += 1
-
- def t_whitespace(self, s):
- r"[ \t]+"
- pass
-
- def t_default(self, s):
- r" . +"
- raise ValueError, "unmatched input: %s" % `s`
-
-class ASDLParser(spark.GenericParser, object):
- def __init__(self):
- super(ASDLParser, self).__init__("module")
-
- def typestring(self, tok):
- return tok.type
-
- def error(self, tok):
- raise ASDLSyntaxError(tok.lineno, tok)
-
- def p_module_0(self, (module, name, version, _0, _1)):
- " module ::= Id Id version { } "
- if module.value != "module":
- raise ASDLSyntaxError(module.lineno,
- msg="expected 'module', found %s" % module)
- return Module(name, None, version)
-
- def p_module(self, (module, name, version, _0, definitions, _1)):
- " module ::= Id Id version { definitions } "
- if module.value != "module":
- raise ASDLSyntaxError(module.lineno,
- msg="expected 'module', found %s" % module)
- return Module(name, definitions, version)
-
- def p_version(self, (version, V)):
- "version ::= Id String"
- if version.value != "version":
- raise ASDLSyntaxError(version.lineno,
- msg="expected 'version', found %s" % version)
- return V
-
- def p_definition_0(self, (definition,)):
- " definitions ::= definition "
- return definition
-
- def p_definition_1(self, (definitions, definition)):
- " definitions ::= definition definitions "
- return definitions + definition
-
- def p_definition(self, (id, _, type)):
- " definition ::= Id = type "
- return [Type(id, type)]
-
- def p_type_0(self, (product,)):
- " type ::= product "
- return product
-
- def p_type_1(self, (sum,)):
- " type ::= sum "
- return Sum(sum)
-
- def p_type_2(self, (sum, id, _0, attributes, _1)):
- " type ::= sum Id ( fields ) "
- if id.value != "attributes":
- raise ASDLSyntaxError(id.lineno,
- msg="expected attributes, found %s" % id)
- if attributes:
- attributes.reverse()
- return Sum(sum, attributes)
-
- def p_product(self, (_0, fields, _1)):
- " product ::= ( fields ) "
- # XXX can't I just construct things in the right order?
- fields.reverse()
- return Product(fields)
-
- def p_sum_0(self, (constructor,)):
- " sum ::= constructor "
- return [constructor]
-
- def p_sum_1(self, (constructor, _, sum)):
- " sum ::= constructor | sum "
- return [constructor] + sum
-
- def p_sum_2(self, (constructor, _, sum)):
- " sum ::= constructor | sum "
- return [constructor] + sum
-
- def p_constructor_0(self, (id,)):
- " constructor ::= Id "
- return Constructor(id)
-
- def p_constructor_1(self, (id, _0, fields, _1)):
- " constructor ::= Id ( fields ) "
- # XXX can't I just construct things in the right order?
- fields.reverse()
- return Constructor(id, fields)
-
- def p_fields_0(self, (field,)):
- " fields ::= field "
- return [field]
-
- def p_fields_1(self, (field, _, fields)):
- " fields ::= field , fields "
- return fields + [field]
-
- def p_field_0(self, (type,)):
- " field ::= Id "
- return Field(type)
-
- def p_field_1(self, (type, name)):
- " field ::= Id Id "
- return Field(type, name)
-
- def p_field_2(self, (type, _, name)):
- " field ::= Id * Id "
- return Field(type, name, seq=True)
-
- def p_field_3(self, (type, _, name)):
- " field ::= Id ? Id "
- return Field(type, name, opt=True)
-
- def p_field_4(self, (type, _)):
- " field ::= Id * "
- return Field(type, seq=True)
-
- def p_field_5(self, (type, _)):
- " field ::= Id ? "
- return Field(type, opt=True)
-
-builtin_types = ("identifier", "string", "int", "bool", "object")
-
-# below is a collection of classes to capture the AST of an AST :-)
-# not sure if any of the methods are useful yet, but I'm adding them
-# piecemeal as they seem helpful
-
-class AST(object):
- pass # a marker class
-
-class Module(AST):
- def __init__(self, name, dfns, version):
- self.name = name
- self.dfns = dfns
- self.version = version
- self.types = {} # maps type name to value (from dfns)
- for type in dfns:
- self.types[type.name.value] = type.value
-
- def __repr__(self):
- return "Module(%s, %s)" % (self.name, self.dfns)
-
-class Type(AST):
- def __init__(self, name, value):
- self.name = name
- self.value = value
-
- def __repr__(self):
- return "Type(%s, %s)" % (self.name, self.value)
-
-class Constructor(AST):
- def __init__(self, name, fields=None):
- self.name = name
- self.fields = fields or []
-
- def __repr__(self):
- return "Constructor(%s, %s)" % (self.name, self.fields)
-
-class Field(AST):
- def __init__(self, type, name=None, seq=False, opt=False):
- self.type = type
- self.name = name
- self.seq = seq
- self.opt = opt
-
- def __repr__(self):
- if self.seq:
- extra = ", seq=True"
- elif self.opt:
- extra = ", opt=True"
- else:
- extra = ""
- if self.name is None:
- return "Field(%s%s)" % (self.type, extra)
- else:
- return "Field(%s, %s%s)" % (self.type, self.name, extra)
-
-class Sum(AST):
- def __init__(self, types, attributes=None):
- self.types = types
- self.attributes = attributes or []
-
- def __repr__(self):
- if self.attributes is None:
- return "Sum(%s)" % self.types
- else:
- return "Sum(%s, %s)" % (self.types, self.attributes)
-
-class Product(AST):
- def __init__(self, fields):
- self.fields = fields
-
- def __repr__(self):
- return "Product(%s)" % self.fields
-
-class VisitorBase(object):
-
- def __init__(self, skip=False):
- self.cache = {}
- self.skip = skip
-
- def visit(self, object, *args):
- meth = self._dispatch(object)
- if meth is None:
- return
- try:
- meth(object, *args)
- except Exception, err:
- print "Error visiting", repr(object)
- print err
- traceback.print_exc()
- # XXX hack
- if hasattr(self, 'file'):
- self.file.flush()
- os._exit(1)
-
- def _dispatch(self, object):
- assert isinstance(object, AST), repr(object)
- klass = object.__class__
- meth = self.cache.get(klass)
- if meth is None:
- methname = "visit" + klass.__name__
- if self.skip:
- meth = getattr(self, methname, None)
- else:
- meth = getattr(self, methname)
- self.cache[klass] = meth
- return meth
-
-class Check(VisitorBase):
-
- def __init__(self):
- super(Check, self).__init__(skip=True)
- self.cons = {}
- self.errors = 0
- self.types = {}
-
- def visitModule(self, mod):
- for dfn in mod.dfns:
- self.visit(dfn)
-
- def visitType(self, type):
- self.visit(type.value, str(type.name))
-
- def visitSum(self, sum, name):
- for t in sum.types:
- self.visit(t, name)
-
- def visitConstructor(self, cons, name):
- key = str(cons.name)
- conflict = self.cons.get(key)
- if conflict is None:
- self.cons[key] = name
- else:
- print "Redefinition of constructor %s" % key
- print "Defined in %s and %s" % (conflict, name)
- self.errors += 1
- for f in cons.fields:
- self.visit(f, key)
-
- def visitField(self, field, name):
- key = str(field.type)
- l = self.types.setdefault(key, [])
- l.append(name)
-
- def visitProduct(self, prod, name):
- for f in prod.fields:
- self.visit(f, name)
-
-def check(mod):
- v = Check()
- v.visit(mod)
-
- for t in v.types:
- if t not in mod.types and not t in builtin_types:
- v.errors += 1
- uses = ", ".join(v.types[t])
- print "Undefined type %s, used in %s" % (t, uses)
-
- return not v.errors
-
-def parse(file):
- scanner = ASDLScanner()
- parser = ASDLParser()
-
- buf = open(file).read()
- tokens = scanner.tokenize(buf)
- try:
- return parser.parse(tokens)
- except ASDLSyntaxError, err:
- print err
- lines = buf.split("\n")
- print lines[err.lineno - 1] # lines starts at 0, files at 1
-
-if __name__ == "__main__":
- import glob
- import sys
-
- if len(sys.argv) > 1:
- files = sys.argv[1:]
- else:
- testdir = "tests"
- files = glob.glob(testdir + "/*.asdl")
-
- for file in files:
- print file
- mod = parse(file)
- print "module", mod.name
- print len(mod.dfns), "definitions"
- if not check(mod):
- print "Check failed"
- else:
- for dfn in mod.dfns:
- print dfn.type
diff --git a/contrib/tools/python/src/Parser/asdl_c.py b/contrib/tools/python/src/Parser/asdl_c.py
deleted file mode 100755
index ac61c78afc4..00000000000
--- a/contrib/tools/python/src/Parser/asdl_c.py
+++ /dev/null
@@ -1,1250 +0,0 @@
-#! /usr/bin/env python
-"""Generate C code from an ASDL description."""
-
-# TO DO
-# handle fields that have a type but no name
-
-import os, sys
-
-import asdl
-
-TABSIZE = 8
-MAX_COL = 80
-
-def get_c_type(name):
- """Return a string for the C name of the type.
-
- This function special cases the default types provided by asdl:
- identifier, string, int, bool.
- """
- # XXX ack! need to figure out where Id is useful and where string
- if isinstance(name, asdl.Id):
- name = name.value
- if name in asdl.builtin_types:
- return name
- else:
- return "%s_ty" % name
-
-def reflow_lines(s, depth):
- """Reflow the line s indented depth tabs.
-
- Return a sequence of lines where no line extends beyond MAX_COL
- when properly indented. The first line is properly indented based
- exclusively on depth * TABSIZE. All following lines -- these are
- the reflowed lines generated by this function -- start at the same
- column as the first character beyond the opening { in the first
- line.
- """
- size = MAX_COL - depth * TABSIZE
- if len(s) < size:
- return [s]
-
- lines = []
- cur = s
- padding = ""
- while len(cur) > size:
- i = cur.rfind(' ', 0, size)
- # XXX this should be fixed for real
- if i == -1 and 'GeneratorExp' in cur:
- i = size + 3
- assert i != -1, "Impossible line %d to reflow: %r" % (size, s)
- lines.append(padding + cur[:i])
- if len(lines) == 1:
- # find new size based on brace
- j = cur.find('{', 0, i)
- if j >= 0:
- j += 2 # account for the brace and the space after it
- size -= j
- padding = " " * j
- else:
- j = cur.find('(', 0, i)
- if j >= 0:
- j += 1 # account for the paren (no space after it)
- size -= j
- padding = " " * j
- cur = cur[i+1:]
- else:
- lines.append(padding + cur)
- return lines
-
-def is_simple(sum):
- """Return True if a sum is a simple.
-
- A sum is simple if its types have no fields, e.g.
- unaryop = Invert | Not | UAdd | USub
- """
- for t in sum.types:
- if t.fields:
- return False
- return True
-
-
-class EmitVisitor(asdl.VisitorBase):
- """Visit that emits lines"""
-
- def __init__(self, file):
- self.file = file
- super(EmitVisitor, self).__init__()
-
- def emit(self, s, depth, reflow=True):
- # XXX reflow long lines?
- if reflow:
- lines = reflow_lines(s, depth)
- else:
- lines = [s]
- for line in lines:
- line = (" " * TABSIZE * depth) + line + "\n"
- self.file.write(line)
-
-
-class TypeDefVisitor(EmitVisitor):
- def visitModule(self, mod):
- for dfn in mod.dfns:
- self.visit(dfn)
-
- def visitType(self, type, depth=0):
- self.visit(type.value, type.name, depth)
-
- def visitSum(self, sum, name, depth):
- if is_simple(sum):
- self.simple_sum(sum, name, depth)
- else:
- self.sum_with_constructors(sum, name, depth)
-
- def simple_sum(self, sum, name, depth):
- enum = []
- for i in range(len(sum.types)):
- type = sum.types[i]
- enum.append("%s=%d" % (type.name, i + 1))
- enums = ", ".join(enum)
- ctype = get_c_type(name)
- s = "typedef enum _%s { %s } %s;" % (name, enums, ctype)
- self.emit(s, depth)
- self.emit("", depth)
-
- def sum_with_constructors(self, sum, name, depth):
- ctype = get_c_type(name)
- s = "typedef struct _%(name)s *%(ctype)s;" % locals()
- self.emit(s, depth)
- self.emit("", depth)
-
- def visitProduct(self, product, name, depth):
- ctype = get_c_type(name)
- s = "typedef struct _%(name)s *%(ctype)s;" % locals()
- self.emit(s, depth)
- self.emit("", depth)
-
-
-class StructVisitor(EmitVisitor):
- """Visitor to generate typedefs for AST."""
-
- def visitModule(self, mod):
- for dfn in mod.dfns:
- self.visit(dfn)
-
- def visitType(self, type, depth=0):
- self.visit(type.value, type.name, depth)
-
- def visitSum(self, sum, name, depth):
- if not is_simple(sum):
- self.sum_with_constructors(sum, name, depth)
-
- def sum_with_constructors(self, sum, name, depth):
- def emit(s, depth=depth):
- self.emit(s % sys._getframe(1).f_locals, depth)
- enum = []
- for i in range(len(sum.types)):
- type = sum.types[i]
- enum.append("%s_kind=%d" % (type.name, i + 1))
-
- emit("enum _%(name)s_kind {" + ", ".join(enum) + "};")
-
- emit("struct _%(name)s {")
- emit("enum _%(name)s_kind kind;", depth + 1)
- emit("union {", depth + 1)
- for t in sum.types:
- self.visit(t, depth + 2)
- emit("} v;", depth + 1)
- for field in sum.attributes:
- # rudimentary attribute handling
- type = str(field.type)
- assert type in asdl.builtin_types, type
- emit("%s %s;" % (type, field.name), depth + 1);
- emit("};")
- emit("")
-
- def visitConstructor(self, cons, depth):
- if cons.fields:
- self.emit("struct {", depth)
- for f in cons.fields:
- self.visit(f, depth + 1)
- self.emit("} %s;" % cons.name, depth)
- self.emit("", depth)
- else:
- # XXX not sure what I want here, nothing is probably fine
- pass
-
- def visitField(self, field, depth):
- # XXX need to lookup field.type, because it might be something
- # like a builtin...
- ctype = get_c_type(field.type)
- name = field.name
- if field.seq:
- if field.type.value in ('cmpop',):
- self.emit("asdl_int_seq *%(name)s;" % locals(), depth)
- else:
- self.emit("asdl_seq *%(name)s;" % locals(), depth)
- else:
- self.emit("%(ctype)s %(name)s;" % locals(), depth)
-
- def visitProduct(self, product, name, depth):
- self.emit("struct _%(name)s {" % locals(), depth)
- for f in product.fields:
- self.visit(f, depth + 1)
- self.emit("};", depth)
- self.emit("", depth)
-
-
-class PrototypeVisitor(EmitVisitor):
- """Generate function prototypes for the .h file"""
-
- def visitModule(self, mod):
- for dfn in mod.dfns:
- self.visit(dfn)
-
- def visitType(self, type):
- self.visit(type.value, type.name)
-
- def visitSum(self, sum, name):
- if is_simple(sum):
- pass # XXX
- else:
- for t in sum.types:
- self.visit(t, name, sum.attributes)
-
- def get_args(self, fields):
- """Return list of C argument into, one for each field.
-
- Argument info is 3-tuple of a C type, variable name, and flag
- that is true if type can be NULL.
- """
- args = []
- unnamed = {}
- for f in fields:
- if f.name is None:
- name = f.type
- c = unnamed[name] = unnamed.get(name, 0) + 1
- if c > 1:
- name = "name%d" % (c - 1)
- else:
- name = f.name
- # XXX should extend get_c_type() to handle this
- if f.seq:
- if f.type.value in ('cmpop',):
- ctype = "asdl_int_seq *"
- else:
- ctype = "asdl_seq *"
- else:
- ctype = get_c_type(f.type)
- args.append((ctype, name, f.opt or f.seq))
- return args
-
- def visitConstructor(self, cons, type, attrs):
- args = self.get_args(cons.fields)
- attrs = self.get_args(attrs)
- ctype = get_c_type(type)
- self.emit_function(cons.name, ctype, args, attrs)
-
- def emit_function(self, name, ctype, args, attrs, union=True):
- args = args + attrs
- if args:
- argstr = ", ".join(["%s %s" % (atype, aname)
- for atype, aname, opt in args])
- argstr += ", PyArena *arena"
- else:
- argstr = "PyArena *arena"
- margs = "a0"
- for i in range(1, len(args)+1):
- margs += ", a%d" % i
- self.emit("#define %s(%s) _Py_%s(%s)" % (name, margs, name, margs), 0,
- reflow=False)
- self.emit("%s _Py_%s(%s);" % (ctype, name, argstr), False)
-
- def visitProduct(self, prod, name):
- self.emit_function(name, get_c_type(name),
- self.get_args(prod.fields), [], union=False)
-
-
-class FunctionVisitor(PrototypeVisitor):
- """Visitor to generate constructor functions for AST."""
-
- def emit_function(self, name, ctype, args, attrs, union=True):
- def emit(s, depth=0, reflow=True):
- self.emit(s, depth, reflow)
- argstr = ", ".join(["%s %s" % (atype, aname)
- for atype, aname, opt in args + attrs])
- if argstr:
- argstr += ", PyArena *arena"
- else:
- argstr = "PyArena *arena"
- self.emit("%s" % ctype, 0)
- emit("%s(%s)" % (name, argstr))
- emit("{")
- emit("%s p;" % ctype, 1)
- for argtype, argname, opt in args:
- # XXX hack alert: false is allowed for a bool
- if not opt and not (argtype == "bool" or argtype == "int"):
- emit("if (!%s) {" % argname, 1)
- emit("PyErr_SetString(PyExc_ValueError,", 2)
- msg = "field %s is required for %s" % (argname, name)
- emit(' "%s");' % msg,
- 2, reflow=False)
- emit('return NULL;', 2)
- emit('}', 1)
-
- emit("p = (%s)PyArena_Malloc(arena, sizeof(*p));" % ctype, 1);
- emit("if (!p)", 1)
- emit("return NULL;", 2)
- if union:
- self.emit_body_union(name, args, attrs)
- else:
- self.emit_body_struct(name, args, attrs)
- emit("return p;", 1)
- emit("}")
- emit("")
-
- def emit_body_union(self, name, args, attrs):
- def emit(s, depth=0, reflow=True):
- self.emit(s, depth, reflow)
- emit("p->kind = %s_kind;" % name, 1)
- for argtype, argname, opt in args:
- emit("p->v.%s.%s = %s;" % (name, argname, argname), 1)
- for argtype, argname, opt in attrs:
- emit("p->%s = %s;" % (argname, argname), 1)
-
- def emit_body_struct(self, name, args, attrs):
- def emit(s, depth=0, reflow=True):
- self.emit(s, depth, reflow)
- for argtype, argname, opt in args:
- emit("p->%s = %s;" % (argname, argname), 1)
- assert not attrs
-
-
-class PickleVisitor(EmitVisitor):
-
- def visitModule(self, mod):
- for dfn in mod.dfns:
- self.visit(dfn)
-
- def visitType(self, type):
- self.visit(type.value, type.name)
-
- def visitSum(self, sum, name):
- pass
-
- def visitProduct(self, sum, name):
- pass
-
- def visitConstructor(self, cons, name):
- pass
-
- def visitField(self, sum):
- pass
-
-
-class Obj2ModPrototypeVisitor(PickleVisitor):
- def visitProduct(self, prod, name):
- code = "static int obj2ast_%s(PyObject* obj, %s* out, PyArena* arena);"
- self.emit(code % (name, get_c_type(name)), 0)
-
- visitSum = visitProduct
-
-
-class Obj2ModVisitor(PickleVisitor):
- def funcHeader(self, name):
- ctype = get_c_type(name)
- self.emit("int", 0)
- self.emit("obj2ast_%s(PyObject* obj, %s* out, PyArena* arena)" % (name, ctype), 0)
- self.emit("{", 0)
- self.emit("PyObject* tmp = NULL;", 1)
- self.emit("int isinstance;", 1)
- self.emit("", 0)
-
- def sumTrailer(self, name):
- self.emit("", 0)
- self.emit("tmp = PyObject_Repr(obj);", 1)
- # there's really nothing more we can do if this fails ...
- self.emit("if (tmp == NULL) goto failed;", 1)
- error = "expected some sort of %s, but got %%.400s" % name
- format = "PyErr_Format(PyExc_TypeError, \"%s\", PyString_AS_STRING(tmp));"
- self.emit(format % error, 1, reflow=False)
- self.emit("failed:", 0)
- self.emit("Py_XDECREF(tmp);", 1)
- self.emit("return 1;", 1)
- self.emit("}", 0)
- self.emit("", 0)
-
- def simpleSum(self, sum, name):
- self.funcHeader(name)
- for t in sum.types:
- line = ("isinstance = PyObject_IsInstance(obj, "
- "(PyObject *)%s_type);")
- self.emit(line % (t.name,), 1)
- self.emit("if (isinstance == -1) {", 1)
- self.emit("return 1;", 2)
- self.emit("}", 1)
- self.emit("if (isinstance) {", 1)
- self.emit("*out = %s;" % t.name, 2)
- self.emit("return 0;", 2)
- self.emit("}", 1)
- self.sumTrailer(name)
-
- def buildArgs(self, fields):
- return ", ".join(fields + ["arena"])
-
- def complexSum(self, sum, name):
- self.funcHeader(name)
- for a in sum.attributes:
- self.visitAttributeDeclaration(a, name, sum=sum)
- self.emit("", 0)
- # XXX: should we only do this for 'expr'?
- self.emit("if (obj == Py_None) {", 1)
- self.emit("*out = NULL;", 2)
- self.emit("return 0;", 2)
- self.emit("}", 1)
- for a in sum.attributes:
- self.visitField(a, name, sum=sum, depth=1)
- for t in sum.types:
- line = "isinstance = PyObject_IsInstance(obj, (PyObject*)%s_type);"
- self.emit(line % (t.name,), 1)
- self.emit("if (isinstance == -1) {", 1)
- self.emit("return 1;", 2)
- self.emit("}", 1)
- self.emit("if (isinstance) {", 1)
- for f in t.fields:
- self.visitFieldDeclaration(f, t.name, sum=sum, depth=2)
- self.emit("", 0)
- for f in t.fields:
- self.visitField(f, t.name, sum=sum, depth=2)
- args = [f.name.value for f in t.fields] + [a.name.value for a in sum.attributes]
- self.emit("*out = %s(%s);" % (t.name, self.buildArgs(args)), 2)
- self.emit("if (*out == NULL) goto failed;", 2)
- self.emit("return 0;", 2)
- self.emit("}", 1)
- self.sumTrailer(name)
-
- def visitAttributeDeclaration(self, a, name, sum=sum):
- ctype = get_c_type(a.type)
- self.emit("%s %s;" % (ctype, a.name), 1)
-
- def visitSum(self, sum, name):
- if is_simple(sum):
- self.simpleSum(sum, name)
- else:
- self.complexSum(sum, name)
-
- def visitProduct(self, prod, name):
- ctype = get_c_type(name)
- self.emit("int", 0)
- self.emit("obj2ast_%s(PyObject* obj, %s* out, PyArena* arena)" % (name, ctype), 0)
- self.emit("{", 0)
- self.emit("PyObject* tmp = NULL;", 1)
- for f in prod.fields:
- self.visitFieldDeclaration(f, name, prod=prod, depth=1)
- self.emit("", 0)
- for f in prod.fields:
- self.visitField(f, name, prod=prod, depth=1)
- args = [f.name.value for f in prod.fields]
- self.emit("*out = %s(%s);" % (name, self.buildArgs(args)), 1)
- self.emit("return 0;", 1)
- self.emit("failed:", 0)
- self.emit("Py_XDECREF(tmp);", 1)
- self.emit("return 1;", 1)
- self.emit("}", 0)
- self.emit("", 0)
-
- def visitFieldDeclaration(self, field, name, sum=None, prod=None, depth=0):
- ctype = get_c_type(field.type)
- if field.seq:
- if self.isSimpleType(field):
- self.emit("asdl_int_seq* %s;" % field.name, depth)
- else:
- self.emit("asdl_seq* %s;" % field.name, depth)
- else:
- ctype = get_c_type(field.type)
- self.emit("%s %s;" % (ctype, field.name), depth)
-
- def isSimpleSum(self, field):
- # XXX can the members of this list be determined automatically?
- return field.type.value in ('expr_context', 'boolop', 'operator',
- 'unaryop', 'cmpop')
-
- def isNumeric(self, field):
- return get_c_type(field.type) in ("int", "bool")
-
- def isSimpleType(self, field):
- return self.isSimpleSum(field) or self.isNumeric(field)
-
- def visitField(self, field, name, sum=None, prod=None, depth=0):
- ctype = get_c_type(field.type)
- self.emit("if (PyObject_HasAttrString(obj, \"%s\")) {" % field.name, depth)
- self.emit("int res;", depth+1)
- if field.seq:
- self.emit("Py_ssize_t len;", depth+1)
- self.emit("Py_ssize_t i;", depth+1)
- self.emit("tmp = PyObject_GetAttrString(obj, \"%s\");" % field.name, depth+1)
- self.emit("if (tmp == NULL) goto failed;", depth+1)
- if field.seq:
- self.emit("if (!PyList_Check(tmp)) {", depth+1)
- self.emit("PyErr_Format(PyExc_TypeError, \"%s field \\\"%s\\\" must "
- "be a list, not a %%.200s\", tmp->ob_type->tp_name);" %
- (name, field.name),
- depth+2, reflow=False)
- self.emit("goto failed;", depth+2)
- self.emit("}", depth+1)
- self.emit("len = PyList_GET_SIZE(tmp);", depth+1)
- if self.isSimpleType(field):
- self.emit("%s = asdl_int_seq_new(len, arena);" % field.name, depth+1)
- else:
- self.emit("%s = asdl_seq_new(len, arena);" % field.name, depth+1)
- self.emit("if (%s == NULL) goto failed;" % field.name, depth+1)
- self.emit("for (i = 0; i < len; i++) {", depth+1)
- self.emit("%s val;" % ctype, depth+2)
- self.emit("res = obj2ast_%s(PyList_GET_ITEM(tmp, i), &val, arena);" %
- field.type, depth+2, reflow=False)
- self.emit("if (res != 0) goto failed;", depth+2)
- self.emit("if (len != PyList_GET_SIZE(tmp)) {", depth+2)
- self.emit("PyErr_SetString(PyExc_RuntimeError, \"%s field \\\"%s\\\" "
- "changed size during iteration\");" %
- (name, field.name),
- depth+3, reflow=False)
- self.emit("goto failed;", depth+3)
- self.emit("}", depth+2)
- self.emit("asdl_seq_SET(%s, i, val);" % field.name, depth+2)
- self.emit("}", depth+1)
- else:
- self.emit("res = obj2ast_%s(tmp, &%s, arena);" %
- (field.type, field.name), depth+1)
- self.emit("if (res != 0) goto failed;", depth+1)
-
- self.emit("Py_XDECREF(tmp);", depth+1)
- self.emit("tmp = NULL;", depth+1)
- self.emit("} else {", depth)
- if not field.opt:
- message = "required field \\\"%s\\\" missing from %s" % (field.name, name)
- format = "PyErr_SetString(PyExc_TypeError, \"%s\");"
- self.emit(format % message, depth+1, reflow=False)
- self.emit("return 1;", depth+1)
- else:
- if self.isNumeric(field):
- self.emit("%s = 0;" % field.name, depth+1)
- elif not self.isSimpleType(field):
- self.emit("%s = NULL;" % field.name, depth+1)
- else:
- raise TypeError("could not determine the default value for %s" % field.name)
- self.emit("}", depth)
-
-
-class MarshalPrototypeVisitor(PickleVisitor):
-
- def prototype(self, sum, name):
- ctype = get_c_type(name)
- self.emit("static int marshal_write_%s(PyObject **, int *, %s);"
- % (name, ctype), 0)
-
- visitProduct = visitSum = prototype
-
-
-class PyTypesDeclareVisitor(PickleVisitor):
-
- def visitProduct(self, prod, name):
- self.emit("static PyTypeObject *%s_type;" % name, 0)
- self.emit("static PyObject* ast2obj_%s(void*);" % name, 0)
- if prod.fields:
- self.emit("static char *%s_fields[]={" % name,0)
- for f in prod.fields:
- self.emit('"%s",' % f.name, 1)
- self.emit("};", 0)
-
- def visitSum(self, sum, name):
- self.emit("static PyTypeObject *%s_type;" % name, 0)
- if sum.attributes:
- self.emit("static char *%s_attributes[] = {" % name, 0)
- for a in sum.attributes:
- self.emit('"%s",' % a.name, 1)
- self.emit("};", 0)
- ptype = "void*"
- if is_simple(sum):
- ptype = get_c_type(name)
- tnames = []
- for t in sum.types:
- tnames.append(str(t.name)+"_singleton")
- tnames = ", *".join(tnames)
- self.emit("static PyObject *%s;" % tnames, 0)
- self.emit("static PyObject* ast2obj_%s(%s);" % (name, ptype), 0)
- for t in sum.types:
- self.visitConstructor(t, name)
-
- def visitConstructor(self, cons, name):
- self.emit("static PyTypeObject *%s_type;" % cons.name, 0)
- if cons.fields:
- self.emit("static char *%s_fields[]={" % cons.name, 0)
- for t in cons.fields:
- self.emit('"%s",' % t.name, 1)
- self.emit("};",0)
-
-class PyTypesVisitor(PickleVisitor):
-
- def visitModule(self, mod):
- self.emit("""
-static int
-ast_type_init(PyObject *self, PyObject *args, PyObject *kw)
-{
- Py_ssize_t i, numfields = 0;
- int res = -1;
- PyObject *key, *value, *fields;
- fields = PyObject_GetAttrString((PyObject*)Py_TYPE(self), "_fields");
- if (!fields)
- PyErr_Clear();
- if (fields) {
- numfields = PySequence_Size(fields);
- if (numfields == -1)
- goto cleanup;
- }
- res = 0; /* if no error occurs, this stays 0 to the end */
- if (PyTuple_GET_SIZE(args) > 0) {
- if (numfields != PyTuple_GET_SIZE(args)) {
- PyErr_Format(PyExc_TypeError, "%.400s constructor takes %s"
- "%zd positional argument%s",
- Py_TYPE(self)->tp_name,
- numfields == 0 ? "" : "either 0 or ",
- numfields, numfields == 1 ? "" : "s");
- res = -1;
- goto cleanup;
- }
- for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
- /* cannot be reached when fields is NULL */
- PyObject *name = PySequence_GetItem(fields, i);
- if (!name) {
- res = -1;
- goto cleanup;
- }
- res = PyObject_SetAttr(self, name, PyTuple_GET_ITEM(args, i));
- Py_DECREF(name);
- if (res < 0)
- goto cleanup;
- }
- }
- if (kw) {
- i = 0; /* needed by PyDict_Next */
- while (PyDict_Next(kw, &i, &key, &value)) {
- res = PyObject_SetAttr(self, key, value);
- if (res < 0)
- goto cleanup;
- }
- }
- cleanup:
- Py_XDECREF(fields);
- return res;
-}
-
-/* Pickling support */
-static PyObject *
-ast_type_reduce(PyObject *self, PyObject *unused)
-{
- PyObject *res;
- PyObject *dict = PyObject_GetAttrString(self, "__dict__");
- if (dict == NULL) {
- if (PyErr_ExceptionMatches(PyExc_AttributeError))
- PyErr_Clear();
- else
- return NULL;
- }
- if (dict) {
- res = Py_BuildValue("O()O", Py_TYPE(self), dict);
- Py_DECREF(dict);
- return res;
- }
- return Py_BuildValue("O()", Py_TYPE(self));
-}
-
-static PyMethodDef ast_type_methods[] = {
- {"__reduce__", ast_type_reduce, METH_NOARGS, NULL},
- {NULL}
-};
-
-static PyTypeObject AST_type = {
- PyVarObject_HEAD_INIT(&PyType_Type, 0)
- "_ast.AST",
- sizeof(PyObject),
- 0,
- 0, /* tp_dealloc */
- 0, /* tp_print */
- 0, /* tp_getattr */
- 0, /* tp_setattr */
- 0, /* tp_compare */
- 0, /* tp_repr */
- 0, /* tp_as_number */
- 0, /* tp_as_sequence */
- 0, /* tp_as_mapping */
- 0, /* tp_hash */
- 0, /* tp_call */
- 0, /* tp_str */
- PyObject_GenericGetAttr, /* tp_getattro */
- PyObject_GenericSetAttr, /* tp_setattro */
- 0, /* tp_as_buffer */
- Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
- 0, /* tp_doc */
- 0, /* tp_traverse */
- 0, /* tp_clear */
- 0, /* tp_richcompare */
- 0, /* tp_weaklistoffset */
- 0, /* tp_iter */
- 0, /* tp_iternext */
- ast_type_methods, /* tp_methods */
- 0, /* tp_members */
- 0, /* tp_getset */
- 0, /* tp_base */
- 0, /* tp_dict */
- 0, /* tp_descr_get */
- 0, /* tp_descr_set */
- 0, /* tp_dictoffset */
- (initproc)ast_type_init, /* tp_init */
- PyType_GenericAlloc, /* tp_alloc */
- PyType_GenericNew, /* tp_new */
- PyObject_Del, /* tp_free */
-};
-
-
-static PyTypeObject* make_type(char *type, PyTypeObject* base, char**fields, int num_fields)
-{
- PyObject *fnames, *result;
- int i;
- fnames = PyTuple_New(num_fields);
- if (!fnames) return NULL;
- for (i = 0; i < num_fields; i++) {
- PyObject *field = PyString_FromString(fields[i]);
- if (!field) {
- Py_DECREF(fnames);
- return NULL;
- }
- PyTuple_SET_ITEM(fnames, i, field);
- }
- result = PyObject_CallFunction((PyObject*)&PyType_Type, "s(O){sOss}",
- type, base, "_fields", fnames, "__module__", "_ast");
- Py_DECREF(fnames);
- return (PyTypeObject*)result;
-}
-
-static int add_attributes(PyTypeObject* type, char**attrs, int num_fields)
-{
- int i, result;
- PyObject *s, *l = PyTuple_New(num_fields);
- if (!l)
- return 0;
- for (i = 0; i < num_fields; i++) {
- s = PyString_FromString(attrs[i]);
- if (!s) {
- Py_DECREF(l);
- return 0;
- }
- PyTuple_SET_ITEM(l, i, s);
- }
- result = PyObject_SetAttrString((PyObject*)type, "_attributes", l) >= 0;
- Py_DECREF(l);
- return result;
-}
-
-/* Conversion AST -> Python */
-
-static PyObject* ast2obj_list(asdl_seq *seq, PyObject* (*func)(void*))
-{
- int i, n = asdl_seq_LEN(seq);
- PyObject *result = PyList_New(n);
- PyObject *value;
- if (!result)
- return NULL;
- for (i = 0; i < n; i++) {
- value = func(asdl_seq_GET(seq, i));
- if (!value) {
- Py_DECREF(result);
- return NULL;
- }
- PyList_SET_ITEM(result, i, value);
- }
- return result;
-}
-
-static PyObject* ast2obj_object(void *o)
-{
- if (!o)
- o = Py_None;
- Py_INCREF((PyObject*)o);
- return (PyObject*)o;
-}
-#define ast2obj_identifier ast2obj_object
-#define ast2obj_string ast2obj_object
-static PyObject* ast2obj_bool(bool b)
-{
- return PyBool_FromLong(b);
-}
-
-static PyObject* ast2obj_int(long b)
-{
- return PyInt_FromLong(b);
-}
-
-/* Conversion Python -> AST */
-
-static int obj2ast_object(PyObject* obj, PyObject** out, PyArena* arena)
-{
- if (obj == Py_None)
- obj = NULL;
- if (obj)
- PyArena_AddPyObject(arena, obj);
- Py_XINCREF(obj);
- *out = obj;
- return 0;
-}
-
-static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena)
-{
- if (!PyString_CheckExact(obj) && obj != Py_None) {
- PyErr_Format(PyExc_TypeError,
- "AST identifier must be of type str");
- return 1;
- }
- return obj2ast_object(obj, out, arena);
-}
-
-static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena)
-{
- if (!PyString_CheckExact(obj) && !PyUnicode_CheckExact(obj)) {
- PyErr_SetString(PyExc_TypeError,
- "AST string must be of type str or unicode");
- return 1;
- }
- return obj2ast_object(obj, out, arena);
-}
-
-static int obj2ast_int(PyObject* obj, int* out, PyArena* arena)
-{
- int i;
- if (!_PyAnyInt_Check(obj)) {
- PyObject *s = PyObject_Repr(obj);
- if (s == NULL) return 1;
- PyErr_Format(PyExc_ValueError, "invalid integer value: %.400s",
- PyString_AS_STRING(s));
- Py_DECREF(s);
- return 1;
- }
-
- i = (int)PyLong_AsLong(obj);
- if (i == -1 && PyErr_Occurred())
- return 1;
- *out = i;
- return 0;
-}
-
-static int obj2ast_bool(PyObject* obj, bool* out, PyArena* arena)
-{
- if (!PyBool_Check(obj)) {
- PyObject *s = PyObject_Repr(obj);
- if (s == NULL) return 1;
- PyErr_Format(PyExc_ValueError, "invalid boolean value: %.400s",
- PyString_AS_STRING(s));
- Py_DECREF(s);
- return 1;
- }
-
- *out = (obj == Py_True);
- return 0;
-}
-
-static int add_ast_fields(void)
-{
- PyObject *empty_tuple, *d;
- if (PyType_Ready(&AST_type) < 0)
- return -1;
- d = AST_type.tp_dict;
- empty_tuple = PyTuple_New(0);
- if (!empty_tuple ||
- PyDict_SetItemString(d, "_fields", empty_tuple) < 0 ||
- PyDict_SetItemString(d, "_attributes", empty_tuple) < 0) {
- Py_XDECREF(empty_tuple);
- return -1;
- }
- Py_DECREF(empty_tuple);
- return 0;
-}
-
-""", 0, reflow=False)
-
- self.emit("static int init_types(void)",0)
- self.emit("{", 0)
- self.emit("static int initialized;", 1)
- self.emit("if (initialized) return 1;", 1)
- self.emit("if (add_ast_fields() < 0) return 0;", 1)
- for dfn in mod.dfns:
- self.visit(dfn)
- self.emit("initialized = 1;", 1)
- self.emit("return 1;", 1);
- self.emit("}", 0)
-
- def visitProduct(self, prod, name):
- if prod.fields:
- fields = name.value+"_fields"
- else:
- fields = "NULL"
- self.emit('%s_type = make_type("%s", &AST_type, %s, %d);' %
- (name, name, fields, len(prod.fields)), 1)
- self.emit("if (!%s_type) return 0;" % name, 1)
-
- def visitSum(self, sum, name):
- self.emit('%s_type = make_type("%s", &AST_type, NULL, 0);' %
- (name, name), 1)
- self.emit("if (!%s_type) return 0;" % name, 1)
- if sum.attributes:
- self.emit("if (!add_attributes(%s_type, %s_attributes, %d)) return 0;" %
- (name, name, len(sum.attributes)), 1)
- else:
- self.emit("if (!add_attributes(%s_type, NULL, 0)) return 0;" % name, 1)
- simple = is_simple(sum)
- for t in sum.types:
- self.visitConstructor(t, name, simple)
-
- def visitConstructor(self, cons, name, simple):
- if cons.fields:
- fields = cons.name.value+"_fields"
- else:
- fields = "NULL"
- self.emit('%s_type = make_type("%s", %s_type, %s, %d);' %
- (cons.name, cons.name, name, fields, len(cons.fields)), 1)
- self.emit("if (!%s_type) return 0;" % cons.name, 1)
- if simple:
- self.emit("%s_singleton = PyType_GenericNew(%s_type, NULL, NULL);" %
- (cons.name, cons.name), 1)
- self.emit("if (!%s_singleton) return 0;" % cons.name, 1)
-
-
-class ASTModuleVisitor(PickleVisitor):
-
- def visitModule(self, mod):
- self.emit("PyMODINIT_FUNC", 0)
- self.emit("init_ast(void)", 0)
- self.emit("{", 0)
- self.emit("PyObject *m, *d;", 1)
- self.emit("if (!init_types()) return;", 1)
- self.emit('m = Py_InitModule3("_ast", NULL, NULL);', 1)
- self.emit("if (!m) return;", 1)
- self.emit("d = PyModule_GetDict(m);", 1)
- self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return;', 1)
- self.emit('if (PyModule_AddIntConstant(m, "PyCF_ONLY_AST", PyCF_ONLY_AST) < 0)', 1)
- self.emit("return;", 2)
- # Value of version: "$Revision$"
- self.emit('if (PyModule_AddStringConstant(m, "__version__", "%s") < 0)'
- % mod.version, 1)
- self.emit("return;", 2)
- for dfn in mod.dfns:
- self.visit(dfn)
- self.emit("}", 0)
-
- def visitProduct(self, prod, name):
- self.addObj(name)
-
- def visitSum(self, sum, name):
- self.addObj(name)
- for t in sum.types:
- self.visitConstructor(t, name)
-
- def visitConstructor(self, cons, name):
- self.addObj(cons.name)
-
- def addObj(self, name):
- self.emit('if (PyDict_SetItemString(d, "%s", (PyObject*)%s_type) < 0) return;' % (name, name), 1)
-
-
-_SPECIALIZED_SEQUENCES = ('stmt', 'expr')
-
-def find_sequence(fields, doing_specialization):
- """Return True if any field uses a sequence."""
- for f in fields:
- if f.seq:
- if not doing_specialization:
- return True
- if str(f.type) not in _SPECIALIZED_SEQUENCES:
- return True
- return False
-
-def has_sequence(types, doing_specialization):
- for t in types:
- if find_sequence(t.fields, doing_specialization):
- return True
- return False
-
-
-class StaticVisitor(PickleVisitor):
- CODE = '''Very simple, always emit this static code. Override CODE'''
-
- def visit(self, object):
- self.emit(self.CODE, 0, reflow=False)
-
-
-class ObjVisitor(PickleVisitor):
-
- def func_begin(self, name):
- ctype = get_c_type(name)
- self.emit("PyObject*", 0)
- self.emit("ast2obj_%s(void* _o)" % (name), 0)
- self.emit("{", 0)
- self.emit("%s o = (%s)_o;" % (ctype, ctype), 1)
- self.emit("PyObject *result = NULL, *value = NULL;", 1)
- self.emit('if (!o) {', 1)
- self.emit("Py_INCREF(Py_None);", 2)
- self.emit('return Py_None;', 2)
- self.emit("}", 1)
- self.emit('', 0)
-
- def func_end(self):
- self.emit("return result;", 1)
- self.emit("failed:", 0)
- self.emit("Py_XDECREF(value);", 1)
- self.emit("Py_XDECREF(result);", 1)
- self.emit("return NULL;", 1)
- self.emit("}", 0)
- self.emit("", 0)
-
- def visitSum(self, sum, name):
- if is_simple(sum):
- self.simpleSum(sum, name)
- return
- self.func_begin(name)
- self.emit("switch (o->kind) {", 1)
- for i in range(len(sum.types)):
- t = sum.types[i]
- self.visitConstructor(t, i + 1, name)
- self.emit("}", 1)
- for a in sum.attributes:
- self.emit("value = ast2obj_%s(o->%s);" % (a.type, a.name), 1)
- self.emit("if (!value) goto failed;", 1)
- self.emit('if (PyObject_SetAttrString(result, "%s", value) < 0)' % a.name, 1)
- self.emit('goto failed;', 2)
- self.emit('Py_DECREF(value);', 1)
- self.func_end()
-
- def simpleSum(self, sum, name):
- self.emit("PyObject* ast2obj_%s(%s_ty o)" % (name, name), 0)
- self.emit("{", 0)
- self.emit("switch(o) {", 1)
- for t in sum.types:
- self.emit("case %s:" % t.name, 2)
- self.emit("Py_INCREF(%s_singleton);" % t.name, 3)
- self.emit("return %s_singleton;" % t.name, 3)
- self.emit("default:", 2)
- self.emit('/* should never happen, but just in case ... */', 3)
- code = "PyErr_Format(PyExc_SystemError, \"unknown %s found\");" % name
- self.emit(code, 3, reflow=False)
- self.emit("return NULL;", 3)
- self.emit("}", 1)
- self.emit("}", 0)
-
- def visitProduct(self, prod, name):
- self.func_begin(name)
- self.emit("result = PyType_GenericNew(%s_type, NULL, NULL);" % name, 1);
- self.emit("if (!result) return NULL;", 1)
- for field in prod.fields:
- self.visitField(field, name, 1, True)
- self.func_end()
-
- def visitConstructor(self, cons, enum, name):
- self.emit("case %s_kind:" % cons.name, 1)
- self.emit("result = PyType_GenericNew(%s_type, NULL, NULL);" % cons.name, 2);
- self.emit("if (!result) goto failed;", 2)
- for f in cons.fields:
- self.visitField(f, cons.name, 2, False)
- self.emit("break;", 2)
-
- def visitField(self, field, name, depth, product):
- def emit(s, d):
- self.emit(s, depth + d)
- if product:
- value = "o->%s" % field.name
- else:
- value = "o->v.%s.%s" % (name, field.name)
- self.set(field, value, depth)
- emit("if (!value) goto failed;", 0)
- emit('if (PyObject_SetAttrString(result, "%s", value) == -1)' % field.name, 0)
- emit("goto failed;", 1)
- emit("Py_DECREF(value);", 0)
-
- def emitSeq(self, field, value, depth, emit):
- emit("seq = %s;" % value, 0)
- emit("n = asdl_seq_LEN(seq);", 0)
- emit("value = PyList_New(n);", 0)
- emit("if (!value) goto failed;", 0)
- emit("for (i = 0; i < n; i++) {", 0)
- self.set("value", field, "asdl_seq_GET(seq, i)", depth + 1)
- emit("if (!value1) goto failed;", 1)
- emit("PyList_SET_ITEM(value, i, value1);", 1)
- emit("value1 = NULL;", 1)
- emit("}", 0)
-
- def set(self, field, value, depth):
- if field.seq:
- # XXX should really check for is_simple, but that requires a symbol table
- if field.type.value == "cmpop":
- # While the sequence elements are stored as void*,
- # ast2obj_cmpop expects an enum
- self.emit("{", depth)
- self.emit("int i, n = asdl_seq_LEN(%s);" % value, depth+1)
- self.emit("value = PyList_New(n);", depth+1)
- self.emit("if (!value) goto failed;", depth+1)
- self.emit("for(i = 0; i < n; i++)", depth+1)
- # This cannot fail, so no need for error handling
- self.emit("PyList_SET_ITEM(value, i, ast2obj_cmpop((cmpop_ty)asdl_seq_GET(%s, i)));" % value,
- depth+2, reflow=False)
- self.emit("}", depth)
- else:
- self.emit("value = ast2obj_list(%s, ast2obj_%s);" % (value, field.type), depth)
- else:
- ctype = get_c_type(field.type)
- self.emit("value = ast2obj_%s(%s);" % (field.type, value), depth, reflow=False)
-
-
-class PartingShots(StaticVisitor):
-
- CODE = """
-PyObject* PyAST_mod2obj(mod_ty t)
-{
- init_types();
- return ast2obj_mod(t);
-}
-
-/* mode is 0 for "exec", 1 for "eval" and 2 for "single" input */
-mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode)
-{
- mod_ty res;
- PyObject *req_type[3];
- char *req_name[3];
- int isinstance;
-
- req_type[0] = (PyObject*)Module_type;
- req_type[1] = (PyObject*)Expression_type;
- req_type[2] = (PyObject*)Interactive_type;
-
- req_name[0] = "Module";
- req_name[1] = "Expression";
- req_name[2] = "Interactive";
-
- assert(0 <= mode && mode <= 2);
-
- init_types();
-
- isinstance = PyObject_IsInstance(ast, req_type[mode]);
- if (isinstance == -1)
- return NULL;
- if (!isinstance) {
- PyErr_Format(PyExc_TypeError, "expected %s node, got %.400s",
- req_name[mode], Py_TYPE(ast)->tp_name);
- return NULL;
- }
- if (obj2ast_mod(ast, &res, arena) != 0)
- return NULL;
- else
- return res;
-}
-
-int PyAST_Check(PyObject* obj)
-{
- init_types();
- return PyObject_IsInstance(obj, (PyObject*)&AST_type);
-}
-"""
-
-class ChainOfVisitors:
- def __init__(self, *visitors):
- self.visitors = visitors
-
- def visit(self, object):
- for v in self.visitors:
- v.visit(object)
- v.emit("", 0)
-
-common_msg = "/* File automatically generated by %s. */\n\n"
-
-c_file_msg = """
-/*
- __version__ %s.
-
- This module must be committed separately after each AST grammar change;
- The __version__ number is set to the revision number of the commit
- containing the grammar change.
-*/
-
-"""
-
-def main(srcfile):
- argv0 = sys.argv[0]
- components = argv0.split(os.sep)
- argv0 = os.sep.join(components[-2:])
- auto_gen_msg = common_msg % argv0
- mod = asdl.parse(srcfile)
- mod.version = "82160"
- if not asdl.check(mod):
- sys.exit(1)
- if INC_DIR:
- p = "%s/%s-ast.h" % (INC_DIR, mod.name)
- f = open(p, "wb")
- f.write(auto_gen_msg)
- f.write('#include "asdl.h"\n\n')
- c = ChainOfVisitors(TypeDefVisitor(f),
- StructVisitor(f),
- PrototypeVisitor(f),
- )
- c.visit(mod)
- f.write("PyObject* PyAST_mod2obj(mod_ty t);\n")
- f.write("mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode);\n")
- f.write("int PyAST_Check(PyObject* obj);\n")
- f.close()
-
- if SRC_DIR:
- p = os.path.join(SRC_DIR, str(mod.name) + "-ast.c")
- f = open(p, "wb")
- f.write(auto_gen_msg)
- f.write(c_file_msg % mod.version)
- f.write('#include "Python.h"\n')
- f.write('#include "%s-ast.h"\n' % mod.name)
- f.write('\n')
- f.write("static PyTypeObject AST_type;\n")
- v = ChainOfVisitors(
- PyTypesDeclareVisitor(f),
- PyTypesVisitor(f),
- Obj2ModPrototypeVisitor(f),
- FunctionVisitor(f),
- ObjVisitor(f),
- Obj2ModVisitor(f),
- ASTModuleVisitor(f),
- PartingShots(f),
- )
- v.visit(mod)
- f.close()
-
-if __name__ == "__main__":
- import sys
- import getopt
-
- INC_DIR = ''
- SRC_DIR = ''
- opts, args = getopt.getopt(sys.argv[1:], "h:c:")
- if len(opts) != 1:
- print "Must specify exactly one output file"
- sys.exit(1)
- for o, v in opts:
- if o == '-h':
- INC_DIR = v
- if o == '-c':
- SRC_DIR = v
- if len(args) != 1:
- print "Must specify single input file"
- sys.exit(1)
- main(args[0])
diff --git a/contrib/tools/python/src/Parser/intrcheck.c b/contrib/tools/python/src/Parser/intrcheck.c
deleted file mode 100644
index 5844a9a85e1..00000000000
--- a/contrib/tools/python/src/Parser/intrcheck.c
+++ /dev/null
@@ -1,178 +0,0 @@
-
-/* Check for interrupts */
-
-#include "Python.h"
-#include "pythread.h"
-
-#ifdef QUICKWIN
-
-#include <io.h>
-
-void
-PyOS_InitInterrupts(void)
-{
-}
-
-void
-PyOS_FiniInterrupts(void)
-{
-}
-
-int
-PyOS_InterruptOccurred(void)
-{
- _wyield();
-}
-
-#define OK
-
-#endif /* QUICKWIN */
-
-#if defined(_M_IX86) && !defined(__QNX__)
-#include <io.h>
-#endif
-
-#if defined(MSDOS) && !defined(QUICKWIN)
-
-#ifdef __GNUC__
-
-/* This is for DJGPP's GO32 extender. I don't know how to trap
- * control-C (There's no API for ctrl-C, and I don't want to mess with
- * the interrupt vectors.) However, this DOES catch control-break.
- * --Amrit
- */
-
-#include <go32.h>
-
-void
-PyOS_InitInterrupts(void)
-{
- _go32_want_ctrl_break(1 /* TRUE */);
-}
-
-void
-PyOS_FiniInterrupts(void)
-{
-}
-
-int
-PyOS_InterruptOccurred(void)
-{
- return _go32_was_ctrl_break_hit();
-}
-
-#else /* !__GNUC__ */
-
-/* This might work for MS-DOS (untested though): */
-
-void
-PyOS_InitInterrupts(void)
-{
-}
-
-void
-PyOS_FiniInterrupts(void)
-{
-}
-
-int
-PyOS_InterruptOccurred(void)
-{
- int interrupted = 0;
- while (kbhit()) {
- if (getch() == '\003')
- interrupted = 1;
- }
- return interrupted;
-}
-
-#endif /* __GNUC__ */
-
-#define OK
-
-#endif /* MSDOS && !QUICKWIN */
-
-
-#ifndef OK
-
-/* Default version -- for real operating systems and for Standard C */
-
-#include <stdio.h>
-#include <string.h>
-#include <signal.h>
-
-static int interrupted;
-
-void
-PyErr_SetInterrupt(void)
-{
- interrupted = 1;
-}
-
-extern int PyErr_CheckSignals(void);
-
-static int
-checksignals_witharg(void * arg)
-{
- return PyErr_CheckSignals();
-}
-
-static void
-intcatcher(int sig)
-{
- extern void Py_Exit(int);
- static char message[] =
-"python: to interrupt a truly hanging Python program, interrupt once more.\n";
- switch (interrupted++) {
- case 0:
- break;
- case 1:
-#ifdef RISCOS
- fprintf(stderr, message);
-#else
- write(2, message, strlen(message));
-#endif
- break;
- case 2:
- interrupted = 0;
- Py_Exit(1);
- break;
- }
- PyOS_setsig(SIGINT, intcatcher);
- Py_AddPendingCall(checksignals_witharg, NULL);
-}
-
-static void (*old_siginthandler)(int) = SIG_DFL;
-
-void
-PyOS_InitInterrupts(void)
-{
- if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN)
- PyOS_setsig(SIGINT, intcatcher);
-}
-
-void
-PyOS_FiniInterrupts(void)
-{
- PyOS_setsig(SIGINT, old_siginthandler);
-}
-
-int
-PyOS_InterruptOccurred(void)
-{
- if (!interrupted)
- return 0;
- interrupted = 0;
- return 1;
-}
-
-#endif /* !OK */
-
-void
-PyOS_AfterFork(void)
-{
-#ifdef WITH_THREAD
- PyThread_ReInitTLS();
- PyEval_ReInitThreads();
-#endif
-}
diff --git a/contrib/tools/python/src/Parser/pgenmain.c b/contrib/tools/python/src/Parser/pgenmain.c
deleted file mode 100644
index 0b47295c1b4..00000000000
--- a/contrib/tools/python/src/Parser/pgenmain.c
+++ /dev/null
@@ -1,174 +0,0 @@
-
-/* Parser generator main program */
-
-/* This expects a filename containing the grammar as argv[1] (UNIX)
- or asks the console for such a file name (THINK C).
- It writes its output on two files in the current directory:
- - "graminit.c" gets the grammar as a bunch of initialized data
- - "graminit.h" gets the grammar's non-terminals as #defines.
- Error messages and status info during the generation process are
- written to stdout, or sometimes to stderr. */
-
-/* XXX TO DO:
- - check for duplicate definitions of names (instead of fatal err)
-*/
-
-#include "Python.h"
-#include "pgenheaders.h"
-#include "grammar.h"
-#include "node.h"
-#include "parsetok.h"
-#include "pgen.h"
-
-int Py_DebugFlag;
-int Py_VerboseFlag;
-int Py_IgnoreEnvironmentFlag;
-
-/* Forward */
-grammar *getgrammar(char *filename);
-
-void
-Py_Exit(int sts)
-{
- exit(sts);
-}
-
-int
-main(int argc, char **argv)
-{
- grammar *g;
- FILE *fp;
- char *filename, *graminit_h, *graminit_c;
-
- if (argc != 4) {
- fprintf(stderr,
- "usage: %s grammar graminit.h graminit.c\n", argv[0]);
- Py_Exit(2);
- }
- filename = argv[1];
- graminit_h = argv[2];
- graminit_c = argv[3];
- g = getgrammar(filename);
- fp = fopen(graminit_c, "w");
- if (fp == NULL) {
- perror(graminit_c);
- Py_Exit(1);
- }
- if (Py_DebugFlag)
- printf("Writing %s ...\n", graminit_c);
- printgrammar(g, fp);
- fclose(fp);
- fp = fopen(graminit_h, "w");
- if (fp == NULL) {
- perror(graminit_h);
- Py_Exit(1);
- }
- if (Py_DebugFlag)
- printf("Writing %s ...\n", graminit_h);
- printnonterminals(g, fp);
- fclose(fp);
- freegrammar(g);
- Py_Exit(0);
- return 0; /* Make gcc -Wall happy */
-}
-
-grammar *
-getgrammar(char *filename)
-{
- FILE *fp;
- node *n;
- grammar *g0, *g;
- perrdetail err;
-
- fp = fopen(filename, "r");
- if (fp == NULL) {
- perror(filename);
- Py_Exit(1);
- }
- g0 = meta_grammar();
- n = PyParser_ParseFile(fp, filename, g0, g0->g_start,
- (char *)NULL, (char *)NULL, &err);
- fclose(fp);
- if (n == NULL) {
- fprintf(stderr, "Parsing error %d, line %d.\n",
- err.error, err.lineno);
- if (err.text != NULL) {
- size_t i;
- fprintf(stderr, "%s", err.text);
- i = strlen(err.text);
- if (i == 0 || err.text[i-1] != '\n')
- fprintf(stderr, "\n");
- for (i = 0; i < err.offset; i++) {
- if (err.text[i] == '\t')
- putc('\t', stderr);
- else
- putc(' ', stderr);
- }
- fprintf(stderr, "^\n");
- PyObject_FREE(err.text);
- }
- Py_Exit(1);
- }
- g = pgen(n);
- if (g == NULL) {
- printf("Bad grammar.\n");
- Py_Exit(1);
- }
- return g;
-}
-
-/* Can't happen in pgen */
-PyObject*
-PyErr_Occurred()
-{
- return 0;
-}
-
-void
-Py_FatalError(const char *msg)
-{
- fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg);
- Py_Exit(1);
-}
-
-/* No-nonsense my_readline() for tokenizer.c */
-
-char *
-PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt)
-{
- size_t n = 1000;
- char *p = (char *)PyMem_MALLOC(n);
- char *q;
- if (p == NULL)
- return NULL;
- fprintf(stderr, "%s", prompt);
- q = fgets(p, n, sys_stdin);
- if (q == NULL) {
- *p = '\0';
- return p;
- }
- n = strlen(p);
- if (n > 0 && p[n-1] != '\n')
- p[n-1] = '\n';
- return (char *)PyMem_REALLOC(p, n+1);
-}
-
-/* No-nonsense fgets */
-char *
-Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
-{
- return fgets(buf, n, stream);
-}
-
-
-#include <stdarg.h>
-
-void
-PySys_WriteStderr(const char *format, ...)
-{
- va_list va;
-
- va_start(va, format);
- vfprintf(stderr, format, va);
- va_end(va);
-}
diff --git a/contrib/tools/python/src/Parser/printgrammar.c b/contrib/tools/python/src/Parser/printgrammar.c
deleted file mode 100644
index 01f552f2d70..00000000000
--- a/contrib/tools/python/src/Parser/printgrammar.c
+++ /dev/null
@@ -1,117 +0,0 @@
-
-/* Print a bunch of C initializers that represent a grammar */
-
-#include "pgenheaders.h"
-#include "grammar.h"
-
-/* Forward */
-static void printarcs(int, dfa *, FILE *);
-static void printstates(grammar *, FILE *);
-static void printdfas(grammar *, FILE *);
-static void printlabels(grammar *, FILE *);
-
-void
-printgrammar(grammar *g, FILE *fp)
-{
- fprintf(fp, "/* Generated by Parser/pgen */\n\n");
- fprintf(fp, "#include \"pgenheaders.h\"\n");
- fprintf(fp, "#include \"grammar.h\"\n");
- fprintf(fp, "PyAPI_DATA(grammar) _PyParser_Grammar;\n");
- printdfas(g, fp);
- printlabels(g, fp);
- fprintf(fp, "grammar _PyParser_Grammar = {\n");
- fprintf(fp, " %d,\n", g->g_ndfas);
- fprintf(fp, " dfas,\n");
- fprintf(fp, " {%d, labels},\n", g->g_ll.ll_nlabels);
- fprintf(fp, " %d\n", g->g_start);
- fprintf(fp, "};\n");
-}
-
-void
-printnonterminals(grammar *g, FILE *fp)
-{
- dfa *d;
- int i;
-
- fprintf(fp, "/* Generated by Parser/pgen */\n\n");
-
- d = g->g_dfa;
- for (i = g->g_ndfas; --i >= 0; d++)
- fprintf(fp, "#define %s %d\n", d->d_name, d->d_type);
-}
-
-static void
-printarcs(int i, dfa *d, FILE *fp)
-{
- arc *a;
- state *s;
- int j, k;
-
- s = d->d_state;
- for (j = 0; j < d->d_nstates; j++, s++) {
- fprintf(fp, "static arc arcs_%d_%d[%d] = {\n",
- i, j, s->s_narcs);
- a = s->s_arc;
- for (k = 0; k < s->s_narcs; k++, a++)
- fprintf(fp, " {%d, %d},\n", a->a_lbl, a->a_arrow);
- fprintf(fp, "};\n");
- }
-}
-
-static void
-printstates(grammar *g, FILE *fp)
-{
- state *s;
- dfa *d;
- int i, j;
-
- d = g->g_dfa;
- for (i = 0; i < g->g_ndfas; i++, d++) {
- printarcs(i, d, fp);
- fprintf(fp, "static state states_%d[%d] = {\n",
- i, d->d_nstates);
- s = d->d_state;
- for (j = 0; j < d->d_nstates; j++, s++)
- fprintf(fp, " {%d, arcs_%d_%d},\n",
- s->s_narcs, i, j);
- fprintf(fp, "};\n");
- }
-}
-
-static void
-printdfas(grammar *g, FILE *fp)
-{
- dfa *d;
- int i, j;
-
- printstates(g, fp);
- fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas);
- d = g->g_dfa;
- for (i = 0; i < g->g_ndfas; i++, d++) {
- fprintf(fp, " {%d, \"%s\", %d, %d, states_%d,\n",
- d->d_type, d->d_name, d->d_initial, d->d_nstates, i);
- fprintf(fp, " \"");
- for (j = 0; j < NBYTES(g->g_ll.ll_nlabels); j++)
- fprintf(fp, "\\%03o", d->d_first[j] & 0xff);
- fprintf(fp, "\"},\n");
- }
- fprintf(fp, "};\n");
-}
-
-static void
-printlabels(grammar *g, FILE *fp)
-{
- label *l;
- int i;
-
- fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels);
- l = g->g_ll.ll_label;
- for (i = g->g_ll.ll_nlabels; --i >= 0; l++) {
- if (l->lb_str == NULL)
- fprintf(fp, " {%d, 0},\n", l->lb_type);
- else
- fprintf(fp, " {%d, \"%s\"},\n",
- l->lb_type, l->lb_str);
- }
- fprintf(fp, "};\n");
-}
diff --git a/contrib/tools/python/src/Parser/spark.py b/contrib/tools/python/src/Parser/spark.py
deleted file mode 100644
index b064d62ec68..00000000000
--- a/contrib/tools/python/src/Parser/spark.py
+++ /dev/null
@@ -1,839 +0,0 @@
-# Copyright (c) 1998-2002 John Aycock
-#
-# Permission is hereby granted, free of charge, to any person obtaining
-# a copy of this software and associated documentation files (the
-# "Software"), to deal in the Software without restriction, including
-# without limitation the rights to use, copy, modify, merge, publish,
-# distribute, sublicense, and/or sell copies of the Software, and to
-# permit persons to whom the Software is furnished to do so, subject to
-# the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-__version__ = 'SPARK-0.7 (pre-alpha-5)'
-
-import re
-import string
-
-def _namelist(instance):
- namelist, namedict, classlist = [], {}, [instance.__class__]
- for c in classlist:
- for b in c.__bases__:
- classlist.append(b)
- for name in c.__dict__.keys():
- if not namedict.has_key(name):
- namelist.append(name)
- namedict[name] = 1
- return namelist
-
-class GenericScanner:
- def __init__(self, flags=0):
- pattern = self.reflect()
- self.re = re.compile(pattern, re.VERBOSE|flags)
-
- self.index2func = {}
- for name, number in self.re.groupindex.items():
- self.index2func[number-1] = getattr(self, 't_' + name)
-
- def makeRE(self, name):
- doc = getattr(self, name).__doc__
- rv = '(?P<%s>%s)' % (name[2:], doc)
- return rv
-
- def reflect(self):
- rv = []
- for name in _namelist(self):
- if name[:2] == 't_' and name != 't_default':
- rv.append(self.makeRE(name))
-
- rv.append(self.makeRE('t_default'))
- return string.join(rv, '|')
-
- def error(self, s, pos):
- print "Lexical error at position %s" % pos
- raise SystemExit
-
- def tokenize(self, s):
- pos = 0
- n = len(s)
- while pos < n:
- m = self.re.match(s, pos)
- if m is None:
- self.error(s, pos)
-
- groups = m.groups()
- for i in range(len(groups)):
- if groups[i] and self.index2func.has_key(i):
- self.index2func[i](groups[i])
- pos = m.end()
-
- def t_default(self, s):
- r'( . | \n )+'
- print "Specification error: unmatched input"
- raise SystemExit
-
-#
-# Extracted from GenericParser and made global so that [un]picking works.
-#
-class _State:
- def __init__(self, stateno, items):
- self.T, self.complete, self.items = [], [], items
- self.stateno = stateno
-
-class GenericParser:
- #
- # An Earley parser, as per J. Earley, "An Efficient Context-Free
- # Parsing Algorithm", CACM 13(2), pp. 94-102. Also J. C. Earley,
- # "An Efficient Context-Free Parsing Algorithm", Ph.D. thesis,
- # Carnegie-Mellon University, August 1968. New formulation of
- # the parser according to J. Aycock, "Practical Earley Parsing
- # and the SPARK Toolkit", Ph.D. thesis, University of Victoria,
- # 2001, and J. Aycock and R. N. Horspool, "Practical Earley
- # Parsing", unpublished paper, 2001.
- #
-
- def __init__(self, start):
- self.rules = {}
- self.rule2func = {}
- self.rule2name = {}
- self.collectRules()
- self.augment(start)
- self.ruleschanged = 1
-
- _NULLABLE = '\e_'
- _START = 'START'
- _BOF = '|-'
-
- #
- # When pickling, take the time to generate the full state machine;
- # some information is then extraneous, too. Unfortunately we
- # can't save the rule2func map.
- #
- def __getstate__(self):
- if self.ruleschanged:
- #
- # XXX - duplicated from parse()
- #
- self.computeNull()
- self.newrules = {}
- self.new2old = {}
- self.makeNewRules()
- self.ruleschanged = 0
- self.edges, self.cores = {}, {}
- self.states = { 0: self.makeState0() }
- self.makeState(0, self._BOF)
- #
- # XXX - should find a better way to do this..
- #
- changes = 1
- while changes:
- changes = 0
- for k, v in self.edges.items():
- if v is None:
- state, sym = k
- if self.states.has_key(state):
- self.goto(state, sym)
- changes = 1
- rv = self.__dict__.copy()
- for s in self.states.values():
- del s.items
- del rv['rule2func']
- del rv['nullable']
- del rv['cores']
- return rv
-
- def __setstate__(self, D):
- self.rules = {}
- self.rule2func = {}
- self.rule2name = {}
- self.collectRules()
- start = D['rules'][self._START][0][1][1] # Blech.
- self.augment(start)
- D['rule2func'] = self.rule2func
- D['makeSet'] = self.makeSet_fast
- self.__dict__ = D
-
- #
- # A hook for GenericASTBuilder and GenericASTMatcher. Mess
- # thee not with this; nor shall thee toucheth the _preprocess
- # argument to addRule.
- #
- def preprocess(self, rule, func): return rule, func
-
- def addRule(self, doc, func, _preprocess=1):
- fn = func
- rules = string.split(doc)
-
- index = []
- for i in range(len(rules)):
- if rules[i] == '::=':
- index.append(i-1)
- index.append(len(rules))
-
- for i in range(len(index)-1):
- lhs = rules[index[i]]
- rhs = rules[index[i]+2:index[i+1]]
- rule = (lhs, tuple(rhs))
-
- if _preprocess:
- rule, fn = self.preprocess(rule, func)
-
- if self.rules.has_key(lhs):
- self.rules[lhs].append(rule)
- else:
- self.rules[lhs] = [ rule ]
- self.rule2func[rule] = fn
- self.rule2name[rule] = func.__name__[2:]
- self.ruleschanged = 1
-
- def collectRules(self):
- for name in _namelist(self):
- if name[:2] == 'p_':
- func = getattr(self, name)
- doc = func.__doc__
- self.addRule(doc, func)
-
- def augment(self, start):
- rule = '%s ::= %s %s' % (self._START, self._BOF, start)
- self.addRule(rule, lambda args: args[1], 0)
-
- def computeNull(self):
- self.nullable = {}
- tbd = []
-
- for rulelist in self.rules.values():
- lhs = rulelist[0][0]
- self.nullable[lhs] = 0
- for rule in rulelist:
- rhs = rule[1]
- if len(rhs) == 0:
- self.nullable[lhs] = 1
- continue
- #
- # We only need to consider rules which
- # consist entirely of nonterminal symbols.
- # This should be a savings on typical
- # grammars.
- #
- for sym in rhs:
- if not self.rules.has_key(sym):
- break
- else:
- tbd.append(rule)
- changes = 1
- while changes:
- changes = 0
- for lhs, rhs in tbd:
- if self.nullable[lhs]:
- continue
- for sym in rhs:
- if not self.nullable[sym]:
- break
- else:
- self.nullable[lhs] = 1
- changes = 1
-
- def makeState0(self):
- s0 = _State(0, [])
- for rule in self.newrules[self._START]:
- s0.items.append((rule, 0))
- return s0
-
- def finalState(self, tokens):
- #
- # Yuck.
- #
- if len(self.newrules[self._START]) == 2 and len(tokens) == 0:
- return 1
- start = self.rules[self._START][0][1][1]
- return self.goto(1, start)
-
- def makeNewRules(self):
- worklist = []
- for rulelist in self.rules.values():
- for rule in rulelist:
- worklist.append((rule, 0, 1, rule))
-
- for rule, i, candidate, oldrule in worklist:
- lhs, rhs = rule
- n = len(rhs)
- while i < n:
- sym = rhs[i]
- if not self.rules.has_key(sym) or \
- not self.nullable[sym]:
- candidate = 0
- i = i + 1
- continue
-
- newrhs = list(rhs)
- newrhs[i] = self._NULLABLE+sym
- newrule = (lhs, tuple(newrhs))
- worklist.append((newrule, i+1,
- candidate, oldrule))
- candidate = 0
- i = i + 1
- else:
- if candidate:
- lhs = self._NULLABLE+lhs
- rule = (lhs, rhs)
- if self.newrules.has_key(lhs):
- self.newrules[lhs].append(rule)
- else:
- self.newrules[lhs] = [ rule ]
- self.new2old[rule] = oldrule
-
- def typestring(self, token):
- return None
-
- def error(self, token):
- print "Syntax error at or near `%s' token" % token
- raise SystemExit
-
- def parse(self, tokens):
- sets = [ [(1,0), (2,0)] ]
- self.links = {}
-
- if self.ruleschanged:
- self.computeNull()
- self.newrules = {}
- self.new2old = {}
- self.makeNewRules()
- self.ruleschanged = 0
- self.edges, self.cores = {}, {}
- self.states = { 0: self.makeState0() }
- self.makeState(0, self._BOF)
-
- for i in xrange(len(tokens)):
- sets.append([])
-
- if sets[i] == []:
- break
- self.makeSet(tokens[i], sets, i)
- else:
- sets.append([])
- self.makeSet(None, sets, len(tokens))
-
- #_dump(tokens, sets, self.states)
-
- finalitem = (self.finalState(tokens), 0)
- if finalitem not in sets[-2]:
- if len(tokens) > 0:
- self.error(tokens[i-1])
- else:
- self.error(None)
-
- return self.buildTree(self._START, finalitem,
- tokens, len(sets)-2)
-
- def isnullable(self, sym):
- #
- # For symbols in G_e only. If we weren't supporting 1.5,
- # could just use sym.startswith().
- #
- return self._NULLABLE == sym[0:len(self._NULLABLE)]
-
- def skip(self, (lhs, rhs), pos=0):
- n = len(rhs)
- while pos < n:
- if not self.isnullable(rhs[pos]):
- break
- pos = pos + 1
- return pos
-
- def makeState(self, state, sym):
- assert sym is not None
- #
- # Compute \epsilon-kernel state's core and see if
- # it exists already.
- #
- kitems = []
- for rule, pos in self.states[state].items:
- lhs, rhs = rule
- if rhs[pos:pos+1] == (sym,):
- kitems.append((rule, self.skip(rule, pos+1)))
- core = kitems
-
- core.sort()
- tcore = tuple(core)
- if self.cores.has_key(tcore):
- return self.cores[tcore]
- #
- # Nope, doesn't exist. Compute it and the associated
- # \epsilon-nonkernel state together; we'll need it right away.
- #
- k = self.cores[tcore] = len(self.states)
- K, NK = _State(k, kitems), _State(k+1, [])
- self.states[k] = K
- predicted = {}
-
- edges = self.edges
- rules = self.newrules
- for X in K, NK:
- worklist = X.items
- for item in worklist:
- rule, pos = item
- lhs, rhs = rule
- if pos == len(rhs):
- X.complete.append(rule)
- continue
-
- nextSym = rhs[pos]
- key = (X.stateno, nextSym)
- if not rules.has_key(nextSym):
- if not edges.has_key(key):
- edges[key] = None
- X.T.append(nextSym)
- else:
- edges[key] = None
- if not predicted.has_key(nextSym):
- predicted[nextSym] = 1
- for prule in rules[nextSym]:
- ppos = self.skip(prule)
- new = (prule, ppos)
- NK.items.append(new)
- #
- # Problem: we know K needs generating, but we
- # don't yet know about NK. Can't commit anything
- # regarding NK to self.edges until we're sure. Should
- # we delay committing on both K and NK to avoid this
- # hacky code? This creates other problems..
- #
- if X is K:
- edges = {}
-
- if NK.items == []:
- return k
-
- #
- # Check for \epsilon-nonkernel's core. Unfortunately we
- # need to know the entire set of predicted nonterminals
- # to do this without accidentally duplicating states.
- #
- core = predicted.keys()
- core.sort()
- tcore = tuple(core)
- if self.cores.has_key(tcore):
- self.edges[(k, None)] = self.cores[tcore]
- return k
-
- nk = self.cores[tcore] = self.edges[(k, None)] = NK.stateno
- self.edges.update(edges)
- self.states[nk] = NK
- return k
-
- def goto(self, state, sym):
- key = (state, sym)
- if not self.edges.has_key(key):
- #
- # No transitions from state on sym.
- #
- return None
-
- rv = self.edges[key]
- if rv is None:
- #
- # Target state isn't generated yet. Remedy this.
- #
- rv = self.makeState(state, sym)
- self.edges[key] = rv
- return rv
-
- def gotoT(self, state, t):
- return [self.goto(state, t)]
-
- def gotoST(self, state, st):
- rv = []
- for t in self.states[state].T:
- if st == t:
- rv.append(self.goto(state, t))
- return rv
-
- def add(self, set, item, i=None, predecessor=None, causal=None):
- if predecessor is None:
- if item not in set:
- set.append(item)
- else:
- key = (item, i)
- if item not in set:
- self.links[key] = []
- set.append(item)
- self.links[key].append((predecessor, causal))
-
- def makeSet(self, token, sets, i):
- cur, next = sets[i], sets[i+1]
-
- ttype = token is not None and self.typestring(token) or None
- if ttype is not None:
- fn, arg = self.gotoT, ttype
- else:
- fn, arg = self.gotoST, token
-
- for item in cur:
- ptr = (item, i)
- state, parent = item
- add = fn(state, arg)
- for k in add:
- if k is not None:
- self.add(next, (k, parent), i+1, ptr)
- nk = self.goto(k, None)
- if nk is not None:
- self.add(next, (nk, i+1))
-
- if parent == i:
- continue
-
- for rule in self.states[state].complete:
- lhs, rhs = rule
- for pitem in sets[parent]:
- pstate, pparent = pitem
- k = self.goto(pstate, lhs)
- if k is not None:
- why = (item, i, rule)
- pptr = (pitem, parent)
- self.add(cur, (k, pparent),
- i, pptr, why)
- nk = self.goto(k, None)
- if nk is not None:
- self.add(cur, (nk, i))
-
- def makeSet_fast(self, token, sets, i):
- #
- # Call *only* when the entire state machine has been built!
- # It relies on self.edges being filled in completely, and
- # then duplicates and inlines code to boost speed at the
- # cost of extreme ugliness.
- #
- cur, next = sets[i], sets[i+1]
- ttype = token is not None and self.typestring(token) or None
-
- for item in cur:
- ptr = (item, i)
- state, parent = item
- if ttype is not None:
- k = self.edges.get((state, ttype), None)
- if k is not None:
- #self.add(next, (k, parent), i+1, ptr)
- #INLINED --v
- new = (k, parent)
- key = (new, i+1)
- if new not in next:
- self.links[key] = []
- next.append(new)
- self.links[key].append((ptr, None))
- #INLINED --^
- #nk = self.goto(k, None)
- nk = self.edges.get((k, None), None)
- if nk is not None:
- #self.add(next, (nk, i+1))
- #INLINED --v
- new = (nk, i+1)
- if new not in next:
- next.append(new)
- #INLINED --^
- else:
- add = self.gotoST(state, token)
- for k in add:
- if k is not None:
- self.add(next, (k, parent), i+1, ptr)
- #nk = self.goto(k, None)
- nk = self.edges.get((k, None), None)
- if nk is not None:
- self.add(next, (nk, i+1))
-
- if parent == i:
- continue
-
- for rule in self.states[state].complete:
- lhs, rhs = rule
- for pitem in sets[parent]:
- pstate, pparent = pitem
- #k = self.goto(pstate, lhs)
- k = self.edges.get((pstate, lhs), None)
- if k is not None:
- why = (item, i, rule)
- pptr = (pitem, parent)
- #self.add(cur, (k, pparent),
- # i, pptr, why)
- #INLINED --v
- new = (k, pparent)
- key = (new, i)
- if new not in cur:
- self.links[key] = []
- cur.append(new)
- self.links[key].append((pptr, why))
- #INLINED --^
- #nk = self.goto(k, None)
- nk = self.edges.get((k, None), None)
- if nk is not None:
- #self.add(cur, (nk, i))
- #INLINED --v
- new = (nk, i)
- if new not in cur:
- cur.append(new)
- #INLINED --^
-
- def predecessor(self, key, causal):
- for p, c in self.links[key]:
- if c == causal:
- return p
- assert 0
-
- def causal(self, key):
- links = self.links[key]
- if len(links) == 1:
- return links[0][1]
- choices = []
- rule2cause = {}
- for p, c in links:
- rule = c[2]
- choices.append(rule)
- rule2cause[rule] = c
- return rule2cause[self.ambiguity(choices)]
-
- def deriveEpsilon(self, nt):
- if len(self.newrules[nt]) > 1:
- rule = self.ambiguity(self.newrules[nt])
- else:
- rule = self.newrules[nt][0]
- #print rule
-
- rhs = rule[1]
- attr = [None] * len(rhs)
-
- for i in range(len(rhs)-1, -1, -1):
- attr[i] = self.deriveEpsilon(rhs[i])
- return self.rule2func[self.new2old[rule]](attr)
-
- def buildTree(self, nt, item, tokens, k):
- state, parent = item
-
- choices = []
- for rule in self.states[state].complete:
- if rule[0] == nt:
- choices.append(rule)
- rule = choices[0]
- if len(choices) > 1:
- rule = self.ambiguity(choices)
- #print rule
-
- rhs = rule[1]
- attr = [None] * len(rhs)
-
- for i in range(len(rhs)-1, -1, -1):
- sym = rhs[i]
- if not self.newrules.has_key(sym):
- if sym != self._BOF:
- attr[i] = tokens[k-1]
- key = (item, k)
- item, k = self.predecessor(key, None)
- #elif self.isnullable(sym):
- elif self._NULLABLE == sym[0:len(self._NULLABLE)]:
- attr[i] = self.deriveEpsilon(sym)
- else:
- key = (item, k)
- why = self.causal(key)
- attr[i] = self.buildTree(sym, why[0],
- tokens, why[1])
- item, k = self.predecessor(key, why)
- return self.rule2func[self.new2old[rule]](attr)
-
- def ambiguity(self, rules):
- #
- # XXX - problem here and in collectRules() if the same rule
- # appears in >1 method. Also undefined results if rules
- # causing the ambiguity appear in the same method.
- #
- sortlist = []
- name2index = {}
- for i in range(len(rules)):
- lhs, rhs = rule = rules[i]
- name = self.rule2name[self.new2old[rule]]
- sortlist.append((len(rhs), name))
- name2index[name] = i
- sortlist.sort()
- list = map(lambda (a,b): b, sortlist)
- return rules[name2index[self.resolve(list)]]
-
- def resolve(self, list):
- #
- # Resolve ambiguity in favor of the shortest RHS.
- # Since we walk the tree from the top down, this
- # should effectively resolve in favor of a "shift".
- #
- return list[0]
-
-#
-# GenericASTBuilder automagically constructs a concrete/abstract syntax tree
-# for a given input. The extra argument is a class (not an instance!)
-# which supports the "__setslice__" and "__len__" methods.
-#
-# XXX - silently overrides any user code in methods.
-#
-
-class GenericASTBuilder(GenericParser):
- def __init__(self, AST, start):
- GenericParser.__init__(self, start)
- self.AST = AST
-
- def preprocess(self, rule, func):
- rebind = lambda lhs, self=self: \
- lambda args, lhs=lhs, self=self: \
- self.buildASTNode(args, lhs)
- lhs, rhs = rule
- return rule, rebind(lhs)
-
- def buildASTNode(self, args, lhs):
- children = []
- for arg in args:
- if isinstance(arg, self.AST):
- children.append(arg)
- else:
- children.append(self.terminal(arg))
- return self.nonterminal(lhs, children)
-
- def terminal(self, token): return token
-
- def nonterminal(self, type, args):
- rv = self.AST(type)
- rv[:len(args)] = args
- return rv
-
-#
-# GenericASTTraversal is a Visitor pattern according to Design Patterns. For
-# each node it attempts to invoke the method n_<node type>, falling
-# back onto the default() method if the n_* can't be found. The preorder
-# traversal also looks for an exit hook named n_<node type>_exit (no default
-# routine is called if it's not found). To prematurely halt traversal
-# of a subtree, call the prune() method -- this only makes sense for a
-# preorder traversal. Node type is determined via the typestring() method.
-#
-
-class GenericASTTraversalPruningException:
- pass
-
-class GenericASTTraversal:
- def __init__(self, ast):
- self.ast = ast
-
- def typestring(self, node):
- return node.type
-
- def prune(self):
- raise GenericASTTraversalPruningException
-
- def preorder(self, node=None):
- if node is None:
- node = self.ast
-
- try:
- name = 'n_' + self.typestring(node)
- if hasattr(self, name):
- func = getattr(self, name)
- func(node)
- else:
- self.default(node)
- except GenericASTTraversalPruningException:
- return
-
- for kid in node:
- self.preorder(kid)
-
- name = name + '_exit'
- if hasattr(self, name):
- func = getattr(self, name)
- func(node)
-
- def postorder(self, node=None):
- if node is None:
- node = self.ast
-
- for kid in node:
- self.postorder(kid)
-
- name = 'n_' + self.typestring(node)
- if hasattr(self, name):
- func = getattr(self, name)
- func(node)
- else:
- self.default(node)
-
-
- def default(self, node):
- pass
-
-#
-# GenericASTMatcher. AST nodes must have "__getitem__" and "__cmp__"
-# implemented.
-#
-# XXX - makes assumptions about how GenericParser walks the parse tree.
-#
-
-class GenericASTMatcher(GenericParser):
- def __init__(self, start, ast):
- GenericParser.__init__(self, start)
- self.ast = ast
-
- def preprocess(self, rule, func):
- rebind = lambda func, self=self: \
- lambda args, func=func, self=self: \
- self.foundMatch(args, func)
- lhs, rhs = rule
- rhslist = list(rhs)
- rhslist.reverse()
-
- return (lhs, tuple(rhslist)), rebind(func)
-
- def foundMatch(self, args, func):
- func(args[-1])
- return args[-1]
-
- def match_r(self, node):
- self.input.insert(0, node)
- children = 0
-
- for child in node:
- if children == 0:
- self.input.insert(0, '(')
- children = children + 1
- self.match_r(child)
-
- if children > 0:
- self.input.insert(0, ')')
-
- def match(self, ast=None):
- if ast is None:
- ast = self.ast
- self.input = []
-
- self.match_r(ast)
- self.parse(self.input)
-
- def resolve(self, list):
- #
- # Resolve ambiguity in favor of the longest RHS.
- #
- return list[-1]
-
-def _dump(tokens, sets, states):
- for i in range(len(sets)):
- print 'set', i
- for item in sets[i]:
- print '\t', item
- for (lhs, rhs), pos in states[item[0]].items:
- print '\t\t', lhs, '::=',
- print string.join(rhs[:pos]),
- print '.',
- print string.join(rhs[pos:])
- if i < len(tokens):
- print
- print 'token', str(tokens[i])
- print
diff --git a/contrib/tools/python/src/Parser/tokenizer_pgen.c b/contrib/tools/python/src/Parser/tokenizer_pgen.c
deleted file mode 100644
index 9cb8492d6a6..00000000000
--- a/contrib/tools/python/src/Parser/tokenizer_pgen.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define PGEN
-#include "tokenizer.c"