1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
# Module 'parser'
#
# Parse S-expressions output by the Panel Editor
# (which is written in Scheme so it can't help writing S-expressions).
#
# See notes at end of file.
from warnings import warnpy3k
warnpy3k("the panelparser module has been removed in Python 3.0", stacklevel=2)
del warnpy3k
whitespace = ' \t\n'
operators = '()\''
separators = operators + whitespace + ';' + '"'
# Tokenize a string.
# Return a list of tokens (strings).
#
def tokenize_string(s):
tokens = []
while s:
c = s[:1]
if c in whitespace:
s = s[1:]
elif c == ';':
s = ''
elif c == '"':
n = len(s)
i = 1
while i < n:
c = s[i]
i = i+1
if c == '"': break
if c == '\\': i = i+1
tokens.append(s[:i])
s = s[i:]
elif c in operators:
tokens.append(c)
s = s[1:]
else:
n = len(s)
i = 1
while i < n:
if s[i] in separators: break
i = i+1
tokens.append(s[:i])
s = s[i:]
return tokens
# Tokenize a whole file (given as file object, not as file name).
# Return a list of tokens (strings).
#
def tokenize_file(fp):
tokens = []
while 1:
line = fp.readline()
if not line: break
tokens = tokens + tokenize_string(line)
return tokens
# Exception raised by parse_exr.
#
syntax_error = 'syntax error'
# Parse an S-expression.
# Input is a list of tokens as returned by tokenize_*().
# Return a pair (expr, tokens)
# where expr is a list representing the s-expression,
# and tokens contains the remaining tokens.
# May raise syntax_error.
#
def parse_expr(tokens):
if (not tokens) or tokens[0] != '(':
raise syntax_error, 'expected "("'
tokens = tokens[1:]
expr = []
while 1:
if not tokens:
raise syntax_error, 'missing ")"'
if tokens[0] == ')':
return expr, tokens[1:]
elif tokens[0] == '(':
subexpr, tokens = parse_expr(tokens)
expr.append(subexpr)
else:
expr.append(tokens[0])
tokens = tokens[1:]
# Parse a file (given as file object, not as file name).
# Return a list of parsed S-expressions found at the top level.
#
def parse_file(fp):
tokens = tokenize_file(fp)
exprlist = []
while tokens:
expr, tokens = parse_expr(tokens)
exprlist.append(expr)
return exprlist
# EXAMPLE:
#
# The input
# '(hip (hop hur-ray))'
#
# passed to tokenize_string() returns the token list
# ['(', 'hip', '(', 'hop', 'hur-ray', ')', ')']
#
# When this is passed to parse_expr() it returns the expression
# ['hip', ['hop', 'hur-ray']]
# plus an empty token list (because there are no tokens left.
#
# When a file containing the example is passed to parse_file() it returns
# a list whose only element is the output of parse_expr() above:
# [['hip', ['hop', 'hur-ray']]]
# TOKENIZING:
#
# Comments start with semicolon (;) and continue till the end of the line.
#
# Tokens are separated by whitespace, except the following characters
# always form a separate token (outside strings):
# ( ) '
# Strings are enclosed in double quotes (") and backslash (\) is used
# as escape character in strings.
|