aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/Pygments/py3/pygments/lexers/lisp.py
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2024-05-20 07:58:40 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2024-05-20 08:05:00 +0300
commitbcd5bcc390793791d293d386b2ebefbe683fb4e1 (patch)
treec93e3b8c847237e7e7626f4a07f1b657bb34f04d /contrib/python/Pygments/py3/pygments/lexers/lisp.py
parent1a9f1508fe9c8c5927ffebf33197a6108e70501d (diff)
downloadydb-bcd5bcc390793791d293d386b2ebefbe683fb4e1.tar.gz
Intermediate changes
Diffstat (limited to 'contrib/python/Pygments/py3/pygments/lexers/lisp.py')
-rw-r--r--contrib/python/Pygments/py3/pygments/lexers/lisp.py428
1 files changed, 363 insertions, 65 deletions
diff --git a/contrib/python/Pygments/py3/pygments/lexers/lisp.py b/contrib/python/Pygments/py3/pygments/lexers/lisp.py
index 966b6063ab..e6cc5875fa 100644
--- a/contrib/python/Pygments/py3/pygments/lexers/lisp.py
+++ b/contrib/python/Pygments/py3/pygments/lexers/lisp.py
@@ -4,7 +4,7 @@
Lexers for Lispy languages.
- :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
+ :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
@@ -20,7 +20,7 @@ from pygments.lexers._scheme_builtins import scheme_keywords, scheme_builtins
__all__ = ['SchemeLexer', 'CommonLispLexer', 'HyLexer', 'RacketLexer',
'NewLispLexer', 'EmacsLispLexer', 'ShenLexer', 'CPSALexer',
- 'XtlangLexer', 'FennelLexer']
+ 'XtlangLexer', 'FennelLexer', 'JanetLexer']
class SchemeLexer(RegexLexer):
@@ -31,14 +31,13 @@ class SchemeLexer(RegexLexer):
at http://paste.lisp.org/ to cover as much syntax as possible.
It supports the full Scheme syntax as defined in R5RS.
-
- .. versionadded:: 0.6
"""
name = 'Scheme'
url = 'http://www.scheme-reports.org/'
aliases = ['scheme', 'scm']
filenames = ['*.scm', '*.ss']
mimetypes = ['text/x-scheme', 'application/x-scheme']
+ version_added = '0.6'
flags = re.DOTALL | re.MULTILINE
@@ -296,14 +295,13 @@ class SchemeLexer(RegexLexer):
class CommonLispLexer(RegexLexer):
"""
A Common Lisp lexer.
-
- .. versionadded:: 0.9
"""
name = 'Common Lisp'
url = 'https://lisp-lang.org/'
aliases = ['common-lisp', 'cl', 'lisp']
filenames = ['*.cl', '*.lisp']
mimetypes = ['text/x-common-lisp']
+ version_added = '0.9'
flags = re.IGNORECASE | re.MULTILINE
@@ -316,7 +314,7 @@ class CommonLispLexer(RegexLexer):
# symbol token, reverse-engineered from hyperspec
# Take a deep breath...
- symbol = r'(\|[^|]+\||(?:%s)(?:%s)*)' % (nonmacro, constituent)
+ symbol = rf'(\|[^|]+\||(?:{nonmacro})(?:{constituent})*)'
def __init__(self, **options):
from pygments.lexers._cl_builtins import BUILTIN_FUNCTIONS, \
@@ -485,14 +483,13 @@ class CommonLispLexer(RegexLexer):
class HyLexer(RegexLexer):
"""
Lexer for Hy source code.
-
- .. versionadded:: 2.0
"""
name = 'Hy'
url = 'http://hylang.org/'
- aliases = ['hylang']
+ aliases = ['hylang', 'hy']
filenames = ['*.hy']
mimetypes = ['text/x-hy', 'application/x-hy']
+ version_added = '2.0'
special_forms = (
'cond', 'for', '->', '->>', 'car',
@@ -522,7 +519,7 @@ class HyLexer(RegexLexer):
# valid names for identifiers
# well, names can only not consist fully of numbers
# but this should be good enough for now
- valid_name = r'(?!#)[\w!$%*+<=>?/.#:-]+'
+ valid_name = r"[^ \t\n\r\f\v()[\]{};\"'`~]+"
def _multi_escape(entries):
return words(entries, suffix=' ')
@@ -534,8 +531,7 @@ class HyLexer(RegexLexer):
(r';.*$', Comment.Single),
# whitespaces - usually not relevant
- (r',+', Text),
- (r'\s+', Whitespace),
+ (r'[ \t\n\r\f\v]+', Whitespace),
# numbers
(r'-?\d+\.\d+', Number.Float),
@@ -601,8 +597,6 @@ class RacketLexer(RegexLexer):
"""
Lexer for Racket source code (formerly
known as PLT Scheme).
-
- .. versionadded:: 1.6
"""
name = 'Racket'
@@ -610,6 +604,7 @@ class RacketLexer(RegexLexer):
aliases = ['racket', 'rkt']
filenames = ['*.rkt', '*.rktd', '*.rktl']
mimetypes = ['text/x-racket', 'application/x-racket']
+ version_added = '1.6'
# Generated by example.rkt
_keywords = (
@@ -1391,19 +1386,17 @@ class RacketLexer(RegexLexer):
_opening_parenthesis = r'[([{]'
_closing_parenthesis = r'[)\]}]'
_delimiters = r'()[\]{}",\'`;\s'
- _symbol = r'(?:\|[^|]*\||\\[\w\W]|[^|\\%s]+)+' % _delimiters
+ _symbol = rf'(?:\|[^|]*\||\\[\w\W]|[^|\\{_delimiters}]+)+'
_exact_decimal_prefix = r'(?:#e)?(?:#d)?(?:#e)?'
_exponent = r'(?:[defls][-+]?\d+)'
_inexact_simple_no_hashes = r'(?:\d+(?:/\d+|\.\d*)?|\.\d+)'
- _inexact_simple = (r'(?:%s|(?:\d+#+(?:\.#*|/\d+#*)?|\.\d+#+|'
- r'\d+(?:\.\d*#+|/\d+#+)))' % _inexact_simple_no_hashes)
- _inexact_normal_no_hashes = r'(?:%s%s?)' % (_inexact_simple_no_hashes,
- _exponent)
- _inexact_normal = r'(?:%s%s?)' % (_inexact_simple, _exponent)
+ _inexact_simple = (rf'(?:{_inexact_simple_no_hashes}|(?:\d+#+(?:\.#*|/\d+#*)?|\.\d+#+|'
+ r'\d+(?:\.\d*#+|/\d+#+)))')
+ _inexact_normal_no_hashes = rf'(?:{_inexact_simple_no_hashes}{_exponent}?)'
+ _inexact_normal = rf'(?:{_inexact_simple}{_exponent}?)'
_inexact_special = r'(?:(?:inf|nan)\.[0f])'
- _inexact_real = r'(?:[-+]?%s|[-+]%s)' % (_inexact_normal,
- _inexact_special)
- _inexact_unsigned = r'(?:%s|%s)' % (_inexact_normal, _inexact_special)
+ _inexact_real = rf'(?:[-+]?{_inexact_normal}|[-+]{_inexact_special})'
+ _inexact_unsigned = rf'(?:{_inexact_normal}|{_inexact_special})'
tokens = {
'root': [
@@ -1423,36 +1416,29 @@ class RacketLexer(RegexLexer):
# onto Pygments token types; some judgment calls here.
# #d or no prefix
- (r'(?i)%s[-+]?\d+(?=[%s])' % (_exact_decimal_prefix, _delimiters),
+ (rf'(?i){_exact_decimal_prefix}[-+]?\d+(?=[{_delimiters}])',
Number.Integer, '#pop'),
- (r'(?i)%s[-+]?(\d+(\.\d*)?|\.\d+)([deflst][-+]?\d+)?(?=[%s])' %
- (_exact_decimal_prefix, _delimiters), Number.Float, '#pop'),
- (r'(?i)%s[-+]?(%s([-+]%s?i)?|[-+]%s?i)(?=[%s])' %
- (_exact_decimal_prefix, _inexact_normal_no_hashes,
- _inexact_normal_no_hashes, _inexact_normal_no_hashes,
- _delimiters), Number, '#pop'),
+ (rf'(?i){_exact_decimal_prefix}[-+]?(\d+(\.\d*)?|\.\d+)([deflst][-+]?\d+)?(?=[{_delimiters}])', Number.Float, '#pop'),
+ (rf'(?i){_exact_decimal_prefix}[-+]?({_inexact_normal_no_hashes}([-+]{_inexact_normal_no_hashes}?i)?|[-+]{_inexact_normal_no_hashes}?i)(?=[{_delimiters}])', Number, '#pop'),
# Inexact without explicit #i
- (r'(?i)(#d)?(%s([-+]%s?i)?|[-+]%s?i|%s@%s)(?=[%s])' %
- (_inexact_real, _inexact_unsigned, _inexact_unsigned,
- _inexact_real, _inexact_real, _delimiters), Number.Float,
+ (rf'(?i)(#d)?({_inexact_real}([-+]{_inexact_unsigned}?i)?|[-+]{_inexact_unsigned}?i|{_inexact_real}@{_inexact_real})(?=[{_delimiters}])', Number.Float,
'#pop'),
# The remaining extflonums
- (r'(?i)(([-+]?%st[-+]?\d+)|[-+](inf|nan)\.t)(?=[%s])' %
- (_inexact_simple, _delimiters), Number.Float, '#pop'),
+ (rf'(?i)(([-+]?{_inexact_simple}t[-+]?\d+)|[-+](inf|nan)\.t)(?=[{_delimiters}])', Number.Float, '#pop'),
# #b
- (r'(?iu)(#[ei])?#b%s' % _symbol, Number.Bin, '#pop'),
+ (rf'(?iu)(#[ei])?#b{_symbol}', Number.Bin, '#pop'),
# #o
- (r'(?iu)(#[ei])?#o%s' % _symbol, Number.Oct, '#pop'),
+ (rf'(?iu)(#[ei])?#o{_symbol}', Number.Oct, '#pop'),
# #x
- (r'(?iu)(#[ei])?#x%s' % _symbol, Number.Hex, '#pop'),
+ (rf'(?iu)(#[ei])?#x{_symbol}', Number.Hex, '#pop'),
# #i is always inexact, i.e. float
- (r'(?iu)(#d)?#i%s' % _symbol, Number.Float, '#pop'),
+ (rf'(?iu)(#d)?#i{_symbol}', Number.Float, '#pop'),
# Strings and characters
(r'#?"', String.Double, ('#pop', 'string')),
@@ -1465,7 +1451,7 @@ class RacketLexer(RegexLexer):
(r'#(true|false|[tTfF])', Name.Constant, '#pop'),
# Keyword argument names (e.g. #:keyword)
- (r'#:%s' % _symbol, Keyword.Declaration, '#pop'),
+ (rf'#:{_symbol}', Keyword.Declaration, '#pop'),
# Reader extensions
(r'(#lang |#!)(\S+)',
@@ -1473,8 +1459,8 @@ class RacketLexer(RegexLexer):
(r'#reader', Keyword.Namespace, 'quoted-datum'),
# Other syntax
- (r"(?i)\.(?=[%s])|#c[is]|#['`]|#,@?" % _delimiters, Operator),
- (r"'|#[s&]|#hash(eqv?)?|#\d*(?=%s)" % _opening_parenthesis,
+ (rf"(?i)\.(?=[{_delimiters}])|#c[is]|#['`]|#,@?", Operator),
+ (rf"'|#[s&]|#hash(eqv?)?|#\d*(?={_opening_parenthesis})",
Operator, ('#pop', 'quoted-datum'))
],
'datum*': [
@@ -1488,15 +1474,15 @@ class RacketLexer(RegexLexer):
],
'unquoted-datum': [
include('datum'),
- (r'quote(?=[%s])' % _delimiters, Keyword,
+ (rf'quote(?=[{_delimiters}])', Keyword,
('#pop', 'quoted-datum')),
(r'`', Operator, ('#pop', 'quasiquoted-datum')),
- (r'quasiquote(?=[%s])' % _delimiters, Keyword,
+ (rf'quasiquote(?=[{_delimiters}])', Keyword,
('#pop', 'quasiquoted-datum')),
(_opening_parenthesis, Punctuation, ('#pop', 'unquoted-list')),
- (words(_keywords, suffix='(?=[%s])' % _delimiters),
+ (words(_keywords, suffix=f'(?=[{_delimiters}])'),
Keyword, '#pop'),
- (words(_builtins, suffix='(?=[%s])' % _delimiters),
+ (words(_builtins, suffix=f'(?=[{_delimiters}])'),
Name.Builtin, '#pop'),
(_symbol, Name, '#pop'),
include('datum*')
@@ -1508,7 +1494,7 @@ class RacketLexer(RegexLexer):
'quasiquoted-datum': [
include('datum'),
(r',@?', Operator, ('#pop', 'unquoted-datum')),
- (r'unquote(-splicing)?(?=[%s])' % _delimiters, Keyword,
+ (rf'unquote(-splicing)?(?=[{_delimiters}])', Keyword,
('#pop', 'unquoted-datum')),
(_opening_parenthesis, Punctuation, ('#pop', 'quasiquoted-list')),
include('datum*')
@@ -1543,8 +1529,6 @@ class RacketLexer(RegexLexer):
class NewLispLexer(RegexLexer):
"""
For newLISP source code (version 10.3.0).
-
- .. versionadded:: 1.5
"""
name = 'NewLisp'
@@ -1552,6 +1536,7 @@ class NewLispLexer(RegexLexer):
aliases = ['newlisp']
filenames = ['*.lsp', '*.nl', '*.kif']
mimetypes = ['text/x-newlisp', 'application/x-newlisp']
+ version_added = '1.5'
flags = re.IGNORECASE | re.MULTILINE
@@ -1676,13 +1661,13 @@ class EmacsLispLexer(RegexLexer):
"""
An ELisp lexer, parsing a stream and outputting the tokens
needed to highlight elisp code.
-
- .. versionadded:: 2.1
"""
name = 'EmacsLisp'
aliases = ['emacs-lisp', 'elisp', 'emacs']
filenames = ['*.el']
mimetypes = ['text/x-elisp', 'application/x-elisp']
+ url = 'https://www.gnu.org/software/emacs'
+ version_added = '2.1'
flags = re.MULTILINE
@@ -1695,7 +1680,7 @@ class EmacsLispLexer(RegexLexer):
# symbol token, reverse-engineered from hyperspec
# Take a deep breath...
- symbol = r'((?:%s)(?:%s)*)' % (nonmacro, constituent)
+ symbol = rf'((?:{nonmacro})(?:{constituent})*)'
macros = {
'atomic-change-group', 'case', 'block', 'cl-block', 'cl-callf', 'cl-callf2',
@@ -2308,7 +2293,7 @@ class EmacsLispLexer(RegexLexer):
],
'string': [
(r'[^"\\`]+', String),
- (r'`%s\'' % symbol, String.Symbol),
+ (rf'`{symbol}\'', String.Symbol),
(r'`', String),
(r'\\.', String),
(r'\\\n', String),
@@ -2320,14 +2305,13 @@ class EmacsLispLexer(RegexLexer):
class ShenLexer(RegexLexer):
"""
Lexer for Shen source code.
-
- .. versionadded:: 2.1
"""
name = 'Shen'
url = 'http://shenlanguage.org/'
aliases = ['shen']
filenames = ['*.shen']
mimetypes = ['text/x-shen', 'application/x-shen']
+ version_added = '2.1'
DECLARATIONS = (
'datatype', 'define', 'defmacro', 'defprolog', 'defcc',
@@ -2375,9 +2359,9 @@ class ShenLexer(RegexLexer):
MAPPINGS.update((s, Keyword) for s in SPECIAL_FORMS)
valid_symbol_chars = r'[\w!$%*+,<=>?/.\'@&#:-]'
- valid_name = '%s+' % valid_symbol_chars
- symbol_name = r'[a-z!$%%*+,<=>?/.\'@&#_-]%s*' % valid_symbol_chars
- variable = r'[A-Z]%s*' % valid_symbol_chars
+ valid_name = f'{valid_symbol_chars}+'
+ symbol_name = rf'[a-z!$%*+,<=>?/.\'@&#_-]{valid_symbol_chars}*'
+ variable = rf'[A-Z]{valid_symbol_chars}*'
tokens = {
'string': [
@@ -2485,13 +2469,13 @@ class ShenLexer(RegexLexer):
class CPSALexer(RegexLexer):
"""
A CPSA lexer based on the CPSA language as of version 2.2.12
-
- .. versionadded:: 2.1
"""
name = 'CPSA'
aliases = ['cpsa']
filenames = ['*.cpsa']
mimetypes = []
+ url = 'https://web.cs.wpi.edu/~guttman/cs564/cpsauser.html'
+ version_added = '2.1'
# list of known keywords and builtins taken form vim 6.4 scheme.vim
# syntax file.
@@ -2566,14 +2550,13 @@ class XtlangLexer(RegexLexer):
This is a mixture of Scheme and xtlang, really. Keyword lists are
taken from the Extempore Emacs mode
(https://github.com/extemporelang/extempore-emacs-mode)
-
- .. versionadded:: 2.2
"""
name = 'xtlang'
url = 'http://extempore.moso.com.au'
aliases = ['extempore']
filenames = ['*.xtm']
mimetypes = []
+ version_added = '2.2'
common_keywords = (
'lambda', 'define', 'if', 'else', 'cond', 'and',
@@ -2769,13 +2752,12 @@ class FennelLexer(RegexLexer):
Fennel compiles to Lua, so all the Lua builtins are recognized as well
as the special forms that are particular to the Fennel compiler.
-
- .. versionadded:: 2.3
"""
name = 'Fennel'
url = 'https://fennel-lang.org'
aliases = ['fennel', 'fnl']
filenames = ['*.fnl']
+ version_added = '2.3'
# this list is current as of Fennel version 0.10.0.
special_forms = (
@@ -2846,3 +2828,319 @@ class FennelLexer(RegexLexer):
(r'#', Punctuation),
]
}
+
+
+class JanetLexer(RegexLexer):
+ """A lexer for the Janet programming language.
+ """
+ name = 'Janet'
+ url = 'https://janet-lang.org/'
+ aliases = ['janet']
+ filenames = ['*.janet', '*.jdn']
+ mimetypes = ['text/x-janet', 'application/x-janet']
+ version_added = '2.18'
+
+ # XXX: gets too slow
+ #flags = re.MULTILINE | re.VERBOSE
+
+ special_forms = (
+ 'break', 'def', 'do', 'fn', 'if', 'quote', 'quasiquote', 'splice',
+ 'set', 'unquote', 'upscope', 'var', 'while'
+ )
+
+ builtin_macros = (
+ '%=', '*=', '++', '+=', '--', '-=', '->', '->>', '-?>',
+ '-?>>', '/=', 'and', 'as->', 'as-macro', 'as?->',
+ 'assert', 'case', 'catseq', 'chr', 'comment', 'compif',
+ 'comptime', 'compwhen', 'cond', 'coro', 'def-',
+ 'default', 'defdyn', 'defer', 'defmacro', 'defmacro-',
+ 'defn', 'defn-', 'delay', 'doc', 'each', 'eachk',
+ 'eachp', 'edefer', 'ev/do-thread', 'ev/gather',
+ 'ev/spawn', 'ev/spawn-thread', 'ev/with-deadline',
+ 'ffi/defbind', 'fiber-fn', 'for', 'forever', 'forv',
+ 'generate', 'if-let', 'if-not', 'if-with', 'import',
+ 'juxt', 'label', 'let', 'loop', 'match', 'or', 'prompt',
+ 'protect', 'repeat', 'seq', 'short-fn', 'tabseq',
+ 'toggle', 'tracev', 'try', 'unless', 'use', 'var-',
+ 'varfn', 'when', 'when-let', 'when-with', 'with',
+ 'with-dyns', 'with-syms', 'with-vars',
+ # obsolete builtin macros
+ 'eachy'
+ )
+
+ builtin_functions = (
+ '%', '*', '+', '-', '/', '<', '<=', '=', '>', '>=',
+ 'abstract?', 'accumulate', 'accumulate2', 'all',
+ 'all-bindings', 'all-dynamics', 'any?', 'apply',
+ 'array', 'array/clear', 'array/concat', 'array/ensure',
+ 'array/fill', 'array/insert', 'array/new',
+ 'array/new-filled', 'array/peek', 'array/pop',
+ 'array/push', 'array/remove', 'array/slice',
+ 'array/trim', 'array/weak', 'array?', 'asm',
+ 'bad-compile', 'bad-parse', 'band', 'blshift', 'bnot',
+ 'boolean?', 'bor', 'brshift', 'brushift', 'buffer',
+ 'buffer/bit', 'buffer/bit-clear', 'buffer/bit-set',
+ 'buffer/bit-toggle', 'buffer/blit', 'buffer/clear',
+ 'buffer/fill', 'buffer/format', 'buffer/from-bytes',
+ 'buffer/new', 'buffer/new-filled', 'buffer/popn',
+ 'buffer/push', 'buffer/push-at', 'buffer/push-byte',
+ 'buffer/push-string', 'buffer/push-word',
+ 'buffer/slice', 'buffer/trim', 'buffer?', 'bxor',
+ 'bytes?', 'cancel', 'cfunction?', 'cli-main', 'cmp',
+ 'comp', 'compare', 'compare<', 'compare<=', 'compare=',
+ 'compare>', 'compare>=', 'compile', 'complement',
+ 'count', 'curenv', 'debug', 'debug/arg-stack',
+ 'debug/break', 'debug/fbreak', 'debug/lineage',
+ 'debug/stack', 'debug/stacktrace', 'debug/step',
+ 'debug/unbreak', 'debug/unfbreak', 'debugger',
+ 'debugger-on-status', 'dec', 'deep-not=', 'deep=',
+ 'defglobal', 'describe', 'dictionary?', 'disasm',
+ 'distinct', 'div', 'doc*', 'doc-format', 'doc-of',
+ 'dofile', 'drop', 'drop-until', 'drop-while', 'dyn',
+ 'eflush', 'empty?', 'env-lookup', 'eprin', 'eprinf',
+ 'eprint', 'eprintf', 'error', 'errorf',
+ 'ev/acquire-lock', 'ev/acquire-rlock',
+ 'ev/acquire-wlock', 'ev/all-tasks', 'ev/call',
+ 'ev/cancel', 'ev/capacity', 'ev/chan', 'ev/chan-close',
+ 'ev/chunk', 'ev/close', 'ev/count', 'ev/deadline',
+ 'ev/full', 'ev/give', 'ev/give-supervisor', 'ev/go',
+ 'ev/lock', 'ev/read', 'ev/release-lock',
+ 'ev/release-rlock', 'ev/release-wlock', 'ev/rselect',
+ 'ev/rwlock', 'ev/select', 'ev/sleep', 'ev/take',
+ 'ev/thread', 'ev/thread-chan', 'ev/write', 'eval',
+ 'eval-string', 'even?', 'every?', 'extreme', 'false?',
+ 'ffi/align', 'ffi/call', 'ffi/calling-conventions',
+ 'ffi/close', 'ffi/context', 'ffi/free', 'ffi/jitfn',
+ 'ffi/lookup', 'ffi/malloc', 'ffi/native',
+ 'ffi/pointer-buffer', 'ffi/pointer-cfunction',
+ 'ffi/read', 'ffi/signature', 'ffi/size', 'ffi/struct',
+ 'ffi/trampoline', 'ffi/write', 'fiber/can-resume?',
+ 'fiber/current', 'fiber/getenv', 'fiber/last-value',
+ 'fiber/maxstack', 'fiber/new', 'fiber/root',
+ 'fiber/setenv', 'fiber/setmaxstack', 'fiber/status',
+ 'fiber?', 'file/close', 'file/flush', 'file/lines',
+ 'file/open', 'file/read', 'file/seek', 'file/tell',
+ 'file/temp', 'file/write', 'filter', 'find',
+ 'find-index', 'first', 'flatten', 'flatten-into',
+ 'flush', 'flycheck', 'freeze', 'frequencies',
+ 'from-pairs', 'function?', 'gccollect', 'gcinterval',
+ 'gcsetinterval', 'gensym', 'get', 'get-in', 'getline',
+ 'getproto', 'group-by', 'has-key?', 'has-value?',
+ 'hash', 'idempotent?', 'identity', 'import*', 'in',
+ 'inc', 'index-of', 'indexed?', 'int/s64',
+ 'int/to-bytes', 'int/to-number', 'int/u64', 'int?',
+ 'interleave', 'interpose', 'invert', 'juxt*', 'keep',
+ 'keep-syntax', 'keep-syntax!', 'keys', 'keyword',
+ 'keyword/slice', 'keyword?', 'kvs', 'last', 'length',
+ 'lengthable?', 'load-image', 'macex', 'macex1',
+ 'maclintf', 'make-env', 'make-image', 'map', 'mapcat',
+ 'marshal', 'math/abs', 'math/acos', 'math/acosh',
+ 'math/asin', 'math/asinh', 'math/atan', 'math/atan2',
+ 'math/atanh', 'math/cbrt', 'math/ceil', 'math/cos',
+ 'math/cosh', 'math/erf', 'math/erfc', 'math/exp',
+ 'math/exp2', 'math/expm1', 'math/floor', 'math/gamma',
+ 'math/gcd', 'math/hypot', 'math/lcm', 'math/log',
+ 'math/log-gamma', 'math/log10', 'math/log1p',
+ 'math/log2', 'math/next', 'math/pow', 'math/random',
+ 'math/rng', 'math/rng-buffer', 'math/rng-int',
+ 'math/rng-uniform', 'math/round', 'math/seedrandom',
+ 'math/sin', 'math/sinh', 'math/sqrt', 'math/tan',
+ 'math/tanh', 'math/trunc', 'max', 'max-of', 'mean',
+ 'memcmp', 'merge', 'merge-into', 'merge-module', 'min',
+ 'min-of', 'mod', 'module/add-paths',
+ 'module/expand-path', 'module/find', 'module/value',
+ 'nan?', 'nat?', 'native', 'neg?', 'net/accept',
+ 'net/accept-loop', 'net/address', 'net/address-unpack',
+ 'net/chunk', 'net/close', 'net/connect', 'net/flush',
+ 'net/listen', 'net/localname', 'net/peername',
+ 'net/read', 'net/recv-from', 'net/send-to',
+ 'net/server', 'net/setsockopt', 'net/shutdown',
+ 'net/write', 'next', 'nil?', 'not', 'not=', 'number?',
+ 'odd?', 'one?', 'os/arch', 'os/cd', 'os/chmod',
+ 'os/clock', 'os/compiler', 'os/cpu-count',
+ 'os/cryptorand', 'os/cwd', 'os/date', 'os/dir',
+ 'os/environ', 'os/execute', 'os/exit', 'os/getenv',
+ 'os/isatty', 'os/link', 'os/lstat', 'os/mkdir',
+ 'os/mktime', 'os/open', 'os/perm-int', 'os/perm-string',
+ 'os/pipe', 'os/posix-exec', 'os/posix-fork',
+ 'os/proc-close', 'os/proc-kill', 'os/proc-wait',
+ 'os/readlink', 'os/realpath', 'os/rename', 'os/rm',
+ 'os/rmdir', 'os/setenv', 'os/shell', 'os/sigaction',
+ 'os/sleep', 'os/spawn', 'os/stat', 'os/strftime',
+ 'os/symlink', 'os/time', 'os/touch', 'os/umask',
+ 'os/which', 'pairs', 'parse', 'parse-all',
+ 'parser/byte', 'parser/clone', 'parser/consume',
+ 'parser/eof', 'parser/error', 'parser/flush',
+ 'parser/has-more', 'parser/insert', 'parser/new',
+ 'parser/produce', 'parser/state', 'parser/status',
+ 'parser/where', 'partial', 'partition', 'partition-by',
+ 'peg/compile', 'peg/find', 'peg/find-all', 'peg/match',
+ 'peg/replace', 'peg/replace-all', 'pos?', 'postwalk',
+ 'pp', 'prewalk', 'prin', 'prinf', 'print', 'printf',
+ 'product', 'propagate', 'put', 'put-in', 'quit',
+ 'range', 'reduce', 'reduce2', 'repl', 'require',
+ 'resume', 'return', 'reverse', 'reverse!',
+ 'run-context', 'sandbox', 'scan-number', 'setdyn',
+ 'signal', 'slice', 'slurp', 'some', 'sort', 'sort-by',
+ 'sorted', 'sorted-by', 'spit', 'string',
+ 'string/ascii-lower', 'string/ascii-upper',
+ 'string/bytes', 'string/check-set', 'string/find',
+ 'string/find-all', 'string/format', 'string/from-bytes',
+ 'string/has-prefix?', 'string/has-suffix?',
+ 'string/join', 'string/repeat', 'string/replace',
+ 'string/replace-all', 'string/reverse', 'string/slice',
+ 'string/split', 'string/trim', 'string/triml',
+ 'string/trimr', 'string?', 'struct', 'struct/getproto',
+ 'struct/proto-flatten', 'struct/to-table',
+ 'struct/with-proto', 'struct?', 'sum', 'symbol',
+ 'symbol/slice', 'symbol?', 'table', 'table/clear',
+ 'table/clone', 'table/getproto', 'table/new',
+ 'table/proto-flatten', 'table/rawget', 'table/setproto',
+ 'table/to-struct', 'table/weak', 'table/weak-keys',
+ 'table/weak-values', 'table?', 'take', 'take-until',
+ 'take-while', 'thaw', 'trace', 'true?', 'truthy?',
+ 'tuple', 'tuple/brackets', 'tuple/setmap',
+ 'tuple/slice', 'tuple/sourcemap', 'tuple/type',
+ 'tuple?', 'type', 'unmarshal', 'untrace', 'update',
+ 'update-in', 'values', 'varglobal', 'walk',
+ 'warn-compile', 'xprin', 'xprinf', 'xprint', 'xprintf',
+ 'yield', 'zero?', 'zipcoll',
+ # obsolete builtin functions
+ 'tarray/buffer', 'tarray/copy-bytes', 'tarray/length',
+ 'tarray/new', 'tarray/properties', 'tarray/slice',
+ 'tarray/swap-bytes', 'thread/close', 'thread/current',
+ 'thread/exit', 'thread/new', 'thread/receive',
+ 'thread/send'
+ )
+
+ builtin_variables = (
+ 'debugger-env', 'default-peg-grammar', 'janet/build',
+ 'janet/config-bits', 'janet/version', 'load-image-dict',
+ 'make-image-dict', 'math/-inf', 'math/e', 'math/inf',
+ 'math/int-max', 'math/int-min', 'math/int32-max',
+ 'math/int32-min', 'math/nan', 'math/pi', 'module/cache',
+ 'module/loaders', 'module/loading', 'module/paths',
+ 'root-env', 'stderr', 'stdin', 'stdout'
+ )
+
+ constants = (
+ 'false', 'nil', 'true'
+ )
+
+ # XXX: this form not usable to pass to `suffix=`
+ #_token_end = r'''
+ # (?= # followed by one of:
+ # \s # whitespace
+ # | \# # comment
+ # | [)\]] # end delimiters
+ # | $ # end of file
+ # )
+ #'''
+
+ # ...so, express it like this
+ _token_end = r'(?=\s|#|[)\]]|$)'
+
+ _first_char = r'[a-zA-Z!$%&*+\-./<=>?@^_]'
+ _rest_char = rf'([0-9:]|{_first_char})'
+
+ valid_name = rf'{_first_char}({_rest_char})*'
+
+ _radix_unit = r'[0-9a-zA-Z][0-9a-zA-Z_]*'
+
+ # exponent marker, optional sign, one or more alphanumeric
+ _radix_exp = r'&[+-]?[0-9a-zA-Z]+'
+
+ # 2af3__bee_
+ _hex_unit = r'[0-9a-fA-F][0-9a-fA-F_]*'
+
+ # 12_000__
+ _dec_unit = r'[0-9][0-9_]*'
+
+ # E-23
+ # lower or uppercase e, optional sign, one or more digits
+ _dec_exp = r'[eE][+-]?[0-9]+'
+
+ tokens = {
+ 'root': [
+ (r'#.*$', Comment.Single),
+
+ (r'\s+', Whitespace),
+
+ # radix number
+ (rf'''(?x)
+ [+-]? [0-9]{{1,2}} r {_radix_unit} \. ({_radix_unit})?
+ ({_radix_exp})?
+ ''',
+ Number),
+
+ (rf'''(?x)
+ [+-]? [0-9]{{1,2}} r (\.)? {_radix_unit}
+ ({_radix_exp})?
+ ''',
+ Number),
+
+ # hex number
+ (rf'(?x) [+-]? 0x {_hex_unit} \. ({_hex_unit})?',
+ Number.Hex),
+
+ (rf'(?x) [+-]? 0x (\.)? {_hex_unit}',
+ Number.Hex),
+
+ # decimal number
+ (rf'(?x) [+-]? {_dec_unit} \. ({_dec_unit})? ({_dec_exp})?',
+ Number.Float),
+
+ (rf'(?x) [+-]? (\.)? {_dec_unit} ({_dec_exp})?',
+ Number.Float),
+
+ # strings and buffers
+ (r'@?"', String, 'string'),
+
+ # long-strings and long-buffers
+ #
+ # non-empty content enclosed by a pair of n-backticks
+ # with optional leading @
+ (r'@?(`+)(.|\n)+?\1', String),
+
+ # things that hang out on front
+ #
+ # ' ~ , ; |
+ (r"['~,;|]", Operator),
+
+ # collection delimiters
+ #
+ # @( ( )
+ # @[ [ ]
+ # @{ { }
+ (r'@?[(\[{]|[)\]}]', Punctuation),
+
+ # constants
+ (words(constants, suffix=_token_end), Keyword.Constants),
+
+ # keywords
+ (rf'(:({_rest_char})+|:)', Name.Constant),
+
+ # symbols
+ (words(builtin_variables, suffix=_token_end),
+ Name.Variable.Global),
+
+ (words(special_forms, prefix=r'(?<=\()', suffix=_token_end),
+ Keyword.Reserved),
+
+ (words(builtin_macros, prefix=r'(?<=\()', suffix=_token_end),
+ Name.Builtin),
+
+ (words(builtin_functions, prefix=r'(?<=\()', suffix=_token_end),
+ Name.Function),
+
+ # other symbols
+ (valid_name, Name.Variable),
+ ],
+ 'string': [
+ (r'\\(u[0-9a-fA-F]{4}|U[0-9a-fA-F]{6})', String.Escape),
+ (r'\\x[0-9a-fA-F]{2}', String.Escape),
+ (r'\\.', String.Escape),
+ (r'"', String, '#pop'),
+ (r'[^\\"]+', String),
+ ]
+ }