diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-05-20 07:58:40 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2024-05-20 08:05:00 +0300 |
commit | bcd5bcc390793791d293d386b2ebefbe683fb4e1 (patch) | |
tree | c93e3b8c847237e7e7626f4a07f1b657bb34f04d /contrib/python/Pygments/py3/pygments/lexers/lisp.py | |
parent | 1a9f1508fe9c8c5927ffebf33197a6108e70501d (diff) | |
download | ydb-bcd5bcc390793791d293d386b2ebefbe683fb4e1.tar.gz |
Intermediate changes
Diffstat (limited to 'contrib/python/Pygments/py3/pygments/lexers/lisp.py')
-rw-r--r-- | contrib/python/Pygments/py3/pygments/lexers/lisp.py | 428 |
1 files changed, 363 insertions, 65 deletions
diff --git a/contrib/python/Pygments/py3/pygments/lexers/lisp.py b/contrib/python/Pygments/py3/pygments/lexers/lisp.py index 966b6063ab..e6cc5875fa 100644 --- a/contrib/python/Pygments/py3/pygments/lexers/lisp.py +++ b/contrib/python/Pygments/py3/pygments/lexers/lisp.py @@ -4,7 +4,7 @@ Lexers for Lispy languages. - :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. + :copyright: Copyright 2006-2024 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ @@ -20,7 +20,7 @@ from pygments.lexers._scheme_builtins import scheme_keywords, scheme_builtins __all__ = ['SchemeLexer', 'CommonLispLexer', 'HyLexer', 'RacketLexer', 'NewLispLexer', 'EmacsLispLexer', 'ShenLexer', 'CPSALexer', - 'XtlangLexer', 'FennelLexer'] + 'XtlangLexer', 'FennelLexer', 'JanetLexer'] class SchemeLexer(RegexLexer): @@ -31,14 +31,13 @@ class SchemeLexer(RegexLexer): at http://paste.lisp.org/ to cover as much syntax as possible. It supports the full Scheme syntax as defined in R5RS. - - .. versionadded:: 0.6 """ name = 'Scheme' url = 'http://www.scheme-reports.org/' aliases = ['scheme', 'scm'] filenames = ['*.scm', '*.ss'] mimetypes = ['text/x-scheme', 'application/x-scheme'] + version_added = '0.6' flags = re.DOTALL | re.MULTILINE @@ -296,14 +295,13 @@ class SchemeLexer(RegexLexer): class CommonLispLexer(RegexLexer): """ A Common Lisp lexer. - - .. versionadded:: 0.9 """ name = 'Common Lisp' url = 'https://lisp-lang.org/' aliases = ['common-lisp', 'cl', 'lisp'] filenames = ['*.cl', '*.lisp'] mimetypes = ['text/x-common-lisp'] + version_added = '0.9' flags = re.IGNORECASE | re.MULTILINE @@ -316,7 +314,7 @@ class CommonLispLexer(RegexLexer): # symbol token, reverse-engineered from hyperspec # Take a deep breath... - symbol = r'(\|[^|]+\||(?:%s)(?:%s)*)' % (nonmacro, constituent) + symbol = rf'(\|[^|]+\||(?:{nonmacro})(?:{constituent})*)' def __init__(self, **options): from pygments.lexers._cl_builtins import BUILTIN_FUNCTIONS, \ @@ -485,14 +483,13 @@ class CommonLispLexer(RegexLexer): class HyLexer(RegexLexer): """ Lexer for Hy source code. - - .. versionadded:: 2.0 """ name = 'Hy' url = 'http://hylang.org/' - aliases = ['hylang'] + aliases = ['hylang', 'hy'] filenames = ['*.hy'] mimetypes = ['text/x-hy', 'application/x-hy'] + version_added = '2.0' special_forms = ( 'cond', 'for', '->', '->>', 'car', @@ -522,7 +519,7 @@ class HyLexer(RegexLexer): # valid names for identifiers # well, names can only not consist fully of numbers # but this should be good enough for now - valid_name = r'(?!#)[\w!$%*+<=>?/.#:-]+' + valid_name = r"[^ \t\n\r\f\v()[\]{};\"'`~]+" def _multi_escape(entries): return words(entries, suffix=' ') @@ -534,8 +531,7 @@ class HyLexer(RegexLexer): (r';.*$', Comment.Single), # whitespaces - usually not relevant - (r',+', Text), - (r'\s+', Whitespace), + (r'[ \t\n\r\f\v]+', Whitespace), # numbers (r'-?\d+\.\d+', Number.Float), @@ -601,8 +597,6 @@ class RacketLexer(RegexLexer): """ Lexer for Racket source code (formerly known as PLT Scheme). - - .. versionadded:: 1.6 """ name = 'Racket' @@ -610,6 +604,7 @@ class RacketLexer(RegexLexer): aliases = ['racket', 'rkt'] filenames = ['*.rkt', '*.rktd', '*.rktl'] mimetypes = ['text/x-racket', 'application/x-racket'] + version_added = '1.6' # Generated by example.rkt _keywords = ( @@ -1391,19 +1386,17 @@ class RacketLexer(RegexLexer): _opening_parenthesis = r'[([{]' _closing_parenthesis = r'[)\]}]' _delimiters = r'()[\]{}",\'`;\s' - _symbol = r'(?:\|[^|]*\||\\[\w\W]|[^|\\%s]+)+' % _delimiters + _symbol = rf'(?:\|[^|]*\||\\[\w\W]|[^|\\{_delimiters}]+)+' _exact_decimal_prefix = r'(?:#e)?(?:#d)?(?:#e)?' _exponent = r'(?:[defls][-+]?\d+)' _inexact_simple_no_hashes = r'(?:\d+(?:/\d+|\.\d*)?|\.\d+)' - _inexact_simple = (r'(?:%s|(?:\d+#+(?:\.#*|/\d+#*)?|\.\d+#+|' - r'\d+(?:\.\d*#+|/\d+#+)))' % _inexact_simple_no_hashes) - _inexact_normal_no_hashes = r'(?:%s%s?)' % (_inexact_simple_no_hashes, - _exponent) - _inexact_normal = r'(?:%s%s?)' % (_inexact_simple, _exponent) + _inexact_simple = (rf'(?:{_inexact_simple_no_hashes}|(?:\d+#+(?:\.#*|/\d+#*)?|\.\d+#+|' + r'\d+(?:\.\d*#+|/\d+#+)))') + _inexact_normal_no_hashes = rf'(?:{_inexact_simple_no_hashes}{_exponent}?)' + _inexact_normal = rf'(?:{_inexact_simple}{_exponent}?)' _inexact_special = r'(?:(?:inf|nan)\.[0f])' - _inexact_real = r'(?:[-+]?%s|[-+]%s)' % (_inexact_normal, - _inexact_special) - _inexact_unsigned = r'(?:%s|%s)' % (_inexact_normal, _inexact_special) + _inexact_real = rf'(?:[-+]?{_inexact_normal}|[-+]{_inexact_special})' + _inexact_unsigned = rf'(?:{_inexact_normal}|{_inexact_special})' tokens = { 'root': [ @@ -1423,36 +1416,29 @@ class RacketLexer(RegexLexer): # onto Pygments token types; some judgment calls here. # #d or no prefix - (r'(?i)%s[-+]?\d+(?=[%s])' % (_exact_decimal_prefix, _delimiters), + (rf'(?i){_exact_decimal_prefix}[-+]?\d+(?=[{_delimiters}])', Number.Integer, '#pop'), - (r'(?i)%s[-+]?(\d+(\.\d*)?|\.\d+)([deflst][-+]?\d+)?(?=[%s])' % - (_exact_decimal_prefix, _delimiters), Number.Float, '#pop'), - (r'(?i)%s[-+]?(%s([-+]%s?i)?|[-+]%s?i)(?=[%s])' % - (_exact_decimal_prefix, _inexact_normal_no_hashes, - _inexact_normal_no_hashes, _inexact_normal_no_hashes, - _delimiters), Number, '#pop'), + (rf'(?i){_exact_decimal_prefix}[-+]?(\d+(\.\d*)?|\.\d+)([deflst][-+]?\d+)?(?=[{_delimiters}])', Number.Float, '#pop'), + (rf'(?i){_exact_decimal_prefix}[-+]?({_inexact_normal_no_hashes}([-+]{_inexact_normal_no_hashes}?i)?|[-+]{_inexact_normal_no_hashes}?i)(?=[{_delimiters}])', Number, '#pop'), # Inexact without explicit #i - (r'(?i)(#d)?(%s([-+]%s?i)?|[-+]%s?i|%s@%s)(?=[%s])' % - (_inexact_real, _inexact_unsigned, _inexact_unsigned, - _inexact_real, _inexact_real, _delimiters), Number.Float, + (rf'(?i)(#d)?({_inexact_real}([-+]{_inexact_unsigned}?i)?|[-+]{_inexact_unsigned}?i|{_inexact_real}@{_inexact_real})(?=[{_delimiters}])', Number.Float, '#pop'), # The remaining extflonums - (r'(?i)(([-+]?%st[-+]?\d+)|[-+](inf|nan)\.t)(?=[%s])' % - (_inexact_simple, _delimiters), Number.Float, '#pop'), + (rf'(?i)(([-+]?{_inexact_simple}t[-+]?\d+)|[-+](inf|nan)\.t)(?=[{_delimiters}])', Number.Float, '#pop'), # #b - (r'(?iu)(#[ei])?#b%s' % _symbol, Number.Bin, '#pop'), + (rf'(?iu)(#[ei])?#b{_symbol}', Number.Bin, '#pop'), # #o - (r'(?iu)(#[ei])?#o%s' % _symbol, Number.Oct, '#pop'), + (rf'(?iu)(#[ei])?#o{_symbol}', Number.Oct, '#pop'), # #x - (r'(?iu)(#[ei])?#x%s' % _symbol, Number.Hex, '#pop'), + (rf'(?iu)(#[ei])?#x{_symbol}', Number.Hex, '#pop'), # #i is always inexact, i.e. float - (r'(?iu)(#d)?#i%s' % _symbol, Number.Float, '#pop'), + (rf'(?iu)(#d)?#i{_symbol}', Number.Float, '#pop'), # Strings and characters (r'#?"', String.Double, ('#pop', 'string')), @@ -1465,7 +1451,7 @@ class RacketLexer(RegexLexer): (r'#(true|false|[tTfF])', Name.Constant, '#pop'), # Keyword argument names (e.g. #:keyword) - (r'#:%s' % _symbol, Keyword.Declaration, '#pop'), + (rf'#:{_symbol}', Keyword.Declaration, '#pop'), # Reader extensions (r'(#lang |#!)(\S+)', @@ -1473,8 +1459,8 @@ class RacketLexer(RegexLexer): (r'#reader', Keyword.Namespace, 'quoted-datum'), # Other syntax - (r"(?i)\.(?=[%s])|#c[is]|#['`]|#,@?" % _delimiters, Operator), - (r"'|#[s&]|#hash(eqv?)?|#\d*(?=%s)" % _opening_parenthesis, + (rf"(?i)\.(?=[{_delimiters}])|#c[is]|#['`]|#,@?", Operator), + (rf"'|#[s&]|#hash(eqv?)?|#\d*(?={_opening_parenthesis})", Operator, ('#pop', 'quoted-datum')) ], 'datum*': [ @@ -1488,15 +1474,15 @@ class RacketLexer(RegexLexer): ], 'unquoted-datum': [ include('datum'), - (r'quote(?=[%s])' % _delimiters, Keyword, + (rf'quote(?=[{_delimiters}])', Keyword, ('#pop', 'quoted-datum')), (r'`', Operator, ('#pop', 'quasiquoted-datum')), - (r'quasiquote(?=[%s])' % _delimiters, Keyword, + (rf'quasiquote(?=[{_delimiters}])', Keyword, ('#pop', 'quasiquoted-datum')), (_opening_parenthesis, Punctuation, ('#pop', 'unquoted-list')), - (words(_keywords, suffix='(?=[%s])' % _delimiters), + (words(_keywords, suffix=f'(?=[{_delimiters}])'), Keyword, '#pop'), - (words(_builtins, suffix='(?=[%s])' % _delimiters), + (words(_builtins, suffix=f'(?=[{_delimiters}])'), Name.Builtin, '#pop'), (_symbol, Name, '#pop'), include('datum*') @@ -1508,7 +1494,7 @@ class RacketLexer(RegexLexer): 'quasiquoted-datum': [ include('datum'), (r',@?', Operator, ('#pop', 'unquoted-datum')), - (r'unquote(-splicing)?(?=[%s])' % _delimiters, Keyword, + (rf'unquote(-splicing)?(?=[{_delimiters}])', Keyword, ('#pop', 'unquoted-datum')), (_opening_parenthesis, Punctuation, ('#pop', 'quasiquoted-list')), include('datum*') @@ -1543,8 +1529,6 @@ class RacketLexer(RegexLexer): class NewLispLexer(RegexLexer): """ For newLISP source code (version 10.3.0). - - .. versionadded:: 1.5 """ name = 'NewLisp' @@ -1552,6 +1536,7 @@ class NewLispLexer(RegexLexer): aliases = ['newlisp'] filenames = ['*.lsp', '*.nl', '*.kif'] mimetypes = ['text/x-newlisp', 'application/x-newlisp'] + version_added = '1.5' flags = re.IGNORECASE | re.MULTILINE @@ -1676,13 +1661,13 @@ class EmacsLispLexer(RegexLexer): """ An ELisp lexer, parsing a stream and outputting the tokens needed to highlight elisp code. - - .. versionadded:: 2.1 """ name = 'EmacsLisp' aliases = ['emacs-lisp', 'elisp', 'emacs'] filenames = ['*.el'] mimetypes = ['text/x-elisp', 'application/x-elisp'] + url = 'https://www.gnu.org/software/emacs' + version_added = '2.1' flags = re.MULTILINE @@ -1695,7 +1680,7 @@ class EmacsLispLexer(RegexLexer): # symbol token, reverse-engineered from hyperspec # Take a deep breath... - symbol = r'((?:%s)(?:%s)*)' % (nonmacro, constituent) + symbol = rf'((?:{nonmacro})(?:{constituent})*)' macros = { 'atomic-change-group', 'case', 'block', 'cl-block', 'cl-callf', 'cl-callf2', @@ -2308,7 +2293,7 @@ class EmacsLispLexer(RegexLexer): ], 'string': [ (r'[^"\\`]+', String), - (r'`%s\'' % symbol, String.Symbol), + (rf'`{symbol}\'', String.Symbol), (r'`', String), (r'\\.', String), (r'\\\n', String), @@ -2320,14 +2305,13 @@ class EmacsLispLexer(RegexLexer): class ShenLexer(RegexLexer): """ Lexer for Shen source code. - - .. versionadded:: 2.1 """ name = 'Shen' url = 'http://shenlanguage.org/' aliases = ['shen'] filenames = ['*.shen'] mimetypes = ['text/x-shen', 'application/x-shen'] + version_added = '2.1' DECLARATIONS = ( 'datatype', 'define', 'defmacro', 'defprolog', 'defcc', @@ -2375,9 +2359,9 @@ class ShenLexer(RegexLexer): MAPPINGS.update((s, Keyword) for s in SPECIAL_FORMS) valid_symbol_chars = r'[\w!$%*+,<=>?/.\'@&#:-]' - valid_name = '%s+' % valid_symbol_chars - symbol_name = r'[a-z!$%%*+,<=>?/.\'@&#_-]%s*' % valid_symbol_chars - variable = r'[A-Z]%s*' % valid_symbol_chars + valid_name = f'{valid_symbol_chars}+' + symbol_name = rf'[a-z!$%*+,<=>?/.\'@&#_-]{valid_symbol_chars}*' + variable = rf'[A-Z]{valid_symbol_chars}*' tokens = { 'string': [ @@ -2485,13 +2469,13 @@ class ShenLexer(RegexLexer): class CPSALexer(RegexLexer): """ A CPSA lexer based on the CPSA language as of version 2.2.12 - - .. versionadded:: 2.1 """ name = 'CPSA' aliases = ['cpsa'] filenames = ['*.cpsa'] mimetypes = [] + url = 'https://web.cs.wpi.edu/~guttman/cs564/cpsauser.html' + version_added = '2.1' # list of known keywords and builtins taken form vim 6.4 scheme.vim # syntax file. @@ -2566,14 +2550,13 @@ class XtlangLexer(RegexLexer): This is a mixture of Scheme and xtlang, really. Keyword lists are taken from the Extempore Emacs mode (https://github.com/extemporelang/extempore-emacs-mode) - - .. versionadded:: 2.2 """ name = 'xtlang' url = 'http://extempore.moso.com.au' aliases = ['extempore'] filenames = ['*.xtm'] mimetypes = [] + version_added = '2.2' common_keywords = ( 'lambda', 'define', 'if', 'else', 'cond', 'and', @@ -2769,13 +2752,12 @@ class FennelLexer(RegexLexer): Fennel compiles to Lua, so all the Lua builtins are recognized as well as the special forms that are particular to the Fennel compiler. - - .. versionadded:: 2.3 """ name = 'Fennel' url = 'https://fennel-lang.org' aliases = ['fennel', 'fnl'] filenames = ['*.fnl'] + version_added = '2.3' # this list is current as of Fennel version 0.10.0. special_forms = ( @@ -2846,3 +2828,319 @@ class FennelLexer(RegexLexer): (r'#', Punctuation), ] } + + +class JanetLexer(RegexLexer): + """A lexer for the Janet programming language. + """ + name = 'Janet' + url = 'https://janet-lang.org/' + aliases = ['janet'] + filenames = ['*.janet', '*.jdn'] + mimetypes = ['text/x-janet', 'application/x-janet'] + version_added = '2.18' + + # XXX: gets too slow + #flags = re.MULTILINE | re.VERBOSE + + special_forms = ( + 'break', 'def', 'do', 'fn', 'if', 'quote', 'quasiquote', 'splice', + 'set', 'unquote', 'upscope', 'var', 'while' + ) + + builtin_macros = ( + '%=', '*=', '++', '+=', '--', '-=', '->', '->>', '-?>', + '-?>>', '/=', 'and', 'as->', 'as-macro', 'as?->', + 'assert', 'case', 'catseq', 'chr', 'comment', 'compif', + 'comptime', 'compwhen', 'cond', 'coro', 'def-', + 'default', 'defdyn', 'defer', 'defmacro', 'defmacro-', + 'defn', 'defn-', 'delay', 'doc', 'each', 'eachk', + 'eachp', 'edefer', 'ev/do-thread', 'ev/gather', + 'ev/spawn', 'ev/spawn-thread', 'ev/with-deadline', + 'ffi/defbind', 'fiber-fn', 'for', 'forever', 'forv', + 'generate', 'if-let', 'if-not', 'if-with', 'import', + 'juxt', 'label', 'let', 'loop', 'match', 'or', 'prompt', + 'protect', 'repeat', 'seq', 'short-fn', 'tabseq', + 'toggle', 'tracev', 'try', 'unless', 'use', 'var-', + 'varfn', 'when', 'when-let', 'when-with', 'with', + 'with-dyns', 'with-syms', 'with-vars', + # obsolete builtin macros + 'eachy' + ) + + builtin_functions = ( + '%', '*', '+', '-', '/', '<', '<=', '=', '>', '>=', + 'abstract?', 'accumulate', 'accumulate2', 'all', + 'all-bindings', 'all-dynamics', 'any?', 'apply', + 'array', 'array/clear', 'array/concat', 'array/ensure', + 'array/fill', 'array/insert', 'array/new', + 'array/new-filled', 'array/peek', 'array/pop', + 'array/push', 'array/remove', 'array/slice', + 'array/trim', 'array/weak', 'array?', 'asm', + 'bad-compile', 'bad-parse', 'band', 'blshift', 'bnot', + 'boolean?', 'bor', 'brshift', 'brushift', 'buffer', + 'buffer/bit', 'buffer/bit-clear', 'buffer/bit-set', + 'buffer/bit-toggle', 'buffer/blit', 'buffer/clear', + 'buffer/fill', 'buffer/format', 'buffer/from-bytes', + 'buffer/new', 'buffer/new-filled', 'buffer/popn', + 'buffer/push', 'buffer/push-at', 'buffer/push-byte', + 'buffer/push-string', 'buffer/push-word', + 'buffer/slice', 'buffer/trim', 'buffer?', 'bxor', + 'bytes?', 'cancel', 'cfunction?', 'cli-main', 'cmp', + 'comp', 'compare', 'compare<', 'compare<=', 'compare=', + 'compare>', 'compare>=', 'compile', 'complement', + 'count', 'curenv', 'debug', 'debug/arg-stack', + 'debug/break', 'debug/fbreak', 'debug/lineage', + 'debug/stack', 'debug/stacktrace', 'debug/step', + 'debug/unbreak', 'debug/unfbreak', 'debugger', + 'debugger-on-status', 'dec', 'deep-not=', 'deep=', + 'defglobal', 'describe', 'dictionary?', 'disasm', + 'distinct', 'div', 'doc*', 'doc-format', 'doc-of', + 'dofile', 'drop', 'drop-until', 'drop-while', 'dyn', + 'eflush', 'empty?', 'env-lookup', 'eprin', 'eprinf', + 'eprint', 'eprintf', 'error', 'errorf', + 'ev/acquire-lock', 'ev/acquire-rlock', + 'ev/acquire-wlock', 'ev/all-tasks', 'ev/call', + 'ev/cancel', 'ev/capacity', 'ev/chan', 'ev/chan-close', + 'ev/chunk', 'ev/close', 'ev/count', 'ev/deadline', + 'ev/full', 'ev/give', 'ev/give-supervisor', 'ev/go', + 'ev/lock', 'ev/read', 'ev/release-lock', + 'ev/release-rlock', 'ev/release-wlock', 'ev/rselect', + 'ev/rwlock', 'ev/select', 'ev/sleep', 'ev/take', + 'ev/thread', 'ev/thread-chan', 'ev/write', 'eval', + 'eval-string', 'even?', 'every?', 'extreme', 'false?', + 'ffi/align', 'ffi/call', 'ffi/calling-conventions', + 'ffi/close', 'ffi/context', 'ffi/free', 'ffi/jitfn', + 'ffi/lookup', 'ffi/malloc', 'ffi/native', + 'ffi/pointer-buffer', 'ffi/pointer-cfunction', + 'ffi/read', 'ffi/signature', 'ffi/size', 'ffi/struct', + 'ffi/trampoline', 'ffi/write', 'fiber/can-resume?', + 'fiber/current', 'fiber/getenv', 'fiber/last-value', + 'fiber/maxstack', 'fiber/new', 'fiber/root', + 'fiber/setenv', 'fiber/setmaxstack', 'fiber/status', + 'fiber?', 'file/close', 'file/flush', 'file/lines', + 'file/open', 'file/read', 'file/seek', 'file/tell', + 'file/temp', 'file/write', 'filter', 'find', + 'find-index', 'first', 'flatten', 'flatten-into', + 'flush', 'flycheck', 'freeze', 'frequencies', + 'from-pairs', 'function?', 'gccollect', 'gcinterval', + 'gcsetinterval', 'gensym', 'get', 'get-in', 'getline', + 'getproto', 'group-by', 'has-key?', 'has-value?', + 'hash', 'idempotent?', 'identity', 'import*', 'in', + 'inc', 'index-of', 'indexed?', 'int/s64', + 'int/to-bytes', 'int/to-number', 'int/u64', 'int?', + 'interleave', 'interpose', 'invert', 'juxt*', 'keep', + 'keep-syntax', 'keep-syntax!', 'keys', 'keyword', + 'keyword/slice', 'keyword?', 'kvs', 'last', 'length', + 'lengthable?', 'load-image', 'macex', 'macex1', + 'maclintf', 'make-env', 'make-image', 'map', 'mapcat', + 'marshal', 'math/abs', 'math/acos', 'math/acosh', + 'math/asin', 'math/asinh', 'math/atan', 'math/atan2', + 'math/atanh', 'math/cbrt', 'math/ceil', 'math/cos', + 'math/cosh', 'math/erf', 'math/erfc', 'math/exp', + 'math/exp2', 'math/expm1', 'math/floor', 'math/gamma', + 'math/gcd', 'math/hypot', 'math/lcm', 'math/log', + 'math/log-gamma', 'math/log10', 'math/log1p', + 'math/log2', 'math/next', 'math/pow', 'math/random', + 'math/rng', 'math/rng-buffer', 'math/rng-int', + 'math/rng-uniform', 'math/round', 'math/seedrandom', + 'math/sin', 'math/sinh', 'math/sqrt', 'math/tan', + 'math/tanh', 'math/trunc', 'max', 'max-of', 'mean', + 'memcmp', 'merge', 'merge-into', 'merge-module', 'min', + 'min-of', 'mod', 'module/add-paths', + 'module/expand-path', 'module/find', 'module/value', + 'nan?', 'nat?', 'native', 'neg?', 'net/accept', + 'net/accept-loop', 'net/address', 'net/address-unpack', + 'net/chunk', 'net/close', 'net/connect', 'net/flush', + 'net/listen', 'net/localname', 'net/peername', + 'net/read', 'net/recv-from', 'net/send-to', + 'net/server', 'net/setsockopt', 'net/shutdown', + 'net/write', 'next', 'nil?', 'not', 'not=', 'number?', + 'odd?', 'one?', 'os/arch', 'os/cd', 'os/chmod', + 'os/clock', 'os/compiler', 'os/cpu-count', + 'os/cryptorand', 'os/cwd', 'os/date', 'os/dir', + 'os/environ', 'os/execute', 'os/exit', 'os/getenv', + 'os/isatty', 'os/link', 'os/lstat', 'os/mkdir', + 'os/mktime', 'os/open', 'os/perm-int', 'os/perm-string', + 'os/pipe', 'os/posix-exec', 'os/posix-fork', + 'os/proc-close', 'os/proc-kill', 'os/proc-wait', + 'os/readlink', 'os/realpath', 'os/rename', 'os/rm', + 'os/rmdir', 'os/setenv', 'os/shell', 'os/sigaction', + 'os/sleep', 'os/spawn', 'os/stat', 'os/strftime', + 'os/symlink', 'os/time', 'os/touch', 'os/umask', + 'os/which', 'pairs', 'parse', 'parse-all', + 'parser/byte', 'parser/clone', 'parser/consume', + 'parser/eof', 'parser/error', 'parser/flush', + 'parser/has-more', 'parser/insert', 'parser/new', + 'parser/produce', 'parser/state', 'parser/status', + 'parser/where', 'partial', 'partition', 'partition-by', + 'peg/compile', 'peg/find', 'peg/find-all', 'peg/match', + 'peg/replace', 'peg/replace-all', 'pos?', 'postwalk', + 'pp', 'prewalk', 'prin', 'prinf', 'print', 'printf', + 'product', 'propagate', 'put', 'put-in', 'quit', + 'range', 'reduce', 'reduce2', 'repl', 'require', + 'resume', 'return', 'reverse', 'reverse!', + 'run-context', 'sandbox', 'scan-number', 'setdyn', + 'signal', 'slice', 'slurp', 'some', 'sort', 'sort-by', + 'sorted', 'sorted-by', 'spit', 'string', + 'string/ascii-lower', 'string/ascii-upper', + 'string/bytes', 'string/check-set', 'string/find', + 'string/find-all', 'string/format', 'string/from-bytes', + 'string/has-prefix?', 'string/has-suffix?', + 'string/join', 'string/repeat', 'string/replace', + 'string/replace-all', 'string/reverse', 'string/slice', + 'string/split', 'string/trim', 'string/triml', + 'string/trimr', 'string?', 'struct', 'struct/getproto', + 'struct/proto-flatten', 'struct/to-table', + 'struct/with-proto', 'struct?', 'sum', 'symbol', + 'symbol/slice', 'symbol?', 'table', 'table/clear', + 'table/clone', 'table/getproto', 'table/new', + 'table/proto-flatten', 'table/rawget', 'table/setproto', + 'table/to-struct', 'table/weak', 'table/weak-keys', + 'table/weak-values', 'table?', 'take', 'take-until', + 'take-while', 'thaw', 'trace', 'true?', 'truthy?', + 'tuple', 'tuple/brackets', 'tuple/setmap', + 'tuple/slice', 'tuple/sourcemap', 'tuple/type', + 'tuple?', 'type', 'unmarshal', 'untrace', 'update', + 'update-in', 'values', 'varglobal', 'walk', + 'warn-compile', 'xprin', 'xprinf', 'xprint', 'xprintf', + 'yield', 'zero?', 'zipcoll', + # obsolete builtin functions + 'tarray/buffer', 'tarray/copy-bytes', 'tarray/length', + 'tarray/new', 'tarray/properties', 'tarray/slice', + 'tarray/swap-bytes', 'thread/close', 'thread/current', + 'thread/exit', 'thread/new', 'thread/receive', + 'thread/send' + ) + + builtin_variables = ( + 'debugger-env', 'default-peg-grammar', 'janet/build', + 'janet/config-bits', 'janet/version', 'load-image-dict', + 'make-image-dict', 'math/-inf', 'math/e', 'math/inf', + 'math/int-max', 'math/int-min', 'math/int32-max', + 'math/int32-min', 'math/nan', 'math/pi', 'module/cache', + 'module/loaders', 'module/loading', 'module/paths', + 'root-env', 'stderr', 'stdin', 'stdout' + ) + + constants = ( + 'false', 'nil', 'true' + ) + + # XXX: this form not usable to pass to `suffix=` + #_token_end = r''' + # (?= # followed by one of: + # \s # whitespace + # | \# # comment + # | [)\]] # end delimiters + # | $ # end of file + # ) + #''' + + # ...so, express it like this + _token_end = r'(?=\s|#|[)\]]|$)' + + _first_char = r'[a-zA-Z!$%&*+\-./<=>?@^_]' + _rest_char = rf'([0-9:]|{_first_char})' + + valid_name = rf'{_first_char}({_rest_char})*' + + _radix_unit = r'[0-9a-zA-Z][0-9a-zA-Z_]*' + + # exponent marker, optional sign, one or more alphanumeric + _radix_exp = r'&[+-]?[0-9a-zA-Z]+' + + # 2af3__bee_ + _hex_unit = r'[0-9a-fA-F][0-9a-fA-F_]*' + + # 12_000__ + _dec_unit = r'[0-9][0-9_]*' + + # E-23 + # lower or uppercase e, optional sign, one or more digits + _dec_exp = r'[eE][+-]?[0-9]+' + + tokens = { + 'root': [ + (r'#.*$', Comment.Single), + + (r'\s+', Whitespace), + + # radix number + (rf'''(?x) + [+-]? [0-9]{{1,2}} r {_radix_unit} \. ({_radix_unit})? + ({_radix_exp})? + ''', + Number), + + (rf'''(?x) + [+-]? [0-9]{{1,2}} r (\.)? {_radix_unit} + ({_radix_exp})? + ''', + Number), + + # hex number + (rf'(?x) [+-]? 0x {_hex_unit} \. ({_hex_unit})?', + Number.Hex), + + (rf'(?x) [+-]? 0x (\.)? {_hex_unit}', + Number.Hex), + + # decimal number + (rf'(?x) [+-]? {_dec_unit} \. ({_dec_unit})? ({_dec_exp})?', + Number.Float), + + (rf'(?x) [+-]? (\.)? {_dec_unit} ({_dec_exp})?', + Number.Float), + + # strings and buffers + (r'@?"', String, 'string'), + + # long-strings and long-buffers + # + # non-empty content enclosed by a pair of n-backticks + # with optional leading @ + (r'@?(`+)(.|\n)+?\1', String), + + # things that hang out on front + # + # ' ~ , ; | + (r"['~,;|]", Operator), + + # collection delimiters + # + # @( ( ) + # @[ [ ] + # @{ { } + (r'@?[(\[{]|[)\]}]', Punctuation), + + # constants + (words(constants, suffix=_token_end), Keyword.Constants), + + # keywords + (rf'(:({_rest_char})+|:)', Name.Constant), + + # symbols + (words(builtin_variables, suffix=_token_end), + Name.Variable.Global), + + (words(special_forms, prefix=r'(?<=\()', suffix=_token_end), + Keyword.Reserved), + + (words(builtin_macros, prefix=r'(?<=\()', suffix=_token_end), + Name.Builtin), + + (words(builtin_functions, prefix=r'(?<=\()', suffix=_token_end), + Name.Function), + + # other symbols + (valid_name, Name.Variable), + ], + 'string': [ + (r'\\(u[0-9a-fA-F]{4}|U[0-9a-fA-F]{6})', String.Escape), + (r'\\x[0-9a-fA-F]{2}', String.Escape), + (r'\\.', String.Escape), + (r'"', String, '#pop'), + (r'[^\\"]+', String), + ] + } |