diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/python/Pygments/py3/pygments/lexers/r.py | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/python/Pygments/py3/pygments/lexers/r.py')
-rw-r--r-- | contrib/python/Pygments/py3/pygments/lexers/r.py | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/contrib/python/Pygments/py3/pygments/lexers/r.py b/contrib/python/Pygments/py3/pygments/lexers/r.py new file mode 100644 index 0000000000..44168a7ad5 --- /dev/null +++ b/contrib/python/Pygments/py3/pygments/lexers/r.py @@ -0,0 +1,190 @@ +""" + pygments.lexers.r + ~~~~~~~~~~~~~~~~~ + + Lexers for the R/S languages. + + :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import Lexer, RegexLexer, include, do_insertions +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Generic + +__all__ = ['RConsoleLexer', 'SLexer', 'RdLexer'] + + +line_re = re.compile('.*?\n') + + +class RConsoleLexer(Lexer): + """ + For R console transcripts or R CMD BATCH output files. + """ + + name = 'RConsole' + aliases = ['rconsole', 'rout'] + filenames = ['*.Rout'] + + def get_tokens_unprocessed(self, text): + slexer = SLexer(**self.options) + + current_code_block = '' + insertions = [] + + for match in line_re.finditer(text): + line = match.group() + if line.startswith('>') or line.startswith('+'): + # Colorize the prompt as such, + # then put rest of line into current_code_block + insertions.append((len(current_code_block), + [(0, Generic.Prompt, line[:2])])) + current_code_block += line[2:] + else: + # We have reached a non-prompt line! + # If we have stored prompt lines, need to process them first. + if current_code_block: + # Weave together the prompts and highlight code. + yield from do_insertions( + insertions, slexer.get_tokens_unprocessed(current_code_block)) + # Reset vars for next code block. + current_code_block = '' + insertions = [] + # Now process the actual line itself, this is output from R. + yield match.start(), Generic.Output, line + + # If we happen to end on a code block with nothing after it, need to + # process the last code block. This is neither elegant nor DRY so + # should be changed. + if current_code_block: + yield from do_insertions( + insertions, slexer.get_tokens_unprocessed(current_code_block)) + + +class SLexer(RegexLexer): + """ + For S, S-plus, and R source code. + + .. versionadded:: 0.10 + """ + + name = 'S' + aliases = ['splus', 's', 'r'] + filenames = ['*.S', '*.R', '.Rhistory', '.Rprofile', '.Renviron'] + mimetypes = ['text/S-plus', 'text/S', 'text/x-r-source', 'text/x-r', + 'text/x-R', 'text/x-r-history', 'text/x-r-profile'] + + valid_name = r'`[^`\\]*(?:\\.[^`\\]*)*`|(?:[a-zA-Z]|\.[A-Za-z_.])[\w.]*|\.' + tokens = { + 'comments': [ + (r'#.*$', Comment.Single), + ], + 'valid_name': [ + (valid_name, Name), + ], + 'punctuation': [ + (r'\[{1,2}|\]{1,2}|\(|\)|;|,', Punctuation), + ], + 'keywords': [ + (r'(if|else|for|while|repeat|in|next|break|return|switch|function)' + r'(?![\w.])', + Keyword.Reserved), + ], + 'operators': [ + (r'<<?-|->>?|-|==|<=|>=|<|>|&&?|!=|\|\|?|\?', Operator), + (r'\*|\+|\^|/|!|%[^%]*%|=|~|\$|@|:{1,3}', Operator), + ], + 'builtin_symbols': [ + (r'(NULL|NA(_(integer|real|complex|character)_)?|' + r'letters|LETTERS|Inf|TRUE|FALSE|NaN|pi|\.\.(\.|[0-9]+))' + r'(?![\w.])', + Keyword.Constant), + (r'(T|F)\b', Name.Builtin.Pseudo), + ], + 'numbers': [ + # hex number + (r'0[xX][a-fA-F0-9]+([pP][0-9]+)?[Li]?', Number.Hex), + # decimal number + (r'[+-]?([0-9]+(\.[0-9]+)?|\.[0-9]+|\.)([eE][+-]?[0-9]+)?[Li]?', + Number), + ], + 'statements': [ + include('comments'), + # whitespaces + (r'\s+', Text), + (r'\'', String, 'string_squote'), + (r'\"', String, 'string_dquote'), + include('builtin_symbols'), + include('valid_name'), + include('numbers'), + include('keywords'), + include('punctuation'), + include('operators'), + ], + 'root': [ + # calls: + (r'(%s)\s*(?=\()' % valid_name, Name.Function), + include('statements'), + # blocks: + (r'\{|\}', Punctuation), + # (r'\{', Punctuation, 'block'), + (r'.', Text), + ], + # 'block': [ + # include('statements'), + # ('\{', Punctuation, '#push'), + # ('\}', Punctuation, '#pop') + # ], + 'string_squote': [ + (r'([^\'\\]|\\.)*\'', String, '#pop'), + ], + 'string_dquote': [ + (r'([^"\\]|\\.)*"', String, '#pop'), + ], + } + + def analyse_text(text): + if re.search(r'[a-z0-9_\])\s]<-(?!-)', text): + return 0.11 + + +class RdLexer(RegexLexer): + """ + Pygments Lexer for R documentation (Rd) files + + This is a very minimal implementation, highlighting little more + than the macros. A description of Rd syntax is found in `Writing R + Extensions <http://cran.r-project.org/doc/manuals/R-exts.html>`_ + and `Parsing Rd files <http://developer.r-project.org/parseRd.pdf>`_. + + .. versionadded:: 1.6 + """ + name = 'Rd' + aliases = ['rd'] + filenames = ['*.Rd'] + mimetypes = ['text/x-r-doc'] + + # To account for verbatim / LaTeX-like / and R-like areas + # would require parsing. + tokens = { + 'root': [ + # catch escaped brackets and percent sign + (r'\\[\\{}%]', String.Escape), + # comments + (r'%.*$', Comment), + # special macros with no arguments + (r'\\(?:cr|l?dots|R|tab)\b', Keyword.Constant), + # macros + (r'\\[a-zA-Z]+\b', Keyword), + # special preprocessor macros + (r'^\s*#(?:ifn?def|endif).*\b', Comment.Preproc), + # non-escaped brackets + (r'[{}]', Name.Builtin), + # everything else + (r'[^\\%\n{}]+', Text), + (r'.', Text), + ] + } |