contrib/python/Pygments/py3/pygments/lexers/dalvik.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124

"""
    pygments.lexers.dalvik
    ~~~~~~~~~~~~~~~~~~~~~~

    Pygments lexers for Dalvik VM-related languages.

    :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import re

from pygments.lexer import RegexLexer, include, bygroups
from pygments.token import Keyword, Text, Comment, Name, String, Number, \
    Punctuation, Whitespace

__all__ = ['SmaliLexer']


class SmaliLexer(RegexLexer):
    """
    For `Smali <http://code.google.com/p/smali/>`_ (Android/Dalvik) assembly
    code.

    .. versionadded:: 1.6
    """
    name = 'Smali'
    aliases = ['smali']
    filenames = ['*.smali']
    mimetypes = ['text/smali']

    tokens = {
        'root': [
            include('comment'),
            include('label'),
            include('field'),
            include('method'),
            include('class'),
            include('directive'),
            include('access-modifier'),
            include('instruction'),
            include('literal'),
            include('punctuation'),
            include('type'),
            include('whitespace')
        ],
        'directive': [
            (r'^([ \t]*)(\.(?:class|super|implements|field|subannotation|annotation|'
             r'enum|method|registers|locals|array-data|packed-switch|'
             r'sparse-switch|catchall|catch|line|parameter|local|prologue|'
             r'epilogue|source))', bygroups(Whitespace, Keyword)),
            (r'^([ \t]*)(\.end)( )(field|subannotation|annotation|method|array-data|'
             'packed-switch|sparse-switch|parameter|local)', bygroups(Whitespace, Keyword, Whitespace, Keyword)),
            (r'^([ \t]*)(\.restart)( )(local)', bygroups(Whitespace, Keyword, Whitespace, Keyword)),
        ],
        'access-modifier': [
            (r'(public|private|protected|static|final|synchronized|bridge|'
             r'varargs|native|abstract|strictfp|synthetic|constructor|'
             r'declared-synchronized|interface|enum|annotation|volatile|'
             r'transient)', Keyword),
        ],
        'whitespace': [
            (r'\n', Whitespace),
            (r'\s+', Whitespace),
        ],
        'instruction': [
            (r'\b[vp]\d+\b', Name.Builtin),  # registers
            (r'(\b[a-z][A-Za-z0-9/-]+)(\s+)', bygroups(Text, Whitespace)),  # instructions
        ],
        'literal': [
            (r'".*"', String),
            (r'0x[0-9A-Fa-f]+t?', Number.Hex),
            (r'[0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
            (r'[0-9]+L?', Number.Integer),
        ],
        'field': [
            (r'(\$?\b)([\w$]*)(:)',
             bygroups(Punctuation, Name.Variable, Punctuation)),
        ],
        'method': [
            (r'<(?:cl)?init>', Name.Function),  # constructor
            (r'(\$?\b)([\w$]*)(\()',
             bygroups(Punctuation, Name.Function, Punctuation)),
        ],
        'label': [
            (r':\w+', Name.Label),
        ],
        'class': [
            # class names in the form Lcom/namespace/ClassName;
            # I only want to color the ClassName part, so the namespace part is
            # treated as 'Text'
            (r'(L)((?:[\w$]+/)*)([\w$]+)(;)',
                bygroups(Keyword.Type, Text, Name.Class, Text)),
        ],
        'punctuation': [
            (r'->', Punctuation),
            (r'[{},():=.-]', Punctuation),
        ],
        'type': [
            (r'[ZBSCIJFDV\[]+', Keyword.Type),
        ],
        'comment': [
            (r'#.*?\n', Comment),
        ],
    }

    def analyse_text(text):
        score = 0
        if re.search(r'^\s*\.class\s', text, re.MULTILINE):
            score += 0.5
            if re.search(r'\b((check-cast|instance-of|throw-verification-error'
                         r')\b|(-to|add|[ais]get|[ais]put|and|cmpl|const|div|'
                         r'if|invoke|move|mul|neg|not|or|rem|return|rsub|shl|'
                         r'shr|sub|ushr)[-/])|{|}', text, re.MULTILINE):
                score += 0.3
        if re.search(r'(\.(catchall|epilogue|restart local|prologue)|'
                     r'\b(array-data|class-change-error|declared-synchronized|'
                     r'(field|inline|vtable)@0x[0-9a-fA-F]|generic-error|'
                     r'illegal-class-access|illegal-field-access|'
                     r'illegal-method-access|instantiation-error|no-error|'
                     r'no-such-class|no-such-field|no-such-method|'
                     r'packed-switch|sparse-switch))\b', text, re.MULTILINE):
            score += 0.6
        return score