1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
"""
pygments.lexers.dalvik
~~~~~~~~~~~~~~~~~~~~~~
Pygments lexers for Dalvik VM-related languages.
:copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re
from pygments.lexer import RegexLexer, include, bygroups
from pygments.token import Keyword, Text, Comment, Name, String, Number, \
Punctuation, Whitespace
__all__ = ['SmaliLexer']
class SmaliLexer(RegexLexer):
"""
For `Smali <http://code.google.com/p/smali/>`_ (Android/Dalvik) assembly
code.
.. versionadded:: 1.6
"""
name = 'Smali'
aliases = ['smali']
filenames = ['*.smali']
mimetypes = ['text/smali']
tokens = {
'root': [
include('comment'),
include('label'),
include('field'),
include('method'),
include('class'),
include('directive'),
include('access-modifier'),
include('instruction'),
include('literal'),
include('punctuation'),
include('type'),
include('whitespace')
],
'directive': [
(r'^([ \t]*)(\.(?:class|super|implements|field|subannotation|annotation|'
r'enum|method|registers|locals|array-data|packed-switch|'
r'sparse-switch|catchall|catch|line|parameter|local|prologue|'
r'epilogue|source))', bygroups(Whitespace, Keyword)),
(r'^([ \t]*)(\.end)( )(field|subannotation|annotation|method|array-data|'
'packed-switch|sparse-switch|parameter|local)', bygroups(Whitespace, Keyword, Whitespace, Keyword)),
(r'^([ \t]*)(\.restart)( )(local)', bygroups(Whitespace, Keyword, Whitespace, Keyword)),
],
'access-modifier': [
(r'(public|private|protected|static|final|synchronized|bridge|'
r'varargs|native|abstract|strictfp|synthetic|constructor|'
r'declared-synchronized|interface|enum|annotation|volatile|'
r'transient)', Keyword),
],
'whitespace': [
(r'\n', Whitespace),
(r'\s+', Whitespace),
],
'instruction': [
(r'\b[vp]\d+\b', Name.Builtin), # registers
(r'(\b[a-z][A-Za-z0-9/-]+)(\s+)', bygroups(Text, Whitespace)), # instructions
],
'literal': [
(r'".*"', String),
(r'0x[0-9A-Fa-f]+t?', Number.Hex),
(r'[0-9]*\.[0-9]+([eE][0-9]+)?[fd]?', Number.Float),
(r'[0-9]+L?', Number.Integer),
],
'field': [
(r'(\$?\b)([\w$]*)(:)',
bygroups(Punctuation, Name.Variable, Punctuation)),
],
'method': [
(r'<(?:cl)?init>', Name.Function), # constructor
(r'(\$?\b)([\w$]*)(\()',
bygroups(Punctuation, Name.Function, Punctuation)),
],
'label': [
(r':\w+', Name.Label),
],
'class': [
# class names in the form Lcom/namespace/ClassName;
# I only want to color the ClassName part, so the namespace part is
# treated as 'Text'
(r'(L)((?:[\w$]+/)*)([\w$]+)(;)',
bygroups(Keyword.Type, Text, Name.Class, Text)),
],
'punctuation': [
(r'->', Punctuation),
(r'[{},():=.-]', Punctuation),
],
'type': [
(r'[ZBSCIJFDV\[]+', Keyword.Type),
],
'comment': [
(r'#.*?\n', Comment),
],
}
def analyse_text(text):
score = 0
if re.search(r'^\s*\.class\s', text, re.MULTILINE):
score += 0.5
if re.search(r'\b((check-cast|instance-of|throw-verification-error'
r')\b|(-to|add|[ais]get|[ais]put|and|cmpl|const|div|'
r'if|invoke|move|mul|neg|not|or|rem|return|rsub|shl|'
r'shr|sub|ushr)[-/])|{|}', text, re.MULTILINE):
score += 0.3
if re.search(r'(\.(catchall|epilogue|restart local|prologue)|'
r'\b(array-data|class-change-error|declared-synchronized|'
r'(field|inline|vtable)@0x[0-9a-fA-F]|generic-error|'
r'illegal-class-access|illegal-field-access|'
r'illegal-method-access|instantiation-error|no-error|'
r'no-such-class|no-such-field|no-such-method|'
r'packed-switch|sparse-switch))\b', text, re.MULTILINE):
score += 0.6
return score
|