1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
"""
pygments.lexers.hexdump
~~~~~~~~~~~~~~~~~~~~~~~
Lexers for hexadecimal dumps.
:copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
from pygments.lexer import RegexLexer, bygroups, include
from pygments.token import Name, Number, String, Punctuation, Whitespace
__all__ = ['HexdumpLexer']
class HexdumpLexer(RegexLexer):
"""
For typical hex dump output formats by the UNIX and GNU/Linux tools ``hexdump``,
``hd``, ``hexcat``, ``od`` and ``xxd``, and the DOS tool ``DEBUG``. For example:
.. sourcecode:: hexdump
00000000 7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00 |.ELF............|
00000010 02 00 3e 00 01 00 00 00 c5 48 40 00 00 00 00 00 |..>......H@.....|
The specific supported formats are the outputs of:
* ``hexdump FILE``
* ``hexdump -C FILE`` -- the `canonical` format used in the example.
* ``hd FILE`` -- same as ``hexdump -C FILE``.
* ``hexcat FILE``
* ``od -t x1z FILE``
* ``xxd FILE``
* ``DEBUG.EXE FILE.COM`` and entering ``d`` to the prompt.
.. versionadded:: 2.1
"""
name = 'Hexdump'
aliases = ['hexdump']
hd = r'[0-9A-Ha-h]'
tokens = {
'root': [
(r'\n', Whitespace),
include('offset'),
(r'('+hd+r'{2})(\-)('+hd+r'{2})',
bygroups(Number.Hex, Punctuation, Number.Hex)),
(hd+r'{2}', Number.Hex),
(r'(\s{2,3})(\>)(.{16})(\<)$',
bygroups(Whitespace, Punctuation, String, Punctuation), 'bracket-strings'),
(r'(\s{2,3})(\|)(.{16})(\|)$',
bygroups(Whitespace, Punctuation, String, Punctuation), 'piped-strings'),
(r'(\s{2,3})(\>)(.{1,15})(\<)$',
bygroups(Whitespace, Punctuation, String, Punctuation)),
(r'(\s{2,3})(\|)(.{1,15})(\|)$',
bygroups(Whitespace, Punctuation, String, Punctuation)),
(r'(\s{2,3})(.{1,15})$', bygroups(Whitespace, String)),
(r'(\s{2,3})(.{16}|.{20})$', bygroups(Whitespace, String), 'nonpiped-strings'),
(r'\s', Whitespace),
(r'^\*', Punctuation),
],
'offset': [
(r'^('+hd+'+)(:)', bygroups(Name.Label, Punctuation), 'offset-mode'),
(r'^'+hd+'+', Name.Label),
],
'offset-mode': [
(r'\s', Whitespace, '#pop'),
(hd+'+', Name.Label),
(r':', Punctuation)
],
'piped-strings': [
(r'\n', Whitespace),
include('offset'),
(hd+r'{2}', Number.Hex),
(r'(\s{2,3})(\|)(.{1,16})(\|)$',
bygroups(Whitespace, Punctuation, String, Punctuation)),
(r'\s', Whitespace),
(r'^\*', Punctuation),
],
'bracket-strings': [
(r'\n', Whitespace),
include('offset'),
(hd+r'{2}', Number.Hex),
(r'(\s{2,3})(\>)(.{1,16})(\<)$',
bygroups(Whitespace, Punctuation, String, Punctuation)),
(r'\s', Whitespace),
(r'^\*', Punctuation),
],
'nonpiped-strings': [
(r'\n', Whitespace),
include('offset'),
(r'('+hd+r'{2})(\-)('+hd+r'{2})',
bygroups(Number.Hex, Punctuation, Number.Hex)),
(hd+r'{2}', Number.Hex),
(r'(\s{19,})(.{1,20}?)$', bygroups(Whitespace, String)),
(r'(\s{2,3})(.{1,20})$', bygroups(Whitespace, String)),
(r'\s', Whitespace),
(r'^\*', Punctuation),
],
}
|