1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
"""
pygments.formatter
~~~~~~~~~~~~~~~~~~
Base formatter class.
:copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import codecs
from pygments.util import get_bool_opt
from pygments.styles import get_style_by_name
__all__ = ['Formatter']
def _lookup_style(style):
if isinstance(style, str):
return get_style_by_name(style)
return style
class Formatter:
"""
Converts a token stream to text.
Formatters should have attributes to help selecting them. These
are similar to the corresponding :class:`~pygments.lexer.Lexer`
attributes.
.. autoattribute:: name
:no-value:
.. autoattribute:: aliases
:no-value:
.. autoattribute:: filenames
:no-value:
You can pass options as keyword arguments to the constructor.
All formatters accept these basic options:
``style``
The style to use, can be a string or a Style subclass
(default: "default"). Not used by e.g. the
TerminalFormatter.
``full``
Tells the formatter to output a "full" document, i.e.
a complete self-contained document. This doesn't have
any effect for some formatters (default: false).
``title``
If ``full`` is true, the title that should be used to
caption the document (default: '').
``encoding``
If given, must be an encoding name. This will be used to
convert the Unicode token strings to byte strings in the
output. If it is "" or None, Unicode strings will be written
to the output file, which most file-like objects do not
support (default: None).
``outencoding``
Overrides ``encoding`` if given.
"""
#: Full name for the formatter, in human-readable form.
name = None
#: A list of short, unique identifiers that can be used to lookup
#: the formatter from a list, e.g. using :func:`.get_formatter_by_name()`.
aliases = []
#: A list of fnmatch patterns that match filenames for which this
#: formatter can produce output. The patterns in this list should be unique
#: among all formatters.
filenames = []
#: If True, this formatter outputs Unicode strings when no encoding
#: option is given.
unicodeoutput = True
def __init__(self, **options):
"""
As with lexers, this constructor takes arbitrary optional arguments,
and if you override it, you should first process your own options, then
call the base class implementation.
"""
self.style = _lookup_style(options.get('style', 'default'))
self.full = get_bool_opt(options, 'full', False)
self.title = options.get('title', '')
self.encoding = options.get('encoding', None) or None
if self.encoding in ('guess', 'chardet'):
# can happen for e.g. pygmentize -O encoding=guess
self.encoding = 'utf-8'
self.encoding = options.get('outencoding') or self.encoding
self.options = options
def get_style_defs(self, arg=''):
"""
This method must return statements or declarations suitable to define
the current style for subsequent highlighted text (e.g. CSS classes
in the `HTMLFormatter`).
The optional argument `arg` can be used to modify the generation and
is formatter dependent (it is standardized because it can be given on
the command line).
This method is called by the ``-S`` :doc:`command-line option <cmdline>`,
the `arg` is then given by the ``-a`` option.
"""
return ''
def format(self, tokensource, outfile):
"""
This method must format the tokens from the `tokensource` iterable and
write the formatted version to the file object `outfile`.
Formatter options can control how exactly the tokens are converted.
"""
if self.encoding:
# wrap the outfile in a StreamWriter
outfile = codecs.lookup(self.encoding)[3](outfile)
return self.format_unencoded(tokensource, outfile)
|