1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
|
//===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file defines the log symbolizer markup data model and parser.
///
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/Symbolize/Markup.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
namespace llvm {
namespace symbolize {
// Matches the following:
// "\033[0m"
// "\033[1m"
// "\033[30m" -- "\033[37m"
static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m";
MarkupParser::MarkupParser(StringSet<> MultilineTags)
: MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {}
static StringRef takeTo(StringRef Str, StringRef::iterator Pos) {
return Str.take_front(Pos - Str.begin());
}
static void advanceTo(StringRef &Str, StringRef::iterator Pos) {
Str = Str.drop_front(Pos - Str.begin());
}
void MarkupParser::parseLine(StringRef Line) {
Buffer.clear();
NextIdx = 0;
FinishedMultiline.clear();
this->Line = Line;
}
std::optional<MarkupNode> MarkupParser::nextNode() {
// Pull something out of the buffer if possible.
if (!Buffer.empty()) {
if (NextIdx < Buffer.size())
return std::move(Buffer[NextIdx++]);
NextIdx = 0;
Buffer.clear();
}
// The buffer is empty, so parse the next bit of the line.
if (Line.empty())
return std::nullopt;
if (!InProgressMultiline.empty()) {
if (std::optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) {
llvm::append_range(InProgressMultiline, *MultilineEnd);
assert(FinishedMultiline.empty() &&
"At most one multi-line element can be finished at a time.");
FinishedMultiline.swap(InProgressMultiline);
// Parse the multi-line element as if it were contiguous.
advanceTo(Line, MultilineEnd->end());
return *parseElement(FinishedMultiline);
}
// The whole line is part of the multi-line element.
llvm::append_range(InProgressMultiline, Line);
Line = Line.drop_front(Line.size());
return std::nullopt;
}
// Find the first valid markup element, if any.
if (std::optional<MarkupNode> Element = parseElement(Line)) {
parseTextOutsideMarkup(takeTo(Line, Element->Text.begin()));
Buffer.push_back(std::move(*Element));
advanceTo(Line, Element->Text.end());
return nextNode();
}
// Since there were no valid elements remaining, see if the line opens a
// multi-line element.
if (std::optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) {
// Emit any text before the element.
parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin()));
// Begin recording the multi-line element.
llvm::append_range(InProgressMultiline, *MultilineBegin);
Line = Line.drop_front(Line.size());
return nextNode();
}
// The line doesn't contain any more markup elements, so emit it as text.
parseTextOutsideMarkup(Line);
Line = Line.drop_front(Line.size());
return nextNode();
}
void MarkupParser::flush() {
Buffer.clear();
NextIdx = 0;
Line = {};
if (InProgressMultiline.empty())
return;
FinishedMultiline.swap(InProgressMultiline);
parseTextOutsideMarkup(FinishedMultiline);
}
// Finds and returns the next valid markup element in the given line. Returns
// std::nullopt if the line contains no valid elements.
std::optional<MarkupNode> MarkupParser::parseElement(StringRef Line) {
while (true) {
// Find next element using begin and end markers.
size_t BeginPos = Line.find("{{{");
if (BeginPos == StringRef::npos)
return std::nullopt;
size_t EndPos = Line.find("}}}", BeginPos + 3);
if (EndPos == StringRef::npos)
return std::nullopt;
EndPos += 3;
MarkupNode Element;
Element.Text = Line.slice(BeginPos, EndPos);
Line = Line.substr(EndPos);
// Parse tag.
StringRef Content = Element.Text.drop_front(3).drop_back(3);
StringRef FieldsContent;
std::tie(Element.Tag, FieldsContent) = Content.split(':');
if (Element.Tag.empty())
continue;
// Parse fields.
if (!FieldsContent.empty())
FieldsContent.split(Element.Fields, ":");
else if (Content.back() == ':')
Element.Fields.push_back(FieldsContent);
return Element;
}
}
static MarkupNode textNode(StringRef Text) {
MarkupNode Node;
Node.Text = Text;
return Node;
}
// Parses a region of text known to be outside any markup elements. Such text
// may still contain SGR control codes, so the region is further subdivided into
// control codes and true text regions.
void MarkupParser::parseTextOutsideMarkup(StringRef Text) {
if (Text.empty())
return;
SmallVector<StringRef> Matches;
while (SGRSyntax.match(Text, &Matches)) {
// Emit any text before the SGR element.
if (Matches.begin()->begin() != Text.begin())
Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin())));
Buffer.push_back(textNode(*Matches.begin()));
advanceTo(Text, Matches.begin()->end());
}
if (!Text.empty())
Buffer.push_back(textNode(Text));
}
// Given that a line doesn't contain any valid markup, see if it ends with the
// start of a multi-line element. If so, returns the beginning.
std::optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) {
// A multi-line begin marker must be the last one on the line.
size_t BeginPos = Line.rfind("{{{");
if (BeginPos == StringRef::npos)
return std::nullopt;
size_t BeginTagPos = BeginPos + 3;
// If there are any end markers afterwards, the begin marker cannot belong to
// a multi-line element.
size_t EndPos = Line.find("}}}", BeginTagPos);
if (EndPos != StringRef::npos)
return std::nullopt;
// Check whether the tag is registered multi-line.
size_t EndTagPos = Line.find(':', BeginTagPos);
if (EndTagPos == StringRef::npos)
return std::nullopt;
StringRef Tag = Line.slice(BeginTagPos, EndTagPos);
if (!MultilineTags.contains(Tag))
return std::nullopt;
return Line.substr(BeginPos);
}
// See if the line begins with the ending of an in-progress multi-line element.
// If so, return the ending.
std::optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) {
size_t EndPos = Line.find("}}}");
if (EndPos == StringRef::npos)
return std::nullopt;
return Line.take_front(EndPos + 3);
}
} // end namespace symbolize
} // end namespace llvm
|