1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
|
//===------ ManualOptimizer.cpp -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Handle pragma/metadata-directed transformations.
//
//===----------------------------------------------------------------------===//
#include "polly/ManualOptimizer.h"
#include "polly/DependenceInfo.h"
#include "polly/Options.h"
#include "polly/ScheduleTreeTransform.h"
#include "polly/Support/ScopHelper.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#define DEBUG_TYPE "polly-opt-manual"
using namespace polly;
using namespace llvm;
namespace {
static cl::opt<bool> IgnoreDepcheck(
"polly-pragma-ignore-depcheck",
cl::desc("Skip the dependency check for pragma-based transformations"),
cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
/// Same as llvm::hasUnrollTransformation(), but takes a LoopID as argument
/// instead of a Loop.
static TransformationMode hasUnrollTransformation(MDNode *LoopID) {
if (getBooleanLoopAttribute(LoopID, "llvm.loop.unroll.disable"))
return TM_SuppressedByUser;
Optional<int> Count =
getOptionalIntLoopAttribute(LoopID, "llvm.loop.unroll.count");
if (Count.hasValue())
return Count.getValue() == 1 ? TM_SuppressedByUser : TM_ForcedByUser;
if (getBooleanLoopAttribute(LoopID, "llvm.loop.unroll.enable"))
return TM_ForcedByUser;
if (getBooleanLoopAttribute(LoopID, "llvm.loop.unroll.full"))
return TM_ForcedByUser;
if (hasDisableAllTransformsHint(LoopID))
return TM_Disable;
return TM_Unspecified;
}
// Return the first DebugLoc in the list.
static DebugLoc findFirstDebugLoc(MDNode *MD) {
if (MD) {
for (const MDOperand &X : drop_begin(MD->operands(), 1)) {
Metadata *A = X.get();
if (!isa<DILocation>(A))
continue;
return cast<DILocation>(A);
}
}
return {};
}
static DebugLoc findTransformationDebugLoc(MDNode *LoopMD, StringRef Name) {
// First find dedicated transformation location
// (such as the location of #pragma clang loop)
MDNode *MD = findOptionMDForLoopID(LoopMD, Name);
if (DebugLoc K = findFirstDebugLoc(MD))
return K;
// Otherwise, fall back to the location of the loop itself
return findFirstDebugLoc(LoopMD);
}
/// Apply full or partial unrolling.
static isl::schedule applyLoopUnroll(MDNode *LoopMD,
isl::schedule_node BandToUnroll) {
TransformationMode UnrollMode = ::hasUnrollTransformation(LoopMD);
if (UnrollMode & TM_Disable)
return {};
assert(!BandToUnroll.is_null());
// TODO: Isl's codegen also supports unrolling by isl_ast_build via
// isl_schedule_node_band_set_ast_build_options({ unroll[x] }) which would be
// more efficient because the content duplication is delayed. However, the
// unrolled loop could be input of another loop transformation which expects
// the explicit schedule nodes. That is, we would need this explicit expansion
// anyway and using the ISL codegen option is a compile-time optimization.
int64_t Factor = getOptionalIntLoopAttribute(LoopMD, "llvm.loop.unroll.count")
.getValueOr(0);
bool Full = getBooleanLoopAttribute(LoopMD, "llvm.loop.unroll.full");
assert((!Full || !(Factor > 0)) &&
"Cannot unroll fully and partially at the same time");
if (Full)
return applyFullUnroll(BandToUnroll);
if (Factor > 0)
return applyPartialUnroll(BandToUnroll, Factor);
// For heuristic unrolling, fall back to LLVM's LoopUnroll pass.
return {};
}
static isl::schedule applyLoopFission(MDNode *LoopMD,
isl::schedule_node BandToFission) {
// TODO: Make it possible to selectively fission substatements.
// TODO: Apply followup loop properties.
// TODO: Instead of fission every statement, find the maximum set that does
// not cause a dependency violation.
return applyMaxFission(BandToFission);
}
// Return the properties from a LoopID. Scalar properties are ignored.
static auto getLoopMDProps(MDNode *LoopMD) {
return map_range(
make_filter_range(
drop_begin(LoopMD->operands(), 1),
[](const MDOperand &MDOp) { return isa<MDNode>(MDOp.get()); }),
[](const MDOperand &MDOp) { return cast<MDNode>(MDOp.get()); });
}
/// Recursively visit all nodes in a schedule, loop for loop-transformations
/// metadata and apply the first encountered.
class SearchTransformVisitor
: public RecursiveScheduleTreeVisitor<SearchTransformVisitor> {
private:
using BaseTy = RecursiveScheduleTreeVisitor<SearchTransformVisitor>;
BaseTy &getBase() { return *this; }
const BaseTy &getBase() const { return *this; }
polly::Scop *S;
const Dependences *D;
OptimizationRemarkEmitter *ORE;
// Set after a transformation is applied. Recursive search must be aborted
// once this happens to ensure that any new followup transformation is
// transformed in innermost-first order.
isl::schedule Result;
/// Check wether a schedule after a transformation is legal. Return the old
/// schedule without the transformation.
isl::schedule
checkDependencyViolation(llvm::MDNode *LoopMD, llvm::Value *CodeRegion,
const isl::schedule_node &OrigBand,
StringRef DebugLocAttr, StringRef TransPrefix,
StringRef RemarkName, StringRef TransformationName) {
if (D->isValidSchedule(*S, Result))
return Result;
LLVMContext &Ctx = LoopMD->getContext();
LLVM_DEBUG(dbgs() << "Dependency violation detected\n");
DebugLoc TransformLoc = findTransformationDebugLoc(LoopMD, DebugLocAttr);
if (IgnoreDepcheck) {
LLVM_DEBUG(dbgs() << "Still accepting transformation due to "
"-polly-pragma-ignore-depcheck\n");
if (ORE) {
ORE->emit(
OptimizationRemark(DEBUG_TYPE, RemarkName, TransformLoc, CodeRegion)
<< (Twine("Could not verify dependencies for ") +
TransformationName +
"; still applying because of -polly-pragma-ignore-depcheck")
.str());
}
return Result;
}
LLVM_DEBUG(dbgs() << "Rolling back transformation\n");
if (ORE) {
ORE->emit(DiagnosticInfoOptimizationFailure(DEBUG_TYPE, RemarkName,
TransformLoc, CodeRegion)
<< (Twine("not applying ") + TransformationName +
": cannot ensure semantic equivalence due to possible "
"dependency violations")
.str());
}
// If illegal, revert and remove the transformation to not risk re-trying
// indefintely.
MDNode *NewLoopMD =
makePostTransformationMetadata(Ctx, LoopMD, {TransPrefix}, {});
BandAttr *Attr = getBandAttr(OrigBand);
Attr->Metadata = NewLoopMD;
// Roll back old schedule.
return OrigBand.get_schedule();
}
public:
SearchTransformVisitor(polly::Scop *S, const Dependences *D,
OptimizationRemarkEmitter *ORE)
: S(S), D(D), ORE(ORE) {}
static isl::schedule applyOneTransformation(polly::Scop *S,
const Dependences *D,
OptimizationRemarkEmitter *ORE,
const isl::schedule &Sched) {
SearchTransformVisitor Transformer(S, D, ORE);
Transformer.visit(Sched);
return Transformer.Result;
}
void visitBand(isl::schedule_node_band Band) {
// Transform inner loops first (depth-first search).
getBase().visitBand(Band);
if (!Result.is_null())
return;
// Since it is (currently) not possible to have a BandAttr marker that is
// specific to each loop in a band, we only support single-loop bands.
if (isl_schedule_node_band_n_member(Band.get()) != 1)
return;
BandAttr *Attr = getBandAttr(Band);
if (!Attr) {
// Band has no attribute.
return;
}
// CodeRegion used but ORE to determine code hotness.
// TODO: Works only for original loop; for transformed loops, should track
// where the loop's body code comes from.
Loop *Loop = Attr->OriginalLoop;
Value *CodeRegion = nullptr;
if (Loop)
CodeRegion = Loop->getHeader();
MDNode *LoopMD = Attr->Metadata;
if (!LoopMD)
return;
// Iterate over loop properties to find the first transformation.
// FIXME: If there are more than one transformation in the LoopMD (making
// the order of transformations ambiguous), all others are silently ignored.
for (MDNode *MD : getLoopMDProps(LoopMD)) {
auto *NameMD = dyn_cast<MDString>(MD->getOperand(0).get());
if (!NameMD)
continue;
StringRef AttrName = NameMD->getString();
// Honor transformation order; transform the first transformation in the
// list first.
if (AttrName == "llvm.loop.unroll.enable" ||
AttrName == "llvm.loop.unroll.count" ||
AttrName == "llvm.loop.unroll.full") {
Result = applyLoopUnroll(LoopMD, Band);
if (!Result.is_null())
return;
} else if (AttrName == "llvm.loop.distribute.enable") {
Result = applyLoopFission(LoopMD, Band);
if (!Result.is_null())
Result = checkDependencyViolation(
LoopMD, CodeRegion, Band, "llvm.loop.distribute.loc",
"llvm.loop.distribute.", "FailedRequestedFission",
"loop fission/distribution");
if (!Result.is_null())
return;
}
// not a loop transformation; look for next property
}
}
void visitNode(isl::schedule_node Other) {
if (!Result.is_null())
return;
getBase().visitNode(Other);
}
};
} // namespace
isl::schedule
polly::applyManualTransformations(Scop *S, isl::schedule Sched,
const Dependences &D,
OptimizationRemarkEmitter *ORE) {
// Search the loop nest for transformations until fixpoint.
while (true) {
isl::schedule Result =
SearchTransformVisitor::applyOneTransformation(S, &D, ORE, Sched);
if (Result.is_null()) {
// No (more) transformation has been found.
break;
}
// Use transformed schedule and look for more transformations.
Sched = Result;
}
return Sched;
}
|