contrib/libs/llvm12/tools/polly/lib/Transform/ForwardOpTree.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115

//===- ForwardOpTree.h ------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Move instructions between statements.
//
//===----------------------------------------------------------------------===//

#include "polly/ForwardOpTree.h"
#include "polly/Options.h"
#include "polly/ScopBuilder.h"
#include "polly/ScopInfo.h"
#include "polly/ScopPass.h"
#include "polly/Support/GICHelper.h"
#include "polly/Support/ISLOStream.h"
#include "polly/Support/ISLTools.h"
#include "polly/Support/VirtualInstruction.h"
#include "polly/ZoneAlgo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "isl/ctx.h"
#include "isl/isl-noexceptions.h"
#include <cassert>
#include <memory>

#define DEBUG_TYPE "polly-optree"

using namespace llvm;
using namespace polly;

static cl::opt<bool>
    AnalyzeKnown("polly-optree-analyze-known",
                 cl::desc("Analyze array contents for load forwarding"),
                 cl::cat(PollyCategory), cl::init(true), cl::Hidden);

static cl::opt<bool>
    NormalizePHIs("polly-optree-normalize-phi",
                  cl::desc("Replace PHIs by their incoming values"),
                  cl::cat(PollyCategory), cl::init(false), cl::Hidden);

static cl::opt<unsigned>
    MaxOps("polly-optree-max-ops",
           cl::desc("Maximum number of ISL operations to invest for known "
                    "analysis; 0=no limit"),
           cl::init(1000000), cl::cat(PollyCategory), cl::Hidden);

STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs");
STATISTIC(KnownOutOfQuota,
          "Analyses aborted because max_operations was reached");

STATISTIC(TotalInstructionsCopied, "Number of copied instructions");
STATISTIC(TotalKnownLoadsForwarded,
          "Number of forwarded loads because their value was known");
STATISTIC(TotalReloads, "Number of reloaded values");
STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses");
STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees");
STATISTIC(TotalModifiedStmts,
          "Number of statements with at least one forwarded tree");

STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree");

STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree");
STATISTIC(NumValueWritesInLoops,
          "Number of scalar value writes nested in affine loops after OpTree");
STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree");
STATISTIC(NumPHIWritesInLoops,
          "Number of scalar phi writes nested in affine loops after OpTree");
STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree");
STATISTIC(NumSingletonWritesInLoops,
          "Number of singleton writes nested in affine loops after OpTree");

namespace {

/// The state of whether an operand tree was/can be forwarded.
///
/// The items apply to an instructions and its operand tree with the instruction
/// as the root element. If the value in question is not an instruction in the
/// SCoP, it can be a leaf of an instruction's operand tree.
enum ForwardingDecision {
  /// An uninitialized value.
  FD_Unknown,

  /// The root instruction or value cannot be forwarded at all.
  FD_CannotForward,

  /// The root instruction or value can be forwarded as a leaf of a larger
  /// operand tree.
  /// It does not make sense to move the value itself, it would just replace it
  /// by a use of itself. For instance, a constant "5" used in a statement can
  /// be forwarded, but it would just replace it by the same constant "5".
  /// However, it makes sense to move as an operand of
  ///
  ///   %add = add 5, 5
  ///
  /// where "5" is moved as part of a larger operand tree. "5" would be placed
  /// (disregarding for a moment that literal constants don't have a location
  /// and can be used anywhere) into the same statement as %add would.
  FD_CanForwardLeaf,

  /// The root instruction can be forwarded and doing so avoids a scalar
  /// dependency.
  ///
  /// This can be either because the operand tree can be moved to the target
  /// statement, or a memory access is redirected to read from a different
  /// location.
  FD_CanForwardProfitably,

  /// A forwarding method cannot be applied to the operand tree.
  /// The difference to FD_CannotForward is that there might be other methods
  /// that can handle it.
  FD_NotApplicable
};

/// Represents the evaluation of and action to taken when forwarding a value
/// from an operand tree.
struct ForwardingAction {
  using KeyTy = std::pair<Value *, ScopStmt *>;

  /// Evaluation of forwarding a value.
  ForwardingDecision Decision = FD_Unknown;

  /// Callback to execute the forwarding.
  /// Returning true allows deleting the polly::MemoryAccess if the value is the
  /// root of the operand tree (and its elimination the reason why the
  /// forwarding is done). Return false if the MemoryAccess is reused or there
  /// might be other users of the read accesses. In the letter case the
  /// polly::SimplifyPass can remove dead MemoryAccesses.
  std::function<bool()> Execute = []() -> bool {
    llvm_unreachable("unspecified how to forward");
  };

  /// Other values that need to be forwarded if this action is executed. Their
  /// actions are executed after this one.
  SmallVector<KeyTy, 4> Depends;

  /// Named ctor: The method creating this object does not apply to the kind of
  /// value, but other methods may.
  static ForwardingAction notApplicable() {
    ForwardingAction Result;
    Result.Decision = FD_NotApplicable;
    return Result;
  }

  /// Named ctor: The value cannot be forwarded.
  static ForwardingAction cannotForward() {
    ForwardingAction Result;
    Result.Decision = FD_CannotForward;
    return Result;
  }

  /// Named ctor: The value can just be used without any preparation.
  static ForwardingAction triviallyForwardable(bool IsProfitable, Value *Val) {
    ForwardingAction Result;
    Result.Decision =
        IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf;
    Result.Execute = [=]() {
      LLVM_DEBUG(dbgs() << "    trivially forwarded: " << *Val << "\n");
      return true;
    };
    return Result;
  }

  /// Name ctor: The value can be forwarded by executing an action.
  static ForwardingAction canForward(std::function<bool()> Execute,
                                     ArrayRef<KeyTy> Depends,
                                     bool IsProfitable) {
    ForwardingAction Result;
    Result.Decision =
        IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf;
    Result.Execute = std::move(Execute);
    Result.Depends.append(Depends.begin(), Depends.end());
    return Result;
  }
};

/// Implementation of operand tree forwarding for a specific SCoP.
///
/// For a statement that requires a scalar value (through a value read
/// MemoryAccess), see if its operand can be moved into the statement. If so,
/// the MemoryAccess is removed and the all the operand tree instructions are
/// moved into the statement. All original instructions are left in the source
/// statements. The simplification pass can clean these up.
class ForwardOpTreeImpl : ZoneAlgorithm {
private:
  using MemoizationTy = DenseMap<ForwardingAction::KeyTy, ForwardingAction>;

  /// Scope guard to limit the number of isl operations for this pass.
  IslMaxOperationsGuard &MaxOpGuard;

  /// How many instructions have been copied to other statements.
  int NumInstructionsCopied = 0;

  /// Number of loads forwarded because their value was known.
  int NumKnownLoadsForwarded = 0;

  /// Number of values reloaded from known array elements.
  int NumReloads = 0;

  /// How many read-only accesses have been copied.
  int NumReadOnlyCopied = 0;

  /// How many operand trees have been forwarded.
  int NumForwardedTrees = 0;

  /// Number of statements with at least one forwarded operand tree.
  int NumModifiedStmts = 0;

  /// Whether we carried out at least one change to the SCoP.
  bool Modified = false;

  /// Cache of how to forward values.
  /// The key of this map is the llvm::Value to be forwarded and the
  /// polly::ScopStmt it is forwarded from. This is because the same llvm::Value
  /// can evaluate differently depending on where it is evaluate. For instance,
  /// a synthesizable Scev represents a recurrence with an loop but the loop's
  /// exit value if evaluated after the loop.
  /// The cached results are only valid for the current TargetStmt.
  /// CHECKME: ScalarEvolution::getScevAtScope should take care for getting the
  /// exit value when instantiated outside of the loop. The primary concern is
  /// ambiguity when crossing PHI nodes, which currently is not supported.
  MemoizationTy ForwardingActions;

  /// Contains the zones where array elements are known to contain a specific
  /// value.
  /// { [Element[] -> Zone[]] -> ValInst[] }
  /// @see computeKnown()
  isl::union_map Known;

  /// Translator for newly introduced ValInsts to already existing ValInsts such
  /// that new introduced load instructions can reuse the Known analysis of its
  /// original load. { ValInst[] -> ValInst[] }
  isl::union_map Translator;

  /// Get list of array elements that do contain the same ValInst[] at Domain[].
  ///
  /// @param ValInst { Domain[] -> ValInst[] }
  ///                The values for which we search for alternative locations,
  ///                per statement instance.
  ///
  /// @return { Domain[] -> Element[] }
  ///         For each statement instance, the array elements that contain the
  ///         same ValInst.
  isl::union_map findSameContentElements(isl::union_map ValInst) {
    assert(!ValInst.is_single_valued().is_false());

    // { Domain[] }
    isl::union_set Domain = ValInst.domain();

    // { Domain[] -> Scatter[] }
    isl::union_map Schedule = getScatterFor(Domain);

    // { Element[] -> [Scatter[] -> ValInst[]] }
    isl::union_map MustKnownCurried =
        convertZoneToTimepoints(Known, isl::dim::in, false, true).curry();

    // { [Domain[] -> ValInst[]] -> Scatter[] }
    isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule);

    // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] }
    isl::union_map SchedValDomVal =
        DomValSched.range_product(ValInst.range_map()).reverse();

    // { Element[] -> [Domain[] -> ValInst[]] }
    isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal);

    // { Domain[] -> Element[] }
    isl::union_map MustKnownMap =
        MustKnownInst.uncurry().domain().unwrap().reverse();
    simplify(MustKnownMap);

    return MustKnownMap;
  }

  /// Find a single array element for each statement instance, within a single
  /// array.
  ///
  /// @param MustKnown { Domain[] -> Element[] }
  ///                  Set of candidate array elements.
  /// @param Domain    { Domain[] }
  ///                  The statement instance for which we need elements for.
  ///
  /// @return { Domain[] -> Element[] }
  ///         For each statement instance, an array element out of @p MustKnown.
  ///         All array elements must be in the same array (Polly does not yet
  ///         support reading from different accesses using the same
  ///         MemoryAccess). If no mapping for all of @p Domain exists, returns
  ///         null.
  isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) {
    // { Domain[] -> Element[] }
    isl::map Result;

    // Make irrelevant elements not interfere.
    Domain = Domain.intersect_params(S->getContext());

    // MemoryAccesses can read only elements from a single array
    // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }).
    // Look through all spaces until we find one that contains at least the
    // wanted statement instance.s
    for (isl::map Map : MustKnown.get_map_list()) {
      // Get the array this is accessing.
      isl::id ArrayId = Map.get_tuple_id(isl::dim::out);
      ScopArrayInfo *SAI = static_cast<ScopArrayInfo *>(ArrayId.get_user());

      // No support for generation of indirect array accesses.
      if (SAI->getBasePtrOriginSAI())
        continue;

      // Determine whether this map contains all wanted values.
      isl::set MapDom = Map.domain();
      if (!Domain.is_subset(MapDom).is_true())
        continue;

      // There might be multiple array elements that contain the same value, but
      // choose only one of them. lexmin is used because it returns a one-value
      // mapping, we do not care about which one.
      // TODO: Get the simplest access function.
      Result = Map.lexmin();
      break;
    }

    return Result;
  }

public:
  ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard)
      : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {}

  /// Compute the zones of known array element contents.
  ///
  /// @return True if the computed #Known is usable.
  bool computeKnownValues() {
    isl::union_map MustKnown, KnownFromLoad, KnownFromInit;

    // Check that nothing strange occurs.
    collectCompatibleElts();

    {
      IslQuotaScope QuotaScope = MaxOpGuard.enter();

      computeCommon();
      if (NormalizePHIs)
        computeNormalizedPHIs();
      Known = computeKnown(true, true);

      // Preexisting ValInsts use the known content analysis of themselves.
      Translator = makeIdentityMap(Known.range(), false);
    }

    if (!Known || !Translator || !NormalizeMap) {
      assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
      Known = nullptr;
      Translator = nullptr;
      NormalizeMap = nullptr;
      LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
      return false;
    }

    KnownAnalyzed++;
    LLVM_DEBUG(dbgs() << "All known: " << Known << "\n");

    return true;
  }

  void printStatistics(raw_ostream &OS, int Indent = 0) {
    OS.indent(Indent) << "Statistics {\n";
    OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied
                          << '\n';
    OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded
                          << '\n';
    OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n';
    OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied
                          << '\n';
    OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees
                          << '\n';
    OS.indent(Indent + 4) << "Statements with forwarded operand trees: "
                          << NumModifiedStmts << '\n';
    OS.indent(Indent) << "}\n";
  }

  void printStatements(raw_ostream &OS, int Indent = 0) const {
    OS.indent(Indent) << "After statements {\n";
    for (auto &Stmt : *S) {
      OS.indent(Indent + 4) << Stmt.getBaseName() << "\n";
      for (auto *MA : Stmt)
        MA->print(OS);

      OS.indent(Indent + 12);
      Stmt.printInstructions(OS);
    }
    OS.indent(Indent) << "}\n";
  }

  /// Create a new MemoryAccess of type read and MemoryKind::Array.
  ///
  /// @param Stmt           The statement in which the access occurs.
  /// @param LI             The instruction that does the access.
  /// @param AccessRelation The array element that each statement instance
  ///                       accesses.
  ///
  /// @param The newly created access.
  MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI,
                                    isl::map AccessRelation) {
    isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out);
    ScopArrayInfo *SAI = reinterpret_cast<ScopArrayInfo *>(ArrayId.get_user());

    // Create a dummy SCEV access, to be replaced anyway.
    SmallVector<const SCEV *, 4> Sizes;
    Sizes.reserve(SAI->getNumberOfDimensions());
    SmallVector<const SCEV *, 4> Subscripts;
    Subscripts.reserve(SAI->getNumberOfDimensions());
    for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) {
      Sizes.push_back(SAI->getDimensionSize(i));
      Subscripts.push_back(nullptr);
    }

    MemoryAccess *Access =
        new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(),
                         LI->getType(), true, {}, Sizes, LI, MemoryKind::Array);
    S->addAccessFunction(Access);
    Stmt->addAccess(Access, true);

    Access->setNewAccessRelation(AccessRelation);

    return Access;
  }

  /// Forward a load by reading from an array element that contains the same
  /// value. Typically the location it was loaded from.
  ///
  /// @param TargetStmt  The statement the operand tree will be copied to.
  /// @param Inst        The (possibly speculatable) instruction to forward.
  /// @param UseStmt     The statement that uses @p Inst.
  /// @param UseLoop     The loop @p Inst is used in.
  /// @param DefStmt     The statement @p Inst is defined in.
  /// @param DefLoop     The loop which contains @p Inst.
  ///
  /// @return A ForwardingAction object describing the feasibility and
  ///         profitability evaluation and the callback carrying-out the value
  ///         forwarding.
  ForwardingAction forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst,
                                    ScopStmt *UseStmt, Loop *UseLoop,
                                    ScopStmt *DefStmt, Loop *DefLoop) {
    // Cannot do anything without successful known analysis.
    if (Known.is_null() || Translator.is_null() ||
        MaxOpGuard.hasQuotaExceeded())
      return ForwardingAction::notApplicable();

    LoadInst *LI = dyn_cast<LoadInst>(Inst);
    if (!LI)
      return ForwardingAction::notApplicable();

    ForwardingDecision OpDecision =
        forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop);
    switch (OpDecision) {
    case FD_CanForwardProfitably:
    case FD_CanForwardLeaf:
      break;
    case FD_CannotForward:
      return ForwardingAction::cannotForward();
    default:
      llvm_unreachable("Shouldn't return this");
    }

    MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI);
    if (Access) {
      // If the load is already in the statement, no forwarding is necessary.
      // However, it might happen that the LoadInst is already present in the
      // statement's instruction list. In that case we do as follows:
      // - For the evaluation, we can trivially forward it as it is
      //   benefit of forwarding an already present instruction.
      // - For the execution, prepend the instruction (to make it
      //   available to all instructions following in the instruction list), but
      //   do not add another MemoryAccess.
      auto ExecAction = [this, TargetStmt, LI, Access]() -> bool {
        TargetStmt->prependInstruction(LI);
        LLVM_DEBUG(
            dbgs() << "    forwarded known load with preexisting MemoryAccess"
                   << Access << "\n");
        (void)Access;

        NumKnownLoadsForwarded++;
        TotalKnownLoadsForwarded++;
        return true;
      };
      return ForwardingAction::canForward(
          ExecAction, {{LI->getPointerOperand(), DefStmt}}, true);
    }

    // Allow the following Isl calculations (until we return the
    // ForwardingAction, excluding the code inside the lambda that will be
    // executed later) to fail.
    IslQuotaScope QuotaScope = MaxOpGuard.enter();

    // { DomainDef[] -> ValInst[] }
    isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
    assert(!isNormalized(ExpectedVal).is_false() &&
           "LoadInsts are always normalized");

    // { DomainUse[] -> DomainTarget[] }
    isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);

    // { DomainTarget[] -> ValInst[] }
    isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
    isl::union_map TranslatedExpectedVal =
        isl::union_map(TargetExpectedVal).apply_range(Translator);

    // { DomainTarget[] -> Element[] }
    isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);

    isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
    if (!SameVal)
      return ForwardingAction::notApplicable();

    LLVM_DEBUG(dbgs() << "      expected values where " << TargetExpectedVal
                      << "\n");
    LLVM_DEBUG(dbgs() << "      candidate elements where " << Candidates
                      << "\n");

    // { ValInst[] }
    isl::space ValInstSpace = ExpectedVal.get_space().range();

    // After adding a new load to the SCoP, also update the Known content
    // about it. The new load will have a known ValInst of
    // { [DomainTarget[] -> Value[]] }
    // but which -- because it is a copy of it -- has same value as the
    // { [DomainDef[] -> Value[]] }
    // that it replicates. Instead of  cloning the known content of
    // [DomainDef[] -> Value[]]
    // for DomainTarget[], we add a 'translator' that maps
    // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]]
    // before comparing to the known content.
    // TODO: 'Translator' could also be used to map PHINodes to their incoming
    // ValInsts.
    isl::map LocalTranslator;
    if (!ValInstSpace.is_wrapping().is_false()) {
      // { DefDomain[] -> Value[] }
      isl::map ValInsts = ExpectedVal.range().unwrap();

      // { DefDomain[] }
      isl::set DefDomain = ValInsts.domain();

      // { Value[] }
      isl::space ValSpace = ValInstSpace.unwrap().range();

      // { Value[] -> Value[] }
      isl::map ValToVal =
          isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace));

      // { DomainDef[] -> DomainTarget[] }
      isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt);

      // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] }
      LocalTranslator = DefToTarget.reverse().product(ValToVal);
      LLVM_DEBUG(dbgs() << "      local translator is " << LocalTranslator
                        << "\n");

      if (!LocalTranslator)
        return ForwardingAction::notApplicable();
    }

    auto ExecAction = [this, TargetStmt, LI, SameVal,
                       LocalTranslator]() -> bool {
      TargetStmt->prependInstruction(LI);
      MemoryAccess *Access = makeReadArrayAccess(TargetStmt, LI, SameVal);
      LLVM_DEBUG(dbgs() << "    forwarded known load with new MemoryAccess"
                        << Access << "\n");
      (void)Access;

      if (LocalTranslator)
        Translator = Translator.add_map(LocalTranslator);

      NumKnownLoadsForwarded++;
      TotalKnownLoadsForwarded++;
      return true;
    };
    return ForwardingAction::canForward(
        ExecAction, {{LI->getPointerOperand(), DefStmt}}, true);
  }

  /// Forward a scalar by redirecting the access to an array element that stores
  /// the same value.
  ///
  /// @param TargetStmt  The statement the operand tree will be copied to.
  /// @param Inst        The scalar to forward.
  /// @param UseStmt     The statement that uses @p Inst.
  /// @param UseLoop     The loop @p Inst is used in.
  /// @param DefStmt     The statement @p Inst is defined in.
  /// @param DefLoop     The loop which contains @p Inst.
  ///
  /// @return A ForwardingAction object describing the feasibility and
  ///         profitability evaluation and the callback carrying-out the value
  ///         forwarding.
  ForwardingAction reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst,
                                      ScopStmt *UseStmt, Loop *UseLoop,
                                      ScopStmt *DefStmt, Loop *DefLoop) {
    // Cannot do anything without successful known analysis.
    if (Known.is_null() || Translator.is_null() ||
        MaxOpGuard.hasQuotaExceeded())
      return ForwardingAction::notApplicable();

    // Don't spend too much time analyzing whether it can be reloaded.
    IslQuotaScope QuotaScope = MaxOpGuard.enter();

    // { DomainDef[] -> ValInst[] }
    isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);

    // { DomainUse[] -> DomainTarget[] }
    isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt);

    // { DomainTarget[] -> ValInst[] }
    isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
    isl::union_map TranslatedExpectedVal =
        TargetExpectedVal.apply_range(Translator);

    // { DomainTarget[] -> Element[] }
    isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal);

    isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt));
    simplify(SameVal);
    if (!SameVal)
      return ForwardingAction::notApplicable();

    auto ExecAction = [this, TargetStmt, Inst, SameVal]() {
      MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst);
      if (!Access)
        Access = TargetStmt->ensureValueRead(Inst);
      Access->setNewAccessRelation(SameVal);

      LLVM_DEBUG(dbgs() << "    forwarded known content of " << *Inst
                        << " which is " << SameVal << "\n");
      TotalReloads++;
      NumReloads++;
      return false;
    };

    return ForwardingAction::canForward(ExecAction, {}, true);
  }

  /// Forwards a speculatively executable instruction.
  ///
  /// @param TargetStmt  The statement the operand tree will be copied to.
  /// @param UseInst     The (possibly speculatable) instruction to forward.
  /// @param DefStmt     The statement @p UseInst is defined in.
  /// @param DefLoop     The loop which contains @p UseInst.
  ///
  /// @return A ForwardingAction object describing the feasibility and
  ///         profitability evaluation and the callback carrying-out the value
  ///         forwarding.
  ForwardingAction forwardSpeculatable(ScopStmt *TargetStmt,
                                       Instruction *UseInst, ScopStmt *DefStmt,
                                       Loop *DefLoop) {
    // PHIs, unless synthesizable, are not yet supported.
    if (isa<PHINode>(UseInst))
      return ForwardingAction::notApplicable();

    // Compatible instructions must satisfy the following conditions:
    // 1. Idempotent (instruction will be copied, not moved; although its
    //    original instance might be removed by simplification)
    // 2. Not access memory (There might be memory writes between)
    // 3. Not cause undefined behaviour (we might copy to a location when the
    //    original instruction was no executed; this is currently not possible
    //    because we do not forward PHINodes)
    // 4. Not leak memory if executed multiple times (i.e. malloc)
    //
    // Instruction::mayHaveSideEffects is not sufficient because it considers
    // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is
    // not sufficient because it allows memory accesses.
    if (mayBeMemoryDependent(*UseInst))
      return ForwardingAction::notApplicable();

    SmallVector<ForwardingAction::KeyTy, 4> Depends;
    Depends.reserve(UseInst->getNumOperands());
    for (Value *OpVal : UseInst->operand_values()) {
      ForwardingDecision OpDecision =
          forwardTree(TargetStmt, OpVal, DefStmt, DefLoop);
      switch (OpDecision) {
      case FD_CannotForward:
        return ForwardingAction::cannotForward();

      case FD_CanForwardLeaf:
      case FD_CanForwardProfitably:
        Depends.emplace_back(OpVal, DefStmt);
        break;

      case FD_NotApplicable:
      case FD_Unknown:
        llvm_unreachable(
            "forwardTree should never return FD_NotApplicable/FD_Unknown");
      }
    }

    auto ExecAction = [this, TargetStmt, UseInst]() {
      // To ensure the right order, prepend this instruction before its
      // operands. This ensures that its operands are inserted before the
      // instruction using them.
      TargetStmt->prependInstruction(UseInst);

      LLVM_DEBUG(dbgs() << "    forwarded speculable instruction: " << *UseInst
                        << "\n");
      NumInstructionsCopied++;
      TotalInstructionsCopied++;
      return true;
    };
    return ForwardingAction::canForward(ExecAction, Depends, true);
  }

  /// Determines whether an operand tree can be forwarded and returns
  /// instructions how to do so in the form of a ForwardingAction object.
  ///
  /// @param TargetStmt  The statement the operand tree will be copied to.
  /// @param UseVal      The value (usually an instruction) which is root of an
  ///                    operand tree.
  /// @param UseStmt     The statement that uses @p UseVal.
  /// @param UseLoop     The loop @p UseVal is used in.
  ///
  /// @return A ForwardingAction object describing the feasibility and
  ///         profitability evaluation and the callback carrying-out the value
  ///         forwarding.
  ForwardingAction forwardTreeImpl(ScopStmt *TargetStmt, Value *UseVal,
                                   ScopStmt *UseStmt, Loop *UseLoop) {
    ScopStmt *DefStmt = nullptr;
    Loop *DefLoop = nullptr;

    // { DefDomain[] -> TargetDomain[] }
    isl::map DefToTarget;

    VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true);
    switch (VUse.getKind()) {
    case VirtualUse::Constant:
    case VirtualUse::Block:
    case VirtualUse::Hoisted:
      // These can be used anywhere without special considerations.
      return ForwardingAction::triviallyForwardable(false, UseVal);

    case VirtualUse::Synthesizable: {
      // Check if the value is synthesizable at the new location as well. This
      // might be possible when leaving a loop for which ScalarEvolution is
      // unable to derive the exit value for.
      // TODO: If there is a LCSSA PHI at the loop exit, use that one.
      // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we
      // do not forward past its loop header. This would require us to use a
      // previous loop induction variable instead the current one. We currently
      // do not allow forwarding PHI nodes, thus this should never occur (the
      // only exception where no phi is necessary being an unreachable loop
      // without edge from the outside).
      VirtualUse TargetUse = VirtualUse::create(
          S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true);
      if (TargetUse.getKind() == VirtualUse::Synthesizable)
        return ForwardingAction::triviallyForwardable(false, UseVal);

      LLVM_DEBUG(
          dbgs() << "    Synthesizable would not be synthesizable anymore: "
                 << *UseVal << "\n");
      return ForwardingAction::cannotForward();
    }

    case VirtualUse::ReadOnly: {
      if (!ModelReadOnlyScalars)
        return ForwardingAction::triviallyForwardable(false, UseVal);

      // If we model read-only scalars, we need to create a MemoryAccess for it.
      auto ExecAction = [this, TargetStmt, UseVal]() {
        TargetStmt->ensureValueRead(UseVal);

        LLVM_DEBUG(dbgs() << "    forwarded read-only value " << *UseVal
                          << "\n");
        NumReadOnlyCopied++;
        TotalReadOnlyCopied++;

        // Note that we cannot return true here. With a operand tree
        // depth of 0, UseVal is the use in TargetStmt that we try to replace.
        // With -polly-analyze-read-only-scalars=true we would ensure the
        // existence of a MemoryAccess (which already exists for a leaf) and be
        // removed again by tryForwardTree because it's goal is to remove this
        // scalar MemoryAccess. It interprets FD_CanForwardTree as the
        // permission to do so.
        return false;
      };
      return ForwardingAction::canForward(ExecAction, {}, false);
    }

    case VirtualUse::Intra:
      // Knowing that UseStmt and DefStmt are the same statement instance, just
      // reuse the information about UseStmt for DefStmt
      DefStmt = UseStmt;

      LLVM_FALLTHROUGH;
    case VirtualUse::Inter:
      Instruction *Inst = cast<Instruction>(UseVal);

      if (!DefStmt) {
        DefStmt = S->getStmtFor(Inst);
        if (!DefStmt)
          return ForwardingAction::cannotForward();
      }

      DefLoop = LI->getLoopFor(Inst->getParent());

      ForwardingAction SpeculativeResult =
          forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop);
      if (SpeculativeResult.Decision != FD_NotApplicable)
        return SpeculativeResult;

      ForwardingAction KnownResult = forwardKnownLoad(
          TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop);
      if (KnownResult.Decision != FD_NotApplicable)
        return KnownResult;

      ForwardingAction ReloadResult = reloadKnownContent(
          TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop);
      if (ReloadResult.Decision != FD_NotApplicable)
        return ReloadResult;

      // When no method is found to forward the operand tree, we effectively
      // cannot handle it.
      LLVM_DEBUG(dbgs() << "    Cannot forward instruction: " << *Inst << "\n");
      return ForwardingAction::cannotForward();
    }

    llvm_unreachable("Case unhandled");
  }

  /// Determines whether an operand tree can be forwarded. Previous evaluations
  /// are cached.
  ///
  /// @param TargetStmt  The statement the operand tree will be copied to.
  /// @param UseVal      The value (usually an instruction) which is root of an
  ///                    operand tree.
  /// @param UseStmt     The statement that uses @p UseVal.
  /// @param UseLoop     The loop @p UseVal is used in.
  ///
  /// @return FD_CannotForward        if @p UseVal cannot be forwarded.
  ///         FD_CanForwardLeaf       if @p UseVal is forwardable, but not
  ///                                 profitable.
  ///         FD_CanForwardProfitably if @p UseVal is forwardable and useful to
  ///                                 do.
  ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal,
                                 ScopStmt *UseStmt, Loop *UseLoop) {
    // Lookup any cached evaluation.
    auto It = ForwardingActions.find({UseVal, UseStmt});
    if (It != ForwardingActions.end())
      return It->second.Decision;

    // Make a new evaluation.
    ForwardingAction Action =
        forwardTreeImpl(TargetStmt, UseVal, UseStmt, UseLoop);
    ForwardingDecision Result = Action.Decision;

    // Remember for the next time.
    assert(!ForwardingActions.count({UseVal, UseStmt}) &&
           "circular dependency?");
    ForwardingActions.insert({{UseVal, UseStmt}, std::move(Action)});

    return Result;
  }

  /// Forward an operand tree using cached actions.
  ///
  /// @param Stmt   Statement the operand tree is moved into.
  /// @param UseVal Root of the operand tree within @p Stmt.
  /// @param RA     The MemoryAccess for @p UseVal that the forwarding intends
  ///               to remove.
  void applyForwardingActions(ScopStmt *Stmt, Value *UseVal, MemoryAccess *RA) {
    using ChildItTy =
        decltype(std::declval<ForwardingAction>().Depends.begin());
    using EdgeTy = std::pair<ForwardingAction *, ChildItTy>;

    DenseSet<ForwardingAction::KeyTy> Visited;
    SmallVector<EdgeTy, 32> Stack;
    SmallVector<ForwardingAction *, 32> Ordered;

    // Seed the tree search using the root value.
    assert(ForwardingActions.count({UseVal, Stmt}));
    ForwardingAction *RootAction = &ForwardingActions[{UseVal, Stmt}];
    Stack.emplace_back(RootAction, RootAction->Depends.begin());

    // Compute the postorder of the operand tree: all operands of an instruction
    // must be visited before the instruction itself. As an additional
    // requirement, the topological ordering must be 'compact': Any subtree node
    // must not be interleaved with nodes from a non-shared subtree. This is
    // because the same llvm::Instruction can be materialized multiple times as
    // used at different ScopStmts which might be different values. Intersecting
    // these lifetimes may result in miscompilations.
    // FIXME: Intersecting lifetimes might still be possible for the roots
    // themselves, since instructions are just prepended to a ScopStmt's
    // instruction list.
    while (!Stack.empty()) {
      EdgeTy &Top = Stack.back();
      ForwardingAction *TopAction = Top.first;
      ChildItTy &TopEdge = Top.second;

      if (TopEdge == TopAction->Depends.end()) {
        // Postorder sorting
        Ordered.push_back(TopAction);
        Stack.pop_back();
        continue;
      }
      ForwardingAction::KeyTy Key = *TopEdge;

      // Next edge for this level
      ++TopEdge;

      auto VisitIt = Visited.insert(Key);
      if (!VisitIt.second)
        continue;

      assert(ForwardingActions.count(Key) &&
             "Must not insert new actions during execution phase");
      ForwardingAction *ChildAction = &ForwardingActions[Key];
      Stack.emplace_back(ChildAction, ChildAction->Depends.begin());
    }

    // Actually, we need the reverse postorder because actions prepend new
    // instructions. Therefore, the first one will always be the action for the
    // operand tree's root.
    assert(Ordered.back() == RootAction);
    if (RootAction->Execute())
      Stmt->removeSingleMemoryAccess(RA);
    Ordered.pop_back();
    for (auto DepAction : reverse(Ordered)) {
      assert(DepAction->Decision != FD_Unknown &&
             DepAction->Decision != FD_CannotForward);
      assert(DepAction != RootAction);
      DepAction->Execute();
    }
  }

  /// Try to forward an operand tree rooted in @p RA.
  bool tryForwardTree(MemoryAccess *RA) {
    assert(RA->isLatestScalarKind());
    LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n");

    ScopStmt *Stmt = RA->getStatement();
    Loop *InLoop = Stmt->getSurroundingLoop();

    isl::map TargetToUse;
    if (!Known.is_null()) {
      isl::space DomSpace = Stmt->getDomainSpace();
      TargetToUse =
          isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace));
    }

    ForwardingDecision Assessment =
        forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop);

    // If considered feasible and profitable, forward it.
    bool Changed = false;
    if (Assessment == FD_CanForwardProfitably) {
      applyForwardingActions(Stmt, RA->getAccessValue(), RA);
      Changed = true;
    }

    ForwardingActions.clear();
    return Changed;
  }

  /// Return which SCoP this instance is processing.
  Scop *getScop() const { return S; }

  /// Run the algorithm: Use value read accesses as operand tree roots and try
  /// to forward them into the statement.
  bool forwardOperandTrees() {
    for (ScopStmt &Stmt : *S) {
      bool StmtModified = false;

      // Because we are modifying the MemoryAccess list, collect them first to
      // avoid iterator invalidation.
      SmallVector<MemoryAccess *, 16> Accs(Stmt.begin(), Stmt.end());

      for (MemoryAccess *RA : Accs) {
        if (!RA->isRead())
          continue;
        if (!RA->isLatestScalarKind())
          continue;

        if (tryForwardTree(RA)) {
          Modified = true;
          StmtModified = true;
          NumForwardedTrees++;
          TotalForwardedTrees++;
        }
      }

      if (StmtModified) {
        NumModifiedStmts++;
        TotalModifiedStmts++;
      }
    }

    if (Modified) {
      ScopsModified++;
      S->realignParams();
    }
    return Modified;
  }

  /// Print the pass result, performed transformations and the SCoP after the
  /// transformation.
  void print(raw_ostream &OS, int Indent = 0) {
    printStatistics(OS, Indent);

    if (!Modified) {
      // This line can easily be checked in regression tests.
      OS << "ForwardOpTree executed, but did not modify anything\n";
      return;
    }

    printStatements(OS, Indent);
  }
};

/// Pass that redirects scalar reads to array elements that are known to contain
/// the same value.
///
/// This reduces the number of scalar accesses and therefore potentially
/// increases the freedom of the scheduler. In the ideal case, all reads of a
/// scalar definition are redirected (We currently do not care about removing
/// the write in this case).  This is also useful for the main DeLICM pass as
/// there are less scalars to be mapped.
class ForwardOpTree : public ScopPass {
private:
  /// The pass implementation, also holding per-scop data.
  std::unique_ptr<ForwardOpTreeImpl> Impl;

public:
  static char ID;

  explicit ForwardOpTree() : ScopPass(ID) {}
  ForwardOpTree(const ForwardOpTree &) = delete;
  ForwardOpTree &operator=(const ForwardOpTree &) = delete;

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequiredTransitive<ScopInfoRegionPass>();
    AU.addRequired<LoopInfoWrapperPass>();
    AU.setPreservesAll();
  }

  bool runOnScop(Scop &S) override {
    // Free resources for previous SCoP's computation, if not yet done.
    releaseMemory();

    LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();

    {
      IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false);
      Impl = std::make_unique<ForwardOpTreeImpl>(&S, &LI, MaxOpGuard);

      if (AnalyzeKnown) {
        LLVM_DEBUG(dbgs() << "Prepare forwarders...\n");
        Impl->computeKnownValues();
      }

      LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n");
      Impl->forwardOperandTrees();

      if (MaxOpGuard.hasQuotaExceeded()) {
        LLVM_DEBUG(dbgs() << "Not all operations completed because of "
                             "max_operations exceeded\n");
        KnownOutOfQuota++;
      }
    }

    LLVM_DEBUG(dbgs() << "\nFinal Scop:\n");
    LLVM_DEBUG(dbgs() << S);

    // Update statistics
    auto ScopStats = S.getStatistics();
    NumValueWrites += ScopStats.NumValueWrites;
    NumValueWritesInLoops += ScopStats.NumValueWritesInLoops;
    NumPHIWrites += ScopStats.NumPHIWrites;
    NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops;
    NumSingletonWrites += ScopStats.NumSingletonWrites;
    NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops;

    return false;
  }

  void printScop(raw_ostream &OS, Scop &S) const override {
    if (!Impl)
      return;

    assert(Impl->getScop() == &S);
    Impl->print(OS);
  }

  void releaseMemory() override { Impl.reset(); }
}; // class ForwardOpTree

char ForwardOpTree::ID;
} // namespace

ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); }

INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree",
                      "Polly - Forward operand tree", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_END(ForwardOpTree, "polly-optree",
                    "Polly - Forward operand tree", false, false)