This repository has been archived by the owner on Apr 23, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
CodeGenPrepare.cpp
6951 lines (6141 loc) · 259 KB
/
CodeGenPrepare.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass munges the code in the input function to better prepare it for
// SelectionDAG-based code generation. This works around limitations in it's
// basic-block-at-a-time approach. It should eventually be removed.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <iterator>
#include <limits>
#include <memory>
#include <utility>
#include <vector>
using namespace llvm;
using namespace llvm::PatternMatch;
#define DEBUG_TYPE "codegenprepare"
STATISTIC(NumBlocksElim, "Number of blocks eliminated");
STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
"sunken Cmps");
STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
"of sunken Casts");
STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
"computations were sunk");
STATISTIC(NumMemoryInstsPhiCreated,
"Number of phis created when address "
"computations were sunk to memory instructions");
STATISTIC(NumMemoryInstsSelectCreated,
"Number of select created when address "
"computations were sunk to memory instructions");
STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
STATISTIC(NumAndsAdded,
"Number of and mask instructions added to form ext loads");
STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
STATISTIC(NumRetsDup, "Number of return instructions duplicated");
STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
cl::desc("Disable branch optimizations in CodeGenPrepare"));
static cl::opt<bool>
DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
cl::desc("Disable GC optimizations in CodeGenPrepare"));
static cl::opt<bool> DisableSelectToBranch(
"disable-cgp-select2branch", cl::Hidden, cl::init(false),
cl::desc("Disable select to branch conversion."));
static cl::opt<bool> AddrSinkUsingGEPs(
"addr-sink-using-gep", cl::Hidden, cl::init(true),
cl::desc("Address sinking in CGP using GEPs."));
static cl::opt<bool> EnableAndCmpSinking(
"enable-andcmp-sinking", cl::Hidden, cl::init(true),
cl::desc("Enable sinkinig and/cmp into branches."));
static cl::opt<bool> DisableStoreExtract(
"disable-cgp-store-extract", cl::Hidden, cl::init(false),
cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
static cl::opt<bool> StressStoreExtract(
"stress-cgp-store-extract", cl::Hidden, cl::init(false),
cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
static cl::opt<bool> DisableExtLdPromotion(
"disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
"CodeGenPrepare"));
static cl::opt<bool> StressExtLdPromotion(
"stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
"optimization in CodeGenPrepare"));
static cl::opt<bool> DisablePreheaderProtect(
"disable-preheader-prot", cl::Hidden, cl::init(false),
cl::desc("Disable protection against removing loop preheaders"));
static cl::opt<bool> ProfileGuidedSectionPrefix(
"profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
cl::desc("Use profile info to add section prefix for hot/cold functions"));
static cl::opt<unsigned> FreqRatioToSkipMerge(
"cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
cl::desc("Skip merging empty blocks if (frequency of empty block) / "
"(frequency of destination block) is greater than this ratio"));
static cl::opt<bool> ForceSplitStore(
"force-split-store", cl::Hidden, cl::init(false),
cl::desc("Force store splitting no matter what the target query says."));
static cl::opt<bool>
EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
cl::desc("Enable merging of redundant sexts when one is dominating"
" the other."), cl::init(true));
static cl::opt<bool> DisableComplexAddrModes(
"disable-complex-addr-modes", cl::Hidden, cl::init(false),
cl::desc("Disables combining addressing modes with different parts "
"in optimizeMemoryInst."));
static cl::opt<bool>
AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
cl::desc("Allow creation of Phis in Address sinking."));
static cl::opt<bool>
AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true),
cl::desc("Allow creation of selects in Address sinking."));
static cl::opt<bool> AddrSinkCombineBaseReg(
"addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
cl::desc("Allow combining of BaseReg field in Address sinking."));
static cl::opt<bool> AddrSinkCombineBaseGV(
"addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
cl::desc("Allow combining of BaseGV field in Address sinking."));
static cl::opt<bool> AddrSinkCombineBaseOffs(
"addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
cl::desc("Allow combining of BaseOffs field in Address sinking."));
static cl::opt<bool> AddrSinkCombineScaledReg(
"addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
cl::desc("Allow combining of ScaledReg field in Address sinking."));
static cl::opt<bool>
EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
cl::init(true),
cl::desc("Enable splitting large offset of GEP."));
namespace {
enum ExtType {
ZeroExtension, // Zero extension has been seen.
SignExtension, // Sign extension has been seen.
BothExtension // This extension type is used if we saw sext after
// ZeroExtension had been set, or if we saw zext after
// SignExtension had been set. It makes the type
// information of a promoted instruction invalid.
};
using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
using SExts = SmallVector<Instruction *, 16>;
using ValueToSExts = DenseMap<Value *, SExts>;
class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
const TargetMachine *TM = nullptr;
const TargetSubtargetInfo *SubtargetInfo;
const TargetLowering *TLI = nullptr;
const TargetRegisterInfo *TRI;
const TargetTransformInfo *TTI = nullptr;
const TargetLibraryInfo *TLInfo;
const LoopInfo *LI;
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
/// As we scan instructions optimizing them, this is the next instruction
/// to optimize. Transforms that can invalidate this should update it.
BasicBlock::iterator CurInstIterator;
/// Keeps track of non-local addresses that have been sunk into a block.
/// This allows us to avoid inserting duplicate code for blocks with
/// multiple load/stores of the same address. The usage of WeakTrackingVH
/// enables SunkAddrs to be treated as a cache whose entries can be
/// invalidated if a sunken address computation has been erased.
ValueMap<Value*, WeakTrackingVH> SunkAddrs;
/// Keeps track of all instructions inserted for the current function.
SetOfInstrs InsertedInsts;
/// Keeps track of the type of the related instruction before their
/// promotion for the current function.
InstrToOrigTy PromotedInsts;
/// Keep track of instructions removed during promotion.
SetOfInstrs RemovedInsts;
/// Keep track of sext chains based on their initial value.
DenseMap<Value *, Instruction *> SeenChainsForSExt;
/// Keep track of GEPs accessing the same data structures such as structs or
/// arrays that are candidates to be split later because of their large
/// size.
MapVector<
AssertingVH<Value>,
SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
LargeOffsetGEPMap;
/// Keep track of new GEP base after splitting the GEPs having large offset.
SmallSet<AssertingVH<Value>, 2> NewGEPBases;
/// Map serial numbers to Large offset GEPs.
DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
/// Keep track of SExt promoted.
ValueToSExts ValToSExtendedUses;
/// True if CFG is modified in any way.
bool ModifiedDT;
/// True if optimizing for size.
bool OptSize;
/// DataLayout for the Function being processed.
const DataLayout *DL = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
CodeGenPrepare() : FunctionPass(ID) {
initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
StringRef getPassName() const override { return "CodeGen Prepare"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
// FIXME: When we can selectively preserve passes, preserve the domtree.
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
}
private:
bool eliminateFallThrough(Function &F);
bool eliminateMostlyEmptyBlocks(Function &F);
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
void eliminateMostlyEmptyBlock(BasicBlock *BB);
bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
bool isPreheader);
bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
bool optimizeInst(Instruction *I, bool &ModifiedDT);
bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Type *AccessTy, unsigned AddrSpace);
bool optimizeInlineAsmInst(CallInst *CS);
bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
bool optimizeExt(Instruction *&I);
bool optimizeExtUses(Instruction *I);
bool optimizeLoadExt(LoadInst *Load);
bool optimizeSelectInst(SelectInst *SI);
bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
bool optimizeSwitchInst(SwitchInst *SI);
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB);
bool placeDbgValues(Function &F);
bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
bool tryToPromoteExts(TypePromotionTransaction &TPT,
const SmallVectorImpl<Instruction *> &Exts,
SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
unsigned CreatedInstsCost = 0);
bool mergeSExts(Function &F);
bool splitLargeGEPOffsets();
bool performAddressTypePromotion(
Instruction *&Inst,
bool AllowPromotionWithoutCommonHeader,
bool HasPromoted, TypePromotionTransaction &TPT,
SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
bool splitBranchCondition(Function &F);
bool simplifyOffsetableRelocate(Instruction &I);
};
} // end anonymous namespace
char CodeGenPrepare::ID = 0;
INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,
"Optimize for code generation", false, false)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,
"Optimize for code generation", false, false)
FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
bool CodeGenPrepare::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
DL = &F.getParent()->getDataLayout();
bool EverMadeChange = false;
// Clear per function information.
InsertedInsts.clear();
PromotedInsts.clear();
ModifiedDT = false;
if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
TM = &TPC->getTM<TargetMachine>();
SubtargetInfo = TM->getSubtargetImpl(F);
TLI = SubtargetInfo->getTargetLowering();
TRI = SubtargetInfo->getRegisterInfo();
}
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
BPI.reset(new BranchProbabilityInfo(F, *LI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
OptSize = F.optForSize();
ProfileSummaryInfo *PSI =
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
if (ProfileGuidedSectionPrefix) {
if (PSI->isFunctionHotInCallGraph(&F, *BFI))
F.setSectionPrefix(".hot");
else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
F.setSectionPrefix(".unlikely");
}
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI &&
TLI->isSlowDivBypassed()) {
const DenseMap<unsigned int, unsigned int> &BypassWidths =
TLI->getBypassSlowDivWidths();
BasicBlock* BB = &*F.begin();
while (BB != nullptr) {
// bypassSlowDivision may create new BBs, but we don't want to reapply the
// optimization to those blocks.
BasicBlock* Next = BB->getNextNode();
EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
BB = Next;
}
}
// Eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= eliminateMostlyEmptyBlocks(F);
if (!DisableBranchOpts)
EverMadeChange |= splitBranchCondition(F);
// Split some critical edges where one of the sources is an indirect branch,
// to help generate sane code for PHIs involving such edges.
EverMadeChange |= SplitIndirectBrCriticalEdges(F);
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
// Restart BB iteration if the dominator tree of the Function was changed
if (ModifiedDTOnIteration)
break;
}
if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
MadeChange |= mergeSExts(F);
if (!LargeOffsetGEPMap.empty())
MadeChange |= splitLargeGEPOffsets();
// Really free removed instructions during promotion.
for (Instruction *I : RemovedInsts)
I->deleteValue();
EverMadeChange |= MadeChange;
SeenChainsForSExt.clear();
ValToSExtendedUses.clear();
RemovedInsts.clear();
LargeOffsetGEPMap.clear();
LargeOffsetGEPID.clear();
}
SunkAddrs.clear();
if (!DisableBranchOpts) {
MadeChange = false;
// Use a set vector to get deterministic iteration order. The order the
// blocks are removed may affect whether or not PHI nodes in successors
// are removed.
SmallSetVector<BasicBlock*, 8> WorkList;
for (BasicBlock &BB : F) {
SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
MadeChange |= ConstantFoldTerminator(&BB, true);
if (!MadeChange) continue;
for (SmallVectorImpl<BasicBlock*>::iterator
II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
if (pred_begin(*II) == pred_end(*II))
WorkList.insert(*II);
}
// Delete the dead blocks and any of their dead successors.
MadeChange |= !WorkList.empty();
while (!WorkList.empty()) {
BasicBlock *BB = WorkList.pop_back_val();
SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
DeleteDeadBlock(BB);
for (SmallVectorImpl<BasicBlock*>::iterator
II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
if (pred_begin(*II) == pred_end(*II))
WorkList.insert(*II);
}
// Merge pairs of basic blocks with unconditional branches, connected by
// a single edge.
if (EverMadeChange || MadeChange)
MadeChange |= eliminateFallThrough(F);
EverMadeChange |= MadeChange;
}
if (!DisableGCOpts) {
SmallVector<Instruction *, 2> Statepoints;
for (BasicBlock &BB : F)
for (Instruction &I : BB)
if (isStatepoint(I))
Statepoints.push_back(&I);
for (auto &I : Statepoints)
EverMadeChange |= simplifyOffsetableRelocate(*I);
}
// Do this last to clean up use-before-def scenarios introduced by other
// preparatory transforms.
EverMadeChange |= placeDbgValues(F);
return EverMadeChange;
}
/// Merge basic blocks which are connected by a single edge, where one of the
/// basic blocks has a single successor pointing to the other basic block,
/// which has a single predecessor.
bool CodeGenPrepare::eliminateFallThrough(Function &F) {
bool Changed = false;
// Scan all of the blocks in the function, except for the entry block.
// Use a temporary array to avoid iterator being invalidated when
// deleting blocks.
SmallVector<WeakTrackingVH, 16> Blocks;
for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
Blocks.push_back(&Block);
for (auto &Block : Blocks) {
auto *BB = cast_or_null<BasicBlock>(Block);
if (!BB)
continue;
// If the destination block has a single pred, then this is a trivial
// edge, just collapse it.
BasicBlock *SinglePred = BB->getSinglePredecessor();
// Don't merge if BB's address is taken.
if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
if (Term && !Term->isConditional()) {
Changed = true;
LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
// Merge BB into SinglePred and delete it.
MergeBlockIntoPredecessor(BB);
}
}
return Changed;
}
/// Find a destination block from BB if BB is mergeable empty block.
BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
// If this block doesn't end with an uncond branch, ignore it.
BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
if (!BI || !BI->isUnconditional())
return nullptr;
// If the instruction before the branch (skipping debug info) isn't a phi
// node, then other stuff is happening here.
BasicBlock::iterator BBI = BI->getIterator();
if (BBI != BB->begin()) {
--BBI;
while (isa<DbgInfoIntrinsic>(BBI)) {
if (BBI == BB->begin())
break;
--BBI;
}
if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
return nullptr;
}
// Do not break infinite loops.
BasicBlock *DestBB = BI->getSuccessor(0);
if (DestBB == BB)
return nullptr;
if (!canMergeBlocks(BB, DestBB))
DestBB = nullptr;
return DestBB;
}
/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
/// edges in ways that are non-optimal for isel. Start by eliminating these
/// blocks so we can split them the way we want them.
bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
SmallPtrSet<BasicBlock *, 16> Preheaders;
SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
while (!LoopList.empty()) {
Loop *L = LoopList.pop_back_val();
LoopList.insert(LoopList.end(), L->begin(), L->end());
if (BasicBlock *Preheader = L->getLoopPreheader())
Preheaders.insert(Preheader);
}
bool MadeChange = false;
// Copy blocks into a temporary array to avoid iterator invalidation issues
// as we remove them.
// Note that this intentionally skips the entry block.
SmallVector<WeakTrackingVH, 16> Blocks;
for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
Blocks.push_back(&Block);
for (auto &Block : Blocks) {
BasicBlock *BB = cast_or_null<BasicBlock>(Block);
if (!BB)
continue;
BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
if (!DestBB ||
!isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
continue;
eliminateMostlyEmptyBlock(BB);
MadeChange = true;
}
return MadeChange;
}
bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
BasicBlock *DestBB,
bool isPreheader) {
// Do not delete loop preheaders if doing so would create a critical edge.
// Loop preheaders can be good locations to spill registers. If the
// preheader is deleted and we create a critical edge, registers may be
// spilled in the loop body instead.
if (!DisablePreheaderProtect && isPreheader &&
!(BB->getSinglePredecessor() &&
BB->getSinglePredecessor()->getSingleSuccessor()))
return false;
// Try to skip merging if the unique predecessor of BB is terminated by a
// switch or indirect branch instruction, and BB is used as an incoming block
// of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
// add COPY instructions in the predecessor of BB instead of BB (if it is not
// merged). Note that the critical edge created by merging such blocks wont be
// split in MachineSink because the jump table is not analyzable. By keeping
// such empty block (BB), ISel will place COPY instructions in BB, not in the
// predecessor of BB.
BasicBlock *Pred = BB->getUniquePredecessor();
if (!Pred ||
!(isa<SwitchInst>(Pred->getTerminator()) ||
isa<IndirectBrInst>(Pred->getTerminator())))
return true;
if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
return true;
// We use a simple cost heuristic which determine skipping merging is
// profitable if the cost of skipping merging is less than the cost of
// merging : Cost(skipping merging) < Cost(merging BB), where the
// Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
// the Cost(merging BB) is Freq(Pred) * Cost(Copy).
// Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
// Freq(Pred) / Freq(BB) > 2.
// Note that if there are multiple empty blocks sharing the same incoming
// value for the PHIs in the DestBB, we consider them together. In such
// case, Cost(merging BB) will be the sum of their frequencies.
if (!isa<PHINode>(DestBB->begin()))
return true;
SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
// Find all other incoming blocks from which incoming values of all PHIs in
// DestBB are the same as the ones from BB.
for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
++PI) {
BasicBlock *DestBBPred = *PI;
if (DestBBPred == BB)
continue;
if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
return DestPN.getIncomingValueForBlock(BB) ==
DestPN.getIncomingValueForBlock(DestBBPred);
}))
SameIncomingValueBBs.insert(DestBBPred);
}
// See if all BB's incoming values are same as the value from Pred. In this
// case, no reason to skip merging because COPYs are expected to be place in
// Pred already.
if (SameIncomingValueBBs.count(Pred))
return true;
BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
BlockFrequency BBFreq = BFI->getBlockFreq(BB);
for (auto SameValueBB : SameIncomingValueBBs)
if (SameValueBB->getUniquePredecessor() == Pred &&
DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
BBFreq += BFI->getBlockFreq(SameValueBB);
return PredFreq.getFrequency() <=
BBFreq.getFrequency() * FreqRatioToSkipMerge;
}
/// Return true if we can merge BB into DestBB if there is a single
/// unconditional branch between them, and BB contains no other non-phi
/// instructions.
bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
const BasicBlock *DestBB) const {
// We only want to eliminate blocks whose phi nodes are used by phi nodes in
// the successor. If there are more complex condition (e.g. preheaders),
// don't mess around with them.
for (const PHINode &PN : BB->phis()) {
for (const User *U : PN.users()) {
const Instruction *UI = cast<Instruction>(U);
if (UI->getParent() != DestBB || !isa<PHINode>(UI))
return false;
// If User is inside DestBB block and it is a PHINode then check
// incoming value. If incoming value is not from BB then this is
// a complex condition (e.g. preheaders) we want to avoid here.
if (UI->getParent() == DestBB) {
if (const PHINode *UPN = dyn_cast<PHINode>(UI))
for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
if (Insn && Insn->getParent() == BB &&
Insn->getParent() != UPN->getIncomingBlock(I))
return false;
}
}
}
}
// If BB and DestBB contain any common predecessors, then the phi nodes in BB
// and DestBB may have conflicting incoming values for the block. If so, we
// can't merge the block.
const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
if (!DestBBPN) return true; // no conflict.
// Collect the preds of BB.
SmallPtrSet<const BasicBlock*, 16> BBPreds;
if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
// It is faster to get preds from a PHI than with pred_iterator.
for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
BBPreds.insert(BBPN->getIncomingBlock(i));
} else {
BBPreds.insert(pred_begin(BB), pred_end(BB));
}
// Walk the preds of DestBB.
for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
if (BBPreds.count(Pred)) { // Common predecessor?
for (const PHINode &PN : DestBB->phis()) {
const Value *V1 = PN.getIncomingValueForBlock(Pred);
const Value *V2 = PN.getIncomingValueForBlock(BB);
// If V2 is a phi node in BB, look up what the mapped value will be.
if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
if (V2PN->getParent() == BB)
V2 = V2PN->getIncomingValueForBlock(Pred);
// If there is a conflict, bail out.
if (V1 != V2) return false;
}
}
}
return true;
}
/// Eliminate a basic block that has only phi's and an unconditional branch in
/// it.
void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
BranchInst *BI = cast<BranchInst>(BB->getTerminator());
BasicBlock *DestBB = BI->getSuccessor(0);
LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB);
// If the destination block has a single pred, then this is a trivial edge,
// just collapse it.
if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
if (SinglePred != DestBB) {
assert(SinglePred == BB &&
"Single predecessor not the same as predecessor");
// Merge DestBB into SinglePred/BB and delete it.
MergeBlockIntoPredecessor(DestBB);
// Note: BB(=SinglePred) will not be deleted on this path.
// DestBB(=its single successor) is the one that was deleted.
LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
return;
}
}
// Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
// to handle the new incoming edges it is about to have.
for (PHINode &PN : DestBB->phis()) {
// Remove the incoming value for BB, and remember it.
Value *InVal = PN.removeIncomingValue(BB, false);
// Two options: either the InVal is a phi node defined in BB or it is some
// value that dominates BB.
PHINode *InValPhi = dyn_cast<PHINode>(InVal);
if (InValPhi && InValPhi->getParent() == BB) {
// Add all of the input values of the input PHI as inputs of this phi.
for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
PN.addIncoming(InValPhi->getIncomingValue(i),
InValPhi->getIncomingBlock(i));
} else {
// Otherwise, add one instance of the dominating value for each edge that
// we will be adding.
if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
} else {
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
PN.addIncoming(InVal, *PI);
}
}
}
// The PHIs are now updated, change everything that refers to BB to use
// DestBB and remove BB.
BB->replaceAllUsesWith(DestBB);
BB->eraseFromParent();
++NumBlocksElim;
LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
}
// Computes a map of base pointer relocation instructions to corresponding
// derived pointer relocation instructions given a vector of all relocate calls
static void computeBaseDerivedRelocateMap(
const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
&RelocateInstMap) {
// Collect information in two maps: one primarily for locating the base object
// while filling the second map; the second map is the final structure holding
// a mapping between Base and corresponding Derived relocate calls
DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
for (auto *ThisRelocate : AllRelocateCalls) {
auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
ThisRelocate->getDerivedPtrIndex());
RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
}
for (auto &Item : RelocateIdxMap) {
std::pair<unsigned, unsigned> Key = Item.first;
if (Key.first == Key.second)
// Base relocation: nothing to insert
continue;
GCRelocateInst *I = Item.second;
auto BaseKey = std::make_pair(Key.first, Key.first);
// We're iterating over RelocateIdxMap so we cannot modify it.
auto MaybeBase = RelocateIdxMap.find(BaseKey);
if (MaybeBase == RelocateIdxMap.end())
// TODO: We might want to insert a new base object relocate and gep off
// that, if there are enough derived object relocates.
continue;
RelocateInstMap[MaybeBase->second].push_back(I);
}
}
// Accepts a GEP and extracts the operands into a vector provided they're all
// small integer constants
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
SmallVectorImpl<Value *> &OffsetV) {
for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
// Only accept small constant integer operands
auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
if (!Op || Op->getZExtValue() > 20)
return false;
}
for (unsigned i = 1; i < GEP->getNumOperands(); i++)
OffsetV.push_back(GEP->getOperand(i));
return true;
}
// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
// replace, computes a replacement, and affects it.
static bool
simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
const SmallVectorImpl<GCRelocateInst *> &Targets) {
bool MadeChange = false;
// We must ensure the relocation of derived pointer is defined after
// relocation of base pointer. If we find a relocation corresponding to base
// defined earlier than relocation of base then we move relocation of base
// right before found relocation. We consider only relocation in the same
// basic block as relocation of base. Relocations from other basic block will
// be skipped by optimization and we do not care about them.
for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
&*R != RelocatedBase; ++R)
if (auto RI = dyn_cast<GCRelocateInst>(R))
if (RI->getStatepoint() == RelocatedBase->getStatepoint())
if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
RelocatedBase->moveBefore(RI);
break;
}
for (GCRelocateInst *ToReplace : Targets) {
assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
"Not relocating a derived object of the original base object");
if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
// A duplicate relocate call. TODO: coalesce duplicates.
continue;
}
if (RelocatedBase->getParent() != ToReplace->getParent()) {
// Base and derived relocates are in different basic blocks.
// In this case transform is only valid when base dominates derived
// relocate. However it would be too expensive to check dominance
// for each such relocate, so we skip the whole transformation.
continue;
}
Value *Base = ToReplace->getBasePtr();
auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
if (!Derived || Derived->getPointerOperand() != Base)
continue;
SmallVector<Value *, 2> OffsetV;
if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
continue;
// Create a Builder and replace the target callsite with a gep
assert(RelocatedBase->getNextNode() &&
"Should always have one since it's not a terminator");
// Insert after RelocatedBase
IRBuilder<> Builder(RelocatedBase->getNextNode());
Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
// If gc_relocate does not match the actual type, cast it to the right type.
// In theory, there must be a bitcast after gc_relocate if the type does not
// match, and we should reuse it to get the derived pointer. But it could be
// cases like this:
// bb1:
// ...
// %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
// br label %merge
//
// bb2:
// ...
// %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
// br label %merge
//
// merge:
// %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
// %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
//
// In this case, we can not find the bitcast any more. So we insert a new bitcast
// no matter there is already one or not. In this way, we can handle all cases, and
// the extra bitcast should be optimized away in later passes.
Value *ActualRelocatedBase = RelocatedBase;
if (RelocatedBase->getType() != Base->getType()) {
ActualRelocatedBase =
Builder.CreateBitCast(RelocatedBase, Base->getType());
}
Value *Replacement = Builder.CreateGEP(
Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
Replacement->takeName(ToReplace);
// If the newly generated derived pointer's type does not match the original derived
// pointer's type, cast the new derived pointer to match it. Same reasoning as above.
Value *ActualReplacement = Replacement;
if (Replacement->getType() != ToReplace->getType()) {
ActualReplacement =
Builder.CreateBitCast(Replacement, ToReplace->getType());
}
ToReplace->replaceAllUsesWith(ActualReplacement);
ToReplace->eraseFromParent();
MadeChange = true;
}
return MadeChange;
}
// Turns this:
//
// %base = ...
// %ptr = gep %base + 15
// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
// %base' = relocate(%tok, i32 4, i32 4)
// %ptr' = relocate(%tok, i32 4, i32 5)
// %val = load %ptr'
//
// into this:
//
// %base = ...
// %ptr = gep %base + 15
// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
// %base' = gc.relocate(%tok, i32 4, i32 4)
// %ptr' = gep %base' + 15
// %val = load %ptr'
bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
bool MadeChange = false;
SmallVector<GCRelocateInst *, 2> AllRelocateCalls;