Skip to content

Commit

Permalink
[LV][VPlan] Build plain CFG with simple VPInstructions for outer loops.
Browse files Browse the repository at this point in the history
Patch #3 from VPlan Outer Loop Vectorization Patch Series #1
(RFC: http://lists.llvm.org/pipermail/llvm-dev/2017-December/119523.html).

Expected to be NFC for the current inner loop vectorization path. It
introduces the basic algorithm to build the VPlan plain CFG (single-level
CFG, no hierarchical CFG (H-CFG), yet) in the VPlan-native vectorization
path using VPInstructions. It includes:
  - VPlanHCFGBuilder: Main class to build the VPlan H-CFG (plain CFG without nested regions, for now).
  - VPlanVerifier: Main class with utilities to check the consistency of a H-CFG.
  - VPlanBlockUtils: Main class with utilities to manipulate VPBlockBases in VPlan.

Reviewers: rengolin, fhahn, mkuper, mssimpso, a.elovikov, hfinkel, aprantl.

Differential Revision: https://reviews.llvm.org/D44338



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@332654 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
dcaballe authored and rocm-hcc committed May 18, 2018
1 parent 17ded78 commit 8f9ae9d
Show file tree
Hide file tree
Showing 10 changed files with 889 additions and 42 deletions.
2 changes: 2 additions & 0 deletions lib/Transforms/Vectorize/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ add_llvm_library(LLVMVectorize
SLPVectorizer.cpp
Vectorize.cpp
VPlan.cpp
VPlanHCFGBuilder.cpp
VPlanVerifier.cpp

ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms
Expand Down
101 changes: 96 additions & 5 deletions lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,23 +39,94 @@ class VPBuilder {
VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();

VPInstruction *createInstruction(unsigned Opcode,
std::initializer_list<VPValue *> Operands) {
ArrayRef<VPValue *> Operands) {
VPInstruction *Instr = new VPInstruction(Opcode, Operands);
BB->insert(Instr, InsertPt);
if (BB)
BB->insert(Instr, InsertPt);
return Instr;
}

VPInstruction *createInstruction(unsigned Opcode,
std::initializer_list<VPValue *> Operands) {
return createInstruction(Opcode, ArrayRef<VPValue *>(Operands));
}

public:
VPBuilder() {}

/// This specifies that created VPInstructions should be appended to
/// the end of the specified block.
/// Clear the insertion point: created instructions will not be inserted into
/// a block.
void clearInsertionPoint() {
BB = nullptr;
InsertPt = VPBasicBlock::iterator();
}

VPBasicBlock *getInsertBlock() const { return BB; }
VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }

/// InsertPoint - A saved insertion point.
class VPInsertPoint {
VPBasicBlock *Block = nullptr;
VPBasicBlock::iterator Point;

public:
/// Creates a new insertion point which doesn't point to anything.
VPInsertPoint() = default;

/// Creates a new insertion point at the given location.
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
: Block(InsertBlock), Point(InsertPoint) {}

/// Returns true if this insert point is set.
bool isSet() const { return Block != nullptr; }

VPBasicBlock *getBlock() const { return Block; }
VPBasicBlock::iterator getPoint() const { return Point; }
};

/// Sets the current insert point to a previously-saved location.
void restoreIP(VPInsertPoint IP) {
if (IP.isSet())
setInsertPoint(IP.getBlock(), IP.getPoint());
else
clearInsertionPoint();
}

/// This specifies that created VPInstructions should be appended to the end
/// of the specified block.
void setInsertPoint(VPBasicBlock *TheBB) {
assert(TheBB && "Attempting to set a null insert point");
BB = TheBB;
InsertPt = BB->end();
}

/// This specifies that created instructions should be inserted at the
/// specified point.
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
BB = TheBB;
InsertPt = IP;
}

/// Insert and return the specified instruction.
VPInstruction *insert(VPInstruction *I) const {
BB->insert(I, InsertPt);
return I;
}

/// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
/// its underlying Instruction.
VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
Instruction *Inst = nullptr) {
VPInstruction *NewVPInst = createInstruction(Opcode, Operands);
NewVPInst->setUnderlyingValue(Inst);
return NewVPInst;
}
VPValue *createNaryOp(unsigned Opcode,
std::initializer_list<VPValue *> Operands,
Instruction *Inst = nullptr) {
return createNaryOp(Opcode, ArrayRef<VPValue *>(Operands), Inst);
}

VPValue *createNot(VPValue *Operand) {
return createInstruction(VPInstruction::Not, {Operand});
}
Expand All @@ -67,8 +138,28 @@ class VPBuilder {
VPValue *createOr(VPValue *LHS, VPValue *RHS) {
return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS});
}
};

//===--------------------------------------------------------------------===//
// RAII helpers.
//===--------------------------------------------------------------------===//

/// RAII object that stores the current insertion point and restores it when
/// the object is destroyed.
class InsertPointGuard {
VPBuilder &Builder;
VPBasicBlock *Block;
VPBasicBlock::iterator Point;

public:
InsertPointGuard(VPBuilder &B)
: Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}

InsertPointGuard(const InsertPointGuard &) = delete;
InsertPointGuard &operator=(const InsertPointGuard &) = delete;

~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
};
};

/// TODO: The following VectorizationFactor was pulled out of
/// LoopVectorizationCostModel class. LV also deals with
Expand Down
54 changes: 43 additions & 11 deletions lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@

#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "LoopVectorizationPlanner.h"
#include "VPlanHCFGBuilder.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
Expand Down Expand Up @@ -244,6 +245,17 @@ static cl::opt<bool> EnableVPlanNativePath(
cl::desc("Enable VPlan-native vectorization path with "
"support for outer loop vectorization."));

// This flag enables the stress testing of the VPlan H-CFG construction in the
// VPlan-native vectorization path. It must be used in conjuction with
// -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the
// verification of the H-CFGs built.
static cl::opt<bool> VPlanBuildStressTest(
"vplan-build-stress-test", cl::init(false), cl::Hidden,
cl::desc(
"Build VPlan for every supported loop nest in the function and bail "
"out right after the build (stress test the VPlan H-CFG construction "
"in the VPlan-native vectorization path)."));

/// A helper function for converting Scalar types to vector types.
/// If the incoming type is void, we return void. If the VF is 1, we return
/// the scalar type.
Expand Down Expand Up @@ -1653,8 +1665,11 @@ static void collectSupportedLoops(Loop &L, LoopInfo *LI,
OptimizationRemarkEmitter *ORE,
SmallVectorImpl<Loop *> &V) {
// Collect inner loops and outer loops without irreducible control flow. For
// now, only collect outer loops that have explicit vectorization hints.
if (L.empty() || (EnableVPlanNativePath && isExplicitVecOuterLoop(&L, ORE))) {
// now, only collect outer loops that have explicit vectorization hints. If we
// are stress testing the VPlan H-CFG construction, we collect the outermost
// loop of every loop nest.
if (L.empty() || VPlanBuildStressTest ||
(EnableVPlanNativePath && isExplicitVecOuterLoop(&L, ORE))) {
LoopBlocksRPO RPOT(&L);
RPOT.perform(LI);
if (!containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI)) {
Expand Down Expand Up @@ -6254,20 +6269,30 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
VectorizationFactor
LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize,
unsigned UserVF) {
// Width 1 means no vectorize, cost 0 means uncomputed cost.
// Width 1 means no vectorization, cost 0 means uncomputed cost.
const VectorizationFactor NoVectorization = {1U, 0U};

// Outer loop handling: They may require CFG and instruction level
// transformations before even evaluating whether vectorization is profitable.
// Since we cannot modify the incoming IR, we need to build VPlan upfront in
// the vectorization pipeline.
if (!OrigLoop->empty()) {
// TODO: If UserVF is not provided, we set UserVF to 4 for stress testing.
// This won't be necessary when UserVF is not required in the VPlan-native
// path.
if (VPlanBuildStressTest && !UserVF)
UserVF = 4;

assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
assert(UserVF && "Expected UserVF for outer loop vectorization.");
assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
buildVPlans(UserVF, UserVF);

// For VPlan build stress testing, we bail out after VPlan construction.
if (VPlanBuildStressTest)
return NoVectorization;

return {UserVF, 0};
}

Expand All @@ -6280,7 +6305,7 @@ LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize,
VectorizationFactor
LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) {
assert(OrigLoop->empty() && "Inner loop expected.");
// Width 1 means no vectorize, cost 0 means uncomputed cost.
// Width 1 means no vectorization, cost 0 means uncomputed cost.
const VectorizationFactor NoVectorization = {1U, 0U};
Optional<unsigned> MaybeMaxVF = CM.computeMaxVF(OptForSize);
if (!MaybeMaxVF.hasValue()) // Cases considered too costly to vectorize.
Expand Down Expand Up @@ -6806,9 +6831,11 @@ VPBasicBlock *LoopVectorizationPlanner::handleReplication(
"VPBB has successors when handling predicated replication.");
// Record predicated instructions for above packing optimizations.
PredInst2Recipe[I] = Recipe;
VPBlockBase *Region =
VPBB->setOneSuccessor(createReplicateRegion(I, Recipe, Plan));
return cast<VPBasicBlock>(Region->setOneSuccessor(new VPBasicBlock()));
VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan);
VPBlockUtils::insertBlockAfter(Region, VPBB);
auto *RegSucc = new VPBasicBlock();
VPBlockUtils::insertBlockAfter(RegSucc, Region);
return RegSucc;
}

VPRegionBlock *
Expand All @@ -6834,8 +6861,8 @@ LoopVectorizationPlanner::createReplicateRegion(Instruction *Instr,

// Note: first set Entry as region entry and then connect successors starting
// from it in order, to propagate the "parent" of each VPBasicBlock.
Entry->setTwoSuccessors(Pred, Exit);
Pred->setOneSuccessor(Exit);
VPBlockUtils::insertTwoBlocksAfter(Pred, Exit, Entry);
VPBlockUtils::connectBlocks(Pred, Exit);

return Region;
}
Expand All @@ -6852,6 +6879,11 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range,

// Create new empty VPlan
auto Plan = llvm::make_unique<VPlan>();

// Build hierarchical CFG
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI);
HCFGBuilder.buildHierarchicalCFG(*Plan.get());

return Plan;
}

Expand Down Expand Up @@ -6893,7 +6925,7 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range,
// ingredients and fill a new VPBasicBlock.
unsigned VPBBsForBB = 0;
auto *FirstVPBBForBB = new VPBasicBlock(BB->getName());
VPBB->setOneSuccessor(FirstVPBBForBB);
VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB);
VPBB = FirstVPBBForBB;
Builder.setInsertPoint(VPBB);

Expand Down Expand Up @@ -6997,7 +7029,7 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range,
VPBasicBlock *PreEntry = cast<VPBasicBlock>(Plan->getEntry());
assert(PreEntry->empty() && "Expecting empty pre-entry block.");
VPBlockBase *Entry = Plan->setEntry(PreEntry->getSingleSuccessor());
PreEntry->disconnectSuccessor(Entry);
VPBlockUtils::disconnectBlocks(PreEntry, Entry);
delete PreEntry;

std::string PlanName;
Expand Down
Loading

0 comments on commit 8f9ae9d

Please sign in to comment.