Commit 032c3158 by Manasij Mukherjee

Subzero: Initial implementation of BB Local CSE

Adds Cfg::localCse for basic-block local common-subexpression elimination If we have t1 = op b c t2 = op b c This pass will replace future uses of t2 in a basic block by t1. To enable, use -enable-experimental in O2 BUG=none R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1997443002 .
parent 5cce7619
...@@ -507,6 +507,132 @@ void Cfg::shuffleNodes() { ...@@ -507,6 +507,132 @@ void Cfg::shuffleNodes() {
dump("After basic block shuffling"); dump("After basic block shuffling");
} }
void Cfg::localCSE() {
// Performs basic-block local common-subexpression elimination
// If we have
// t1 = op b c
// t2 = op b c
// This pass will replace future references to t2 in a basic block by t1
// Points to note:
// 1. Does not assume SSA, but not tested on non-SSA input yet as it is run
// at the beginning.
// 2. Leaves removal of instructions to DCE.
// 3. Only enabled on arithmetic instructions. pnacl-clang (-O2) is expected
// to take care of cases not arising from GEP simplification.
// 4. By default, two passes are made over each basic block. Control this
// with -lcse-max-iters=N
TimerMarker T(TimerStack::TT_localCse, this);
struct VariableHash {
size_t operator()(const Variable *Var) const { return Var->hashValue(); }
};
struct InstHash {
size_t operator()(const Inst *Instr) const {
auto Kind = Instr->getKind();
auto Result =
std::hash<typename std::underlying_type<Inst::InstKind>::type>()(
Kind);
for (SizeT i = 0; i < Instr->getSrcSize(); ++i) {
Result ^= Instr->getSrc(i)->hashValue();
}
return Result;
}
};
struct InstEq {
bool srcEq(const Operand *A, const Operand *B) const {
if (llvm::isa<Variable>(A) || llvm::isa<Constant>(A))
return (A == B);
return false;
}
bool operator()(const Inst *InstrA, const Inst *InstrB) const {
if ((InstrA->getKind() != InstrB->getKind()) ||
(InstrA->getSrcSize() != InstrB->getSrcSize()))
return false;
if (auto *A = llvm::dyn_cast<InstArithmetic>(InstrA)) {
auto *B = llvm::cast<InstArithmetic>(InstrB);
// A, B are guaranteed to be of the same 'kind' at this point
// So, dyn_cast is not needed
if (A->getOp() != B->getOp())
return false;
}
// Does not enter loop if different kind or number of operands
for (SizeT i = 0; i < InstrA->getSrcSize(); ++i) {
if (!srcEq(InstrA->getSrc(i), InstrB->getSrc(i)))
return false;
}
return true;
}
};
for (CfgNode *Node : getNodes()) {
CfgUnorderedSet<Inst *, InstHash, InstEq> Seen;
CfgUnorderedMap<Variable *, Variable *, VariableHash> Replacements;
// Combining the above two into a single data structure might consume less
// memory but will be slower i.e map of Instruction -> Set of Variables
CfgUnorderedMap<Variable *, std::vector<Inst *>, VariableHash> Dependency;
// Not necessary for SSA, still keeping it in case this pass is not run at
// the beginning. Remove to improve performace.
int IterCount = getFlags().getLocalCseMaxIterations();
while (IterCount--) {
// TODO : Stats on IterCount -> performance
for (Inst &Instr : Node->getInsts()) {
if (Instr.isDeleted() || !llvm::isa<InstArithmetic>(&Instr))
continue;
// Invalidate replacements
auto Iter = Replacements.find(Instr.getDest());
if (Iter != Replacements.end()) {
Replacements.erase(Iter);
}
// Invalidate 'seen' instructions whose operands were just updated.
auto DepIter = Dependency.find(Instr.getDest());
if (DepIter != Dependency.end()) {
for (auto DepInst : DepIter->second) {
Seen.erase(DepInst);
}
}
// The above two can be removed if SSA is assumed.
// Replace - doing this before checking for repetitions might enable
// more
// optimizations
for (SizeT i = 0; i < Instr.getSrcSize(); ++i) {
auto *Opnd = Instr.getSrc(i);
if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) {
if (Replacements.find(Var) != Replacements.end()) {
Instr.replaceSource(i, Replacements[Var]);
}
}
}
// Check for repetitions
auto SeenIter = Seen.find(&Instr);
if (SeenIter != Seen.end()) { // seen before
const Inst *Found = *SeenIter;
Replacements[Instr.getDest()] = Found->getDest();
} else { // new
Seen.insert(&Instr);
// Update dependencies
for (SizeT i = 0; i < Instr.getSrcSize(); ++i) {
auto *Opnd = Instr.getSrc(i);
if (auto *Var = llvm::dyn_cast<Variable>(Opnd)) {
Dependency[Var].push_back(&Instr);
}
}
}
}
}
}
}
void Cfg::doArgLowering() { void Cfg::doArgLowering() {
TimerMarker T(TimerStack::TT_doArgLowering, this); TimerMarker T(TimerStack::TT_doArgLowering, this);
getTarget()->lowerArguments(); getTarget()->lowerArguments();
......
...@@ -200,6 +200,7 @@ public: ...@@ -200,6 +200,7 @@ public:
void advancedPhiLowering(); void advancedPhiLowering();
void reorderNodes(); void reorderNodes();
void shuffleNodes(); void shuffleNodes();
void localCSE();
/// Scan allocas to determine whether we need to use a frame pointer. /// Scan allocas to determine whether we need to use a frame pointer.
/// If SortAndCombine == true, merge all the fixed-size allocas in the /// If SortAndCombine == true, merge all the fixed-size allocas in the
......
...@@ -140,6 +140,10 @@ struct dev_list_flag {}; ...@@ -140,6 +140,10 @@ struct dev_list_flag {};
"information to stdout at the end of program execution."), \ "information to stdout at the end of program execution."), \
cl::init(false)) \ cl::init(false)) \
\ \
X(EnableExperimental, bool, dev_opt_flag, "enable-experimental", \
cl::desc("Enable Optimizations not yet part of O2"), \
cl::init(false)) \
\
X(EnablePhiEdgeSplit, bool, dev_opt_flag, "phi-edge-split", \ X(EnablePhiEdgeSplit, bool, dev_opt_flag, "phi-edge-split", \
cl::desc("Enable edge splitting for Phi lowering"), cl::init(true)) \ cl::desc("Enable edge splitting for Phi lowering"), cl::init(true)) \
\ \
...@@ -176,6 +180,9 @@ struct dev_list_flag {}; ...@@ -176,6 +180,9 @@ struct dev_list_flag {};
"building LLVM IR first"), \ "building LLVM IR first"), \
cl::init(false)) \ cl::init(false)) \
\ \
X(LocalCseMaxIterations, int, dev_opt_flag, "lcse-max-iters", \
cl::desc("Number of times local-cse is run on a block"), cl::init(2)) \
\
X(LogFilename, std::string, dev_opt_flag, "log", \ X(LogFilename, std::string, dev_opt_flag, "log", \
cl::desc("Set log filename"), cl::init("-"), cl::value_desc("filename")) \ cl::desc("Set log filename"), cl::init("-"), cl::value_desc("filename")) \
\ \
......
...@@ -108,6 +108,13 @@ public: ...@@ -108,6 +108,13 @@ public:
assert(I < getSrcSize()); assert(I < getSrcSize());
return Srcs[I]; return Srcs[I];
} }
void replaceSource(SizeT Index, Operand *Replacement) {
assert(Index < NumSrcs);
assert(!isDeleted());
assert(LiveRangesEnded == 0);
//Invalidates liveness info because the use Srcs[Index] is removed.
Srcs[Index] = Replacement;
}
bool isLastUse(const Operand *Src) const; bool isLastUse(const Operand *Src) const;
void spliceLivenessInfo(Inst *OrigInst, Inst *SpliceAssn); void spliceLivenessInfo(Inst *OrigInst, Inst *SpliceAssn);
......
...@@ -98,6 +98,12 @@ public: ...@@ -98,6 +98,12 @@ public:
virtual Variable *asBoolean() { return nullptr; } virtual Variable *asBoolean() { return nullptr; }
virtual SizeT hashValue() const {
llvm::report_fatal_error("Tried to hash unsupported operand type : " +
std::to_string(Kind));
return 0;
}
protected: protected:
Operand(OperandKind Kind, Type Ty) : Ty(Ty), Kind(Kind) { Operand(OperandKind Kind, Type Ty) : Ty(Ty), Kind(Kind) {
// It is undefined behavior to have a larger value in the enum // It is undefined behavior to have a larger value in the enum
...@@ -153,6 +159,7 @@ public: ...@@ -153,6 +159,7 @@ public:
++LookupCount; ++LookupCount;
} }
CounterType getLookupCount() const { return LookupCount; } CounterType getLookupCount() const { return LookupCount; }
SizeT hashValue() const override { return 0; }
protected: protected:
Constant(OperandKind Kind, Type Ty) : Operand(Kind, Ty) { Constant(OperandKind Kind, Type Ty) : Operand(Kind, Ty) {
...@@ -204,6 +211,10 @@ public: ...@@ -204,6 +211,10 @@ public:
return Operand->getKind() == K; return Operand->getKind() == K;
} }
SizeT hashValue() const override {
return std::hash<PrimType>()(Value);
}
virtual bool shouldBeRandomizedOrPooled() const override { return false; } virtual bool shouldBeRandomizedOrPooled() const override { return false; }
private: private:
...@@ -769,6 +780,10 @@ public: ...@@ -769,6 +780,10 @@ public:
return Kind >= kVariable && Kind <= kVariable_Max; return Kind >= kVariable && Kind <= kVariable_Max;
} }
SizeT hashValue() const override {
return std::hash<SizeT>()(getIndex());
}
protected: protected:
Variable(const Cfg *Func, OperandKind K, Type Ty, SizeT Index) Variable(const Cfg *Func, OperandKind K, Type Ty, SizeT Index)
: Operand(K, Ty), Number(Index), : Operand(K, Ty), Number(Index),
......
...@@ -443,6 +443,11 @@ template <typename TraitsType> void TargetX86Base<TraitsType>::translateO2() { ...@@ -443,6 +443,11 @@ template <typename TraitsType> void TargetX86Base<TraitsType>::translateO2() {
Func->processAllocas(SortAndCombineAllocas); Func->processAllocas(SortAndCombineAllocas);
Func->dump("After Alloca processing"); Func->dump("After Alloca processing");
if (getFlags().getEnableExperimental()) {
Func->localCSE();
Func->dump("After Local CSE");
}
if (!getFlags().getEnablePhiEdgeSplit()) { if (!getFlags().getEnablePhiEdgeSplit()) {
// Lower Phi instructions. // Lower Phi instructions.
Func->placePhiLoads(); Func->placePhiLoads();
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
X(livenessLightweight) \ X(livenessLightweight) \
X(llvmConvert) \ X(llvmConvert) \
X(loadOpt) \ X(loadOpt) \
X(localCse) \
X(lowerPhiAssignments) \ X(lowerPhiAssignments) \
X(materializeVectorShuffles) \ X(materializeVectorShuffles) \
X(parse) \ X(parse) \
......
; Tests local-cse on x8632 and x8664
; RUN: %p2i -i %s --filetype=obj --disassemble --target x8632 --args \
; RUN: -O2 -enable-experimental | FileCheck --check-prefix=X8632 \
; RUN: --check-prefix=X8632EXP %s
; RUN: %p2i -i %s --filetype=obj --disassemble --target x8632 --args \
; RUN: -O2 | FileCheck --check-prefix=X8632 --check-prefix X8632NOEXP %s
; RUN: %p2i -i %s --filetype=obj --disassemble --target x8664 --args \
; RUN: -O2 -enable-experimental | FileCheck --check-prefix=X8664 \
; RUN: --check-prefix=X8664EXP %s
; RUN: %p2i -i %s --filetype=obj --disassemble --target x8664 --args \
; RUN: -O2 | FileCheck --check-prefix=X8664 --check-prefix X8664NOEXP %s
define internal i32 @local_cse_test(i32 %a, i32 %b) {
entry:
%add1 = add i32 %b, %a
%add2 = add i32 %b, %a
%add3 = add i32 %add1, %add2
ret i32 %add3
}
; X8632: add
; X8632: add
; X8632NOEXP: add
; X8632EXP-NOT: add
; X8632: ret
; X8664: add
; X8664: add
; X8664NOEXP: add
; X8664EXP-NOT: add
; X8664: ret
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment