Commit b9a84728 by Jim Stichnoth

Subzero: Local variable splitting.

The linear-scan register allocator takes an all-or-nothing approach -- either the variable's entire live range gets a register, or none of it does. To help with this, we add a pass that splits successive uses of a variable within a basic block into a chain of linked variables. This gives the register allocator the chance to allocate registers to subsets of the original live range. The split variables are linked to each other so that if they don't get a register, they share a stack slot with the original variable, and redundant writes to that stack slot are recognized and elided. This pass is executed after target lowering and right before register allocation. As such, it has to deal with some idiosyncrasies of target lowering, specifically the possibility of intra-block control flow. We experimented with doing this as a pre-lowering pass. However, the transformations interfered with some of the target lowering's pattern matching, such as bool folding, so we concluded that post-lowering was a better place for it. Note: Some of the lit tests are overly specific about registers, and in these cases it was the path of least resistance to just disable local variable splitting. BUG= none R=eholk@chromium.org, jpp@chromium.org Review URL: https://codereview.chromium.org/2177033002 .
parent 181a9bcb
...@@ -326,6 +326,7 @@ SRCS = \ ...@@ -326,6 +326,7 @@ SRCS = \
IceTimerTree.cpp \ IceTimerTree.cpp \
IceTranslator.cpp \ IceTranslator.cpp \
IceTypes.cpp \ IceTypes.cpp \
IceVariableSplitting.cpp \
LinuxMallocProfiling.cpp \ LinuxMallocProfiling.cpp \
main.cpp \ main.cpp \
PNaClTranslator.cpp PNaClTranslator.cpp
......
...@@ -1661,7 +1661,7 @@ void Cfg::emit() { ...@@ -1661,7 +1661,7 @@ void Cfg::emit() {
emitTextHeader(FunctionName, Ctx, Asm); emitTextHeader(FunctionName, Ctx, Asm);
if (getFlags().getDecorateAsm()) { if (getFlags().getDecorateAsm()) {
for (Variable *Var : getVariables()) { for (Variable *Var : getVariables()) {
if (Var->hasStackOffset() && !Var->isRematerializable()) { if (Var->hasKnownStackOffset() && !Var->isRematerializable()) {
Str << "\t" << Var->getSymbolicStackOffset() << " = " Str << "\t" << Var->getSymbolicStackOffset() << " = "
<< Var->getStackOffset() << "\n"; << Var->getStackOffset() << "\n";
} }
......
...@@ -76,6 +76,8 @@ public: ...@@ -76,6 +76,8 @@ public:
/// @{ /// @{
InstList &getInsts() { return Insts; } InstList &getInsts() { return Insts; }
PhiList &getPhis() { return Phis; } PhiList &getPhis() { return Phis; }
const InstList &getInsts() const { return Insts; }
const PhiList &getPhis() const { return Phis; }
void appendInst(Inst *Instr); void appendInst(Inst *Instr);
void renumberInstructions(); void renumberInstructions();
/// Rough and generally conservative estimate of the number of instructions in /// Rough and generally conservative estimate of the number of instructions in
......
...@@ -204,6 +204,7 @@ void ClFlags::getParsedClFlags(ClFlags &OutFlags) { ...@@ -204,6 +204,7 @@ void ClFlags::getParsedClFlags(ClFlags &OutFlags) {
OutFlags.setDisableHybridAssembly(DisableHybridAssemblyObj || OutFlags.setDisableHybridAssembly(DisableHybridAssemblyObj ||
(OutFileTypeObj != Ice::FT_Iasm)); (OutFileTypeObj != Ice::FT_Iasm));
OutFlags.ForceO2.init(OutFlags.getForceO2String()); OutFlags.ForceO2.init(OutFlags.getForceO2String());
OutFlags.SplitInsts.init(OutFlags.getSplitInstString());
OutFlags.TestStatus.init(OutFlags.getTestStatusString()); OutFlags.TestStatus.init(OutFlags.getTestStatusString());
OutFlags.TimingFocus.init(OutFlags.getTimingFocusOnString()); OutFlags.TimingFocus.init(OutFlags.getTimingFocusOnString());
OutFlags.TranslateOnly.init(OutFlags.getTranslateOnlyString()); OutFlags.TranslateOnly.init(OutFlags.getTranslateOnlyString());
......
...@@ -159,6 +159,9 @@ struct dev_list_flag {}; ...@@ -159,6 +159,9 @@ struct dev_list_flag {};
X(ForceO2String, std::string, dev_opt_flag, "force-O2", \ X(ForceO2String, std::string, dev_opt_flag, "force-O2", \
cl::desc("Force -O2 for certain functions (assumes -Om1)"), cl::init("")) \ cl::desc("Force -O2 for certain functions (assumes -Om1)"), cl::init("")) \
\ \
X(SplitInstString, std::string, dev_opt_flag, "split-inst", \
cl::desc("Restrict local var splitting to specific insts"), cl::init(":")) \
\
X(FunctionSections, bool, dev_opt_flag, "ffunction-sections", \ X(FunctionSections, bool, dev_opt_flag, "ffunction-sections", \
cl::desc("Emit functions into separate sections")) \ cl::desc("Emit functions into separate sections")) \
\ \
...@@ -233,6 +236,9 @@ struct dev_list_flag {}; ...@@ -233,6 +236,9 @@ struct dev_list_flag {};
X(RandomizeRegisterAllocation, bool, dev_opt_flag, "randomize-regalloc", \ X(RandomizeRegisterAllocation, bool, dev_opt_flag, "randomize-regalloc", \
cl::desc("Randomize register allocation"), cl::init(false)) \ cl::desc("Randomize register allocation"), cl::init(false)) \
\ \
X(SplitLocalVars, bool, dev_opt_flag, "split-local-vars", cl::init(true), \
cl::desc("Block-local variable splitting (O2 only)")) \
\
X(RandomSeed, unsigned long long, dev_opt_flag, "sz-seed", \ X(RandomSeed, unsigned long long, dev_opt_flag, "sz-seed", \
cl::desc("Seed the random number generator"), cl::init(1)) \ cl::desc("Seed the random number generator"), cl::init(1)) \
\ \
......
...@@ -168,6 +168,9 @@ public: ...@@ -168,6 +168,9 @@ public:
bool matchForceO2(GlobalString Name, uint32_t Number) const { bool matchForceO2(GlobalString Name, uint32_t Number) const {
return ForceO2.match(Name, Number); return ForceO2.match(Name, Number);
} }
bool matchSplitInsts(const std::string &Name, uint32_t Number) const {
return SplitInsts.match(Name, Number);
}
bool matchTestStatus(GlobalString Name, uint32_t Number) const { bool matchTestStatus(GlobalString Name, uint32_t Number) const {
return TestStatus.match(Name, Number); return TestStatus.match(Name, Number);
} }
...@@ -191,6 +194,7 @@ private: ...@@ -191,6 +194,7 @@ private:
bool GenerateUnitTestMessages; bool GenerateUnitTestMessages;
RangeSpec ForceO2; RangeSpec ForceO2;
RangeSpec SplitInsts;
RangeSpec TestStatus; RangeSpec TestStatus;
RangeSpec TimingFocus; RangeSpec TimingFocus;
RangeSpec TranslateOnly; RangeSpec TranslateOnly;
......
...@@ -1090,9 +1090,10 @@ void InstIcmp::reverseConditionAndOperands() { ...@@ -1090,9 +1090,10 @@ void InstIcmp::reverseConditionAndOperands() {
Condition = InstIcmpAttributes[Condition].Reverse; Condition = InstIcmpAttributes[Condition].Reverse;
std::swap(Srcs[0], Srcs[1]); std::swap(Srcs[0], Srcs[1]);
} }
bool checkForRedundantAssign(const Variable *Dest, const Operand *Source) { bool checkForRedundantAssign(const Variable *Dest, const Operand *Source) {
const auto *SrcVar = llvm::dyn_cast<const Variable>(Source); const auto *SrcVar = llvm::dyn_cast<const Variable>(Source);
if (!SrcVar) if (SrcVar == nullptr)
return false; return false;
if (Dest->hasReg() && Dest->getRegNum() == SrcVar->getRegNum()) { if (Dest->hasReg() && Dest->getRegNum() == SrcVar->getRegNum()) {
// TODO: On x86-64, instructions like "mov eax, eax" are used to clear the // TODO: On x86-64, instructions like "mov eax, eax" are used to clear the
...@@ -1101,6 +1102,8 @@ bool checkForRedundantAssign(const Variable *Dest, const Operand *Source) { ...@@ -1101,6 +1102,8 @@ bool checkForRedundantAssign(const Variable *Dest, const Operand *Source) {
} }
if (!Dest->hasReg() && !SrcVar->hasReg()) { if (!Dest->hasReg() && !SrcVar->hasReg()) {
if (!Dest->hasStackOffset() || !SrcVar->hasStackOffset()) { if (!Dest->hasStackOffset() || !SrcVar->hasStackOffset()) {
// If called before stack slots have been assigned (i.e. as part of the
// dump() routine), conservatively return false.
return false; return false;
} }
if (Dest->getStackOffset() != SrcVar->getStackOffset()) { if (Dest->getStackOffset() != SrcVar->getStackOffset()) {
...@@ -1108,6 +1111,15 @@ bool checkForRedundantAssign(const Variable *Dest, const Operand *Source) { ...@@ -1108,6 +1111,15 @@ bool checkForRedundantAssign(const Variable *Dest, const Operand *Source) {
} }
return true; return true;
} }
// For a "v=t" assignment where t has a register, v has a stack slot, and v
// has a LinkedTo stack root, and v and t share the same LinkedTo root, return
// true. This is because this assignment is effectively reassigning the same
// value to the original LinkedTo stack root.
if (SrcVar->hasReg() && Dest->hasStackOffset() &&
Dest->getLinkedToStackRoot() != nullptr &&
Dest->getLinkedToRoot() == SrcVar->getLinkedToRoot()) {
return true;
}
return false; return false;
} }
......
...@@ -111,8 +111,6 @@ public: ...@@ -111,8 +111,6 @@ public:
void replaceSource(SizeT Index, Operand *Replacement) { void replaceSource(SizeT Index, Operand *Replacement) {
assert(Index < getSrcSize()); assert(Index < getSrcSize());
assert(!isDeleted()); assert(!isDeleted());
assert(LiveRangesEnded == 0);
// Invalidates liveness info because the use Srcs[Index] is removed.
Srcs[Index] = Replacement; Srcs[Index] = Replacement;
} }
...@@ -151,6 +149,15 @@ public: ...@@ -151,6 +149,15 @@ public:
/// report_fatal_error(). /// report_fatal_error().
virtual bool isMemoryWrite() const; virtual bool isMemoryWrite() const;
/// Returns true if the (target-specific) instruction represents an
/// intra-block label, i.e. branch target. This is meant primarily for
/// Cfg::splitLocalVars().
virtual bool isLabel() const { return false; }
/// If the (target-specific) instruction represents an intra-block branch to
/// some Label instruction, return that Label branch target instruction;
/// otherwise return nullptr.
virtual const Inst *getIntraBlockBranchTarget() const { return nullptr; }
void livenessLightweight(Cfg *Func, LivenessBV &Live); void livenessLightweight(Cfg *Func, LivenessBV &Live);
/// Calculates liveness for this instruction. Returns true if this instruction /// Calculates liveness for this instruction. Returns true if this instruction
/// is (tentatively) still live and should be retained, and false if this /// is (tentatively) still live and should be retained, and false if this
......
...@@ -337,6 +337,7 @@ template <typename TraitsType> struct InstImpl { ...@@ -337,6 +337,7 @@ template <typename TraitsType> struct InstImpl {
uint32_t getEmitInstCount() const override { return 0; } uint32_t getEmitInstCount() const override { return 0; }
GlobalString getLabelName() const { return Name; } GlobalString getLabelName() const { return Name; }
SizeT getLabelNumber() const { return LabelNumber; } SizeT getLabelNumber() const { return LabelNumber; }
bool isLabel() const override { return true; }
void emit(const Cfg *Func) const override; void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override; void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override; void dump(const Cfg *Func) const override;
...@@ -412,6 +413,7 @@ template <typename TraitsType> struct InstImpl { ...@@ -412,6 +413,7 @@ template <typename TraitsType> struct InstImpl {
bool isUnconditionalBranch() const override { bool isUnconditionalBranch() const override {
return !Label && Condition == Cond::Br_None; return !Label && Condition == Cond::Br_None;
} }
const Inst *getIntraBlockBranchTarget() const override { return Label; }
bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) override; bool repointEdges(CfgNode *OldNode, CfgNode *NewNode) override;
void emit(const Cfg *Func) const override; void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override; void emitIAS(const Cfg *Func) const override;
......
...@@ -208,10 +208,11 @@ const Variable *Variable::asType(const Cfg *Func, Type Ty, ...@@ -208,10 +208,11 @@ const Variable *Variable::asType(const Cfg *Func, Type Ty,
} }
RegWeight Variable::getWeight(const Cfg *Func) const { RegWeight Variable::getWeight(const Cfg *Func) const {
VariablesMetadata *VMetadata = Func->getVMetadata(); if (mustHaveReg())
return mustHaveReg() ? RegWeight(RegWeight::Inf) return RegWeight(RegWeight::Inf);
: mustNotHaveReg() ? RegWeight(RegWeight::Zero) if (mustNotHaveReg())
: VMetadata->getUseWeight(this); return RegWeight(RegWeight::Zero);
return Func->getVMetadata()->getUseWeight(this);
} }
void VariableTracking::markUse(MetadataKind TrackingKind, const Inst *Instr, void VariableTracking::markUse(MetadataKind TrackingKind, const Inst *Instr,
...@@ -540,8 +541,10 @@ void Variable::dump(const Cfg *Func, Ostream &Str) const { ...@@ -540,8 +541,10 @@ void Variable::dump(const Cfg *Func, Ostream &Str) const {
if (Func->isVerbose(IceV_RegOrigins) || if (Func->isVerbose(IceV_RegOrigins) ||
(!hasReg() && !Func->getTarget()->hasComputedFrame())) { (!hasReg() && !Func->getTarget()->hasComputedFrame())) {
Str << "%" << getName(); Str << "%" << getName();
if (getLinkedTo() != nullptr) for (Variable *Link = getLinkedTo(); Link != nullptr;
Str << ":%" << getLinkedTo()->getName(); Link = Link->getLinkedTo()) {
Str << ":%" << Link->getName();
}
} }
if (hasReg()) { if (hasReg()) {
if (Func->isVerbose(IceV_RegOrigins)) if (Func->isVerbose(IceV_RegOrigins))
...@@ -554,7 +557,7 @@ void Variable::dump(const Cfg *Func, Ostream &Str) const { ...@@ -554,7 +557,7 @@ void Variable::dump(const Cfg *Func, Ostream &Str) const {
hasReg() ? getBaseRegNum() : Func->getTarget()->getFrameOrStackReg(); hasReg() ? getBaseRegNum() : Func->getTarget()->getFrameOrStackReg();
Str << "[" Str << "["
<< Func->getTarget()->getRegName(BaseRegisterNumber, IceType_i32); << Func->getTarget()->getRegName(BaseRegisterNumber, IceType_i32);
if (hasStackOffset()) { if (hasKnownStackOffset()) {
int32_t Offset = getStackOffset(); int32_t Offset = getStackOffset();
if (Offset) { if (Offset) {
if (Offset > 0) if (Offset > 0)
......
...@@ -696,12 +696,27 @@ public: ...@@ -696,12 +696,27 @@ public:
return IgnoreLiveness || IsRematerializable; return IgnoreLiveness || IsRematerializable;
} }
/// Returns true if the variable either has a definite stack offset, or has
/// the UndeterminedStackOffset such that it is guaranteed to have a definite
/// stack offset at emission time.
bool hasStackOffset() const { return StackOffset != InvalidStackOffset; } bool hasStackOffset() const { return StackOffset != InvalidStackOffset; }
/// Returns true if the variable has a stack offset that is known at this
/// time.
bool hasKnownStackOffset() const {
return StackOffset != InvalidStackOffset &&
StackOffset != UndeterminedStackOffset;
}
int32_t getStackOffset() const { int32_t getStackOffset() const {
assert(hasStackOffset()); assert(hasKnownStackOffset());
return StackOffset; return StackOffset;
} }
void setStackOffset(int32_t Offset) { StackOffset = Offset; } void setStackOffset(int32_t Offset) { StackOffset = Offset; }
/// Set a "placeholder" stack offset before its actual offset has been
/// determined.
void setHasStackOffset() {
if (!hasStackOffset())
StackOffset = UndeterminedStackOffset;
}
/// Returns the variable's stack offset in symbolic form, to improve /// Returns the variable's stack offset in symbolic form, to improve
/// readability in DecorateAsm mode. /// readability in DecorateAsm mode.
std::string getSymbolicStackOffset() const { std::string getSymbolicStackOffset() const {
...@@ -729,6 +744,7 @@ public: ...@@ -729,6 +744,7 @@ public:
bool mustNotHaveReg() const { bool mustNotHaveReg() const {
return RegRequirement == RR_MustNotHaveRegister; return RegRequirement == RR_MustNotHaveRegister;
} }
bool mayHaveReg() const { return RegRequirement == RR_MayHaveRegister; }
void setRematerializable(RegNumT NewRegNum, int32_t NewOffset) { void setRematerializable(RegNumT NewRegNum, int32_t NewOffset) {
IsRematerializable = true; IsRematerializable = true;
setRegNum(NewRegNum); setRegNum(NewRegNum);
...@@ -789,6 +805,18 @@ public: ...@@ -789,6 +805,18 @@ public:
Root = Root->LinkedTo; Root = Root->LinkedTo;
return Root; return Root;
} }
/// Follow the LinkedTo chain up to the furthest stack-allocated ancestor.
/// This is only certain to be accurate after register allocation and stack
/// slot assignment have completed.
Variable *getLinkedToStackRoot() const {
Variable *FurthestStackVar = nullptr;
for (Variable *Root = LinkedTo; Root != nullptr; Root = Root->LinkedTo) {
if (!Root->hasReg() && Root->hasStackOffset()) {
FurthestStackVar = Root;
}
}
return FurthestStackVar;
}
static bool classof(const Operand *Operand) { static bool classof(const Operand *Operand) {
OperandKind Kind = Operand->getKind(); OperandKind Kind = Operand->getKind();
...@@ -825,7 +853,10 @@ protected: ...@@ -825,7 +853,10 @@ protected:
RegNumT RegNum; RegNumT RegNum;
/// RegNumTmp is the tentative assignment during register allocation. /// RegNumTmp is the tentative assignment during register allocation.
RegNumT RegNumTmp; RegNumT RegNumTmp;
static constexpr int32_t InvalidStackOffset = -1; static constexpr int32_t InvalidStackOffset =
std::numeric_limits<int32_t>::min();
static constexpr int32_t UndeterminedStackOffset =
1 + std::numeric_limits<int32_t>::min();
/// StackOffset is the canonical location on stack (only if /// StackOffset is the canonical location on stack (only if
/// RegNum.hasNoValue() || IsArgument). /// RegNum.hasNoValue() || IsArgument).
int32_t StackOffset = InvalidStackOffset; int32_t StackOffset = InvalidStackOffset;
......
...@@ -565,16 +565,6 @@ void TargetLowering::sortVarsByAlignment(VarList &Dest, ...@@ -565,16 +565,6 @@ void TargetLowering::sortVarsByAlignment(VarList &Dest,
}); });
} }
namespace {
bool mightHaveStackSlot(const Variable *Var, const BitVector &IsVarReferenced) {
if (!IsVarReferenced[Var->getIndex()])
return false;
if (Var->hasReg())
return false;
return true;
}
} // end of anonymous namespace
void TargetLowering::getVarStackSlotParams( void TargetLowering::getVarStackSlotParams(
VarList &SortedSpilledVariables, SmallBitVector &RegsUsed, VarList &SortedSpilledVariables, SmallBitVector &RegsUsed,
size_t *GlobalsSize, size_t *SpillAreaSizeBytes, size_t *GlobalsSize, size_t *SpillAreaSizeBytes,
...@@ -594,30 +584,6 @@ void TargetLowering::getVarStackSlotParams( ...@@ -594,30 +584,6 @@ void TargetLowering::getVarStackSlotParams(
} }
} }
// Find each variable Var where:
// - Var is actively referenced
// - Var does not have a register
// - Var's furthest ancestor through LinkedTo: Root
// - Root has no active references, or has a register
//
// When any such Var is found, rotate the LinkedTo tree by swapping
// Var->LinkedTo and Root->LinkedTo. This ensures that when Var needs a stack
// slot, either its LinkedTo field is nullptr, or Var->getLinkedToRoot()
// returns a variable with a stack slot.
for (Variable *Var : Func->getVariables()) {
if (!mightHaveStackSlot(Var, IsVarReferenced))
continue;
if (Variable *Root = Var->getLinkedToRoot()) {
assert(Root->getLinkedTo() == nullptr);
if (mightHaveStackSlot(Root, IsVarReferenced)) {
// Found a "safe" root, no need to rotate the tree.
continue;
}
Var->setLinkedTo(nullptr);
Root->setLinkedTo(Var);
}
}
// If SimpleCoalescing is false, each variable without a register gets its // If SimpleCoalescing is false, each variable without a register gets its
// own unique stack slot, which leads to large stack frames. If // own unique stack slot, which leads to large stack frames. If
// SimpleCoalescing is true, then each "global" variable without a register // SimpleCoalescing is true, then each "global" variable without a register
...@@ -647,8 +613,13 @@ void TargetLowering::getVarStackSlotParams( ...@@ -647,8 +613,13 @@ void TargetLowering::getVarStackSlotParams(
} }
// An argument either does not need a stack slot (if passed in a register) // An argument either does not need a stack slot (if passed in a register)
// or already has one (if passed on the stack). // or already has one (if passed on the stack).
if (Var->getIsArg()) if (Var->getIsArg()) {
if (!Var->hasReg()) {
assert(!Var->hasStackOffset());
Var->setHasStackOffset();
}
continue; continue;
}
// An unreferenced variable doesn't need a stack slot. // An unreferenced variable doesn't need a stack slot.
if (!IsVarReferenced[Var->getIndex()]) if (!IsVarReferenced[Var->getIndex()])
continue; continue;
...@@ -656,6 +627,8 @@ void TargetLowering::getVarStackSlotParams( ...@@ -656,6 +627,8 @@ void TargetLowering::getVarStackSlotParams(
// not need accounting here. // not need accounting here.
if (TargetVarHook(Var)) if (TargetVarHook(Var))
continue; continue;
assert(!Var->hasStackOffset());
Var->setHasStackOffset();
SpilledVariables.push_back(Var); SpilledVariables.push_back(Var);
} }
......
...@@ -321,6 +321,16 @@ public: ...@@ -321,6 +321,16 @@ public:
virtual void addProlog(CfgNode *Node) = 0; virtual void addProlog(CfgNode *Node) = 0;
virtual void addEpilog(CfgNode *Node) = 0; virtual void addEpilog(CfgNode *Node) = 0;
/// Create a properly-typed "mov" instruction. This is primarily for local
/// variable splitting.
virtual Inst *createLoweredMove(Variable *Dest, Variable *SrcVar) {
// TODO(stichnot): make pure virtual by implementing for all targets
(void)Dest;
(void)SrcVar;
llvm::report_fatal_error("createLoweredMove() unimplemented");
return nullptr;
}
virtual ~TargetLowering() = default; virtual ~TargetLowering() = default;
private: private:
......
...@@ -220,6 +220,8 @@ public: ...@@ -220,6 +220,8 @@ public:
InstructionSetEnum getInstructionSet() const { return InstructionSet; } InstructionSetEnum getInstructionSet() const { return InstructionSet; }
Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT()); Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT());
Inst *createLoweredMove(Variable *Dest, Variable *SrcVar) override;
protected: protected:
const bool NeedSandboxing; const bool NeedSandboxing;
......
...@@ -23,11 +23,13 @@ ...@@ -23,11 +23,13 @@
#include "IceELFObjectWriter.h" #include "IceELFObjectWriter.h"
#include "IceGlobalInits.h" #include "IceGlobalInits.h"
#include "IceInstVarIter.h" #include "IceInstVarIter.h"
#include "IceInstX86Base.h"
#include "IceLiveness.h" #include "IceLiveness.h"
#include "IceOperand.h" #include "IceOperand.h"
#include "IcePhiLoweringImpl.h" #include "IcePhiLoweringImpl.h"
#include "IceUtils.h" #include "IceUtils.h"
#include "IceInstX86Base.h" #include "IceVariableSplitting.h"
#include "llvm/Support/MathExtras.h" #include "llvm/Support/MathExtras.h"
#include <stack> #include <stack>
...@@ -521,6 +523,7 @@ template <typename TraitsType> void TargetX86Base<TraitsType>::translateO2() { ...@@ -521,6 +523,7 @@ template <typename TraitsType> void TargetX86Base<TraitsType>::translateO2() {
initSandbox(); initSandbox();
} }
Func->dump("After x86 codegen"); Func->dump("After x86 codegen");
splitBlockLocalVariables(Func);
// Register allocation. This requires instruction renumbering and full // Register allocation. This requires instruction renumbering and full
// liveness analysis. Loops must be identified before liveness so variable // liveness analysis. Loops must be identified before liveness so variable
...@@ -1042,11 +1045,11 @@ void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) { ...@@ -1042,11 +1045,11 @@ void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) {
// stack slot. // stack slot.
std::function<bool(Variable *)> TargetVarHook = std::function<bool(Variable *)> TargetVarHook =
[&VariablesLinkedToSpillSlots](Variable *Var) { [&VariablesLinkedToSpillSlots](Variable *Var) {
if (Var->getLinkedTo() != nullptr) { // TODO(stichnot): Refactor this into the base class.
// TODO(stichnot): This assert won't necessarily be true in the Variable *Root = Var->getLinkedToStackRoot();
// future. if (Root != nullptr) {
assert(Var->mustNotHaveReg()); assert(!Root->hasReg());
if (!Var->getLinkedTo()->hasReg()) { if (!Root->hasReg()) {
VariablesLinkedToSpillSlots.push_back(Var); VariablesLinkedToSpillSlots.push_back(Var);
return true; return true;
} }
...@@ -1210,7 +1213,7 @@ void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) { ...@@ -1210,7 +1213,7 @@ void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) {
// Assign stack offsets to variables that have been linked to spilled // Assign stack offsets to variables that have been linked to spilled
// variables. // variables.
for (Variable *Var : VariablesLinkedToSpillSlots) { for (Variable *Var : VariablesLinkedToSpillSlots) {
const Variable *Root = Var->getLinkedToRoot(); const Variable *Root = Var->getLinkedToStackRoot();
assert(Root != nullptr); assert(Root != nullptr);
Var->setStackOffset(Root->getStackOffset()); Var->setStackOffset(Root->getStackOffset());
} }
...@@ -1350,6 +1353,15 @@ void TargetX86Base<TraitsType>::addEpilog(CfgNode *Node) { ...@@ -1350,6 +1353,15 @@ void TargetX86Base<TraitsType>::addEpilog(CfgNode *Node) {
RI->setDeleted(); RI->setDeleted();
} }
template <typename TraitsType>
Inst *TargetX86Base<TraitsType>::createLoweredMove(Variable *Dest,
Variable *SrcVar) {
if (isVectorType(Dest->getType())) {
return Traits::Insts::Movp::create(Func, Dest, SrcVar);
}
return Traits::Insts::Mov::create(Func, Dest, SrcVar);
}
template <typename TraitsType> Type TargetX86Base<TraitsType>::stackSlotType() { template <typename TraitsType> Type TargetX86Base<TraitsType>::stackSlotType() {
return Traits::WordType; return Traits::WordType;
} }
...@@ -3124,7 +3136,7 @@ void TargetX86Base<TraitsType>::lowerCast(const InstCast *Instr) { ...@@ -3124,7 +3136,7 @@ void TargetX86Base<TraitsType>::lowerCast(const InstCast *Instr) {
} else { } else {
Src0 = legalize(Src0); Src0 = legalize(Src0);
if (llvm::isa<X86OperandMem>(Src0)) { if (llvm::isa<X86OperandMem>(Src0)) {
Variable *T = Func->makeVariable(DestTy); Variable *T = makeReg(DestTy);
_movq(T, Src0); _movq(T, Src0);
_movq(Dest, T); _movq(Dest, T);
break; break;
......
...@@ -63,6 +63,7 @@ ...@@ -63,6 +63,7 @@
X(regAlloc) \ X(regAlloc) \
X(renumberInstructions) \ X(renumberInstructions) \
X(shortCircuit) \ X(shortCircuit) \
X(splitLocalVars) \
X(szmain) \ X(szmain) \
X(translate) \ X(translate) \
X(translateFunctions) \ X(translateFunctions) \
......
//===- subzero/src/IceVariableSplitting.h - Local var splitting -*- C++ -*-===//
//
// The Subzero Code Generator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// \brief Aggressive block-local variable splitting to improve linear-scan
/// register allocation.
///
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICEVARIABLESPLITTING_H
#define SUBZERO_SRC_ICEVARIABLESPLITTING_H
namespace Ice {
void splitBlockLocalVariables(class Cfg *Func);
} // end of namespace Ice
#endif // SUBZERO_SRC_ICEVARIABLESPLITTING_H
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
; follow a usual pattern). ; follow a usual pattern).
; RUN: %p2i --filetype=obj --disassemble --sandbox -i %s --args -O2 \ ; RUN: %p2i --filetype=obj --disassemble --sandbox -i %s --args -O2 \
; RUN: -mattr=sse4.1 | FileCheck %s ; RUN: -mattr=sse4.1 -split-local-vars=0 | FileCheck %s
define internal <8 x i16> @test_mul_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) { define internal <8 x i16> @test_mul_v8i16(<8 x i16> %arg0, <8 x i16> %arg1) {
entry: entry:
......
...@@ -51,9 +51,13 @@ entry: ...@@ -51,9 +51,13 @@ entry:
call void %__1() call void %__1()
call void %__1() call void %__1()
call void %__1() call void %__1()
call void %__1()
ret void ret void
} }
; CHECK-LABEL: CallIndirect ; CHECK-LABEL: CallIndirect
; Use the first call as a barrier in case the register allocator decides to use
; a scratch register for it but a common preserved register for the rest.
; CHECK: call
; CHECK: call [[REGISTER:[a-z]+]] ; CHECK: call [[REGISTER:[a-z]+]]
; CHECK: call [[REGISTER]] ; CHECK: call [[REGISTER]]
; CHECK: call [[REGISTER]] ; CHECK: call [[REGISTER]]
......
...@@ -150,12 +150,12 @@ next: ...@@ -150,12 +150,12 @@ next:
; CHECK-NEXT: sub esp,0x40 ; CHECK-NEXT: sub esp,0x40
; CHECK-NEXT: mov ecx,esp ; CHECK-NEXT: mov ecx,esp
; CHECK-NEXT: mov edx,ecx ; CHECK-NEXT: mov edx,ecx
; CHECK-NEXT: add edx,0x20 ; CHECK-NEXT: add ecx,0x20
; CHECK-NEXT: add ecx,0x0 ; CHECK-NEXT: add edx,0x0
; CHECK-NEXT: sub esp,0x10 ; CHECK-NEXT: sub esp,0x10
; CHECK-NEXT: mov ebx,esp ; CHECK-NEXT: mov ebx,esp
; CHECK-NEXT: mov DWORD PTR [edx],eax
; CHECK-NEXT: mov DWORD PTR [ecx],eax ; CHECK-NEXT: mov DWORD PTR [ecx],eax
; CHECK-NEXT: mov DWORD PTR [edx],eax
; CHECK-NEXT: mov DWORD PTR [ebp-0x24],eax ; CHECK-NEXT: mov DWORD PTR [ebp-0x24],eax
; CHECK-NEXT: mov DWORD PTR [ebp-0x14],eax ; CHECK-NEXT: mov DWORD PTR [ebp-0x14],eax
; CHECK-NEXT: mov DWORD PTR [ebx],eax ; CHECK-NEXT: mov DWORD PTR [ebx],eax
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
; REQUIRES: allow_dump ; REQUIRES: allow_dump
; RUN: %p2i --target x8632 -i %s --filetype=asm --args -O2 -asm-verbose \ ; RUN: %p2i --target x8632 -i %s --filetype=asm --args -O2 -asm-verbose \
; RUN: --split-local-vars=0 \
; RUN: --reg-use=eax,edx -reg-reserve | FileCheck --check-prefix=X8632 %s ; RUN: --reg-use=eax,edx -reg-reserve | FileCheck --check-prefix=X8632 %s
; RUN: %p2i --target arm32 -i %s --filetype=asm --args -O2 -asm-verbose \ ; RUN: %p2i --target arm32 -i %s --filetype=asm --args -O2 -asm-verbose \
; RUN: | FileCheck --check-prefix=ARM32 %s ; RUN: | FileCheck --check-prefix=ARM32 %s
......
...@@ -1402,7 +1402,7 @@ done: ...@@ -1402,7 +1402,7 @@ done:
;;; Some register will be used in the xadd instruction. ;;; Some register will be used in the xadd instruction.
; O2: lock xadd DWORD PTR {{.*}},[[REG:e..]] ; O2: lock xadd DWORD PTR {{.*}},[[REG:e..]]
;;; Make sure that register isn't used again, e.g. as the induction variable. ;;; Make sure that register isn't used again, e.g. as the induction variable.
; O2-NOT: [[REG]] ; O2-NOT: ,[[REG]]
; O2: ret ; O2: ret
; Do the same test for the xchg instruction instead of xadd. ; Do the same test for the xchg instruction instead of xadd.
...@@ -1423,7 +1423,7 @@ done: ...@@ -1423,7 +1423,7 @@ done:
;;; Some register will be used in the xchg instruction. ;;; Some register will be used in the xchg instruction.
; O2: xchg DWORD PTR {{.*}},[[REG:e..]] ; O2: xchg DWORD PTR {{.*}},[[REG:e..]]
;;; Make sure that register isn't used again, e.g. as the induction variable. ;;; Make sure that register isn't used again, e.g. as the induction variable.
; O2-NOT: [[REG]] ; O2-NOT: ,[[REG]]
; O2: ret ; O2: ret
; Same test for cmpxchg. ; Same test for cmpxchg.
...@@ -1444,7 +1444,7 @@ done: ...@@ -1444,7 +1444,7 @@ done:
;;; eax and some other register will be used in the cmpxchg instruction. ;;; eax and some other register will be used in the cmpxchg instruction.
; O2: lock cmpxchg DWORD PTR {{.*}},[[REG:e..]] ; O2: lock cmpxchg DWORD PTR {{.*}},[[REG:e..]]
;;; Make sure eax isn't used again, e.g. as the induction variable. ;;; Make sure eax isn't used again, e.g. as the induction variable.
; O2-NOT: eax ; O2-NOT: ,eax
; O2: ret ; O2: ret
; Same test for cmpxchg8b. ; Same test for cmpxchg8b.
...@@ -1466,5 +1466,5 @@ done: ...@@ -1466,5 +1466,5 @@ done:
;;; eax and some other register will be used in the cmpxchg instruction. ;;; eax and some other register will be used in the cmpxchg instruction.
; O2: lock cmpxchg8b QWORD PTR ; O2: lock cmpxchg8b QWORD PTR
;;; Make sure eax/ecx/edx/ebx aren't used again, e.g. as the induction variable. ;;; Make sure eax/ecx/edx/ebx aren't used again, e.g. as the induction variable.
; O2-NOT: {{eax|ecx|edx|ebx}} ; O2-NOT: ,{{eax|ecx|edx|ebx}}
; O2: pop ebx ; O2: pop ebx
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
; implementation. ; implementation.
; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 -sz-seed=1 \ ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 -sz-seed=1 \
; RUN: -randomize-regalloc \ ; RUN: -randomize-regalloc -split-local-vars=0 \
; RUN: | FileCheck %s --check-prefix=CHECK_1 ; RUN: | FileCheck %s --check-prefix=CHECK_1
; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 -sz-seed=1 \ ; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 -sz-seed=1 \
; RUN: -randomize-regalloc \ ; RUN: -randomize-regalloc \
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
; Same tests but with a different seed, just to verify randomness. ; Same tests but with a different seed, just to verify randomness.
; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 -sz-seed=123 \ ; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 -sz-seed=123 \
; RUN: -randomize-regalloc \ ; RUN: -randomize-regalloc -split-local-vars=0 \
; RUN: | FileCheck %s --check-prefix=CHECK_123 ; RUN: | FileCheck %s --check-prefix=CHECK_123
; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 -sz-seed=123 \ ; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 -sz-seed=123 \
; RUN: -randomize-regalloc \ ; RUN: -randomize-regalloc \
......
...@@ -65,6 +65,7 @@ ...@@ -65,6 +65,7 @@
; RUN: -randomize-regalloc \ ; RUN: -randomize-regalloc \
; RUN: -nop-insertion \ ; RUN: -nop-insertion \
; RUN: -reorder-pooled-constants \ ; RUN: -reorder-pooled-constants \
; RUN: -split-local-vars=0 \
; RUN: | FileCheck %s --check-prefix=REGALLOC ; RUN: | FileCheck %s --check-prefix=REGALLOC
; Command for checking nop insertion (Need to turn off randomize-regalloc) ; Command for checking nop insertion (Need to turn off randomize-regalloc)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment