Commit b40595a1 by Jim Stichnoth

Subzero: Make the register allocator more robust with -reg-use and -reg-exclude.

The problem is that if you too aggressively -reg-use or -reg-exclude, you can get failures because of inherently high register pressure, and there are also contributions from the "specialty" register classes. For example, when you combine load optimization, address mode inference, local register availability optimization, and the div instruction, you can end up needing 5 simultaneously live infinite-weight registers. The fix/enhancement here is to keep track of the "reserve" set of registers for each register class, and allow the register allocator to draw from that as a last resort. This behavior is guarded by the -reg-reserve flag. This CL also includes two improvements in lowering sequences to reduce register pressure. BUG= none R=kschimpf@google.com Review URL: https://codereview.chromium.org/1641653004 .
parent 029bed9c
...@@ -55,6 +55,30 @@ Cfg::Cfg(GlobalContext *Ctx, uint32_t SequenceNumber) ...@@ -55,6 +55,30 @@ Cfg::Cfg(GlobalContext *Ctx, uint32_t SequenceNumber)
Cfg::~Cfg() { assert(ICE_TLS_GET_FIELD(CurrentCfg) == nullptr); } Cfg::~Cfg() { assert(ICE_TLS_GET_FIELD(CurrentCfg) == nullptr); }
/// Create a string like "foo(i=123:b=9)" indicating the function name, number
/// of high-level instructions, and number of basic blocks. This string is only
/// used for dumping and other diagnostics, and the idea is that given a set of
/// functions to debug a problem on, it's easy to find the smallest or simplest
/// function to attack. Note that the counts may change somewhat depending on
/// what point it is called during the translation passes.
IceString Cfg::getFunctionNameAndSize() const {
if (!BuildDefs::dump())
return getFunctionName();
SizeT NodeCount = 0;
SizeT InstCount = 0;
for (CfgNode *Node : getNodes()) {
++NodeCount;
// Note: deleted instructions are *not* ignored.
InstCount += Node->getPhis().size();
for (Inst &I : Node->getInsts()) {
if (!llvm::isa<InstTarget>(&I))
++InstCount;
}
}
return getFunctionName() + "(i=" + std::to_string(InstCount) + ":b=" +
std::to_string(NodeCount) + ")";
}
void Cfg::setError(const IceString &Message) { void Cfg::setError(const IceString &Message) {
HasError = true; HasError = true;
ErrorMessage = Message; ErrorMessage = Message;
...@@ -1075,7 +1099,9 @@ void Cfg::dump(const IceString &Message) { ...@@ -1075,7 +1099,9 @@ void Cfg::dump(const IceString &Message) {
Str << Args[i]->getType() << " "; Str << Args[i]->getType() << " ";
Args[i]->dump(this); Args[i]->dump(this);
} }
Str << ") {\n"; // Append an extra copy of the function name here, in order to print its
// size stats but not mess up lit tests.
Str << ") { # " << getFunctionNameAndSize() << "\n";
} }
resetCurrentNode(); resetCurrentNode();
if (isVerbose(IceV_Liveness)) { if (isVerbose(IceV_Liveness)) {
......
...@@ -64,7 +64,8 @@ public: ...@@ -64,7 +64,8 @@ public:
/// \name Manage the name and return type of the function being translated. /// \name Manage the name and return type of the function being translated.
/// @{ /// @{
void setFunctionName(const IceString &Name) { FunctionName = Name; } void setFunctionName(const IceString &Name) { FunctionName = Name; }
IceString getFunctionName() const { return FunctionName; } const IceString &getFunctionName() const { return FunctionName; }
IceString getFunctionNameAndSize() const;
void setReturnType(Type Ty) { ReturnType = Ty; } void setReturnType(Type Ty) { ReturnType = Ty; }
Type getReturnType() const { return ReturnType; } Type getReturnType() const { return ReturnType; }
/// @} /// @}
......
...@@ -185,6 +185,13 @@ cl::opt<bool> ...@@ -185,6 +185,13 @@ cl::opt<bool>
cl::desc("Randomize register allocation"), cl::desc("Randomize register allocation"),
cl::init(false)); cl::init(false));
/// Allow failsafe access to registers that were restricted via -reg-use or
/// -reg-exclude.
cl::opt<bool>
RegAllocReserve("reg-reserve",
cl::desc("Let register allocation use reserve registers"),
cl::init(false));
/// Repeat register allocation until convergence. /// Repeat register allocation until convergence.
cl::opt<bool> cl::opt<bool>
RepeatRegAlloc("regalloc-repeat", RepeatRegAlloc("regalloc-repeat",
...@@ -545,6 +552,7 @@ void ClFlags::getParsedClFlags(ClFlags &OutFlags) { ...@@ -545,6 +552,7 @@ void ClFlags::getParsedClFlags(ClFlags &OutFlags) {
OutFlags.setShouldReorderBasicBlocks(::ReorderBasicBlocks); OutFlags.setShouldReorderBasicBlocks(::ReorderBasicBlocks);
OutFlags.setShouldDoNopInsertion(::ShouldDoNopInsertion); OutFlags.setShouldDoNopInsertion(::ShouldDoNopInsertion);
OutFlags.setShouldRandomizeRegAlloc(::RandomizeRegisterAllocation); OutFlags.setShouldRandomizeRegAlloc(::RandomizeRegisterAllocation);
OutFlags.setRegAllocReserve(::RegAllocReserve);
OutFlags.setShouldRepeatRegAlloc(::RepeatRegAlloc); OutFlags.setShouldRepeatRegAlloc(::RepeatRegAlloc);
OutFlags.setShouldReorderFunctions(::ReorderFunctions); OutFlags.setShouldReorderFunctions(::ReorderFunctions);
OutFlags.setShouldReorderGlobalVariables(::ReorderGlobalVariables); OutFlags.setShouldReorderGlobalVariables(::ReorderGlobalVariables);
......
...@@ -169,6 +169,11 @@ public: ...@@ -169,6 +169,11 @@ public:
/// Set ClFlags::RandomRegAlloc to a new value /// Set ClFlags::RandomRegAlloc to a new value
void setShouldRandomizeRegAlloc(bool NewValue) { RandomRegAlloc = NewValue; } void setShouldRandomizeRegAlloc(bool NewValue) { RandomRegAlloc = NewValue; }
/// Get the value of ClFlags::RegAllocReserve
bool getRegAllocReserve() const { return RegAllocReserve; }
/// Set ClFlags::RegAllocReserve to a new value
void setRegAllocReserve(bool NewValue) { RegAllocReserve = NewValue; }
/// Get the value of ClFlags::RepeatRegAlloc /// Get the value of ClFlags::RepeatRegAlloc
bool shouldRepeatRegAlloc() const { return RepeatRegAlloc; } bool shouldRepeatRegAlloc() const { return RepeatRegAlloc; }
/// Set ClFlags::RepeatRegAlloc to a new value /// Set ClFlags::RepeatRegAlloc to a new value
...@@ -425,6 +430,8 @@ private: ...@@ -425,6 +430,8 @@ private:
bool RandomNopInsertion; bool RandomNopInsertion;
/// see anonymous_namespace{IceClFlags.cpp}::RandomizeRegisterAllocation /// see anonymous_namespace{IceClFlags.cpp}::RandomizeRegisterAllocation
bool RandomRegAlloc; bool RandomRegAlloc;
/// see anonymous_namespace{IceClFlags.cpp}::RegAllocReserve
bool RegAllocReserve;
/// see anonymous_namespace{IceClFlags.cpp}::RepeatRegAlloc /// see anonymous_namespace{IceClFlags.cpp}::RepeatRegAlloc
bool RepeatRegAlloc; bool RepeatRegAlloc;
/// see anonymous_namespace{IceClFlags.cpp}::ReorderBasicBlocks /// see anonymous_namespace{IceClFlags.cpp}::ReorderBasicBlocks
......
...@@ -309,7 +309,8 @@ void GlobalContext::translateFunctions() { ...@@ -309,7 +309,8 @@ void GlobalContext::translateFunctions() {
getErrorStatus()->assign(EC_Translation); getErrorStatus()->assign(EC_Translation);
OstreamLocker L(this); OstreamLocker L(this);
getStrError() << "ICE translation error: " << Func->getFunctionName() getStrError() << "ICE translation error: " << Func->getFunctionName()
<< ": " << Func->getError() << "\n"; << ": " << Func->getError() << ": "
<< Func->getFunctionNameAndSize() << "\n";
Item = new EmitterWorkItem(Func->getSequenceNumber()); Item = new EmitterWorkItem(Func->getSequenceNumber());
} else { } else {
Func->getAssembler<>()->setInternal(Func->getInternal()); Func->getAssembler<>()->setInternal(Func->getInternal());
...@@ -320,7 +321,7 @@ void GlobalContext::translateFunctions() { ...@@ -320,7 +321,7 @@ void GlobalContext::translateFunctions() {
// The Cfg has already emitted into the assembly buffer, so // The Cfg has already emitted into the assembly buffer, so
// stats have been fully collected into this thread's TLS. // stats have been fully collected into this thread's TLS.
// Dump them before TLS is reset for the next Cfg. // Dump them before TLS is reset for the next Cfg.
dumpStats(Func->getFunctionName()); dumpStats(Func->getFunctionNameAndSize());
Assembler *Asm = Func->releaseAssembler(); Assembler *Asm = Func->releaseAssembler();
// Copy relevant fields into Asm before Func is deleted. // Copy relevant fields into Asm before Func is deleted.
Asm->setFunctionName(Func->getFunctionName()); Asm->setFunctionName(Func->getFunctionName());
...@@ -549,7 +550,7 @@ void GlobalContext::emitItems() { ...@@ -549,7 +550,7 @@ void GlobalContext::emitItems() {
Cfg::setCurrentCfg(Func.get()); Cfg::setCurrentCfg(Func.get());
Func->emit(); Func->emit();
Cfg::setCurrentCfg(nullptr); Cfg::setCurrentCfg(nullptr);
dumpStats(Func->getFunctionName()); dumpStats(Func->getFunctionNameAndSize());
} break; } break;
} }
} }
......
...@@ -61,7 +61,9 @@ private: ...@@ -61,7 +61,9 @@ private:
int32_t PreferReg = Variable::NoRegister; int32_t PreferReg = Variable::NoRegister;
bool AllowOverlap = false; bool AllowOverlap = false;
llvm::SmallBitVector RegMask; llvm::SmallBitVector RegMask;
llvm::SmallBitVector RegMaskUnfiltered;
llvm::SmallBitVector Free; llvm::SmallBitVector Free;
llvm::SmallBitVector FreeUnfiltered;
llvm::SmallBitVector PrecoloredUnhandledMask; // Note: only used for dumping llvm::SmallBitVector PrecoloredUnhandledMask; // Note: only used for dumping
llvm::SmallVector<RegWeight, REGS_SIZE> Weights; llvm::SmallVector<RegWeight, REGS_SIZE> Weights;
}; };
...@@ -98,7 +100,7 @@ private: ...@@ -98,7 +100,7 @@ private:
void filterFreeWithPrecoloredRanges(IterationState &Iter); void filterFreeWithPrecoloredRanges(IterationState &Iter);
void allocatePrecoloredRegister(Variable *Cur); void allocatePrecoloredRegister(Variable *Cur);
void allocatePreferredRegister(IterationState &Iter); void allocatePreferredRegister(IterationState &Iter);
void allocateFreeRegister(IterationState &Iter); void allocateFreeRegister(IterationState &Iter, bool Filtered);
void handleNoFreeRegisters(IterationState &Iter); void handleNoFreeRegisters(IterationState &Iter);
void assignFinalRegisters(const llvm::SmallBitVector &RegMaskFull, void assignFinalRegisters(const llvm::SmallBitVector &RegMaskFull,
const llvm::SmallBitVector &PreDefinedRegisters, const llvm::SmallBitVector &PreDefinedRegisters,
...@@ -130,6 +132,7 @@ private: ...@@ -130,6 +132,7 @@ private:
bool FindOverlap = false; bool FindOverlap = false;
const bool Verbose; const bool Verbose;
const bool UseReserve;
}; };
} // end of namespace Ice } // end of namespace Ice
......
...@@ -275,8 +275,15 @@ public: ...@@ -275,8 +275,15 @@ public:
virtual llvm::SmallBitVector getRegisterSet(RegSetMask Include, virtual llvm::SmallBitVector getRegisterSet(RegSetMask Include,
RegSetMask Exclude) const = 0; RegSetMask Exclude) const = 0;
/// Get the set of physical registers available for the specified Variable's
/// register class, applying register restrictions from the command line.
virtual const llvm::SmallBitVector & virtual const llvm::SmallBitVector &
getRegistersForVariable(const Variable *Var) const = 0; getRegistersForVariable(const Variable *Var) const = 0;
/// Get the set of *all* physical registers available for the specified
/// Variable's register class, *not* applying register restrictions from the
/// command line.
virtual const llvm::SmallBitVector &
getAllRegistersForVariable(const Variable *Var) const = 0;
virtual const llvm::SmallBitVector &getAliasesForRegister(SizeT) const = 0; virtual const llvm::SmallBitVector &getAliasesForRegister(SizeT) const = 0;
void regAlloc(RegAllocKind Kind); void regAlloc(RegAllocKind Kind);
......
...@@ -342,6 +342,9 @@ void TargetARM32::staticInit(GlobalContext *Ctx) { ...@@ -342,6 +342,9 @@ void TargetARM32::staticInit(GlobalContext *Ctx) {
TypeToRegisterSet[IceType_v4i32] = VectorRegisters; TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
TypeToRegisterSet[IceType_v4f32] = VectorRegisters; TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
filterTypeToRegisterSet( filterTypeToRegisterSet(
Ctx, RegARM32::Reg_NUM, TypeToRegisterSet, Ctx, RegARM32::Reg_NUM, TypeToRegisterSet,
llvm::array_lengthof(TypeToRegisterSet), [](int32_t RegNum) -> IceString { llvm::array_lengthof(TypeToRegisterSet), [](int32_t RegNum) -> IceString {
...@@ -6514,6 +6517,8 @@ void TargetHeaderARM32::lower() { ...@@ -6514,6 +6517,8 @@ void TargetHeaderARM32::lower() {
} }
llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
llvm::SmallBitVector
TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
} // end of namespace ARM32 } // end of namespace ARM32
......
...@@ -88,6 +88,12 @@ public: ...@@ -88,6 +88,12 @@ public:
assert(RC < RC_Target); assert(RC < RC_Target);
return TypeToRegisterSet[RC]; return TypeToRegisterSet[RC];
} }
const llvm::SmallBitVector &
getAllRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
assert(RC < RC_Target);
return TypeToRegisterSetUnfiltered[RC];
}
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override { const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
return RegisterAliases[Reg]; return RegisterAliases[Reg];
} }
...@@ -1020,6 +1026,8 @@ protected: ...@@ -1020,6 +1026,8 @@ protected:
uint32_t MaxOutArgsSizeBytes = 0; uint32_t MaxOutArgsSizeBytes = 0;
// TODO(jpp): std::array instead of array. // TODO(jpp): std::array instead of array.
static llvm::SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM]; static llvm::SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];
static llvm::SmallBitVector
TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
llvm::SmallBitVector RegsUsed; llvm::SmallBitVector RegsUsed;
VarList PhysicalRegisters[IceType_NUM]; VarList PhysicalRegisters[IceType_NUM];
......
...@@ -116,6 +116,9 @@ void TargetMIPS32::staticInit(GlobalContext *Ctx) { ...@@ -116,6 +116,9 @@ void TargetMIPS32::staticInit(GlobalContext *Ctx) {
TypeToRegisterSet[IceType_v4i32] = VectorRegisters; TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
TypeToRegisterSet[IceType_v4f32] = VectorRegisters; TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet, filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet,
llvm::array_lengthof(TypeToRegisterSet), llvm::array_lengthof(TypeToRegisterSet),
RegMIPS32::getRegName, getRegClassName); RegMIPS32::getRegName, getRegClassName);
...@@ -1126,6 +1129,7 @@ void TargetHeaderMIPS32::lower() { ...@@ -1126,6 +1129,7 @@ void TargetHeaderMIPS32::lower() {
} }
llvm::SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM]; llvm::SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
llvm::SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
llvm::SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM]; llvm::SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
} // end of namespace MIPS32 } // end of namespace MIPS32
......
...@@ -57,6 +57,12 @@ public: ...@@ -57,6 +57,12 @@ public:
assert(RC < RC_Target); assert(RC < RC_Target);
return TypeToRegisterSet[RC]; return TypeToRegisterSet[RC];
} }
const llvm::SmallBitVector &
getAllRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
assert(RC < RC_Target);
return TypeToRegisterSetUnfiltered[RC];
}
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override { const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
return RegisterAliases[Reg]; return RegisterAliases[Reg];
} }
...@@ -263,6 +269,7 @@ protected: ...@@ -263,6 +269,7 @@ protected:
bool UsesFramePointer = false; bool UsesFramePointer = false;
bool NeedsStackAlignment = false; bool NeedsStackAlignment = false;
static llvm::SmallBitVector TypeToRegisterSet[RCMIPS32_NUM]; static llvm::SmallBitVector TypeToRegisterSet[RCMIPS32_NUM];
static llvm::SmallBitVector TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
static llvm::SmallBitVector RegisterAliases[RegMIPS32::Reg_NUM]; static llvm::SmallBitVector RegisterAliases[RegMIPS32::Reg_NUM];
llvm::SmallBitVector RegsUsed; llvm::SmallBitVector RegsUsed;
VarList PhysicalRegisters[IceType_NUM]; VarList PhysicalRegisters[IceType_NUM];
......
...@@ -107,6 +107,10 @@ std::array<llvm::SmallBitVector, RCX86_NUM> ...@@ -107,6 +107,10 @@ std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<X8632::Traits>::TypeToRegisterSet = {{}}; TargetX86Base<X8632::Traits>::TypeToRegisterSet = {{}};
template <> template <>
std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<X8632::Traits>::TypeToRegisterSetUnfiltered = {{}};
template <>
std::array<llvm::SmallBitVector, std::array<llvm::SmallBitVector,
TargetX86Base<X8632::Traits>::Traits::RegisterSet::Reg_NUM> TargetX86Base<X8632::Traits>::Traits::RegisterSet::Reg_NUM>
TargetX86Base<X8632::Traits>::RegisterAliases = {{}}; TargetX86Base<X8632::Traits>::RegisterAliases = {{}};
......
...@@ -107,6 +107,10 @@ std::array<llvm::SmallBitVector, RCX86_NUM> ...@@ -107,6 +107,10 @@ std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<X8664::Traits>::TypeToRegisterSet = {{}}; TargetX86Base<X8664::Traits>::TypeToRegisterSet = {{}};
template <> template <>
std::array<llvm::SmallBitVector, RCX86_NUM>
TargetX86Base<X8664::Traits>::TypeToRegisterSetUnfiltered = {{}};
template <>
std::array<llvm::SmallBitVector, std::array<llvm::SmallBitVector,
TargetX86Base<X8664::Traits>::Traits::RegisterSet::Reg_NUM> TargetX86Base<X8664::Traits>::Traits::RegisterSet::Reg_NUM>
TargetX86Base<X8664::Traits>::RegisterAliases = {{}}; TargetX86Base<X8664::Traits>::RegisterAliases = {{}};
......
...@@ -124,6 +124,13 @@ public: ...@@ -124,6 +124,13 @@ public:
return TypeToRegisterSet[RC]; return TypeToRegisterSet[RC];
} }
const llvm::SmallBitVector &
getAllRegistersForVariable(const Variable *Var) const override {
RegClass RC = Var->getRegClass();
assert(static_cast<RegClassX86>(RC) < RCX86_NUM);
return TypeToRegisterSetUnfiltered[RC];
}
const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override { const llvm::SmallBitVector &getAliasesForRegister(SizeT Reg) const override {
assert(Reg < Traits::RegisterSet::Reg_NUM); assert(Reg < Traits::RegisterSet::Reg_NUM);
return RegisterAliases[Reg]; return RegisterAliases[Reg];
...@@ -974,6 +981,8 @@ protected: ...@@ -974,6 +981,8 @@ protected:
bool PrologEmitsFixedAllocas = false; bool PrologEmitsFixedAllocas = false;
uint32_t MaxOutArgsSizeBytes = 0; uint32_t MaxOutArgsSizeBytes = 0;
static std::array<llvm::SmallBitVector, RCX86_NUM> TypeToRegisterSet; static std::array<llvm::SmallBitVector, RCX86_NUM> TypeToRegisterSet;
static std::array<llvm::SmallBitVector, RCX86_NUM>
TypeToRegisterSetUnfiltered;
static std::array<llvm::SmallBitVector, Traits::RegisterSet::Reg_NUM> static std::array<llvm::SmallBitVector, Traits::RegisterSet::Reg_NUM>
RegisterAliases; RegisterAliases;
llvm::SmallBitVector RegsUsed; llvm::SmallBitVector RegsUsed;
......
...@@ -379,6 +379,8 @@ template <typename TraitsType> ...@@ -379,6 +379,8 @@ template <typename TraitsType>
void TargetX86Base<TraitsType>::staticInit(GlobalContext *Ctx) { void TargetX86Base<TraitsType>::staticInit(GlobalContext *Ctx) {
Traits::initRegisterSet(Ctx->getFlags(), &TypeToRegisterSet, Traits::initRegisterSet(Ctx->getFlags(), &TypeToRegisterSet,
&RegisterAliases); &RegisterAliases);
for (size_t i = 0; i < TypeToRegisterSet.size(); ++i)
TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
filterTypeToRegisterSet(Ctx, Traits::RegisterSet::Reg_NUM, filterTypeToRegisterSet(Ctx, Traits::RegisterSet::Reg_NUM,
TypeToRegisterSet.data(), TypeToRegisterSet.size(), TypeToRegisterSet.data(), TypeToRegisterSet.size(),
Traits::getRegName, getRegClassName); Traits::getRegName, getRegClassName);
...@@ -1945,8 +1947,6 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1945,8 +1947,6 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Inst) {
Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem); Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
_mov(T_1, Src0Hi); _mov(T_1, Src0Hi);
_imul(T_1, Src1Lo); _imul(T_1, Src1Lo);
_mov(T_2, Src1Hi);
_imul(T_2, Src0Lo);
_mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax); _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);
_mul(T_4Lo, T_3, Src1Lo); _mul(T_4Lo, T_3, Src1Lo);
// The mul instruction produces two dest variables, edx:eax. We create a // The mul instruction produces two dest variables, edx:eax. We create a
...@@ -1954,6 +1954,8 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -1954,6 +1954,8 @@ void TargetX86Base<TraitsType>::lowerArithmetic(const InstArithmetic *Inst) {
Context.insert<InstFakeDef>(T_4Hi, T_4Lo); Context.insert<InstFakeDef>(T_4Hi, T_4Lo);
_mov(DestLo, T_4Lo); _mov(DestLo, T_4Lo);
_add(T_4Hi, T_1); _add(T_4Hi, T_1);
_mov(T_2, Src1Hi);
_imul(T_2, Src0Lo);
_add(T_4Hi, T_2); _add(T_4Hi, T_2);
_mov(DestHi, T_4Hi); _mov(DestHi, T_4Hi);
} break; } break;
...@@ -5801,8 +5803,8 @@ void TargetX86Base<TraitsType>::lowerStore(const InstStore *Inst) { ...@@ -5801,8 +5803,8 @@ void TargetX86Base<TraitsType>::lowerStore(const InstStore *Inst) {
if (!Traits::Is64Bit && Ty == IceType_i64) { if (!Traits::Is64Bit && Ty == IceType_i64) {
Value = legalizeUndef(Value); Value = legalizeUndef(Value);
Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
_store(ValueHi, llvm::cast<X86OperandMem>(hiOperand(NewAddr))); _store(ValueHi, llvm::cast<X86OperandMem>(hiOperand(NewAddr)));
Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
_store(ValueLo, llvm::cast<X86OperandMem>(loOperand(NewAddr))); _store(ValueLo, llvm::cast<X86OperandMem>(loOperand(NewAddr)));
} else if (isVectorType(Ty)) { } else if (isVectorType(Ty)) {
_storep(legalizeToReg(Value), NewAddr); _storep(legalizeToReg(Value), NewAddr);
......
...@@ -125,7 +125,7 @@ entry: ...@@ -125,7 +125,7 @@ entry:
; OPTM1-LABEL: pass64BitConstArg ; OPTM1-LABEL: pass64BitConstArg
; OPTM1: sub esp ; OPTM1: sub esp
; OPTM1: mov DWORD PTR [esp+0x4] ; OPTM1: mov DWORD PTR [esp+0x4]
; OPTM1-NEXT: mov DWORD PTR [esp] ; OPTM1: mov DWORD PTR [esp]
; OPTM1-NEXT: mov DWORD PTR [esp+0x8],0x7b ; OPTM1-NEXT: mov DWORD PTR [esp+0x8],0x7b
; Bundle padding might be added (so not using -NEXT). ; Bundle padding might be added (so not using -NEXT).
; OPTM1: mov DWORD PTR [esp+0x10],0xdeadbeef ; OPTM1: mov DWORD PTR [esp+0x10],0xdeadbeef
...@@ -277,16 +277,16 @@ entry: ...@@ -277,16 +277,16 @@ entry:
} }
; CHECK-LABEL: mul64BitSigned ; CHECK-LABEL: mul64BitSigned
; CHECK: imul ; CHECK: imul
; CHECK: imul
; CHECK: mul ; CHECK: mul
; CHECK: add ; CHECK: add
; CHECK: imul
; CHECK: add ; CHECK: add
; ;
; OPTM1-LABEL: mul64BitSigned ; OPTM1-LABEL: mul64BitSigned
; OPTM1: imul ; OPTM1: imul
; OPTM1: imul
; OPTM1: mul ; OPTM1: mul
; OPTM1: add ; OPTM1: add
; OPTM1: imul
; OPTM1: add ; OPTM1: add
; ARM32-LABEL: mul64BitSigned ; ARM32-LABEL: mul64BitSigned
...@@ -302,16 +302,16 @@ entry: ...@@ -302,16 +302,16 @@ entry:
} }
; CHECK-LABEL: mul64BitUnsigned ; CHECK-LABEL: mul64BitUnsigned
; CHECK: imul ; CHECK: imul
; CHECK: imul
; CHECK: mul ; CHECK: mul
; CHECK: add ; CHECK: add
; CHECK: imul
; CHECK: add ; CHECK: add
; ;
; OPTM1-LABEL: mul64BitUnsigned ; OPTM1-LABEL: mul64BitUnsigned
; OPTM1: imul ; OPTM1: imul
; OPTM1: imul
; OPTM1: mul ; OPTM1: mul
; OPTM1: add ; OPTM1: add
; OPTM1: imul
; OPTM1: add ; OPTM1: add
; ARM32-LABEL: mul64BitUnsigned ; ARM32-LABEL: mul64BitUnsigned
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment