Commit 21fd103c by David Sehr

Eliminate stack adjustment for float-returning functions

This involves changing AdjustStack to grow/shrink the stack, and to use that operation exclusively to move the StackAdjustment variable in lowering, rather than in call emission as before. BUG= R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1449523002 .
parent 3607b6c9
...@@ -157,7 +157,7 @@ public: ...@@ -157,7 +157,7 @@ public:
const Operand *Src, const Operand *Src,
const typename Traits::Assembler::GPREmitterShiftOp &Emitter); const typename Traits::Assembler::GPREmitterShiftOp &Emitter);
static X86TargetLowering *getTarget(const Cfg* Func) { static X86TargetLowering *getTarget(const Cfg *Func) {
return static_cast<X86TargetLowering *>(Func->getTarget()); return static_cast<X86TargetLowering *>(Func->getTarget());
} }
...@@ -405,8 +405,9 @@ private: ...@@ -405,8 +405,9 @@ private:
InstX86Jmp(Cfg *Func, Operand *Target); InstX86Jmp(Cfg *Func, Operand *Target);
}; };
/// AdjustStack instruction - subtracts esp by the given amount and updates the /// AdjustStack instruction - grows the stack (moves esp down) by the given
/// stack offset during code emission. /// amount. If the amount is negative, it shrinks the stack (moves esp up).
/// It also updates the target lowering StackAdjustment during code emission.
template <class Machine> template <class Machine>
class InstX86AdjustStack final : public InstX86Base<Machine> { class InstX86AdjustStack final : public InstX86Base<Machine> {
InstX86AdjustStack() = delete; InstX86AdjustStack() = delete;
...@@ -414,7 +415,7 @@ class InstX86AdjustStack final : public InstX86Base<Machine> { ...@@ -414,7 +415,7 @@ class InstX86AdjustStack final : public InstX86Base<Machine> {
InstX86AdjustStack &operator=(const InstX86AdjustStack &) = delete; InstX86AdjustStack &operator=(const InstX86AdjustStack &) = delete;
public: public:
static InstX86AdjustStack *create(Cfg *Func, SizeT Amount, Variable *Esp) { static InstX86AdjustStack *create(Cfg *Func, int32_t Amount, Variable *Esp) {
return new (Func->allocate<InstX86AdjustStack>()) return new (Func->allocate<InstX86AdjustStack>())
InstX86AdjustStack(Func, Amount, Esp); InstX86AdjustStack(Func, Amount, Esp);
} }
...@@ -427,8 +428,8 @@ public: ...@@ -427,8 +428,8 @@ public:
} }
private: private:
InstX86AdjustStack(Cfg *Func, SizeT Amount, Variable *Esp); InstX86AdjustStack(Cfg *Func, int32_t Amount, Variable *Esp);
SizeT Amount; const int32_t Amount;
}; };
/// Call instruction. Arguments should have already been pushed. /// Call instruction. Arguments should have already been pushed.
......
...@@ -57,7 +57,7 @@ InstX86FakeRMW<Machine>::InstX86FakeRMW(Cfg *Func, Operand *Data, Operand *Addr, ...@@ -57,7 +57,7 @@ InstX86FakeRMW<Machine>::InstX86FakeRMW(Cfg *Func, Operand *Data, Operand *Addr,
} }
template <class Machine> template <class Machine>
InstX86AdjustStack<Machine>::InstX86AdjustStack(Cfg *Func, SizeT Amount, InstX86AdjustStack<Machine>::InstX86AdjustStack(Cfg *Func, int32_t Amount,
Variable *Esp) Variable *Esp)
: InstX86Base<Machine>(Func, InstX86Base<Machine>::Adjuststack, 1, Esp), : InstX86Base<Machine>(Func, InstX86Base<Machine>::Adjuststack, 1, Esp),
Amount(Amount) { Amount(Amount) {
...@@ -581,7 +581,6 @@ void InstX86Call<Machine>::emit(const Cfg *Func) const { ...@@ -581,7 +581,6 @@ void InstX86Call<Machine>::emit(const Cfg *Func) const {
Str << "*"; Str << "*";
CallTarget->emit(Func); CallTarget->emit(Func);
} }
Target->resetStackAdjustment();
} }
template <class Machine> template <class Machine>
...@@ -610,7 +609,6 @@ void InstX86Call<Machine>::emitIAS(const Cfg *Func) const { ...@@ -610,7 +609,6 @@ void InstX86Call<Machine>::emitIAS(const Cfg *Func) const {
} else { } else {
llvm_unreachable("Unexpected operand type"); llvm_unreachable("Unexpected operand type");
} }
Target->resetStackAdjustment();
} }
template <class Machine> template <class Machine>
...@@ -1597,8 +1595,7 @@ void InstX86Cmov<Machine>::emitIAS(const Cfg *Func) const { ...@@ -1597,8 +1595,7 @@ void InstX86Cmov<Machine>::emitIAS(const Cfg *Func) const {
this->getDest()->getRegNum()), this->getDest()->getRegNum()),
InstX86Base<Machine>::Traits::getEncodedGPR(SrcVar->getRegNum())); InstX86Base<Machine>::Traits::getEncodedGPR(SrcVar->getRegNum()));
} else { } else {
Asm->cmov( Asm->cmov(SrcTy, Condition, InstX86Base<Machine>::Traits::getEncodedGPR(
SrcTy, Condition, InstX86Base<Machine>::Traits::getEncodedGPR(
this->getDest()->getRegNum()), this->getDest()->getRegNum()),
Target->stackVarToAsmOperand(SrcVar)); Target->stackVarToAsmOperand(SrcVar));
} }
...@@ -2635,16 +2632,14 @@ void InstX86Fstp<Machine>::emit(const Cfg *Func) const { ...@@ -2635,16 +2632,14 @@ void InstX86Fstp<Machine>::emit(const Cfg *Func) const {
return; return;
} }
Type Ty = this->getDest()->getType(); Type Ty = this->getDest()->getType();
size_t Width = typeWidthInBytes(Ty);
if (!this->getDest()->hasReg()) { if (!this->getDest()->hasReg()) {
Str << "\tfstp" << this->getFldString(Ty) << "\t"; Str << "\tfstp" << this->getFldString(Ty) << "\t";
this->getDest()->emit(Func); this->getDest()->emit(Func);
return; return;
} }
// Dest is a physical (xmm) register, so st(0) needs to go through memory. // Dest is a physical (xmm) register, so st(0) needs to go through memory.
// Hack this by creating a temporary stack slot, spilling st(0) there, // Hack this by using caller-reserved memory at the top of stack, spilling
// loading it into the xmm register, and deallocating the stack slot. // st(0) there, and loading it into the xmm register.
Str << "\tsubl\t$" << Width << ", %esp\n";
Str << "\tfstp" << this->getFldString(Ty) << "\t" Str << "\tfstp" << this->getFldString(Ty) << "\t"
<< "(%esp)\n"; << "(%esp)\n";
Str << "\tmov" << InstX86Base<Machine>::Traits::TypeAttributes[Ty].SdSsString Str << "\tmov" << InstX86Base<Machine>::Traits::TypeAttributes[Ty].SdSsString
...@@ -2652,7 +2647,6 @@ void InstX86Fstp<Machine>::emit(const Cfg *Func) const { ...@@ -2652,7 +2647,6 @@ void InstX86Fstp<Machine>::emit(const Cfg *Func) const {
<< "(%esp), "; << "(%esp), ";
this->getDest()->emit(Func); this->getDest()->emit(Func);
Str << "\n"; Str << "\n";
Str << "\taddl\t$" << Width << ", %esp";
} }
template <class Machine> template <class Machine>
...@@ -2676,11 +2670,8 @@ void InstX86Fstp<Machine>::emitIAS(const Cfg *Func) const { ...@@ -2676,11 +2670,8 @@ void InstX86Fstp<Machine>::emitIAS(const Cfg *Func) const {
Asm->fstp(Ty, StackAddr); Asm->fstp(Ty, StackAddr);
} else { } else {
// Dest is a physical (xmm) register, so st(0) needs to go through memory. // Dest is a physical (xmm) register, so st(0) needs to go through memory.
// Hack this by creating a temporary stack slot, spilling st(0) there, // Hack this by using caller-reserved memory at the top of stack, spilling
// loading it into the xmm register, and deallocating the stack slot. // st(0) there, and loading it into the xmm register.
Immediate Width(typeWidthInBytes(Ty));
Asm->sub(IceType_i32,
InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, Width);
typename InstX86Base<Machine>::Traits::Address StackSlot = typename InstX86Base<Machine>::Traits::Address StackSlot =
typename InstX86Base<Machine>::Traits::Address( typename InstX86Base<Machine>::Traits::Address(
InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, 0, InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, 0,
...@@ -2689,8 +2680,6 @@ void InstX86Fstp<Machine>::emitIAS(const Cfg *Func) const { ...@@ -2689,8 +2680,6 @@ void InstX86Fstp<Machine>::emitIAS(const Cfg *Func) const {
Asm->movss(Ty, Asm->movss(Ty,
InstX86Base<Machine>::Traits::getEncodedXmm(Dest->getRegNum()), InstX86Base<Machine>::Traits::getEncodedXmm(Dest->getRegNum()),
StackSlot); StackSlot);
Asm->add(IceType_i32,
InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, Width);
} }
} }
...@@ -2932,7 +2921,10 @@ void InstX86AdjustStack<Machine>::emit(const Cfg *Func) const { ...@@ -2932,7 +2921,10 @@ void InstX86AdjustStack<Machine>::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
if (Amount > 0)
Str << "\tsubl\t$" << Amount << ", %esp"; Str << "\tsubl\t$" << Amount << ", %esp";
else
Str << "\taddl\t$" << -Amount << ", %esp";
auto *Target = InstX86Base<Machine>::getTarget(Func); auto *Target = InstX86Base<Machine>::getTarget(Func);
Target->updateStackAdjustment(Amount); Target->updateStackAdjustment(Amount);
} }
...@@ -2941,9 +2933,14 @@ template <class Machine> ...@@ -2941,9 +2933,14 @@ template <class Machine>
void InstX86AdjustStack<Machine>::emitIAS(const Cfg *Func) const { void InstX86AdjustStack<Machine>::emitIAS(const Cfg *Func) const {
typename InstX86Base<Machine>::Traits::Assembler *Asm = typename InstX86Base<Machine>::Traits::Assembler *Asm =
Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>(); Func->getAssembler<typename InstX86Base<Machine>::Traits::Assembler>();
if (Amount > 0)
Asm->sub(IceType_i32, Asm->sub(IceType_i32,
InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp, InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
Immediate(Amount)); Immediate(Amount));
else
Asm->add(IceType_i32,
InstX86Base<Machine>::Traits::RegisterSet::Encoded_Reg_esp,
Immediate(-Amount));
auto *Target = InstX86Base<Machine>::getTarget(Func); auto *Target = InstX86Base<Machine>::getTarget(Func);
Target->updateStackAdjustment(Amount); Target->updateStackAdjustment(Amount);
} }
...@@ -2953,7 +2950,10 @@ void InstX86AdjustStack<Machine>::dump(const Cfg *Func) const { ...@@ -2953,7 +2950,10 @@ void InstX86AdjustStack<Machine>::dump(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrDump(); Ostream &Str = Func->getContext()->getStrDump();
if (Amount > 0)
Str << "esp = sub.i32 esp, " << Amount; Str << "esp = sub.i32 esp, " << Amount;
else
Str << "esp = add.i32 esp, " << -Amount;
} }
template <class Machine> template <class Machine>
......
...@@ -131,7 +131,7 @@ void TargetX8632::lowerCall(const InstCall *Instr) { ...@@ -131,7 +131,7 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
OperandList XmmArgs; OperandList XmmArgs;
OperandList StackArgs, StackArgLocations; OperandList StackArgs, StackArgLocations;
uint32_t ParameterAreaSizeBytes = 0; int32_t ParameterAreaSizeBytes = 0;
// Classify each argument operand according to the location where the // Classify each argument operand according to the location where the
// argument is passed. // argument is passed.
...@@ -158,6 +158,13 @@ void TargetX8632::lowerCall(const InstCall *Instr) { ...@@ -158,6 +158,13 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
} }
} }
// Ensure there is enough space for the fstp/movs for floating returns.
Variable *Dest = Instr->getDest();
if (Dest != nullptr && isScalarFloatingType(Dest->getType())) {
ParameterAreaSizeBytes =
std::max(static_cast<size_t>(ParameterAreaSizeBytes),
typeWidthInBytesOnStack(Dest->getType()));
}
// Adjust the parameter area so that the stack is aligned. It is assumed that // Adjust the parameter area so that the stack is aligned. It is assumed that
// the stack is already aligned at the start of the calling sequence. // the stack is already aligned at the start of the calling sequence.
...@@ -197,7 +204,6 @@ void TargetX8632::lowerCall(const InstCall *Instr) { ...@@ -197,7 +204,6 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
} }
// Generate the call instruction. Assign its result to a temporary with high // Generate the call instruction. Assign its result to a temporary with high
// register allocation weight. // register allocation weight.
Variable *Dest = Instr->getDest();
// ReturnReg doubles as ReturnRegLo as necessary. // ReturnReg doubles as ReturnRegLo as necessary.
Variable *ReturnReg = nullptr; Variable *ReturnReg = nullptr;
Variable *ReturnRegHi = nullptr; Variable *ReturnRegHi = nullptr;
...@@ -255,17 +261,24 @@ void TargetX8632::lowerCall(const InstCall *Instr) { ...@@ -255,17 +261,24 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
if (ReturnRegHi) if (ReturnRegHi)
Context.insert(InstFakeDef::create(Func, ReturnRegHi)); Context.insert(InstFakeDef::create(Func, ReturnRegHi));
// Add the appropriate offset to esp. The call instruction takes care of
// resetting the stack offset during emission.
if (ParameterAreaSizeBytes) {
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
_add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
}
// Insert a register-kill pseudo instruction. // Insert a register-kill pseudo instruction.
Context.insert(InstFakeKill::create(Func, NewCall)); Context.insert(InstFakeKill::create(Func, NewCall));
if (Dest != nullptr && isScalarFloatingType(Dest->getType())) {
// Special treatment for an FP function which returns its result in st(0).
// If Dest ends up being a physical xmm register, the fstp emit code will
// route st(0) through the space reserved in the function argument area
// we allocated.
_fstp(Dest);
// Create a fake use of Dest in case it actually isn't used, because st(0)
// still needs to be popped.
Context.insert(InstFakeUse::create(Func, Dest));
}
// Add the appropriate offset to esp.
if (ParameterAreaSizeBytes)
_adjust_stack(-ParameterAreaSizeBytes);
// Generate a FakeUse to keep the call live if necessary. // Generate a FakeUse to keep the call live if necessary.
if (Instr->hasSideEffects() && ReturnReg) { if (Instr->hasSideEffects() && ReturnReg) {
Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
...@@ -293,14 +306,6 @@ void TargetX8632::lowerCall(const InstCall *Instr) { ...@@ -293,14 +306,6 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
_mov(Dest, ReturnReg); _mov(Dest, ReturnReg);
} }
} }
} else if (isScalarFloatingType(Dest->getType())) {
// Special treatment for an FP function which returns its result in st(0).
// If Dest ends up being a physical xmm register, the fstp emit code will
// route st(0) through a temporary stack slot.
_fstp(Dest);
// Create a fake use of Dest in case it actually isn't used, because st(0)
// still needs to be popped.
Context.insert(InstFakeUse::create(Func, Dest));
} }
} }
...@@ -363,11 +368,7 @@ void TargetX8632::lowerRet(const InstRet *Inst) { ...@@ -363,11 +368,7 @@ void TargetX8632::lowerRet(const InstRet *Inst) {
_ret(Reg); _ret(Reg);
// Add a fake use of esp to make sure esp stays alive for the entire // Add a fake use of esp to make sure esp stays alive for the entire
// function. Otherwise post-call esp adjustments get dead-code eliminated. // function. Otherwise post-call esp adjustments get dead-code eliminated.
// TODO: Are there more places where the fake use should be inserted? E.g. keepEspLiveAtExit();
// "void f(int n){while(1) g(n);}" may not have a ret instruction.
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Context.insert(InstFakeUse::create(Func, esp));
} }
void TargetX8632::addProlog(CfgNode *Node) { void TargetX8632::addProlog(CfgNode *Node) {
......
...@@ -386,11 +386,7 @@ void TargetX8664::lowerRet(const InstRet *Inst) { ...@@ -386,11 +386,7 @@ void TargetX8664::lowerRet(const InstRet *Inst) {
_ret(Reg); _ret(Reg);
// Add a fake use of esp to make sure esp stays alive for the entire // Add a fake use of esp to make sure esp stays alive for the entire
// function. Otherwise post-call esp adjustments get dead-code eliminated. // function. Otherwise post-call esp adjustments get dead-code eliminated.
// TODO: Are there more places where the fake use should be inserted? E.g. keepEspLiveAtExit();
// "void f(int n){while(1) g(n);}" may not have a ret instruction.
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Context.insert(InstFakeUse::create(Func, esp));
} }
void TargetX8664::addProlog(CfgNode *Node) { void TargetX8664::addProlog(CfgNode *Node) {
......
...@@ -226,6 +226,13 @@ protected: ...@@ -226,6 +226,13 @@ protected:
void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest, void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
Operand *Src0, Operand *Src1); Operand *Src0, Operand *Src1);
/// Emit a fake use of esp to make sure esp stays alive for the entire
/// function. Otherwise some esp adjustments get dead-code eliminated.
void keepEspLiveAtExit() {
Variable *esp = Func->getTarget()->getPhysicalRegister(getStackReg());
Context.insert(InstFakeUse::create(Func, esp));
}
/// Operand legalization helpers. To deal with address mode constraints, the /// Operand legalization helpers. To deal with address mode constraints, the
/// helpers will create a new Operand and emit instructions that guarantee /// helpers will create a new Operand and emit instructions that guarantee
/// that the Operand kind is one of those indicated by the LegalMask (a /// that the Operand kind is one of those indicated by the LegalMask (a
......
...@@ -5207,6 +5207,9 @@ template <class Machine> ...@@ -5207,6 +5207,9 @@ template <class Machine>
void TargetX86Base<Machine>::lowerUnreachable( void TargetX86Base<Machine>::lowerUnreachable(
const InstUnreachable * /*Inst*/) { const InstUnreachable * /*Inst*/) {
_ud2(); _ud2();
// Add a fake use of esp to make sure esp adjustments after the unreachable
// do not get dead-code eliminated.
keepEspLiveAtExit();
} }
template <class Machine> template <class Machine>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment