Commit f531931f by Jim Stichnoth

Subzero: Improve effectiveness of local register availability peephole.

X86 only. The register availability peephole optimization during lowering disallows available register substitution when the variable is pre-colored. This is for good reasons (too complex to be discussed here). However, that leaves some potential substitutions on the table. Specifically, this happens a lot around register arguments to function calls, both at the call site and in the prolog. The simplest solution seems to be to launder the pre-colored variable through a separate infinite-weight variable, as implemented in this CL through a combination of such copies and extra legalize() calls. There are other situations where this technique can also work, which may be handled in a separate CL. This CL also fixes a problem where the stack pointer adjustment in the prolog is subject to dead-code elimination if the function has no epilog. This would only happen in asm-verbose mode, in the final liveness analysis pass prior to code emission. BUG= none R=eholk@chromium.org Review URL: https://codereview.chromium.org/2052683003 .
parent b684f2b7
...@@ -195,6 +195,9 @@ Traits::X86OperandMem *TargetX8632::_sandbox_mem_reference(X86OperandMem *Mem) { ...@@ -195,6 +195,9 @@ Traits::X86OperandMem *TargetX8632::_sandbox_mem_reference(X86OperandMem *Mem) {
void TargetX8632::_sub_sp(Operand *Adjustment) { void TargetX8632::_sub_sp(Operand *Adjustment) {
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
_sub(esp, Adjustment); _sub(esp, Adjustment);
// Add a fake use of the stack pointer, to prevent the stack pointer adustment
// from being dead-code eliminated in a function that doesn't return.
Context.insert<InstFakeUse>(esp);
} }
void TargetX8632::_link_bp() { void TargetX8632::_link_bp() {
......
...@@ -485,11 +485,8 @@ Traits::X86OperandMem *TargetX8664::_sandbox_mem_reference(X86OperandMem *Mem) { ...@@ -485,11 +485,8 @@ Traits::X86OperandMem *TargetX8664::_sandbox_mem_reference(X86OperandMem *Mem) {
void TargetX8664::_sub_sp(Operand *Adjustment) { void TargetX8664::_sub_sp(Operand *Adjustment) {
Variable *rsp = Variable *rsp =
getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType); getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
if (!NeedSandboxing) {
_sub(rsp, Adjustment);
return;
}
if (NeedSandboxing) {
Variable *esp = Variable *esp =
getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32); getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32);
Variable *r15 = Variable *r15 =
...@@ -504,6 +501,13 @@ void TargetX8664::_sub_sp(Operand *Adjustment) { ...@@ -504,6 +501,13 @@ void TargetX8664::_sub_sp(Operand *Adjustment) {
_sub(esp, Adjustment); _sub(esp, Adjustment);
_redefined(Context.insert<InstFakeDef>(rsp, esp)); _redefined(Context.insert<InstFakeDef>(rsp, esp));
_add(rsp, r15); _add(rsp, r15);
} else {
_sub(rsp, Adjustment);
}
// Add a fake use of the stack pointer, to prevent the stack pointer adustment
// from being dead-code eliminated in a function that doesn't return.
Context.insert<InstFakeUse>(rsp);
} }
void TargetX8664::initRebasePtr() { void TargetX8664::initRebasePtr() {
......
...@@ -1506,6 +1506,7 @@ void TargetX86Base<TraitsType>::lowerAlloca(const InstAlloca *Instr) { ...@@ -1506,6 +1506,7 @@ void TargetX86Base<TraitsType>::lowerAlloca(const InstAlloca *Instr) {
template <typename TraitsType> template <typename TraitsType>
void TargetX86Base<TraitsType>::lowerArguments() { void TargetX86Base<TraitsType>::lowerArguments() {
const bool OptM1 = Func->getOptLevel() == Opt_m1;
VarList &Args = Func->getArgs(); VarList &Args = Func->getArgs();
unsigned NumXmmArgs = 0; unsigned NumXmmArgs = 0;
bool XmmSlotsRemain = true; bool XmmSlotsRemain = true;
...@@ -1561,8 +1562,20 @@ void TargetX86Base<TraitsType>::lowerArguments() { ...@@ -1561,8 +1562,20 @@ void TargetX86Base<TraitsType>::lowerArguments() {
Arg->setIsArg(false); Arg->setIsArg(false);
Args[i] = RegisterArg; Args[i] = RegisterArg;
// When not Om1, do the assignment through a temporary, instead of directly
// from the pre-colored variable, so that a subsequent availabilityGet()
// call has a chance to work. (In Om1, don't bother creating extra
// instructions with extra variables to register-allocate.)
if (OptM1) {
Context.insert<InstAssign>(Arg, RegisterArg); Context.insert<InstAssign>(Arg, RegisterArg);
} else {
Variable *Tmp = makeReg(RegisterArg->getType());
Context.insert<InstAssign>(Tmp, RegisterArg);
Context.insert<InstAssign>(Arg, Tmp);
}
} }
if (!OptM1)
Context.availabilityUpdate();
} }
/// Strength-reduce scalar integer multiplication by a constant (for i32 or /// Strength-reduce scalar integer multiplication by a constant (for i32 or
...@@ -2588,29 +2601,35 @@ void TargetX86Base<TraitsType>::lowerCall(const InstCall *Instr) { ...@@ -2588,29 +2601,35 @@ void TargetX86Base<TraitsType>::lowerCall(const InstCall *Instr) {
ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes()); assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes());
// Copy arguments that are passed on the stack to the appropriate stack // Copy arguments that are passed on the stack to the appropriate stack
// locations. // locations. We make sure legalize() is called on each argument at this
// point, to allow availabilityGet() to work.
for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) { for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) {
lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); lowerStore(
InstStore::create(Func, legalize(StackArgs[i]), StackArgLocations[i]));
} }
// Copy arguments to be passed in registers to the appropriate registers. // Copy arguments to be passed in registers to the appropriate registers.
for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Variable *Reg = XmmArgs[i] =
legalizeToReg(XmmArgs[i], Traits::getRegisterForXmmArgNum(i)); legalizeToReg(legalize(XmmArgs[i]), Traits::getRegisterForXmmArgNum(i));
// Generate a FakeUse of register arguments so that they do not get dead
// code eliminated as a result of the FakeKill of scratch registers after
// the call.
Context.insert<InstFakeUse>(Reg);
} }
// Materialize moves for arguments passed in GPRs. // Materialize moves for arguments passed in GPRs.
for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) { for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
const Type SignatureTy = GprArgs[i].first; const Type SignatureTy = GprArgs[i].first;
Operand *Arg = GprArgs[i].second; Operand *Arg = legalize(GprArgs[i].second);
Variable *Reg = GprArgs[i].second =
legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i)); legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i));
assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32); assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32);
assert(SignatureTy == Arg->getType()); assert(SignatureTy == Arg->getType());
(void)SignatureTy; (void)SignatureTy;
Context.insert<InstFakeUse>(Reg); }
// Generate a FakeUse of register arguments so that they do not get dead code
// eliminated as a result of the FakeKill of scratch registers after the call.
// These need to be right before the call instruction.
for (auto *Arg : XmmArgs) {
Context.insert<InstFakeUse>(llvm::cast<Variable>(Arg));
}
for (auto &ArgPair : GprArgs) {
Context.insert<InstFakeUse>(llvm::cast<Variable>(ArgPair.second));
} }
// Generate the call instruction. Assign its result to a temporary with high // Generate the call instruction. Assign its result to a temporary with high
// register allocation weight. // register allocation weight.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment