Commit f531931f by Jim Stichnoth

Subzero: Improve effectiveness of local register availability peephole.

X86 only. The register availability peephole optimization during lowering disallows available register substitution when the variable is pre-colored. This is for good reasons (too complex to be discussed here). However, that leaves some potential substitutions on the table. Specifically, this happens a lot around register arguments to function calls, both at the call site and in the prolog. The simplest solution seems to be to launder the pre-colored variable through a separate infinite-weight variable, as implemented in this CL through a combination of such copies and extra legalize() calls. There are other situations where this technique can also work, which may be handled in a separate CL. This CL also fixes a problem where the stack pointer adjustment in the prolog is subject to dead-code elimination if the function has no epilog. This would only happen in asm-verbose mode, in the final liveness analysis pass prior to code emission. BUG= none R=eholk@chromium.org Review URL: https://codereview.chromium.org/2052683003 .
parent b684f2b7
......@@ -195,6 +195,9 @@ Traits::X86OperandMem *TargetX8632::_sandbox_mem_reference(X86OperandMem *Mem) {
void TargetX8632::_sub_sp(Operand *Adjustment) {
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
_sub(esp, Adjustment);
// Add a fake use of the stack pointer, to prevent the stack pointer adustment
// from being dead-code eliminated in a function that doesn't return.
Context.insert<InstFakeUse>(esp);
}
void TargetX8632::_link_bp() {
......
......@@ -485,25 +485,29 @@ Traits::X86OperandMem *TargetX8664::_sandbox_mem_reference(X86OperandMem *Mem) {
void TargetX8664::_sub_sp(Operand *Adjustment) {
Variable *rsp =
getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
if (!NeedSandboxing) {
if (NeedSandboxing) {
Variable *esp =
getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32);
Variable *r15 =
getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
// .bundle_start
// sub Adjustment, %esp
// add %r15, %rsp
// .bundle_end
AutoBundle _(this);
_redefined(Context.insert<InstFakeDef>(esp, rsp));
_sub(esp, Adjustment);
_redefined(Context.insert<InstFakeDef>(rsp, esp));
_add(rsp, r15);
} else {
_sub(rsp, Adjustment);
return;
}
Variable *esp =
getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32);
Variable *r15 =
getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
// .bundle_start
// sub Adjustment, %esp
// add %r15, %rsp
// .bundle_end
AutoBundle _(this);
_redefined(Context.insert<InstFakeDef>(esp, rsp));
_sub(esp, Adjustment);
_redefined(Context.insert<InstFakeDef>(rsp, esp));
_add(rsp, r15);
// Add a fake use of the stack pointer, to prevent the stack pointer adustment
// from being dead-code eliminated in a function that doesn't return.
Context.insert<InstFakeUse>(rsp);
}
void TargetX8664::initRebasePtr() {
......
......@@ -1506,6 +1506,7 @@ void TargetX86Base<TraitsType>::lowerAlloca(const InstAlloca *Instr) {
template <typename TraitsType>
void TargetX86Base<TraitsType>::lowerArguments() {
const bool OptM1 = Func->getOptLevel() == Opt_m1;
VarList &Args = Func->getArgs();
unsigned NumXmmArgs = 0;
bool XmmSlotsRemain = true;
......@@ -1561,8 +1562,20 @@ void TargetX86Base<TraitsType>::lowerArguments() {
Arg->setIsArg(false);
Args[i] = RegisterArg;
Context.insert<InstAssign>(Arg, RegisterArg);
// When not Om1, do the assignment through a temporary, instead of directly
// from the pre-colored variable, so that a subsequent availabilityGet()
// call has a chance to work. (In Om1, don't bother creating extra
// instructions with extra variables to register-allocate.)
if (OptM1) {
Context.insert<InstAssign>(Arg, RegisterArg);
} else {
Variable *Tmp = makeReg(RegisterArg->getType());
Context.insert<InstAssign>(Tmp, RegisterArg);
Context.insert<InstAssign>(Arg, Tmp);
}
}
if (!OptM1)
Context.availabilityUpdate();
}
/// Strength-reduce scalar integer multiplication by a constant (for i32 or
......@@ -2588,29 +2601,35 @@ void TargetX86Base<TraitsType>::lowerCall(const InstCall *Instr) {
ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
assert(ParameterAreaSizeBytes <= maxOutArgsSizeBytes());
// Copy arguments that are passed on the stack to the appropriate stack
// locations.
// locations. We make sure legalize() is called on each argument at this
// point, to allow availabilityGet() to work.
for (SizeT i = 0, NumStackArgs = StackArgs.size(); i < NumStackArgs; ++i) {
lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
lowerStore(
InstStore::create(Func, legalize(StackArgs[i]), StackArgLocations[i]));
}
// Copy arguments to be passed in registers to the appropriate registers.
for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Variable *Reg =
legalizeToReg(XmmArgs[i], Traits::getRegisterForXmmArgNum(i));
// Generate a FakeUse of register arguments so that they do not get dead
// code eliminated as a result of the FakeKill of scratch registers after
// the call.
Context.insert<InstFakeUse>(Reg);
XmmArgs[i] =
legalizeToReg(legalize(XmmArgs[i]), Traits::getRegisterForXmmArgNum(i));
}
// Materialize moves for arguments passed in GPRs.
for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
const Type SignatureTy = GprArgs[i].first;
Operand *Arg = GprArgs[i].second;
Variable *Reg =
Operand *Arg = legalize(GprArgs[i].second);
GprArgs[i].second =
legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i));
assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32);
assert(SignatureTy == Arg->getType());
(void)SignatureTy;
Context.insert<InstFakeUse>(Reg);
}
// Generate a FakeUse of register arguments so that they do not get dead code
// eliminated as a result of the FakeKill of scratch registers after the call.
// These need to be right before the call instruction.
for (auto *Arg : XmmArgs) {
Context.insert<InstFakeUse>(llvm::cast<Variable>(Arg));
}
for (auto &ArgPair : GprArgs) {
Context.insert<InstFakeUse>(llvm::cast<Variable>(ArgPair.second));
}
// Generate the call instruction. Assign its result to a temporary with high
// register allocation weight.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment