Commit b9a404d4 by David Sehr

Merged addProlog and addEpilog on x86.

BUG= R=jpp@chromium.org Review URL: https://codereview.chromium.org/1616483003 .
parent bc3bd502
...@@ -141,249 +141,29 @@ void TargetX8632::_sub_sp(Operand *Adjustment) { ...@@ -141,249 +141,29 @@ void TargetX8632::_sub_sp(Operand *Adjustment) {
_sub(esp, Adjustment); _sub(esp, Adjustment);
} }
void TargetX8632::lowerIndirectJump(Variable *JumpTarget) { void TargetX8632::_link_bp() {
AutoBundle _(this);
if (NeedSandboxing) {
const SizeT BundleSize =
1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
_and(JumpTarget, Ctx->getConstantInt32(~(BundleSize - 1)));
}
_jmp(JumpTarget);
}
Inst *TargetX8632::emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) {
std::unique_ptr<AutoBundle> Bundle;
if (NeedSandboxing) {
if (llvm::isa<Constant>(CallTarget)) {
Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd);
} else {
Variable *CallTargetVar = nullptr;
_mov(CallTargetVar, CallTarget);
Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd);
const SizeT BundleSize =
1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
_and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
CallTarget = CallTargetVar;
}
}
return Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
}
Variable *TargetX8632::moveReturnValueToRegister(Operand *Value,
Type ReturnType) {
if (isVectorType(ReturnType)) {
return legalizeToReg(Value, Traits::RegisterSet::Reg_xmm0);
} else if (isScalarFloatingType(ReturnType)) {
_fld(Value);
return nullptr;
} else {
assert(ReturnType == IceType_i32 || ReturnType == IceType_i64);
if (ReturnType == IceType_i64) {
Variable *eax =
legalizeToReg(loOperand(Value), Traits::RegisterSet::Reg_eax);
Variable *edx =
legalizeToReg(hiOperand(Value), Traits::RegisterSet::Reg_edx);
Context.insert<InstFakeUse>(edx);
return eax;
} else {
Variable *Reg = nullptr;
_mov(Reg, Value, Traits::RegisterSet::Reg_eax);
return Reg;
}
}
}
void TargetX8632::addProlog(CfgNode *Node) {
// Stack frame layout:
//
// +------------------------+
// | 1. return address |
// +------------------------+
// | 2. preserved registers |
// +------------------------+
// | 3. padding |
// +------------------------+
// | 4. global spill area |
// +------------------------+
// | 5. padding |
// +------------------------+
// | 6. local spill area |
// +------------------------+
// | 7. padding |
// +------------------------+
// | 8. allocas |
// +------------------------+
// | 9. padding |
// +------------------------+
// | 10. out args |
// +------------------------+ <--- StackPointer
//
// The following variables record the size in bytes of the given areas:
// * X86_RET_IP_SIZE_BYTES: area 1
// * PreservedRegsSizeBytes: area 2
// * SpillAreaPaddingBytes: area 3
// * GlobalsSize: area 4
// * GlobalsAndSubsequentPaddingSize: areas 4 - 5
// * LocalsSpillAreaSize: area 6
// * SpillAreaSizeBytes: areas 3 - 10
// * maxOutArgsSizeBytes(): area 10
// Determine stack frame offsets for each Variable without a register
// assignment. This can be done as one variable per stack slot. Or, do
// coalescing by running the register allocator again with an infinite set of
// registers (as a side effect, this gives variables a second chance at
// physical register assignment).
//
// A middle ground approach is to leverage sparsity and allocate one block of
// space on the frame for globals (variables with multi-block lifetime), and
// one block to share for locals (single-block lifetime).
Context.init(Node);
Context.setInsertPoint(Context.getCur());
llvm::SmallBitVector CalleeSaves =
getRegisterSet(RegSet_CalleeSave, RegSet_None);
RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
size_t GlobalsSize = 0;
// If there is a separate locals area, this represents that area. Otherwise
// it counts any variable not counted by GlobalsSize.
SpillAreaSizeBytes = 0;
// If there is a separate locals area, this specifies the alignment for it.
uint32_t LocalsSlotsAlignmentBytes = 0;
// The entire spill locations area gets aligned to largest natural alignment
// of the variables that have a spill slot.
uint32_t SpillAreaAlignmentBytes = 0;
// A spill slot linked to a variable with a stack slot should reuse that
// stack slot.
std::function<bool(Variable *)> TargetVarHook =
[&VariablesLinkedToSpillSlots](Variable *Var) {
if (auto *SpillVar =
llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
assert(Var->mustNotHaveReg());
if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
VariablesLinkedToSpillSlots.push_back(Var);
return true;
}
}
return false;
};
// Compute the list of spilled variables and bounds for GlobalsSize, etc.
getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
&SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
&LocalsSlotsAlignmentBytes, TargetVarHook);
uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
SpillAreaSizeBytes += GlobalsSize;
// Add push instructions for preserved registers.
uint32_t NumCallee = 0;
size_t PreservedRegsSizeBytes = 0;
llvm::SmallBitVector Pushed(CalleeSaves.size());
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
SizeT Canonical = Traits::getBaseReg(i);
if (CalleeSaves[i] && RegsUsed[i]) {
Pushed[Canonical] = true;
}
}
for (SizeT i = 0; i < Pushed.size(); ++i) {
if (Pushed[i]) {
++NumCallee;
PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);
_push(getPhysicalRegister(i));
}
}
Ctx->statsUpdateRegistersSaved(NumCallee);
// Generate "push ebp; mov ebp, esp"
if (IsEbpBasedFrame) {
assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
.count() == 0);
PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);
Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp); Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
_push(ebp); _push(ebp);
_mov(ebp, esp); _mov(ebp, esp);
// Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode). // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).
Context.insert<InstFakeUse>(ebp); Context.insert<InstFakeUse>(ebp);
} }
// Align the variables area. SpillAreaPaddingBytes is the size of the region
// after the preserved registers and before the spill areas.
// LocalsSlotsPaddingBytes is the amount of padding between the globals and
// locals area if they are separate.
assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
uint32_t SpillAreaPaddingBytes = 0;
uint32_t LocalsSlotsPaddingBytes = 0;
alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
SpillAreaAlignmentBytes, GlobalsSize,
LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
&LocalsSlotsPaddingBytes);
SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
uint32_t GlobalsAndSubsequentPaddingSize =
GlobalsSize + LocalsSlotsPaddingBytes;
// Functions returning scalar floating point types may need to convert values
// from an in-register xmm value to the top of the x87 floating point stack.
// This is done by a movp[sd] and an fld[sd]. Ensure there is enough scratch
// space on the stack for this.
const Type ReturnType = Func->getReturnType();
if (isScalarFloatingType(ReturnType)) {
// Avoid misaligned double-precicion load/store.
NeedsStackAlignment = true;
SpillAreaSizeBytes =
std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes);
}
// Align esp if necessary.
if (NeedsStackAlignment) {
uint32_t StackOffset =
Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
uint32_t StackSize =
Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes());
SpillAreaSizeBytes = StackSize - StackOffset;
} else {
SpillAreaSizeBytes += maxOutArgsSizeBytes();
}
// Combine fixed allocations into SpillAreaSizeBytes if we are emitting the
// fixed allocations in the prolog.
if (PrologEmitsFixedAllocas)
SpillAreaSizeBytes += FixedAllocaSizeBytes;
if (SpillAreaSizeBytes) {
// Generate "sub esp, SpillAreaSizeBytes"
_sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
Ctx->getConstantInt32(SpillAreaSizeBytes));
// If the fixed allocas are aligned more than the stack frame, align the
// stack pointer accordingly.
if (PrologEmitsFixedAllocas &&
FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) {
assert(IsEbpBasedFrame);
_and(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
Ctx->getConstantInt32(-FixedAllocaAlignBytes));
}
}
// Account for known-frame-offset alloca instructions that were not already
// combined into the prolog.
if (!PrologEmitsFixedAllocas)
SpillAreaSizeBytes += FixedAllocaSizeBytes;
Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); void TargetX8632::_unlink_bp() {
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
// For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
// use of esp before the assignment of esp=ebp keeps previous esp
// adjustments from being dead-code eliminated.
Context.insert<InstFakeUse>(esp);
_mov(esp, ebp);
_pop(ebp);
}
// Fill in stack offsets for stack args, and copy args into registers for void TargetX8632::_push_reg(Variable *Reg) { _push(Reg); }
// those that were register-allocated. Args are pushed right to left, so
// Arg[0] is closest to the stack/frame pointer.
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
size_t BasicFrameOffset =
PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
if (!IsEbpBasedFrame)
BasicFrameOffset += SpillAreaSizeBytes;
void TargetX8632::emitGetIP(CfgNode *Node) {
// If there is a non-deleted InstX86GetIP instruction, we need to move it to // If there is a non-deleted InstX86GetIP instruction, we need to move it to
// the point after the stack frame has stabilized but before // the point after the stack frame has stabilized but before
// register-allocated in-args are copied into their home registers. It would // register-allocated in-args are copied into their home registers. It would
...@@ -420,127 +200,63 @@ void TargetX8632::addProlog(CfgNode *Node) { ...@@ -420,127 +200,63 @@ void TargetX8632::addProlog(CfgNode *Node) {
_mov(Dest, CallDest); _mov(Dest, CallDest);
} }
} }
const VarList &Args = Func->getArgs();
size_t InArgsSizeBytes = 0;
unsigned NumXmmArgs = 0;
for (Variable *Arg : Args) {
// Skip arguments passed in registers.
if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
++NumXmmArgs;
continue;
}
// For esp-based frames where the allocas are done outside the prolog, the
// esp value may not stabilize to its home value until after all the
// fixed-size alloca instructions have executed. In this case, a stack
// adjustment is needed when accessing in-args in order to copy them into
// registers.
size_t StackAdjBytes = 0;
if (!IsEbpBasedFrame && !PrologEmitsFixedAllocas)
StackAdjBytes -= FixedAllocaSizeBytes;
finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes,
InArgsSizeBytes);
}
// Fill in stack offsets for locals.
assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
IsEbpBasedFrame);
// Assign stack offsets to variables that have been linked to spilled
// variables.
for (Variable *Var : VariablesLinkedToSpillSlots) {
Variable *Linked =
(llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
Var->setStackOffset(Linked->getStackOffset());
}
this->HasComputedFrame = true;
if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
OstreamLocker L(Func->getContext());
Ostream &Str = Func->getContext()->getStrDump();
Str << "Stack layout:\n";
uint32_t EspAdjustmentPaddingSize =
SpillAreaSizeBytes - LocalsSpillAreaSize -
GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
maxOutArgsSizeBytes();
Str << " in-args = " << InArgsSizeBytes << " bytes\n"
<< " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
<< " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
<< " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
<< " globals spill area = " << GlobalsSize << " bytes\n"
<< " globals-locals spill areas intermediate padding = "
<< GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
<< " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
<< " esp alignment padding = " << EspAdjustmentPaddingSize
<< " bytes\n";
Str << "Stack details:\n"
<< " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
<< " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
<< " outgoing args size = " << maxOutArgsSizeBytes() << " bytes\n"
<< " locals spill area alignment = " << LocalsSlotsAlignmentBytes
<< " bytes\n"
<< " is ebp based = " << IsEbpBasedFrame << "\n";
}
} }
void TargetX8632::addEpilog(CfgNode *Node) { void TargetX8632::lowerIndirectJump(Variable *JumpTarget) {
InstList &Insts = Node->getInsts(); AutoBundle _(this);
InstList::reverse_iterator RI, E;
for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { if (NeedSandboxing) {
if (llvm::isa<typename Traits::Insts::Ret>(*RI)) const SizeT BundleSize =
break; 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
_and(JumpTarget, Ctx->getConstantInt32(~(BundleSize - 1)));
} }
if (RI == E)
return;
// Convert the reverse_iterator position into its corresponding (forward) _jmp(JumpTarget);
// iterator position. }
InstList::iterator InsertPoint = RI.base();
--InsertPoint;
Context.init(Node);
Context.setInsertPoint(InsertPoint);
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); Inst *TargetX8632::emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) {
if (IsEbpBasedFrame) { std::unique_ptr<AutoBundle> Bundle;
Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp); if (NeedSandboxing) {
// For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake if (llvm::isa<Constant>(CallTarget)) {
// use of esp before the assignment of esp=ebp keeps previous esp Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd);
// adjustments from being dead-code eliminated.
Context.insert<InstFakeUse>(esp);
_mov(esp, ebp);
_pop(ebp);
} else { } else {
// add esp, SpillAreaSizeBytes Variable *CallTargetVar = nullptr;
if (SpillAreaSizeBytes) _mov(CallTargetVar, CallTarget);
_add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes)); Bundle = makeUnique<AutoBundle>(this, InstBundleLock::Opt_AlignToEnd);
} const SizeT BundleSize =
1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
// Add pop instructions for preserved registers. _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
llvm::SmallBitVector CalleeSaves = CallTarget = CallTargetVar;
getRegisterSet(RegSet_CalleeSave, RegSet_None);
llvm::SmallBitVector Popped(CalleeSaves.size());
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
SizeT Canonical = Traits::getBaseReg(i);
if (CalleeSaves[i] && RegsUsed[i]) {
Popped[Canonical] = true;
}
}
for (SizeT i = 0; i < Popped.size(); ++i) {
SizeT j = Popped.size() - i - 1;
SizeT Canonical = Traits::getBaseReg(j);
if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)
continue;
if (Popped[j]) {
_pop(getPhysicalRegister(Canonical));
} }
} }
return Context.insert<Traits::Insts::Call>(ReturnReg, CallTarget);
}
if (!NeedSandboxing) { Variable *TargetX8632::moveReturnValueToRegister(Operand *Value,
return; Type ReturnType) {
if (isVectorType(ReturnType)) {
return legalizeToReg(Value, Traits::RegisterSet::Reg_xmm0);
} else if (isScalarFloatingType(ReturnType)) {
_fld(Value);
return nullptr;
} else {
assert(ReturnType == IceType_i32 || ReturnType == IceType_i64);
if (ReturnType == IceType_i64) {
Variable *eax =
legalizeToReg(loOperand(Value), Traits::RegisterSet::Reg_eax);
Variable *edx =
legalizeToReg(hiOperand(Value), Traits::RegisterSet::Reg_edx);
Context.insert<InstFakeUse>(edx);
return eax;
} else {
Variable *Reg = nullptr;
_mov(Reg, Value, Traits::RegisterSet::Reg_eax);
return Reg;
}
} }
}
void TargetX8632::emitSandboxedReturn() {
// Change the original ret instruction into a sandboxed return sequence. // Change the original ret instruction into a sandboxed return sequence.
// t:ecx = pop // t:ecx = pop
// bundle_lock // bundle_lock
...@@ -551,11 +267,6 @@ void TargetX8632::addEpilog(CfgNode *Node) { ...@@ -551,11 +267,6 @@ void TargetX8632::addEpilog(CfgNode *Node) {
Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
_pop(T_ecx); _pop(T_ecx);
lowerIndirectJump(T_ecx); lowerIndirectJump(T_ecx);
if (RI->getSrcSize()) {
auto *RetValue = llvm::cast<Variable>(RI->getSrc(0));
Context.insert<InstFakeUse>(RetValue);
}
RI->setDeleted();
} }
void TargetX8632::emitJumpTable(const Cfg *Func, void TargetX8632::emitJumpTable(const Cfg *Func,
......
...@@ -55,13 +55,16 @@ protected: ...@@ -55,13 +55,16 @@ protected:
llvm::report_fatal_error("sandbox mem reference for x86-32."); llvm::report_fatal_error("sandbox mem reference for x86-32.");
} }
void _sub_sp(Operand *Adjustment); void _sub_sp(Operand *Adjustment);
void _link_bp();
void _unlink_bp();
void _push_reg(Variable *Reg);
void initSandbox() {} void initSandbox() {}
void emitSandboxedReturn();
void lowerIndirectJump(Variable *JumpTarget); void lowerIndirectJump(Variable *JumpTarget);
void emitGetIP(CfgNode *Node);
Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) override; Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) override;
Variable *moveReturnValueToRegister(Operand *Value, Type ReturnType) override; Variable *moveReturnValueToRegister(Operand *Value, Type ReturnType) override;
void addProlog(CfgNode *Node) override;
void addEpilog(CfgNode *Node) override;
private: private:
ENABLE_MAKE_UNIQUE; ENABLE_MAKE_UNIQUE;
......
...@@ -215,6 +215,84 @@ void TargetX8664::_push_rbp() { ...@@ -215,6 +215,84 @@ void TargetX8664::_push_rbp() {
Context.insert<typename Traits::Insts::Store>(ebp, TopOfStack); Context.insert<typename Traits::Insts::Store>(ebp, TopOfStack);
} }
void TargetX8664::_link_bp() {
Variable *esp =
getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32);
Variable *rsp =
getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
Variable *ebp =
getPhysicalRegister(Traits::RegisterSet::Reg_ebp, IceType_i32);
Variable *rbp =
getPhysicalRegister(Traits::RegisterSet::Reg_rbp, Traits::WordType);
Variable *r15 =
getPhysicalRegister(Traits::RegisterSet::Reg_r15, Traits::WordType);
if (!NeedSandboxing) {
_push(rbp);
_mov(rbp, rsp);
} else {
_push_rbp();
AutoBundle _(this);
_redefined(Context.insert<InstFakeDef>(ebp, rbp));
_redefined(Context.insert<InstFakeDef>(esp, rsp));
_mov(ebp, esp);
_redefined(Context.insert<InstFakeDef>(rsp, esp));
_add(rbp, r15);
}
// Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).
Context.insert<InstFakeUse>(rbp);
}
void TargetX8664::_unlink_bp() {
Variable *rsp =
getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64);
Variable *rbp =
getPhysicalRegister(Traits::RegisterSet::Reg_rbp, IceType_i64);
Variable *ebp =
getPhysicalRegister(Traits::RegisterSet::Reg_ebp, IceType_i32);
// For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
// use of rsp before the assignment of rsp=rbp keeps previous rsp
// adjustments from being dead-code eliminated.
Context.insert<InstFakeUse>(rsp);
if (!NeedSandboxing) {
_mov(rsp, rbp);
_pop(rbp);
} else {
_mov_sp(ebp);
Variable *r15 =
getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
Variable *rcx =
getPhysicalRegister(Traits::RegisterSet::Reg_rcx, IceType_i64);
Variable *ecx =
getPhysicalRegister(Traits::RegisterSet::Reg_ecx, IceType_i32);
_pop(rcx);
Context.insert<InstFakeDef>(ecx, rcx);
AutoBundle _(this);
_mov(ebp, ecx);
_redefined(Context.insert<InstFakeDef>(rbp, ebp));
_add(rbp, r15);
}
}
void TargetX8664::_push_reg(Variable *Reg) {
Variable *rbp =
getPhysicalRegister(Traits::RegisterSet::Reg_rbp, Traits::WordType);
if (Reg != rbp || !NeedSandboxing) {
_push(Reg);
} else {
_push_rbp();
}
}
void TargetX8664::emitGetIP(CfgNode *Node) {
// No IP base register is needed on X86-64.
(void)Node;
}
Traits::X86OperandMem *TargetX8664::_sandbox_mem_reference(X86OperandMem *Mem) { Traits::X86OperandMem *TargetX8664::_sandbox_mem_reference(X86OperandMem *Mem) {
// In x86_64-nacl, all memory references are relative to %r15 (i.e., %rzp.) // In x86_64-nacl, all memory references are relative to %r15 (i.e., %rzp.)
// NaCl sandboxing also requires that any registers that are not %rsp and // NaCl sandboxing also requires that any registers that are not %rsp and
...@@ -326,7 +404,7 @@ Traits::X86OperandMem *TargetX8664::_sandbox_mem_reference(X86OperandMem *Mem) { ...@@ -326,7 +404,7 @@ Traits::X86OperandMem *TargetX8664::_sandbox_mem_reference(X86OperandMem *Mem) {
void TargetX8664::_sub_sp(Operand *Adjustment) { void TargetX8664::_sub_sp(Operand *Adjustment) {
Variable *rsp = Variable *rsp =
getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64); getPhysicalRegister(Traits::RegisterSet::Reg_rsp, Traits::WordType);
if (!NeedSandboxing) { if (!NeedSandboxing) {
_sub(rsp, Adjustment); _sub(rsp, Adjustment);
return; return;
...@@ -448,371 +526,11 @@ Variable *TargetX8664::moveReturnValueToRegister(Operand *Value, ...@@ -448,371 +526,11 @@ Variable *TargetX8664::moveReturnValueToRegister(Operand *Value,
} }
} }
void TargetX8664::addProlog(CfgNode *Node) { void TargetX8664::emitSandboxedReturn() {
// Stack frame layout:
//
// +------------------------+
// | 1. return address |
// +------------------------+
// | 2. preserved registers |
// +------------------------+
// | 3. padding |
// +------------------------+
// | 4. global spill area |
// +------------------------+
// | 5. padding |
// +------------------------+
// | 6. local spill area |
// +------------------------+
// | 7. padding |
// +------------------------+
// | 8. allocas |
// +------------------------+
// | 9. padding |
// +------------------------+
// | 10. out args |
// +------------------------+ <--- StackPointer
//
// The following variables record the size in bytes of the given areas:
// * X86_RET_IP_SIZE_BYTES: area 1
// * PreservedRegsSizeBytes: area 2
// * SpillAreaPaddingBytes: area 3
// * GlobalsSize: area 4
// * GlobalsAndSubsequentPaddingSize: areas 4 - 5
// * LocalsSpillAreaSize: area 6
// * SpillAreaSizeBytes: areas 3 - 10
// * maxOutArgsSizeBytes(): area 10
// Determine stack frame offsets for each Variable without a register
// assignment. This can be done as one variable per stack slot. Or, do
// coalescing by running the register allocator again with an infinite set of
// registers (as a side effect, this gives variables a second chance at
// physical register assignment).
//
// A middle ground approach is to leverage sparsity and allocate one block of
// space on the frame for globals (variables with multi-block lifetime), and
// one block to share for locals (single-block lifetime).
Context.init(Node);
Context.setInsertPoint(Context.getCur());
llvm::SmallBitVector CalleeSaves =
getRegisterSet(RegSet_CalleeSave, RegSet_None);
RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
size_t GlobalsSize = 0;
// If there is a separate locals area, this represents that area. Otherwise
// it counts any variable not counted by GlobalsSize.
SpillAreaSizeBytes = 0;
// If there is a separate locals area, this specifies the alignment for it.
uint32_t LocalsSlotsAlignmentBytes = 0;
// The entire spill locations area gets aligned to largest natural alignment
// of the variables that have a spill slot.
uint32_t SpillAreaAlignmentBytes = 0;
// A spill slot linked to a variable with a stack slot should reuse that
// stack slot.
std::function<bool(Variable *)> TargetVarHook =
[&VariablesLinkedToSpillSlots](Variable *Var) {
if (auto *SpillVar =
llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
assert(Var->mustNotHaveReg());
if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
VariablesLinkedToSpillSlots.push_back(Var);
return true;
}
}
return false;
};
// Compute the list of spilled variables and bounds for GlobalsSize, etc.
getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
&SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
&LocalsSlotsAlignmentBytes, TargetVarHook);
uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
SpillAreaSizeBytes += GlobalsSize;
// Add push instructions for preserved registers.
uint32_t NumCallee = 0;
size_t PreservedRegsSizeBytes = 0;
llvm::SmallBitVector Pushed(CalleeSaves.size());
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
const int32_t Canonical = Traits::getBaseReg(i);
assert(Canonical == Traits::getBaseReg(Canonical));
if (CalleeSaves[i] && RegsUsed[i])
Pushed[Canonical] = true;
}
Variable *rbp =
getPhysicalRegister(Traits::RegisterSet::Reg_rbp, IceType_i64);
Variable *ebp =
getPhysicalRegister(Traits::RegisterSet::Reg_ebp, IceType_i32);
Variable *rsp =
getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64);
for (SizeT i = 0; i < Pushed.size(); ++i) {
if (!Pushed[i])
continue;
assert(static_cast<int32_t>(i) == Traits::getBaseReg(i));
++NumCallee;
PreservedRegsSizeBytes += typeWidthInBytes(IceType_i64);
Variable *Src = getPhysicalRegister(i, IceType_i64);
if (Src != rbp || !NeedSandboxing) {
_push(getPhysicalRegister(i, IceType_i64));
} else {
_push_rbp();
}
}
Ctx->statsUpdateRegistersSaved(NumCallee);
// Generate "push ebp; mov ebp, esp"
if (IsEbpBasedFrame) {
assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
.count() == 0);
PreservedRegsSizeBytes += typeWidthInBytes(IceType_i64);
Variable *esp =
getPhysicalRegister(Traits::RegisterSet::Reg_esp, IceType_i32);
Variable *r15 =
getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
if (!NeedSandboxing) {
_push(rbp);
_mov(rbp, rsp);
} else {
_push_rbp();
AutoBundle _(this);
_redefined(Context.insert<InstFakeDef>(ebp, rbp));
_redefined(Context.insert<InstFakeDef>(esp, rsp));
_mov(ebp, esp);
_redefined(Context.insert<InstFakeDef>(rsp, esp));
_add(rbp, r15);
}
// Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).
Context.insert<InstFakeUse>(rbp);
}
// Align the variables area. SpillAreaPaddingBytes is the size of the region
// after the preserved registers and before the spill areas.
// LocalsSlotsPaddingBytes is the amount of padding between the globals and
// locals area if they are separate.
assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
uint32_t SpillAreaPaddingBytes = 0;
uint32_t LocalsSlotsPaddingBytes = 0;
alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
SpillAreaAlignmentBytes, GlobalsSize,
LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
&LocalsSlotsPaddingBytes);
SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
uint32_t GlobalsAndSubsequentPaddingSize =
GlobalsSize + LocalsSlotsPaddingBytes;
// Align esp if necessary.
if (NeedsStackAlignment) {
uint32_t StackOffset =
Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
uint32_t StackSize =
Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes());
SpillAreaSizeBytes = StackSize - StackOffset;
} else {
SpillAreaSizeBytes += maxOutArgsSizeBytes();
}
// Combine fixed allocations into SpillAreaSizeBytes if we are emitting the
// fixed allocations in the prolog.
if (PrologEmitsFixedAllocas)
SpillAreaSizeBytes += FixedAllocaSizeBytes;
// Generate "sub esp, SpillAreaSizeBytes"
if (SpillAreaSizeBytes) {
if (NeedSandboxing) {
_sub_sp(Ctx->getConstantInt32(SpillAreaSizeBytes));
} else {
_sub(getPhysicalRegister(getStackReg(), IceType_i64),
Ctx->getConstantInt32(SpillAreaSizeBytes));
}
// If the fixed allocas are aligned more than the stack frame, align the
// stack pointer accordingly.
if (PrologEmitsFixedAllocas &&
FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) {
assert(IsEbpBasedFrame);
_and(getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64),
Ctx->getConstantInt32(-FixedAllocaAlignBytes));
}
}
// Account for alloca instructions with known frame offsets.
if (!PrologEmitsFixedAllocas)
SpillAreaSizeBytes += FixedAllocaSizeBytes;
Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
// Fill in stack offsets for stack args, and copy args into registers for
// those that were register-allocated. Args are pushed right to left, so
// Arg[0] is closest to the stack/frame pointer.
Variable *FramePtr =
getPhysicalRegister(getFrameOrStackReg(), Traits::WordType);
size_t BasicFrameOffset =
PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
if (!IsEbpBasedFrame)
BasicFrameOffset += SpillAreaSizeBytes;
const VarList &Args = Func->getArgs();
size_t InArgsSizeBytes = 0;
unsigned NumXmmArgs = 0;
unsigned NumGPRArgs = 0;
for (Variable *Arg : Args) {
// Skip arguments passed in registers.
if (isVectorType(Arg->getType()) || isScalarFloatingType(Arg->getType())) {
if (NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
++NumXmmArgs;
continue;
}
} else {
assert(isScalarIntegerType(Arg->getType()));
if (NumGPRArgs < Traits::X86_MAX_GPR_ARGS) {
++NumGPRArgs;
continue;
}
}
// For esp-based frames, the esp value may not stabilize to its home value
// until after all the fixed-size alloca instructions have executed. In
// this case, a stack adjustment is needed when accessing in-args in order
// to copy them into registers.
size_t StackAdjBytes = 0;
if (!IsEbpBasedFrame && !PrologEmitsFixedAllocas)
StackAdjBytes -= FixedAllocaSizeBytes;
finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes,
InArgsSizeBytes);
}
// Fill in stack offsets for locals.
assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
IsEbpBasedFrame);
// Assign stack offsets to variables that have been linked to spilled
// variables.
for (Variable *Var : VariablesLinkedToSpillSlots) {
Variable *Linked =
(llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
Var->setStackOffset(Linked->getStackOffset());
}
this->HasComputedFrame = true;
if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
OstreamLocker L(Func->getContext());
Ostream &Str = Func->getContext()->getStrDump();
Str << "Stack layout:\n";
uint32_t EspAdjustmentPaddingSize =
SpillAreaSizeBytes - LocalsSpillAreaSize -
GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
maxOutArgsSizeBytes();
Str << " in-args = " << InArgsSizeBytes << " bytes\n"
<< " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
<< " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
<< " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
<< " globals spill area = " << GlobalsSize << " bytes\n"
<< " globals-locals spill areas intermediate padding = "
<< GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
<< " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
<< " esp alignment padding = " << EspAdjustmentPaddingSize
<< " bytes\n";
Str << "Stack details:\n"
<< " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
<< " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
<< " outgoing args size = " << maxOutArgsSizeBytes() << " bytes\n"
<< " locals spill area alignment = " << LocalsSlotsAlignmentBytes
<< " bytes\n"
<< " is ebp based = " << IsEbpBasedFrame << "\n";
}
}
void TargetX8664::addEpilog(CfgNode *Node) {
InstList &Insts = Node->getInsts();
InstList::reverse_iterator RI, E;
for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
if (llvm::isa<typename Traits::Insts::Ret>(*RI))
break;
}
if (RI == E)
return;
// Convert the reverse_iterator position into its corresponding (forward)
// iterator position.
InstList::iterator InsertPoint = RI.base();
--InsertPoint;
Context.init(Node);
Context.setInsertPoint(InsertPoint);
Variable *rsp =
getPhysicalRegister(Traits::RegisterSet::Reg_rsp, IceType_i64);
if (!IsEbpBasedFrame) {
// add rsp, SpillAreaSizeBytes
if (SpillAreaSizeBytes != 0) {
_add_sp(Ctx->getConstantInt32(SpillAreaSizeBytes));
}
} else {
Variable *rbp =
getPhysicalRegister(Traits::RegisterSet::Reg_rbp, IceType_i64);
Variable *ebp =
getPhysicalRegister(Traits::RegisterSet::Reg_ebp, IceType_i32);
// For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
// use of rsp before the assignment of rsp=rbp keeps previous rsp
// adjustments from being dead-code eliminated.
Context.insert<InstFakeUse>(rsp);
if (!NeedSandboxing) {
_mov(rsp, rbp);
_pop(rbp);
} else {
_mov_sp(ebp);
Variable *r15 =
getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
Variable *rcx =
getPhysicalRegister(Traits::RegisterSet::Reg_rcx, IceType_i64);
Variable *ecx =
getPhysicalRegister(Traits::RegisterSet::Reg_ecx, IceType_i32);
_pop(rcx);
Context.insert<InstFakeDef>(ecx, rcx);
AutoBundle _(this);
_mov(ebp, ecx);
_redefined(Context.insert<InstFakeDef>(rbp, ebp));
_add(rbp, r15);
}
}
// Add pop instructions for preserved registers.
llvm::SmallBitVector CalleeSaves =
getRegisterSet(RegSet_CalleeSave, RegSet_None);
llvm::SmallBitVector Popped(CalleeSaves.size());
for (int32_t i = CalleeSaves.size() - 1; i >= 0; --i) {
if (i == Traits::RegisterSet::Reg_rbp && IsEbpBasedFrame)
continue;
const SizeT Canonical = Traits::getBaseReg(i);
if (CalleeSaves[i] && RegsUsed[i])
Popped[Canonical] = true;
}
for (int32_t i = Popped.size() - 1; i >= 0; --i) {
if (!Popped[i])
continue;
assert(i == Traits::getBaseReg(i));
_pop(getPhysicalRegister(i, IceType_i64));
}
if (!NeedSandboxing) {
return;
}
Variable *T_rcx = makeReg(IceType_i64, Traits::RegisterSet::Reg_rcx); Variable *T_rcx = makeReg(IceType_i64, Traits::RegisterSet::Reg_rcx);
Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
_pop(T_rcx); _pop(T_rcx);
_mov(T_ecx, T_rcx); _mov(T_ecx, T_rcx);
// lowerIndirectJump(T_ecx); // lowerIndirectJump(T_ecx);
Variable *r15 = Variable *r15 =
getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64); getPhysicalRegister(Traits::RegisterSet::Reg_r15, IceType_i64);
...@@ -827,12 +545,6 @@ void TargetX8664::addEpilog(CfgNode *Node) { ...@@ -827,12 +545,6 @@ void TargetX8664::addEpilog(CfgNode *Node) {
_jmp(T_rcx); _jmp(T_rcx);
} }
if (RI->getSrcSize()) {
auto *RetValue = llvm::cast<Variable>(RI->getSrc(0));
Context.insert<InstFakeUse>(RetValue);
}
RI->setDeleted();
} }
void TargetX8664::emitJumpTable(const Cfg *Func, void TargetX8664::emitJumpTable(const Cfg *Func,
......
...@@ -57,13 +57,16 @@ protected: ...@@ -57,13 +57,16 @@ protected:
void _push_rbp(); void _push_rbp();
Traits::X86OperandMem *_sandbox_mem_reference(X86OperandMem *Mem); Traits::X86OperandMem *_sandbox_mem_reference(X86OperandMem *Mem);
void _sub_sp(Operand *Adjustment); void _sub_sp(Operand *Adjustment);
void _link_bp();
void _unlink_bp();
void _push_reg(Variable *Reg);
void initSandbox(); void initSandbox();
void emitSandboxedReturn();
void lowerIndirectJump(Variable *JumpTarget); void lowerIndirectJump(Variable *JumpTarget);
void emitGetIP(CfgNode *Node);
Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) override; Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) override;
Variable *moveReturnValueToRegister(Operand *Value, Type ReturnType) override; Variable *moveReturnValueToRegister(Operand *Value, Type ReturnType) override;
void addProlog(CfgNode *Node) override;
void addEpilog(CfgNode *Node) override;
private: private:
ENABLE_MAKE_UNIQUE; ENABLE_MAKE_UNIQUE;
......
...@@ -177,9 +177,11 @@ public: ...@@ -177,9 +177,11 @@ public:
"Hey, yo! This is x86-64. Watcha doin'? (hiOperand)"); "Hey, yo! This is x86-64. Watcha doin'? (hiOperand)");
} }
void addProlog(CfgNode *Node) override;
void finishArgumentLowering(Variable *Arg, Variable *FramePtr, void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
size_t BasicFrameOffset, size_t StackAdjBytes, size_t BasicFrameOffset, size_t StackAdjBytes,
size_t &InArgsSizeBytes); size_t &InArgsSizeBytes);
void addEpilog(CfgNode *Node) override;
X86Address stackVarToAsmOperand(const Variable *Var) const; X86Address stackVarToAsmOperand(const Variable *Var) const;
InstructionSetEnum getInstructionSet() const { return InstructionSet; } InstructionSetEnum getInstructionSet() const { return InstructionSet; }
...@@ -282,6 +284,13 @@ protected: ...@@ -282,6 +284,13 @@ protected:
void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest, void scalarizeArithmetic(InstArithmetic::OpKind K, Variable *Dest,
Operand *Src0, Operand *Src1); Operand *Src0, Operand *Src1);
void emitGetIP(CfgNode *Node) {
dispatchToConcrete(&Traits::ConcreteTarget::emitGetIP, std::move(Node));
}
/// Emit a sandboxed return sequence rather than a return.
void emitSandboxedReturn() {
dispatchToConcrete(&Traits::ConcreteTarget::emitSandboxedReturn);
}
/// Emit just the call instruction (without argument or return variable /// Emit just the call instruction (without argument or return variable
/// processing), sandboxing if needed. /// processing), sandboxing if needed.
virtual Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) = 0; virtual Inst *emitCallToTarget(Operand *CallTarget, Variable *ReturnReg) = 0;
...@@ -588,6 +597,10 @@ protected: ...@@ -588,6 +597,10 @@ protected:
void _lea(Variable *Dest, Operand *Src0) { void _lea(Variable *Dest, Operand *Src0) {
Context.insert<typename Traits::Insts::Lea>(Dest, Src0); Context.insert<typename Traits::Insts::Lea>(Dest, Src0);
} }
void _link_bp() { dispatchToConcrete(&Traits::ConcreteTarget::_link_bp); }
void _push_reg(Variable *Reg) {
dispatchToConcrete(&Traits::ConcreteTarget::_push_reg, std::move(Reg));
}
void _mfence() { Context.insert<typename Traits::Insts::Mfence>(); } void _mfence() { Context.insert<typename Traits::Insts::Mfence>(); }
/// Moves can be used to redefine registers, creating "partial kills" for /// Moves can be used to redefine registers, creating "partial kills" for
/// liveness. Mark where moves are used in this way. /// liveness. Mark where moves are used in this way.
...@@ -836,6 +849,7 @@ protected: ...@@ -836,6 +849,7 @@ protected:
Context.insert<typename Traits::Insts::Ucomiss>(Src0, Src1); Context.insert<typename Traits::Insts::Ucomiss>(Src0, Src1);
} }
void _ud2() { Context.insert<typename Traits::Insts::UD2>(); } void _ud2() { Context.insert<typename Traits::Insts::UD2>(); }
void _unlink_bp() { dispatchToConcrete(&Traits::ConcreteTarget::_unlink_bp); }
void _xadd(Operand *Dest, Variable *Src, bool Locked) { void _xadd(Operand *Dest, Variable *Src, bool Locked) {
AutoMemorySandboxer<> _(this, &Dest, &Src); AutoMemorySandboxer<> _(this, &Dest, &Src);
Context.insert<typename Traits::Insts::Xadd>(Dest, Src, Locked); Context.insert<typename Traits::Insts::Xadd>(Dest, Src, Locked);
......
...@@ -874,6 +874,277 @@ TargetX86Base<TraitsType>::stackVarToAsmOperand(const Variable *Var) const { ...@@ -874,6 +874,277 @@ TargetX86Base<TraitsType>::stackVarToAsmOperand(const Variable *Var) const {
AssemblerFixup::NoFixup); AssemblerFixup::NoFixup);
} }
template <typename TraitsType>
void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) {
// Stack frame layout:
//
// +------------------------+
// | 1. return address |
// +------------------------+
// | 2. preserved registers |
// +------------------------+
// | 3. padding |
// +------------------------+
// | 4. global spill area |
// +------------------------+
// | 5. padding |
// +------------------------+
// | 6. local spill area |
// +------------------------+
// | 7. padding |
// +------------------------+
// | 8. allocas |
// +------------------------+
// | 9. padding |
// +------------------------+
// | 10. out args |
// +------------------------+ <--- StackPointer
//
// The following variables record the size in bytes of the given areas:
// * X86_RET_IP_SIZE_BYTES: area 1
// * PreservedRegsSizeBytes: area 2
// * SpillAreaPaddingBytes: area 3
// * GlobalsSize: area 4
// * GlobalsAndSubsequentPaddingSize: areas 4 - 5
// * LocalsSpillAreaSize: area 6
// * SpillAreaSizeBytes: areas 3 - 10
// * maxOutArgsSizeBytes(): area 10
// Determine stack frame offsets for each Variable without a register
// assignment. This can be done as one variable per stack slot. Or, do
// coalescing by running the register allocator again with an infinite set of
// registers (as a side effect, this gives variables a second chance at
// physical register assignment).
//
// A middle ground approach is to leverage sparsity and allocate one block of
// space on the frame for globals (variables with multi-block lifetime), and
// one block to share for locals (single-block lifetime).
Context.init(Node);
Context.setInsertPoint(Context.getCur());
llvm::SmallBitVector CalleeSaves =
getRegisterSet(RegSet_CalleeSave, RegSet_None);
RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
size_t GlobalsSize = 0;
// If there is a separate locals area, this represents that area. Otherwise
// it counts any variable not counted by GlobalsSize.
SpillAreaSizeBytes = 0;
// If there is a separate locals area, this specifies the alignment for it.
uint32_t LocalsSlotsAlignmentBytes = 0;
// The entire spill locations area gets aligned to largest natural alignment
// of the variables that have a spill slot.
uint32_t SpillAreaAlignmentBytes = 0;
// A spill slot linked to a variable with a stack slot should reuse that
// stack slot.
std::function<bool(Variable *)> TargetVarHook =
[&VariablesLinkedToSpillSlots](Variable *Var) {
if (auto *SpillVar =
llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
assert(Var->mustNotHaveReg());
if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
VariablesLinkedToSpillSlots.push_back(Var);
return true;
}
}
return false;
};
// Compute the list of spilled variables and bounds for GlobalsSize, etc.
getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
&SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
&LocalsSlotsAlignmentBytes, TargetVarHook);
uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
SpillAreaSizeBytes += GlobalsSize;
// Add push instructions for preserved registers.
uint32_t NumCallee = 0;
size_t PreservedRegsSizeBytes = 0;
llvm::SmallBitVector Pushed(CalleeSaves.size());
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
const int32_t Canonical = Traits::getBaseReg(i);
assert(Canonical == Traits::getBaseReg(Canonical));
if (CalleeSaves[i] && RegsUsed[i]) {
Pushed[Canonical] = true;
}
}
for (SizeT i = 0; i < Pushed.size(); ++i) {
if (!Pushed[i])
continue;
assert(static_cast<int32_t>(i) == Traits::getBaseReg(i));
++NumCallee;
PreservedRegsSizeBytes += typeWidthInBytes(Traits::WordType);
_push_reg(getPhysicalRegister(i, Traits::WordType));
}
Ctx->statsUpdateRegistersSaved(NumCallee);
// Generate "push frameptr; mov frameptr, stackptr"
if (IsEbpBasedFrame) {
assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
.count() == 0);
PreservedRegsSizeBytes += typeWidthInBytes(Traits::WordType);
_link_bp();
}
// Align the variables area. SpillAreaPaddingBytes is the size of the region
// after the preserved registers and before the spill areas.
// LocalsSlotsPaddingBytes is the amount of padding between the globals and
// locals area if they are separate.
assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
uint32_t SpillAreaPaddingBytes = 0;
uint32_t LocalsSlotsPaddingBytes = 0;
alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
SpillAreaAlignmentBytes, GlobalsSize,
LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
&LocalsSlotsPaddingBytes);
SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
uint32_t GlobalsAndSubsequentPaddingSize =
GlobalsSize + LocalsSlotsPaddingBytes;
// Functions returning scalar floating point types may need to convert values
// from an in-register xmm value to the top of the x87 floating point stack.
// This is done by a movp[sd] and an fld[sd]. Ensure there is enough scratch
// space on the stack for this.
const Type ReturnType = Func->getReturnType();
if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) {
if (isScalarFloatingType(ReturnType)) {
// Avoid misaligned double-precicion load/store.
NeedsStackAlignment = true;
SpillAreaSizeBytes =
std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes);
}
}
// Align esp if necessary.
if (NeedsStackAlignment) {
uint32_t StackOffset =
Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
uint32_t StackSize =
Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes());
SpillAreaSizeBytes = StackSize - StackOffset;
} else {
SpillAreaSizeBytes += maxOutArgsSizeBytes();
}
// Combine fixed allocations into SpillAreaSizeBytes if we are emitting the
// fixed allocations in the prolog.
if (PrologEmitsFixedAllocas)
SpillAreaSizeBytes += FixedAllocaSizeBytes;
if (SpillAreaSizeBytes) {
// Generate "sub stackptr, SpillAreaSizeBytes"
_sub_sp(Ctx->getConstantInt32(SpillAreaSizeBytes));
// If the fixed allocas are aligned more than the stack frame, align the
// stack pointer accordingly.
if (PrologEmitsFixedAllocas &&
FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) {
assert(IsEbpBasedFrame);
_and(getPhysicalRegister(getStackReg(), Traits::WordType),
Ctx->getConstantInt32(-FixedAllocaAlignBytes));
}
}
// Account for known-frame-offset alloca instructions that were not already
// combined into the prolog.
if (!PrologEmitsFixedAllocas)
SpillAreaSizeBytes += FixedAllocaSizeBytes;
Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
// Fill in stack offsets for stack args, and copy args into registers for
// those that were register-allocated. Args are pushed right to left, so
// Arg[0] is closest to the stack/frame pointer.
Variable *FramePtr =
getPhysicalRegister(getFrameOrStackReg(), Traits::WordType);
size_t BasicFrameOffset =
PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
if (!IsEbpBasedFrame)
BasicFrameOffset += SpillAreaSizeBytes;
emitGetIP(Node);
const VarList &Args = Func->getArgs();
size_t InArgsSizeBytes = 0;
unsigned NumXmmArgs = 0;
unsigned NumGPRArgs = 0;
for (Variable *Arg : Args) {
// Skip arguments passed in registers.
if (isVectorType(Arg->getType())) {
if (Traits::getRegisterForXmmArgNum(NumXmmArgs) != Variable::NoRegister) {
++NumXmmArgs;
continue;
}
} else if (isScalarFloatingType(Arg->getType())) {
if (Traits::X86_PASS_SCALAR_FP_IN_XMM &&
Traits::getRegisterForXmmArgNum(NumXmmArgs) != Variable::NoRegister) {
++NumXmmArgs;
continue;
}
} else {
assert(isScalarIntegerType(Arg->getType()));
if (Traits::getRegisterForGprArgNum(Traits::WordType, NumGPRArgs) !=
Variable::NoRegister) {
++NumGPRArgs;
continue;
}
}
// For esp-based frames where the allocas are done outside the prolog, the
// esp value may not stabilize to its home value until after all the
// fixed-size alloca instructions have executed. In this case, a stack
// adjustment is needed when accessing in-args in order to copy them into
// registers.
size_t StackAdjBytes = 0;
if (!IsEbpBasedFrame && !PrologEmitsFixedAllocas)
StackAdjBytes -= FixedAllocaSizeBytes;
finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes,
InArgsSizeBytes);
}
// Fill in stack offsets for locals.
assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
IsEbpBasedFrame);
// Assign stack offsets to variables that have been linked to spilled
// variables.
for (Variable *Var : VariablesLinkedToSpillSlots) {
Variable *Linked =
(llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
Var->setStackOffset(Linked->getStackOffset());
}
this->HasComputedFrame = true;
if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
OstreamLocker L(Func->getContext());
Ostream &Str = Func->getContext()->getStrDump();
Str << "Stack layout:\n";
uint32_t EspAdjustmentPaddingSize =
SpillAreaSizeBytes - LocalsSpillAreaSize -
GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
maxOutArgsSizeBytes();
Str << " in-args = " << InArgsSizeBytes << " bytes\n"
<< " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
<< " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
<< " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
<< " globals spill area = " << GlobalsSize << " bytes\n"
<< " globals-locals spill areas intermediate padding = "
<< GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
<< " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
<< " esp alignment padding = " << EspAdjustmentPaddingSize
<< " bytes\n";
Str << "Stack details:\n"
<< " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
<< " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
<< " outgoing args size = " << maxOutArgsSizeBytes() << " bytes\n"
<< " locals spill area alignment = " << LocalsSlotsAlignmentBytes
<< " bytes\n"
<< " is ebp based = " << IsEbpBasedFrame << "\n";
}
}
/// Helper function for addProlog(). /// Helper function for addProlog().
/// ///
/// This assumes Arg is an argument passed on the stack. This sets the frame /// This assumes Arg is an argument passed on the stack. This sets the frame
...@@ -920,6 +1191,63 @@ void TargetX86Base<TraitsType>::finishArgumentLowering( ...@@ -920,6 +1191,63 @@ void TargetX86Base<TraitsType>::finishArgumentLowering(
} }
} }
template <typename TraitsType>
void TargetX86Base<TraitsType>::addEpilog(CfgNode *Node) {
InstList &Insts = Node->getInsts();
InstList::reverse_iterator RI, E;
for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
if (llvm::isa<typename Traits::Insts::Ret>(*RI))
break;
}
if (RI == E)
return;
// Convert the reverse_iterator position into its corresponding (forward)
// iterator position.
InstList::iterator InsertPoint = RI.base();
--InsertPoint;
Context.init(Node);
Context.setInsertPoint(InsertPoint);
if (IsEbpBasedFrame) {
_unlink_bp();
} else {
// add stackptr, SpillAreaSizeBytes
if (SpillAreaSizeBytes != 0) {
_add_sp(Ctx->getConstantInt32(SpillAreaSizeBytes));
}
}
// Add pop instructions for preserved registers.
llvm::SmallBitVector CalleeSaves =
getRegisterSet(RegSet_CalleeSave, RegSet_None);
llvm::SmallBitVector Popped(CalleeSaves.size());
for (int32_t i = CalleeSaves.size() - 1; i >= 0; --i) {
if (static_cast<SizeT>(i) == getFrameReg() && IsEbpBasedFrame)
continue;
const SizeT Canonical = Traits::getBaseReg(i);
if (CalleeSaves[i] && RegsUsed[i]) {
Popped[Canonical] = true;
}
}
for (int32_t i = Popped.size() - 1; i >= 0; --i) {
if (!Popped[i])
continue;
assert(i == Traits::getBaseReg(i));
_pop(getPhysicalRegister(i, Traits::WordType));
}
if (!NeedSandboxing) {
return;
}
emitSandboxedReturn();
if (RI->getSrcSize()) {
auto *RetValue = llvm::cast<Variable>(RI->getSrc(0));
Context.insert<InstFakeUse>(RetValue);
}
RI->setDeleted();
}
template <typename TraitsType> Type TargetX86Base<TraitsType>::stackSlotType() { template <typename TraitsType> Type TargetX86Base<TraitsType>::stackSlotType() {
return Traits::WordType; return Traits::WordType;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment