Commit 729b5f6c by John Porto

Subzero. Moves code around in preparations for 64-bit lowering.

Specifically, it moves lowerArguments lowerRet addProlog addEpilog from the x86 lowering template to the concrete lowering implementations. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4077 R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1261383002.
parent f6f9825e
...@@ -8,9 +8,8 @@ ...@@ -8,9 +8,8 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
/// ///
/// \file /// \file
/// This file used to house all the X8664 instructions. Subzero has been /// (Note: x86 instructions are templates, and they are defined in
/// modified to use templates for X86 instructions, so all those definitions are /// src/IceInstX86Base.)
/// are in IceInstX86Base.h
/// ///
/// When interacting with the X8664 target (which should only happen in the /// When interacting with the X8664 target (which should only happen in the
/// X8664 TargetLowering) clients have should use the Ice::X8664::Traits::Insts /// X8664 TargetLowering) clients have should use the Ice::X8664::Traits::Insts
......
...@@ -89,6 +89,563 @@ const char *MachineTraits<TargetX8632>::TargetName = "X8632"; ...@@ -89,6 +89,563 @@ const char *MachineTraits<TargetX8632>::TargetName = "X8632";
} // end of namespace X86Internal } // end of namespace X86Internal
//------------------------------------------------------------------------------
// __ ______ __ __ ______ ______ __ __ __ ______
// /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
// \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
// \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
// \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
//
//------------------------------------------------------------------------------
void TargetX8632::lowerCall(const InstCall *Instr) {
// x86-32 calling convention:
//
// * At the point before the call, the stack must be aligned to 16
// bytes.
//
// * The first four arguments of vector type, regardless of their
// position relative to the other arguments in the argument list, are
// placed in registers xmm0 - xmm3.
//
// * Other arguments are pushed onto the stack in right-to-left order,
// such that the left-most argument ends up on the top of the stack at
// the lowest memory address.
//
// * Stack arguments of vector type are aligned to start at the next
// highest multiple of 16 bytes. Other stack arguments are aligned to
// 4 bytes.
//
// This intends to match the section "IA-32 Function Calling
// Convention" of the document "OS X ABI Function Call Guide" by
// Apple.
NeedsStackAlignment = true;
typedef std::vector<Operand *> OperandList;
OperandList XmmArgs;
OperandList StackArgs, StackArgLocations;
uint32_t ParameterAreaSizeBytes = 0;
// Classify each argument operand according to the location where the
// argument is passed.
for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
Operand *Arg = Instr->getArg(i);
Type Ty = Arg->getType();
// The PNaCl ABI requires the width of arguments to be at least 32 bits.
assert(typeWidthInBytes(Ty) >= 4);
if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
XmmArgs.push_back(Arg);
} else {
StackArgs.push_back(Arg);
if (isVectorType(Arg->getType())) {
ParameterAreaSizeBytes =
Traits::applyStackAlignment(ParameterAreaSizeBytes);
}
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
StackArgLocations.push_back(
Traits::X86OperandMem::create(Func, Ty, esp, Loc));
ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
}
}
// Adjust the parameter area so that the stack is aligned. It is
// assumed that the stack is already aligned at the start of the
// calling sequence.
ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
// Subtract the appropriate amount for the argument area. This also
// takes care of setting the stack adjustment during emission.
//
// TODO: If for some reason the call instruction gets dead-code
// eliminated after lowering, we would need to ensure that the
// pre-call and the post-call esp adjustment get eliminated as well.
if (ParameterAreaSizeBytes) {
_adjust_stack(ParameterAreaSizeBytes);
}
// Copy arguments that are passed on the stack to the appropriate
// stack locations.
for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
}
// Copy arguments to be passed in registers to the appropriate
// registers.
// TODO: Investigate the impact of lowering arguments passed in
// registers after lowering stack arguments as opposed to the other
// way around. Lowering register arguments after stack arguments may
// reduce register pressure. On the other hand, lowering register
// arguments first (before stack arguments) may result in more compact
// code, as the memory operand displacements may end up being smaller
// before any stack adjustment is done.
for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Variable *Reg =
legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
// Generate a FakeUse of register arguments so that they do not get
// dead code eliminated as a result of the FakeKill of scratch
// registers after the call.
Context.insert(InstFakeUse::create(Func, Reg));
}
// Generate the call instruction. Assign its result to a temporary
// with high register allocation weight.
Variable *Dest = Instr->getDest();
// ReturnReg doubles as ReturnRegLo as necessary.
Variable *ReturnReg = nullptr;
Variable *ReturnRegHi = nullptr;
if (Dest) {
switch (Dest->getType()) {
case IceType_NUM:
case IceType_void:
llvm::report_fatal_error("Invalid Call dest type");
break;
case IceType_i1:
case IceType_i8:
case IceType_i16:
case IceType_i32:
ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
break;
case IceType_i64:
ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
break;
case IceType_f32:
case IceType_f64:
// Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
// the fstp instruction.
break;
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
case IceType_v4f32:
ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
break;
}
}
Operand *CallTarget = legalize(Instr->getCallTarget());
const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
if (NeedSandboxing) {
if (llvm::isa<Constant>(CallTarget)) {
_bundle_lock(InstBundleLock::Opt_AlignToEnd);
} else {
Variable *CallTargetVar = nullptr;
_mov(CallTargetVar, CallTarget);
_bundle_lock(InstBundleLock::Opt_AlignToEnd);
const SizeT BundleSize =
1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
_and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
CallTarget = CallTargetVar;
}
}
Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
Context.insert(NewCall);
if (NeedSandboxing)
_bundle_unlock();
if (ReturnRegHi)
Context.insert(InstFakeDef::create(Func, ReturnRegHi));
// Add the appropriate offset to esp. The call instruction takes care
// of resetting the stack offset during emission.
if (ParameterAreaSizeBytes) {
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
_add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
}
// Insert a register-kill pseudo instruction.
Context.insert(InstFakeKill::create(Func, NewCall));
// Generate a FakeUse to keep the call live if necessary.
if (Instr->hasSideEffects() && ReturnReg) {
Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
Context.insert(FakeUse);
}
if (!Dest)
return;
// Assign the result of the call to Dest.
if (ReturnReg) {
if (ReturnRegHi) {
assert(Dest->getType() == IceType_i64);
split64(Dest);
Variable *DestLo = Dest->getLo();
Variable *DestHi = Dest->getHi();
_mov(DestLo, ReturnReg);
_mov(DestHi, ReturnRegHi);
} else {
assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
isVectorType(Dest->getType()));
if (isVectorType(Dest->getType())) {
_movp(Dest, ReturnReg);
} else {
_mov(Dest, ReturnReg);
}
}
} else if (isScalarFloatingType(Dest->getType())) {
// Special treatment for an FP function which returns its result in
// st(0).
// If Dest ends up being a physical xmm register, the fstp emit code
// will route st(0) through a temporary stack slot.
_fstp(Dest);
// Create a fake use of Dest in case it actually isn't used,
// because st(0) still needs to be popped.
Context.insert(InstFakeUse::create(Func, Dest));
}
}
void TargetX8632::lowerArguments() {
VarList &Args = Func->getArgs();
// The first four arguments of vector type, regardless of their
// position relative to the other arguments in the argument list, are
// passed in registers xmm0 - xmm3.
unsigned NumXmmArgs = 0;
Context.init(Func->getEntryNode());
Context.setInsertPoint(Context.getCur());
for (SizeT I = 0, E = Args.size();
I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {
Variable *Arg = Args[I];
Type Ty = Arg->getType();
if (!isVectorType(Ty))
continue;
// Replace Arg in the argument list with the home register. Then
// generate an instruction in the prolog to copy the home register
// to the assigned location of Arg.
int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;
++NumXmmArgs;
Variable *RegisterArg = Func->makeVariable(Ty);
if (BuildDefs::dump())
RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
RegisterArg->setRegNum(RegNum);
RegisterArg->setIsArg();
Arg->setIsArg(false);
Args[I] = RegisterArg;
Context.insert(InstAssign::create(Func, Arg, RegisterArg));
}
}
void TargetX8632::lowerRet(const InstRet *Inst) {
Variable *Reg = nullptr;
if (Inst->hasRetValue()) {
Operand *Src0 = legalize(Inst->getRetValue());
// TODO(jpp): this is not needed.
if (Src0->getType() == IceType_i64) {
Variable *eax =
legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);
Variable *edx =
legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
Reg = eax;
Context.insert(InstFakeUse::create(Func, edx));
} else if (isScalarFloatingType(Src0->getType())) {
_fld(Src0);
} else if (isVectorType(Src0->getType())) {
Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
} else {
_mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
}
}
// Add a ret instruction even if sandboxing is enabled, because
// addEpilog explicitly looks for a ret instruction as a marker for
// where to insert the frame removal instructions.
_ret(Reg);
// Add a fake use of esp to make sure esp stays alive for the entire
// function. Otherwise post-call esp adjustments get dead-code
// eliminated. TODO: Are there more places where the fake use
// should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
// have a ret instruction.
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Context.insert(InstFakeUse::create(Func, esp));
}
void TargetX8632::addProlog(CfgNode *Node) {
// Stack frame layout:
//
// +------------------------+
// | 1. return address |
// +------------------------+
// | 2. preserved registers |
// +------------------------+
// | 3. padding |
// +------------------------+
// | 4. global spill area |
// +------------------------+
// | 5. padding |
// +------------------------+
// | 6. local spill area |
// +------------------------+
// | 7. padding |
// +------------------------+
// | 8. allocas |
// +------------------------+
//
// The following variables record the size in bytes of the given areas:
// * X86_RET_IP_SIZE_BYTES: area 1
// * PreservedRegsSizeBytes: area 2
// * SpillAreaPaddingBytes: area 3
// * GlobalsSize: area 4
// * GlobalsAndSubsequentPaddingSize: areas 4 - 5
// * LocalsSpillAreaSize: area 6
// * SpillAreaSizeBytes: areas 3 - 7
// Determine stack frame offsets for each Variable without a
// register assignment. This can be done as one variable per stack
// slot. Or, do coalescing by running the register allocator again
// with an infinite set of registers (as a side effect, this gives
// variables a second chance at physical register assignment).
//
// A middle ground approach is to leverage sparsity and allocate one
// block of space on the frame for globals (variables with
// multi-block lifetime), and one block to share for locals
// (single-block lifetime).
Context.init(Node);
Context.setInsertPoint(Context.getCur());
llvm::SmallBitVector CalleeSaves =
getRegisterSet(RegSet_CalleeSave, RegSet_None);
RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
size_t GlobalsSize = 0;
// If there is a separate locals area, this represents that area.
// Otherwise it counts any variable not counted by GlobalsSize.
SpillAreaSizeBytes = 0;
// If there is a separate locals area, this specifies the alignment
// for it.
uint32_t LocalsSlotsAlignmentBytes = 0;
// The entire spill locations area gets aligned to largest natural
// alignment of the variables that have a spill slot.
uint32_t SpillAreaAlignmentBytes = 0;
// A spill slot linked to a variable with a stack slot should reuse
// that stack slot.
std::function<bool(Variable *)> TargetVarHook =
[&VariablesLinkedToSpillSlots](Variable *Var) {
if (auto *SpillVar =
llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
assert(Var->getWeight().isZero());
if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
VariablesLinkedToSpillSlots.push_back(Var);
return true;
}
}
return false;
};
// Compute the list of spilled variables and bounds for GlobalsSize, etc.
getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
&SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
&LocalsSlotsAlignmentBytes, TargetVarHook);
uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
SpillAreaSizeBytes += GlobalsSize;
// Add push instructions for preserved registers.
uint32_t NumCallee = 0;
size_t PreservedRegsSizeBytes = 0;
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
if (CalleeSaves[i] && RegsUsed[i]) {
++NumCallee;
PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);
_push(getPhysicalRegister(i));
}
}
Ctx->statsUpdateRegistersSaved(NumCallee);
// Generate "push ebp; mov ebp, esp"
if (IsEbpBasedFrame) {
assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
.count() == 0);
PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);
Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
_push(ebp);
_mov(ebp, esp);
// Keep ebp live for late-stage liveness analysis
// (e.g. asm-verbose mode).
Context.insert(InstFakeUse::create(Func, ebp));
}
// Align the variables area. SpillAreaPaddingBytes is the size of
// the region after the preserved registers and before the spill areas.
// LocalsSlotsPaddingBytes is the amount of padding between the globals
// and locals area if they are separate.
assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
uint32_t SpillAreaPaddingBytes = 0;
uint32_t LocalsSlotsPaddingBytes = 0;
alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
SpillAreaAlignmentBytes, GlobalsSize,
LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
&LocalsSlotsPaddingBytes);
SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
uint32_t GlobalsAndSubsequentPaddingSize =
GlobalsSize + LocalsSlotsPaddingBytes;
// Align esp if necessary.
if (NeedsStackAlignment) {
uint32_t StackOffset =
Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
uint32_t StackSize =
Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
SpillAreaSizeBytes = StackSize - StackOffset;
}
// Generate "sub esp, SpillAreaSizeBytes"
if (SpillAreaSizeBytes)
_sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
Ctx->getConstantInt32(SpillAreaSizeBytes));
Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
resetStackAdjustment();
// Fill in stack offsets for stack args, and copy args into registers
// for those that were register-allocated. Args are pushed right to
// left, so Arg[0] is closest to the stack/frame pointer.
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
size_t BasicFrameOffset =
PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
if (!IsEbpBasedFrame)
BasicFrameOffset += SpillAreaSizeBytes;
const VarList &Args = Func->getArgs();
size_t InArgsSizeBytes = 0;
unsigned NumXmmArgs = 0;
for (Variable *Arg : Args) {
// Skip arguments passed in registers.
if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
++NumXmmArgs;
continue;
}
finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
}
// Fill in stack offsets for locals.
assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
IsEbpBasedFrame);
// Assign stack offsets to variables that have been linked to spilled
// variables.
for (Variable *Var : VariablesLinkedToSpillSlots) {
Variable *Linked =
(llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
Var->setStackOffset(Linked->getStackOffset());
}
this->HasComputedFrame = true;
if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
OstreamLocker L(Func->getContext());
Ostream &Str = Func->getContext()->getStrDump();
Str << "Stack layout:\n";
uint32_t EspAdjustmentPaddingSize =
SpillAreaSizeBytes - LocalsSpillAreaSize -
GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
Str << " in-args = " << InArgsSizeBytes << " bytes\n"
<< " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
<< " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
<< " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
<< " globals spill area = " << GlobalsSize << " bytes\n"
<< " globals-locals spill areas intermediate padding = "
<< GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
<< " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
<< " esp alignment padding = " << EspAdjustmentPaddingSize
<< " bytes\n";
Str << "Stack details:\n"
<< " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
<< " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
<< " locals spill area alignment = " << LocalsSlotsAlignmentBytes
<< " bytes\n"
<< " is ebp based = " << IsEbpBasedFrame << "\n";
}
}
void TargetX8632::addEpilog(CfgNode *Node) {
InstList &Insts = Node->getInsts();
InstList::reverse_iterator RI, E;
for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
if (llvm::isa<typename Traits::Insts::Ret>(*RI))
break;
}
if (RI == E)
return;
// Convert the reverse_iterator position into its corresponding
// (forward) iterator position.
InstList::iterator InsertPoint = RI.base();
--InsertPoint;
Context.init(Node);
Context.setInsertPoint(InsertPoint);
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
if (IsEbpBasedFrame) {
Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
// For late-stage liveness analysis (e.g. asm-verbose mode),
// adding a fake use of esp before the assignment of esp=ebp keeps
// previous esp adjustments from being dead-code eliminated.
Context.insert(InstFakeUse::create(Func, esp));
_mov(esp, ebp);
_pop(ebp);
} else {
// add esp, SpillAreaSizeBytes
if (SpillAreaSizeBytes)
_add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
}
// Add pop instructions for preserved registers.
llvm::SmallBitVector CalleeSaves =
getRegisterSet(RegSet_CalleeSave, RegSet_None);
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
SizeT j = CalleeSaves.size() - i - 1;
if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)
continue;
if (CalleeSaves[j] && RegsUsed[j]) {
_pop(getPhysicalRegister(j));
}
}
if (!Ctx->getFlags().getUseSandboxing())
return;
// Change the original ret instruction into a sandboxed return sequence.
// t:ecx = pop
// bundle_lock
// and t, ~31
// jmp *t
// bundle_unlock
// FakeUse <original_ret_operand>
Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
_pop(T_ecx);
lowerIndirectJump(T_ecx);
if (RI->getSrcSize()) {
Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
Context.insert(InstFakeUse::create(Func, RetValue));
}
RI->setDeleted();
}
void TargetX8632::emitJumpTable(const Cfg *Func,
const InstJumpTable *JumpTable) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Ctx->getStrEmit();
IceString MangledName = Ctx->mangleName(Func->getFunctionName());
Str << "\t.section\t.rodata." << MangledName
<< "$jumptable,\"a\",@progbits\n";
Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";
// On X8632 pointers are 32-bit hence the use of .long
for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
Str << "\n";
}
TargetDataX8632::TargetDataX8632(GlobalContext *Ctx) TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)
: TargetDataLowering(Ctx) {} : TargetDataLowering(Ctx) {}
...@@ -159,23 +716,6 @@ const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte"; ...@@ -159,23 +716,6 @@ const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";
const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x"; const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";
} // end of anonymous namespace } // end of anonymous namespace
void TargetX8632::emitJumpTable(const Cfg *Func,
const InstJumpTable *JumpTable) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Ctx->getStrEmit();
IceString MangledName = Ctx->mangleName(Func->getFunctionName());
Str << "\t.section\t.rodata." << MangledName
<< "$jumptable,\"a\",@progbits\n";
Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";
// On X8632 pointers are 32-bit hence the use of .long
for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
Str << "\n";
}
template <typename T> template <typename T>
void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) { void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
...@@ -407,214 +947,4 @@ ICETYPE_TABLE ...@@ -407,214 +947,4 @@ ICETYPE_TABLE
} // end of namespace dummy3 } // end of namespace dummy3
} // end of anonymous namespace } // end of anonymous namespace
//------------------------------------------------------------------------------
// __ ______ __ __ ______ ______ __ __ __ ______
// /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
// \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
// \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
// \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
//
//------------------------------------------------------------------------------
void TargetX8632::lowerCall(const InstCall *Instr) {
// x86-32 calling convention:
//
// * At the point before the call, the stack must be aligned to 16
// bytes.
//
// * The first four arguments of vector type, regardless of their
// position relative to the other arguments in the argument list, are
// placed in registers xmm0 - xmm3.
//
// * Other arguments are pushed onto the stack in right-to-left order,
// such that the left-most argument ends up on the top of the stack at
// the lowest memory address.
//
// * Stack arguments of vector type are aligned to start at the next
// highest multiple of 16 bytes. Other stack arguments are aligned to
// 4 bytes.
//
// This intends to match the section "IA-32 Function Calling
// Convention" of the document "OS X ABI Function Call Guide" by
// Apple.
NeedsStackAlignment = true;
typedef std::vector<Operand *> OperandList;
OperandList XmmArgs;
OperandList StackArgs, StackArgLocations;
uint32_t ParameterAreaSizeBytes = 0;
// Classify each argument operand according to the location where the
// argument is passed.
for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
Operand *Arg = Instr->getArg(i);
Type Ty = Arg->getType();
// The PNaCl ABI requires the width of arguments to be at least 32 bits.
assert(typeWidthInBytes(Ty) >= 4);
if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
XmmArgs.push_back(Arg);
} else {
StackArgs.push_back(Arg);
if (isVectorType(Arg->getType())) {
ParameterAreaSizeBytes =
Traits::applyStackAlignment(ParameterAreaSizeBytes);
}
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
StackArgLocations.push_back(
Traits::X86OperandMem::create(Func, Ty, esp, Loc));
ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
}
}
// Adjust the parameter area so that the stack is aligned. It is
// assumed that the stack is already aligned at the start of the
// calling sequence.
ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
// Subtract the appropriate amount for the argument area. This also
// takes care of setting the stack adjustment during emission.
//
// TODO: If for some reason the call instruction gets dead-code
// eliminated after lowering, we would need to ensure that the
// pre-call and the post-call esp adjustment get eliminated as well.
if (ParameterAreaSizeBytes) {
_adjust_stack(ParameterAreaSizeBytes);
}
// Copy arguments that are passed on the stack to the appropriate
// stack locations.
for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
}
// Copy arguments to be passed in registers to the appropriate
// registers.
// TODO: Investigate the impact of lowering arguments passed in
// registers after lowering stack arguments as opposed to the other
// way around. Lowering register arguments after stack arguments may
// reduce register pressure. On the other hand, lowering register
// arguments first (before stack arguments) may result in more compact
// code, as the memory operand displacements may end up being smaller
// before any stack adjustment is done.
for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Variable *Reg =
legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
// Generate a FakeUse of register arguments so that they do not get
// dead code eliminated as a result of the FakeKill of scratch
// registers after the call.
Context.insert(InstFakeUse::create(Func, Reg));
}
// Generate the call instruction. Assign its result to a temporary
// with high register allocation weight.
Variable *Dest = Instr->getDest();
// ReturnReg doubles as ReturnRegLo as necessary.
Variable *ReturnReg = nullptr;
Variable *ReturnRegHi = nullptr;
if (Dest) {
switch (Dest->getType()) {
case IceType_NUM:
llvm_unreachable("Invalid Call dest type");
break;
case IceType_void:
break;
case IceType_i1:
case IceType_i8:
case IceType_i16:
case IceType_i32:
ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
break;
case IceType_i64:
ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
break;
case IceType_f32:
case IceType_f64:
// Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
// the fstp instruction.
break;
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
case IceType_v4f32:
ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
break;
}
}
Operand *CallTarget = legalize(Instr->getCallTarget());
const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
if (NeedSandboxing) {
if (llvm::isa<Constant>(CallTarget)) {
_bundle_lock(InstBundleLock::Opt_AlignToEnd);
} else {
Variable *CallTargetVar = nullptr;
_mov(CallTargetVar, CallTarget);
_bundle_lock(InstBundleLock::Opt_AlignToEnd);
const SizeT BundleSize =
1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
_and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
CallTarget = CallTargetVar;
}
}
Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
Context.insert(NewCall);
if (NeedSandboxing)
_bundle_unlock();
if (ReturnRegHi)
Context.insert(InstFakeDef::create(Func, ReturnRegHi));
// Add the appropriate offset to esp. The call instruction takes care
// of resetting the stack offset during emission.
if (ParameterAreaSizeBytes) {
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
_add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
}
// Insert a register-kill pseudo instruction.
Context.insert(InstFakeKill::create(Func, NewCall));
// Generate a FakeUse to keep the call live if necessary.
if (Instr->hasSideEffects() && ReturnReg) {
Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
Context.insert(FakeUse);
}
if (!Dest)
return;
// Assign the result of the call to Dest.
if (ReturnReg) {
if (ReturnRegHi) {
assert(Dest->getType() == IceType_i64);
split64(Dest);
Variable *DestLo = Dest->getLo();
Variable *DestHi = Dest->getHi();
_mov(DestLo, ReturnReg);
_mov(DestHi, ReturnRegHi);
} else {
assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
isVectorType(Dest->getType()));
if (isVectorType(Dest->getType())) {
_movp(Dest, ReturnReg);
} else {
_mov(Dest, ReturnReg);
}
}
} else if (isScalarFloatingType(Dest->getType())) {
// Special treatment for an FP function which returns its result in
// st(0).
// If Dest ends up being a physical xmm register, the fstp emit code
// will route st(0) through a temporary stack slot.
_fstp(Dest);
// Create a fake use of Dest in case it actually isn't used,
// because st(0) still needs to be popped.
Context.insert(InstFakeUse::create(Func, Dest));
}
}
} // end of namespace Ice } // end of namespace Ice
...@@ -43,6 +43,10 @@ public: ...@@ -43,6 +43,10 @@ public:
protected: protected:
void lowerCall(const InstCall *Instr) override; void lowerCall(const InstCall *Instr) override;
void lowerArguments() override;
void lowerRet(const InstRet *Inst) override;
void addProlog(CfgNode *Node) override;
void addEpilog(CfgNode *Node) override;
private: private:
friend class ::Ice::X86Internal::TargetX86Base<TargetX8632>; friend class ::Ice::X86Internal::TargetX86Base<TargetX8632>;
......
...@@ -243,9 +243,8 @@ void TargetX8664::lowerCall(const InstCall *Instr) { ...@@ -243,9 +243,8 @@ void TargetX8664::lowerCall(const InstCall *Instr) {
if (Dest) { if (Dest) {
switch (Dest->getType()) { switch (Dest->getType()) {
case IceType_NUM: case IceType_NUM:
llvm_unreachable("Invalid Call dest type");
break;
case IceType_void: case IceType_void:
llvm::report_fatal_error("Invalid Call dest type");
break; break;
case IceType_i1: case IceType_i1:
case IceType_i8: case IceType_i8:
...@@ -339,34 +338,381 @@ void TargetX8664::lowerCall(const InstCall *Instr) { ...@@ -339,34 +338,381 @@ void TargetX8664::lowerCall(const InstCall *Instr) {
} }
} }
void TargetDataX8664::lowerJumpTables() { void TargetX8664::lowerArguments() {
switch (Ctx->getFlags().getOutFileType()) { VarList &Args = Func->getArgs();
case FT_Elf: { // The first eight vetcor typed arguments (as well as fp arguments) are passed
ELFObjectWriter *Writer = Ctx->getObjectWriter(); // in %xmm0 through %xmm7 regardless of their position in the argument list.
for (const JumpTableData &JumpTable : Ctx->getJumpTables()) unsigned NumXmmArgs = 0;
// TODO(jpp): not 386. // The first six integer typed arguments are passed in %rdi, %rsi, %rdx, %rcx,
Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32); // %r8, and %r9 regardless of their position in the argument list.
} break; unsigned NumGprArgs = 0;
case FT_Asm:
// Already emitted from Cfg Context.init(Func->getEntryNode());
Context.setInsertPoint(Context.getCur());
for (SizeT i = 0, End = Args.size();
i < End && (NumXmmArgs < Traits::X86_MAX_XMM_ARGS ||
NumGprArgs < Traits::X86_MAX_XMM_ARGS);
++i) {
Variable *Arg = Args[i];
Type Ty = Arg->getType();
if ((isVectorType(Ty) || isScalarFloatingType(Ty)) &&
NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
// Replace Arg in the argument list with the home register. Then
// generate an instruction in the prolog to copy the home register
// to the assigned location of Arg.
int32_t RegNum = getRegisterForXmmArgNum(NumXmmArgs);
++NumXmmArgs;
Variable *RegisterArg = Func->makeVariable(Ty);
if (BuildDefs::dump())
RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
RegisterArg->setRegNum(RegNum);
RegisterArg->setIsArg();
Arg->setIsArg(false);
Args[i] = RegisterArg;
Context.insert(InstAssign::create(Func, Arg, RegisterArg));
} else if (isScalarIntegerType(Ty) &&
NumGprArgs < Traits::X86_MAX_GPR_ARGS) {
int32_t RegNum = getRegisterForGprArgNum(NumGprArgs);
++NumGprArgs;
Variable *RegisterArg = Func->makeVariable(Ty);
if (BuildDefs::dump())
RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
RegisterArg->setRegNum(RegNum);
RegisterArg->setIsArg();
Arg->setIsArg(false);
Args[i] = RegisterArg;
Context.insert(InstAssign::create(Func, Arg, RegisterArg));
}
}
}
void TargetX8664::lowerRet(const InstRet *Inst) {
Variable *Reg = nullptr;
if (Inst->hasRetValue()) {
Operand *Src0 = legalize(Inst->getRetValue());
// TODO(jpp): this is not needed.
if (Src0->getType() == IceType_i64) {
Variable *eax =
legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);
Variable *edx =
legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
Reg = eax;
Context.insert(InstFakeUse::create(Func, edx));
} else if (isScalarFloatingType(Src0->getType())) {
_fld(Src0);
} else if (isVectorType(Src0->getType())) {
Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
} else {
_mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
}
}
// Add a ret instruction even if sandboxing is enabled, because
// addEpilog explicitly looks for a ret instruction as a marker for
// where to insert the frame removal instructions.
_ret(Reg);
// Add a fake use of esp to make sure esp stays alive for the entire
// function. Otherwise post-call esp adjustments get dead-code
// eliminated. TODO: Are there more places where the fake use
// should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
// have a ret instruction.
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Context.insert(InstFakeUse::create(Func, esp));
}
void TargetX8664::addProlog(CfgNode *Node) {
// Stack frame layout:
//
// +------------------------+
// | 1. return address |
// +------------------------+
// | 2. preserved registers |
// +------------------------+
// | 3. padding |
// +------------------------+
// | 4. global spill area |
// +------------------------+
// | 5. padding |
// +------------------------+
// | 6. local spill area |
// +------------------------+
// | 7. padding |
// +------------------------+
// | 8. allocas |
// +------------------------+
//
// The following variables record the size in bytes of the given areas:
// * X86_RET_IP_SIZE_BYTES: area 1
// * PreservedRegsSizeBytes: area 2
// * SpillAreaPaddingBytes: area 3
// * GlobalsSize: area 4
// * GlobalsAndSubsequentPaddingSize: areas 4 - 5
// * LocalsSpillAreaSize: area 6
// * SpillAreaSizeBytes: areas 3 - 7
// Determine stack frame offsets for each Variable without a
// register assignment. This can be done as one variable per stack
// slot. Or, do coalescing by running the register allocator again
// with an infinite set of registers (as a side effect, this gives
// variables a second chance at physical register assignment).
//
// A middle ground approach is to leverage sparsity and allocate one
// block of space on the frame for globals (variables with
// multi-block lifetime), and one block to share for locals
// (single-block lifetime).
Context.init(Node);
Context.setInsertPoint(Context.getCur());
llvm::SmallBitVector CalleeSaves =
getRegisterSet(RegSet_CalleeSave, RegSet_None);
RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
size_t GlobalsSize = 0;
// If there is a separate locals area, this represents that area.
// Otherwise it counts any variable not counted by GlobalsSize.
SpillAreaSizeBytes = 0;
// If there is a separate locals area, this specifies the alignment
// for it.
uint32_t LocalsSlotsAlignmentBytes = 0;
// The entire spill locations area gets aligned to largest natural
// alignment of the variables that have a spill slot.
uint32_t SpillAreaAlignmentBytes = 0;
// A spill slot linked to a variable with a stack slot should reuse
// that stack slot.
std::function<bool(Variable *)> TargetVarHook =
[&VariablesLinkedToSpillSlots](Variable *Var) {
if (auto *SpillVar =
llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
assert(Var->getWeight().isZero());
if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
VariablesLinkedToSpillSlots.push_back(Var);
return true;
}
}
return false;
};
// Compute the list of spilled variables and bounds for GlobalsSize, etc.
getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
&SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
&LocalsSlotsAlignmentBytes, TargetVarHook);
uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
SpillAreaSizeBytes += GlobalsSize;
// Add push instructions for preserved registers.
uint32_t NumCallee = 0;
size_t PreservedRegsSizeBytes = 0;
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
if (CalleeSaves[i] && RegsUsed[i]) {
++NumCallee;
PreservedRegsSizeBytes += typeWidthInBytes(IceType_i64);
_push(getPhysicalRegister(i));
}
}
Ctx->statsUpdateRegistersSaved(NumCallee);
// Generate "push ebp; mov ebp, esp"
if (IsEbpBasedFrame) {
assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
.count() == 0);
PreservedRegsSizeBytes += typeWidthInBytes(IceType_i64);
Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
_push(ebp);
_mov(ebp, esp);
// Keep ebp live for late-stage liveness analysis
// (e.g. asm-verbose mode).
Context.insert(InstFakeUse::create(Func, ebp));
}
// Align the variables area. SpillAreaPaddingBytes is the size of
// the region after the preserved registers and before the spill areas.
// LocalsSlotsPaddingBytes is the amount of padding between the globals
// and locals area if they are separate.
assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
uint32_t SpillAreaPaddingBytes = 0;
uint32_t LocalsSlotsPaddingBytes = 0;
alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
SpillAreaAlignmentBytes, GlobalsSize,
LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
&LocalsSlotsPaddingBytes);
SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
uint32_t GlobalsAndSubsequentPaddingSize =
GlobalsSize + LocalsSlotsPaddingBytes;
// Align esp if necessary.
if (NeedsStackAlignment) {
uint32_t StackOffset =
Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
uint32_t StackSize =
Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
SpillAreaSizeBytes = StackSize - StackOffset;
}
// Generate "sub esp, SpillAreaSizeBytes"
if (SpillAreaSizeBytes)
_sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
Ctx->getConstantInt32(SpillAreaSizeBytes));
Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
resetStackAdjustment();
// Fill in stack offsets for stack args, and copy args into registers
// for those that were register-allocated. Args are pushed right to
// left, so Arg[0] is closest to the stack/frame pointer.
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
size_t BasicFrameOffset =
PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
if (!IsEbpBasedFrame)
BasicFrameOffset += SpillAreaSizeBytes;
const VarList &Args = Func->getArgs();
size_t InArgsSizeBytes = 0;
unsigned NumXmmArgs = 0;
unsigned NumGPRArgs = 0;
for (Variable *Arg : Args) {
// Skip arguments passed in registers.
if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
++NumXmmArgs;
continue;
}
if (isScalarFloatingType(Arg->getType()) &&
NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
++NumXmmArgs;
continue;
}
if (isScalarIntegerType(Arg->getType()) &&
NumGPRArgs < Traits::X86_MAX_GPR_ARGS) {
++NumGPRArgs;
continue;
}
finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
}
// Fill in stack offsets for locals.
assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
IsEbpBasedFrame);
// Assign stack offsets to variables that have been linked to spilled
// variables.
for (Variable *Var : VariablesLinkedToSpillSlots) {
Variable *Linked =
(llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
Var->setStackOffset(Linked->getStackOffset());
}
this->HasComputedFrame = true;
if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
OstreamLocker L(Func->getContext());
Ostream &Str = Func->getContext()->getStrDump();
Str << "Stack layout:\n";
uint32_t EspAdjustmentPaddingSize =
SpillAreaSizeBytes - LocalsSpillAreaSize -
GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
Str << " in-args = " << InArgsSizeBytes << " bytes\n"
<< " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
<< " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
<< " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
<< " globals spill area = " << GlobalsSize << " bytes\n"
<< " globals-locals spill areas intermediate padding = "
<< GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
<< " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
<< " esp alignment padding = " << EspAdjustmentPaddingSize
<< " bytes\n";
Str << "Stack details:\n"
<< " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
<< " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
<< " locals spill area alignment = " << LocalsSlotsAlignmentBytes
<< " bytes\n"
<< " is ebp based = " << IsEbpBasedFrame << "\n";
}
}
void TargetX8664::addEpilog(CfgNode *Node) {
InstList &Insts = Node->getInsts();
InstList::reverse_iterator RI, E;
for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
if (llvm::isa<typename Traits::Insts::Ret>(*RI))
break; break;
case FT_Iasm: { }
if (RI == E)
return;
// Convert the reverse_iterator position into its corresponding
// (forward) iterator position.
InstList::iterator InsertPoint = RI.base();
--InsertPoint;
Context.init(Node);
Context.setInsertPoint(InsertPoint);
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
if (IsEbpBasedFrame) {
Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
// For late-stage liveness analysis (e.g. asm-verbose mode),
// adding a fake use of esp before the assignment of esp=ebp keeps
// previous esp adjustments from being dead-code eliminated.
Context.insert(InstFakeUse::create(Func, esp));
_mov(esp, ebp);
_pop(ebp);
} else {
// add esp, SpillAreaSizeBytes
if (SpillAreaSizeBytes)
_add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
}
// Add pop instructions for preserved registers.
llvm::SmallBitVector CalleeSaves =
getRegisterSet(RegSet_CalleeSave, RegSet_None);
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
SizeT j = CalleeSaves.size() - i - 1;
if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)
continue;
if (CalleeSaves[j] && RegsUsed[j]) {
_pop(getPhysicalRegister(j));
}
}
if (!Ctx->getFlags().getUseSandboxing())
return;
// Change the original ret instruction into a sandboxed return sequence.
// t:ecx = pop
// bundle_lock
// and t, ~31
// jmp *t
// bundle_unlock
// FakeUse <original_ret_operand>
Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
_pop(T_ecx);
lowerIndirectJump(T_ecx);
if (RI->getSrcSize()) {
Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
Context.insert(InstFakeUse::create(Func, RetValue));
}
RI->setDeleted();
}
void TargetX8664::emitJumpTable(const Cfg *Func,
const InstJumpTable *JumpTable) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Ctx->getStrEmit(); Ostream &Str = Ctx->getStrEmit();
for (const JumpTableData &JT : Ctx->getJumpTables()) { IceString MangledName = Ctx->mangleName(Func->getFunctionName());
Str << "\t.section\t.rodata." << JT.getFunctionName() Str << "\t.section\t.rodata." << MangledName
<< "$jumptable,\"a\",@progbits\n"; << "$jumptable,\"a\",@progbits\n";
Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"; Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":"; Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";
// On X8664 ILP32 pointers are 32-bit hence the use of .long // On X8664 ILP32 pointers are 32-bit hence the use of .long
for (intptr_t TargetOffset : JT.getTargetOffsets()) for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset; Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
Str << "\n"; Str << "\n";
}
} break;
}
} }
namespace { namespace {
...@@ -507,21 +853,34 @@ void TargetDataX8664::lowerConstants() { ...@@ -507,21 +853,34 @@ void TargetDataX8664::lowerConstants() {
} }
} }
void TargetX8664::emitJumpTable(const Cfg *Func, void TargetDataX8664::lowerJumpTables() {
const InstJumpTable *JumpTable) const { switch (Ctx->getFlags().getOutFileType()) {
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
for (const JumpTableData &JumpTable : Ctx->getJumpTables())
// TODO(jpp): not 386.
Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32);
} break;
case FT_Asm:
// Already emitted from Cfg
break;
case FT_Iasm: {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Ctx->getStrEmit(); Ostream &Str = Ctx->getStrEmit();
IceString MangledName = Ctx->mangleName(Func->getFunctionName()); for (const JumpTableData &JT : Ctx->getJumpTables()) {
Str << "\t.section\t.rodata." << MangledName Str << "\t.section\t.rodata." << JT.getFunctionName()
<< "$jumptable,\"a\",@progbits\n"; << "$jumptable,\"a\",@progbits\n";
Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"; Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":"; Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";
// On X8664 ILP32 pointers are 32-bit hence the use of .long // On X8664 ILP32 pointers are 32-bit hence the use of .long
for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I) for (intptr_t TargetOffset : JT.getTargetOffsets())
Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName(); Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
Str << "\n"; Str << "\n";
}
} break;
}
} }
void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars, void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars,
......
...@@ -40,6 +40,10 @@ public: ...@@ -40,6 +40,10 @@ public:
protected: protected:
void lowerCall(const InstCall *Instr) override; void lowerCall(const InstCall *Instr) override;
void lowerArguments() override;
void lowerRet(const InstRet *Inst) override;
void addProlog(CfgNode *Node) override;
void addEpilog(CfgNode *Node) override;
private: private:
friend class ::Ice::X86Internal::TargetX86Base<TargetX8664>; friend class ::Ice::X86Internal::TargetX86Base<TargetX8664>;
......
...@@ -98,10 +98,7 @@ public: ...@@ -98,10 +98,7 @@ public:
void emit(const ConstantFloat *C) const final; void emit(const ConstantFloat *C) const final;
void emit(const ConstantDouble *C) const final; void emit(const ConstantDouble *C) const final;
void lowerArguments() override;
void initNodeForLowering(CfgNode *Node) override; void initNodeForLowering(CfgNode *Node) override;
void addProlog(CfgNode *Node) override;
void addEpilog(CfgNode *Node) override;
/// Ensure that a 64-bit Variable has been split into 2 32-bit /// Ensure that a 64-bit Variable has been split into 2 32-bit
/// Variables, creating them if necessary. This is needed for all /// Variables, creating them if necessary. This is needed for all
/// I64 operations, and it is needed for pushing F64 arguments for /// I64 operations, and it is needed for pushing F64 arguments for
...@@ -136,7 +133,6 @@ protected: ...@@ -136,7 +133,6 @@ protected:
void lowerInsertElement(const InstInsertElement *Inst) override; void lowerInsertElement(const InstInsertElement *Inst) override;
void lowerLoad(const InstLoad *Inst) override; void lowerLoad(const InstLoad *Inst) override;
void lowerPhi(const InstPhi *Inst) override; void lowerPhi(const InstPhi *Inst) override;
void lowerRet(const InstRet *Inst) override;
void lowerSelect(const InstSelect *Inst) override; void lowerSelect(const InstSelect *Inst) override;
void lowerStore(const InstStore *Inst) override; void lowerStore(const InstStore *Inst) override;
void lowerSwitch(const InstSwitch *Inst) override; void lowerSwitch(const InstSwitch *Inst) override;
......
...@@ -793,39 +793,6 @@ TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const { ...@@ -793,39 +793,6 @@ TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {
Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset); Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset);
} }
template <class Machine> void TargetX86Base<Machine>::lowerArguments() {
VarList &Args = Func->getArgs();
// The first four arguments of vector type, regardless of their
// position relative to the other arguments in the argument list, are
// passed in registers xmm0 - xmm3.
unsigned NumXmmArgs = 0;
Context.init(Func->getEntryNode());
Context.setInsertPoint(Context.getCur());
for (SizeT I = 0, E = Args.size();
I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {
Variable *Arg = Args[I];
Type Ty = Arg->getType();
if (!isVectorType(Ty))
continue;
// Replace Arg in the argument list with the home register. Then
// generate an instruction in the prolog to copy the home register
// to the assigned location of Arg.
int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;
++NumXmmArgs;
Variable *RegisterArg = Func->makeVariable(Ty);
if (BuildDefs::dump())
RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
RegisterArg->setRegNum(RegNum);
RegisterArg->setIsArg();
Arg->setIsArg(false);
Args[I] = RegisterArg;
Context.insert(InstAssign::create(Func, Arg, RegisterArg));
}
}
/// Helper function for addProlog(). /// Helper function for addProlog().
/// ///
/// This assumes Arg is an argument passed on the stack. This sets the /// This assumes Arg is an argument passed on the stack. This sets the
...@@ -844,6 +811,7 @@ void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, ...@@ -844,6 +811,7 @@ void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
Variable *Hi = Arg->getHi(); Variable *Hi = Arg->getHi();
Type Ty = Arg->getType(); Type Ty = Arg->getType();
if (Lo && Hi && Ty == IceType_i64) { if (Lo && Hi && Ty == IceType_i64) {
// TODO(jpp): This special case is not needed for x86-64.
assert(Lo->getType() != IceType_i64); // don't want infinite recursion assert(Lo->getType() != IceType_i64); // don't want infinite recursion
assert(Hi->getType() != IceType_i64); // don't want infinite recursion assert(Hi->getType() != IceType_i64); // don't want infinite recursion
finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
...@@ -872,273 +840,10 @@ void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, ...@@ -872,273 +840,10 @@ void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
} }
template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
// TODO(jpp): this is wrong for x86-64.
return IceType_i32; return IceType_i32;
} }
template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) {
// Stack frame layout:
//
// +------------------------+
// | 1. return address |
// +------------------------+
// | 2. preserved registers |
// +------------------------+
// | 3. padding |
// +------------------------+
// | 4. global spill area |
// +------------------------+
// | 5. padding |
// +------------------------+
// | 6. local spill area |
// +------------------------+
// | 7. padding |
// +------------------------+
// | 8. allocas |
// +------------------------+
//
// The following variables record the size in bytes of the given areas:
// * X86_RET_IP_SIZE_BYTES: area 1
// * PreservedRegsSizeBytes: area 2
// * SpillAreaPaddingBytes: area 3
// * GlobalsSize: area 4
// * GlobalsAndSubsequentPaddingSize: areas 4 - 5
// * LocalsSpillAreaSize: area 6
// * SpillAreaSizeBytes: areas 3 - 7
// Determine stack frame offsets for each Variable without a
// register assignment. This can be done as one variable per stack
// slot. Or, do coalescing by running the register allocator again
// with an infinite set of registers (as a side effect, this gives
// variables a second chance at physical register assignment).
//
// A middle ground approach is to leverage sparsity and allocate one
// block of space on the frame for globals (variables with
// multi-block lifetime), and one block to share for locals
// (single-block lifetime).
Context.init(Node);
Context.setInsertPoint(Context.getCur());
llvm::SmallBitVector CalleeSaves =
getRegisterSet(RegSet_CalleeSave, RegSet_None);
RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
size_t GlobalsSize = 0;
// If there is a separate locals area, this represents that area.
// Otherwise it counts any variable not counted by GlobalsSize.
SpillAreaSizeBytes = 0;
// If there is a separate locals area, this specifies the alignment
// for it.
uint32_t LocalsSlotsAlignmentBytes = 0;
// The entire spill locations area gets aligned to largest natural
// alignment of the variables that have a spill slot.
uint32_t SpillAreaAlignmentBytes = 0;
// A spill slot linked to a variable with a stack slot should reuse
// that stack slot.
std::function<bool(Variable *)> TargetVarHook =
[&VariablesLinkedToSpillSlots](Variable *Var) {
if (auto *SpillVar =
llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
assert(Var->getWeight().isZero());
if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
VariablesLinkedToSpillSlots.push_back(Var);
return true;
}
}
return false;
};
// Compute the list of spilled variables and bounds for GlobalsSize, etc.
getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
&SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
&LocalsSlotsAlignmentBytes, TargetVarHook);
uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
SpillAreaSizeBytes += GlobalsSize;
// Add push instructions for preserved registers.
uint32_t NumCallee = 0;
size_t PreservedRegsSizeBytes = 0;
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
if (CalleeSaves[i] && RegsUsed[i]) {
++NumCallee;
PreservedRegsSizeBytes += 4;
_push(getPhysicalRegister(i));
}
}
Ctx->statsUpdateRegistersSaved(NumCallee);
// Generate "push ebp; mov ebp, esp"
if (IsEbpBasedFrame) {
assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
.count() == 0);
PreservedRegsSizeBytes += 4;
Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
_push(ebp);
_mov(ebp, esp);
// Keep ebp live for late-stage liveness analysis
// (e.g. asm-verbose mode).
Context.insert(InstFakeUse::create(Func, ebp));
}
// Align the variables area. SpillAreaPaddingBytes is the size of
// the region after the preserved registers and before the spill areas.
// LocalsSlotsPaddingBytes is the amount of padding between the globals
// and locals area if they are separate.
assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
uint32_t SpillAreaPaddingBytes = 0;
uint32_t LocalsSlotsPaddingBytes = 0;
alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
SpillAreaAlignmentBytes, GlobalsSize,
LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
&LocalsSlotsPaddingBytes);
SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
uint32_t GlobalsAndSubsequentPaddingSize =
GlobalsSize + LocalsSlotsPaddingBytes;
// Align esp if necessary.
if (NeedsStackAlignment) {
uint32_t StackOffset =
Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
uint32_t StackSize =
Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
SpillAreaSizeBytes = StackSize - StackOffset;
}
// Generate "sub esp, SpillAreaSizeBytes"
if (SpillAreaSizeBytes)
_sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
Ctx->getConstantInt32(SpillAreaSizeBytes));
Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
resetStackAdjustment();
// Fill in stack offsets for stack args, and copy args into registers
// for those that were register-allocated. Args are pushed right to
// left, so Arg[0] is closest to the stack/frame pointer.
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
size_t BasicFrameOffset =
PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
if (!IsEbpBasedFrame)
BasicFrameOffset += SpillAreaSizeBytes;
const VarList &Args = Func->getArgs();
size_t InArgsSizeBytes = 0;
unsigned NumXmmArgs = 0;
for (Variable *Arg : Args) {
// Skip arguments passed in registers.
if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
++NumXmmArgs;
continue;
}
finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
}
// Fill in stack offsets for locals.
assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
IsEbpBasedFrame);
// Assign stack offsets to variables that have been linked to spilled
// variables.
for (Variable *Var : VariablesLinkedToSpillSlots) {
Variable *Linked =
(llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
Var->setStackOffset(Linked->getStackOffset());
}
this->HasComputedFrame = true;
if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
OstreamLocker L(Func->getContext());
Ostream &Str = Func->getContext()->getStrDump();
Str << "Stack layout:\n";
uint32_t EspAdjustmentPaddingSize =
SpillAreaSizeBytes - LocalsSpillAreaSize -
GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
Str << " in-args = " << InArgsSizeBytes << " bytes\n"
<< " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
<< " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
<< " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
<< " globals spill area = " << GlobalsSize << " bytes\n"
<< " globals-locals spill areas intermediate padding = "
<< GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
<< " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
<< " esp alignment padding = " << EspAdjustmentPaddingSize
<< " bytes\n";
Str << "Stack details:\n"
<< " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
<< " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
<< " locals spill area alignment = " << LocalsSlotsAlignmentBytes
<< " bytes\n"
<< " is ebp based = " << IsEbpBasedFrame << "\n";
}
}
template <class Machine> void TargetX86Base<Machine>::addEpilog(CfgNode *Node) {
InstList &Insts = Node->getInsts();
InstList::reverse_iterator RI, E;
for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
if (llvm::isa<typename Traits::Insts::Ret>(*RI))
break;
}
if (RI == E)
return;
// Convert the reverse_iterator position into its corresponding
// (forward) iterator position.
InstList::iterator InsertPoint = RI.base();
--InsertPoint;
Context.init(Node);
Context.setInsertPoint(InsertPoint);
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
if (IsEbpBasedFrame) {
Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
// For late-stage liveness analysis (e.g. asm-verbose mode),
// adding a fake use of esp before the assignment of esp=ebp keeps
// previous esp adjustments from being dead-code eliminated.
Context.insert(InstFakeUse::create(Func, esp));
_mov(esp, ebp);
_pop(ebp);
} else {
// add esp, SpillAreaSizeBytes
if (SpillAreaSizeBytes)
_add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
}
// Add pop instructions for preserved registers.
llvm::SmallBitVector CalleeSaves =
getRegisterSet(RegSet_CalleeSave, RegSet_None);
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
SizeT j = CalleeSaves.size() - i - 1;
if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)
continue;
if (CalleeSaves[j] && RegsUsed[j]) {
_pop(getPhysicalRegister(j));
}
}
if (!Ctx->getFlags().getUseSandboxing())
return;
// Change the original ret instruction into a sandboxed return sequence.
// t:ecx = pop
// bundle_lock
// and t, ~31
// jmp *t
// bundle_unlock
// FakeUse <original_ret_operand>
Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
_pop(T_ecx);
lowerIndirectJump(T_ecx);
if (RI->getSrcSize()) {
Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
Context.insert(InstFakeUse::create(Func, RetValue));
}
RI->setDeleted();
}
template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) { template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) {
switch (Var->getType()) { switch (Var->getType()) {
default: default:
...@@ -4236,40 +3941,6 @@ void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) { ...@@ -4236,40 +3941,6 @@ void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) {
} }
template <class Machine> template <class Machine>
void TargetX86Base<Machine>::lowerRet(const InstRet *Inst) {
Variable *Reg = nullptr;
if (Inst->hasRetValue()) {
Operand *Src0 = legalize(Inst->getRetValue());
if (Src0->getType() == IceType_i64) {
Variable *eax =
legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);
Variable *edx =
legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
Reg = eax;
Context.insert(InstFakeUse::create(Func, edx));
} else if (isScalarFloatingType(Src0->getType())) {
_fld(Src0);
} else if (isVectorType(Src0->getType())) {
Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
} else {
_mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
}
}
// Add a ret instruction even if sandboxing is enabled, because
// addEpilog explicitly looks for a ret instruction as a marker for
// where to insert the frame removal instructions.
_ret(Reg);
// Add a fake use of esp to make sure esp stays alive for the entire
// function. Otherwise post-call esp adjustments get dead-code
// eliminated. TODO: Are there more places where the fake use
// should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
// have a ret instruction.
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Context.insert(InstFakeUse::create(Func, esp));
}
template <class Machine>
void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) { void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {
Variable *Dest = Inst->getDest(); Variable *Dest = Inst->getDest();
Type DestTy = Dest->getType(); Type DestTy = Dest->getType();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment