Commit e0d9afa8 by John Porto

Subzero. Implements x86-64 lowerCall.

parent c2ec5817
......@@ -881,20 +881,19 @@ ConstantList GlobalContext::getConstantExternSyms() {
JumpTableDataList GlobalContext::getJumpTables() {
JumpTableDataList JumpTables(*getJumpTableList());
if (getFlags().shouldReorderPooledConstants()) {
// If reorder-pooled-constants option is set to true, we need to shuffle the
// constant pool before emitting it.
RandomShuffle(JumpTables.begin(), JumpTables.end(), [this](uint64_t N) {
return (uint32_t)getRNG().next(N);
});
// If reorder-pooled-constants option is set to true, we need to shuffle the
// constant pool before emitting it.
RandomShuffle(JumpTables.begin(), JumpTables.end(),
[this](uint64_t N) { return (uint32_t)getRNG().next(N); });
} else {
// Make order deterministic by sorting into functions and then ID of the
// jump table within that function.
std::sort(JumpTables.begin(), JumpTables.end(), [](const JumpTableData &A,
const JumpTableData &B) {
if (A.getFunctionName() != B.getFunctionName())
return A.getFunctionName() < B.getFunctionName();
return A.getId() < B.getId();
});
std::sort(JumpTables.begin(), JumpTables.end(),
[](const JumpTableData &A, const JumpTableData &B) {
if (A.getFunctionName() != B.getFunctionName())
return A.getFunctionName() < B.getFunctionName();
return A.getId() < B.getId();
});
}
return JumpTables;
}
......
......@@ -20,9 +20,6 @@
///
/// ::Ice::X8632::Traits::Insts::Mov::create
///
/// In the future, this file might be used to declare X8632 specific
/// instructions (e.g., FLD, and FSTP.)
///
//===----------------------------------------------------------------------===//
#ifndef SUBZERO_SRC_ICEINSTX8632_H
......
......@@ -21,6 +21,14 @@
namespace Ice {
//------------------------------------------------------------------------------
// ______ ______ ______ __ ______ ______
// /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\
// \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \
// \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\
// \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/
//
//------------------------------------------------------------------------------
namespace X86Internal {
const MachineTraits<TargetX8632>::TableFcmpType
MachineTraits<TargetX8632>::TableFcmp[] = {
......@@ -399,4 +407,214 @@ ICETYPE_TABLE
} // end of namespace dummy3
} // end of anonymous namespace
//------------------------------------------------------------------------------
// __ ______ __ __ ______ ______ __ __ __ ______
// /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
// \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
// \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
// \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
//
//------------------------------------------------------------------------------
void TargetX8632::lowerCall(const InstCall *Instr) {
// x86-32 calling convention:
//
// * At the point before the call, the stack must be aligned to 16
// bytes.
//
// * The first four arguments of vector type, regardless of their
// position relative to the other arguments in the argument list, are
// placed in registers xmm0 - xmm3.
//
// * Other arguments are pushed onto the stack in right-to-left order,
// such that the left-most argument ends up on the top of the stack at
// the lowest memory address.
//
// * Stack arguments of vector type are aligned to start at the next
// highest multiple of 16 bytes. Other stack arguments are aligned to
// 4 bytes.
//
// This intends to match the section "IA-32 Function Calling
// Convention" of the document "OS X ABI Function Call Guide" by
// Apple.
NeedsStackAlignment = true;
typedef std::vector<Operand *> OperandList;
OperandList XmmArgs;
OperandList StackArgs, StackArgLocations;
uint32_t ParameterAreaSizeBytes = 0;
// Classify each argument operand according to the location where the
// argument is passed.
for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
Operand *Arg = Instr->getArg(i);
Type Ty = Arg->getType();
// The PNaCl ABI requires the width of arguments to be at least 32 bits.
assert(typeWidthInBytes(Ty) >= 4);
if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
XmmArgs.push_back(Arg);
} else {
StackArgs.push_back(Arg);
if (isVectorType(Arg->getType())) {
ParameterAreaSizeBytes =
Traits::applyStackAlignment(ParameterAreaSizeBytes);
}
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
StackArgLocations.push_back(
Traits::X86OperandMem::create(Func, Ty, esp, Loc));
ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
}
}
// Adjust the parameter area so that the stack is aligned. It is
// assumed that the stack is already aligned at the start of the
// calling sequence.
ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
// Subtract the appropriate amount for the argument area. This also
// takes care of setting the stack adjustment during emission.
//
// TODO: If for some reason the call instruction gets dead-code
// eliminated after lowering, we would need to ensure that the
// pre-call and the post-call esp adjustment get eliminated as well.
if (ParameterAreaSizeBytes) {
_adjust_stack(ParameterAreaSizeBytes);
}
// Copy arguments that are passed on the stack to the appropriate
// stack locations.
for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
}
// Copy arguments to be passed in registers to the appropriate
// registers.
// TODO: Investigate the impact of lowering arguments passed in
// registers after lowering stack arguments as opposed to the other
// way around. Lowering register arguments after stack arguments may
// reduce register pressure. On the other hand, lowering register
// arguments first (before stack arguments) may result in more compact
// code, as the memory operand displacements may end up being smaller
// before any stack adjustment is done.
for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Variable *Reg =
legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
// Generate a FakeUse of register arguments so that they do not get
// dead code eliminated as a result of the FakeKill of scratch
// registers after the call.
Context.insert(InstFakeUse::create(Func, Reg));
}
// Generate the call instruction. Assign its result to a temporary
// with high register allocation weight.
Variable *Dest = Instr->getDest();
// ReturnReg doubles as ReturnRegLo as necessary.
Variable *ReturnReg = nullptr;
Variable *ReturnRegHi = nullptr;
if (Dest) {
switch (Dest->getType()) {
case IceType_NUM:
llvm_unreachable("Invalid Call dest type");
break;
case IceType_void:
break;
case IceType_i1:
case IceType_i8:
case IceType_i16:
case IceType_i32:
ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
break;
case IceType_i64:
ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
break;
case IceType_f32:
case IceType_f64:
// Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
// the fstp instruction.
break;
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
case IceType_v4f32:
ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
break;
}
}
Operand *CallTarget = legalize(Instr->getCallTarget());
const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
if (NeedSandboxing) {
if (llvm::isa<Constant>(CallTarget)) {
_bundle_lock(InstBundleLock::Opt_AlignToEnd);
} else {
Variable *CallTargetVar = nullptr;
_mov(CallTargetVar, CallTarget);
_bundle_lock(InstBundleLock::Opt_AlignToEnd);
const SizeT BundleSize =
1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
_and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
CallTarget = CallTargetVar;
}
}
Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
Context.insert(NewCall);
if (NeedSandboxing)
_bundle_unlock();
if (ReturnRegHi)
Context.insert(InstFakeDef::create(Func, ReturnRegHi));
// Add the appropriate offset to esp. The call instruction takes care
// of resetting the stack offset during emission.
if (ParameterAreaSizeBytes) {
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
_add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
}
// Insert a register-kill pseudo instruction.
Context.insert(InstFakeKill::create(Func, NewCall));
// Generate a FakeUse to keep the call live if necessary.
if (Instr->hasSideEffects() && ReturnReg) {
Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
Context.insert(FakeUse);
}
if (!Dest)
return;
// Assign the result of the call to Dest.
if (ReturnReg) {
if (ReturnRegHi) {
assert(Dest->getType() == IceType_i64);
split64(Dest);
Variable *DestLo = Dest->getLo();
Variable *DestHi = Dest->getHi();
_mov(DestLo, ReturnReg);
_mov(DestHi, ReturnRegHi);
} else {
assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
isVectorType(Dest->getType()));
if (isVectorType(Dest->getType())) {
_movp(Dest, ReturnReg);
} else {
_mov(Dest, ReturnReg);
}
}
} else if (isScalarFloatingType(Dest->getType())) {
// Special treatment for an FP function which returns its result in
// st(0).
// If Dest ends up being a physical xmm register, the fstp emit code
// will route st(0) through a temporary stack slot.
_fstp(Dest);
// Create a fake use of Dest in case it actually isn't used,
// because st(0) still needs to be popped.
Context.insert(InstFakeUse::create(Func, Dest));
}
}
} // end of namespace Ice
......@@ -41,6 +41,9 @@ public:
static TargetX8632 *create(Cfg *Func) { return new TargetX8632(Func); }
protected:
void lowerCall(const InstCall *Instr) override;
private:
friend class ::Ice::X86Internal::TargetX86Base<TargetX8632>;
......
......@@ -21,6 +21,14 @@
namespace Ice {
//------------------------------------------------------------------------------
// ______ ______ ______ __ ______ ______
// /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\
// \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \
// \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\
// \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/
//
//------------------------------------------------------------------------------
namespace X86Internal {
const MachineTraits<TargetX8664>::TableFcmpType
MachineTraits<TargetX8664>::TableFcmp[] = {
......@@ -81,6 +89,286 @@ const char *MachineTraits<TargetX8664>::TargetName = "X8664";
} // end of namespace X86Internal
//------------------------------------------------------------------------------
// __ ______ __ __ ______ ______ __ __ __ ______
// /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
// \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
// \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
// \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
//
//------------------------------------------------------------------------------
namespace {
static inline TargetX8664::Traits::RegisterSet::AllRegisters
getRegisterForXmmArgNum(uint32_t ArgNum) {
assert(ArgNum < TargetX8664::Traits::X86_MAX_XMM_ARGS);
return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>(
TargetX8664::Traits::RegisterSet::Reg_xmm0 + ArgNum);
}
static inline TargetX8664::Traits::RegisterSet::AllRegisters
getRegisterForGprArgNum(uint32_t ArgNum) {
assert(ArgNum < TargetX8664::Traits::X86_MAX_GPR_ARGS);
static const TargetX8664::Traits::RegisterSet::AllRegisters GprForArgNum[] = {
TargetX8664::Traits::RegisterSet::Reg_edi,
TargetX8664::Traits::RegisterSet::Reg_esi,
TargetX8664::Traits::RegisterSet::Reg_edx,
TargetX8664::Traits::RegisterSet::Reg_ecx,
TargetX8664::Traits::RegisterSet::Reg_r8d,
TargetX8664::Traits::RegisterSet::Reg_r9d,
};
static_assert(llvm::array_lengthof(GprForArgNum) ==
TargetX8664::TargetX8664::Traits::X86_MAX_GPR_ARGS,
"Mismatch between MAX_GPR_ARGS and GprForArgNum.");
return GprForArgNum[ArgNum];
}
// constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
// OperandList in lowerCall. std::max() was supposed to work, but it doesn't.
constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }
} // end of anonymous namespace
void TargetX8664::lowerCall(const InstCall *Instr) {
// x86-64 calling convention:
//
// * At the point before the call, the stack must be aligned to 16
// bytes.
//
// * The first eight arguments of vector/fp type, regardless of their
// position relative to the other arguments in the argument list, are
// placed in registers %xmm0 - %xmm7.
//
// * The first six arguments of integer types, regardless of their
// position relative to the other arguments in the argument list, are
// placed in registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.
//
// * Other arguments are pushed onto the stack in right-to-left order,
// such that the left-most argument ends up on the top of the stack at
// the lowest memory address.
//
// * Stack arguments of vector type are aligned to start at the next
// highest multiple of 16 bytes. Other stack arguments are aligned to
// 8 bytes.
//
// This intends to match the section "Function Calling Sequence" of the
// document "System V Application Binary Interface."
NeedsStackAlignment = true;
using OperandList =
llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
Traits::X86_MAX_GPR_ARGS)>;
OperandList XmmArgs;
OperandList GprArgs;
OperandList StackArgs, StackArgLocations;
uint32_t ParameterAreaSizeBytes = 0;
// Classify each argument operand according to the location where the
// argument is passed.
for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
Operand *Arg = Instr->getArg(i);
Type Ty = Arg->getType();
// The PNaCl ABI requires the width of arguments to be at least 32 bits.
assert(typeWidthInBytes(Ty) >= 4);
if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
XmmArgs.push_back(Arg);
} else if (isScalarFloatingType(Ty) &&
XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
XmmArgs.push_back(Arg);
} else if (isScalarIntegerType(Ty) &&
GprArgs.size() < Traits::X86_MAX_GPR_ARGS) {
GprArgs.push_back(Arg);
} else {
StackArgs.push_back(Arg);
if (isVectorType(Arg->getType())) {
ParameterAreaSizeBytes =
Traits::applyStackAlignment(ParameterAreaSizeBytes);
}
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
StackArgLocations.push_back(
Traits::X86OperandMem::create(Func, Ty, esp, Loc));
ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
}
}
// Adjust the parameter area so that the stack is aligned. It is
// assumed that the stack is already aligned at the start of the
// calling sequence.
ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
// Subtract the appropriate amount for the argument area. This also
// takes care of setting the stack adjustment during emission.
//
// TODO: If for some reason the call instruction gets dead-code
// eliminated after lowering, we would need to ensure that the
// pre-call and the post-call esp adjustment get eliminated as well.
if (ParameterAreaSizeBytes) {
_adjust_stack(ParameterAreaSizeBytes);
}
// Copy arguments that are passed on the stack to the appropriate
// stack locations.
for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
}
// Copy arguments to be passed in registers to the appropriate
// registers.
// TODO: Investigate the impact of lowering arguments passed in
// registers after lowering stack arguments as opposed to the other
// way around. Lowering register arguments after stack arguments may
// reduce register pressure. On the other hand, lowering register
// arguments first (before stack arguments) may result in more compact
// code, as the memory operand displacements may end up being smaller
// before any stack adjustment is done.
for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i));
// Generate a FakeUse of register arguments so that they do not get
// dead code eliminated as a result of the FakeKill of scratch
// registers after the call.
Context.insert(InstFakeUse::create(Func, Reg));
}
for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
Variable *Reg = legalizeToReg(GprArgs[i], getRegisterForGprArgNum(i));
Context.insert(InstFakeUse::create(Func, Reg));
}
// Generate the call instruction. Assign its result to a temporary
// with high register allocation weight.
Variable *Dest = Instr->getDest();
// ReturnReg doubles as ReturnRegLo as necessary.
Variable *ReturnReg = nullptr;
Variable *ReturnRegHi = nullptr;
if (Dest) {
switch (Dest->getType()) {
case IceType_NUM:
llvm_unreachable("Invalid Call dest type");
break;
case IceType_void:
break;
case IceType_i1:
case IceType_i8:
case IceType_i16:
case IceType_i32:
ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
break;
case IceType_i64:
// TODO(jpp): return i64 in a GPR.
ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
break;
case IceType_f32:
case IceType_f64:
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
case IceType_v4f32:
ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
break;
}
}
Operand *CallTarget = legalize(Instr->getCallTarget());
const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
if (NeedSandboxing) {
if (llvm::isa<Constant>(CallTarget)) {
_bundle_lock(InstBundleLock::Opt_AlignToEnd);
} else {
Variable *CallTargetVar = nullptr;
_mov(CallTargetVar, CallTarget);
_bundle_lock(InstBundleLock::Opt_AlignToEnd);
const SizeT BundleSize =
1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
_and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
CallTarget = CallTargetVar;
}
}
Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
Context.insert(NewCall);
if (NeedSandboxing)
_bundle_unlock();
if (ReturnRegHi)
Context.insert(InstFakeDef::create(Func, ReturnRegHi));
// Add the appropriate offset to esp. The call instruction takes care
// of resetting the stack offset during emission.
if (ParameterAreaSizeBytes) {
Variable *Esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
_add(Esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
}
// Insert a register-kill pseudo instruction.
Context.insert(InstFakeKill::create(Func, NewCall));
// Generate a FakeUse to keep the call live if necessary.
if (Instr->hasSideEffects() && ReturnReg) {
Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
Context.insert(FakeUse);
}
if (!Dest)
return;
assert(ReturnReg && "x86-64 always returns value on registers.");
// Assign the result of the call to Dest.
if (ReturnRegHi) {
assert(Dest->getType() == IceType_i64);
split64(Dest);
Variable *DestLo = Dest->getLo();
Variable *DestHi = Dest->getHi();
_mov(DestLo, ReturnReg);
_mov(DestHi, ReturnRegHi);
return;
}
assert(Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64 ||
Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
isVectorType(Dest->getType()));
if (isScalarFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
_movp(Dest, ReturnReg);
} else {
_mov(Dest, ReturnReg);
}
}
void TargetDataX8664::lowerJumpTables() {
switch (Ctx->getFlags().getOutFileType()) {
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
for (const JumpTableData &JumpTable : Ctx->getJumpTables())
// TODO(jpp): not 386.
Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32);
} break;
case FT_Asm:
// Already emitted from Cfg
break;
case FT_Iasm: {
if (!BuildDefs::dump())
return;
Ostream &Str = Ctx->getStrEmit();
for (const JumpTableData &JT : Ctx->getJumpTables()) {
Str << "\t.section\t.rodata." << JT.getFunctionName()
<< "$jumptable,\"a\",@progbits\n";
Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";
// On X8664 ILP32 pointers are 32-bit hence the use of .long
for (intptr_t TargetOffset : JT.getTargetOffsets())
Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
Str << "\n";
}
} break;
}
}
namespace {
template <typename T> struct PoolTypeConverter {};
......@@ -236,36 +524,6 @@ void TargetX8664::emitJumpTable(const Cfg *Func,
Str << "\n";
}
void TargetDataX8664::lowerJumpTables() {
switch (Ctx->getFlags().getOutFileType()) {
case FT_Elf: {
ELFObjectWriter *Writer = Ctx->getObjectWriter();
for (const JumpTableData &JT : Ctx->getJumpTables())
// TODO(jpp): not 386.
Writer->writeJumpTable(JT, llvm::ELF::R_386_32);
} break;
case FT_Asm:
// Already emitted from Cfg
break;
case FT_Iasm: {
if (!BuildDefs::dump())
return;
Ostream &Str = Ctx->getStrEmit();
for (const JumpTableData &JT : Ctx->getJumpTables()) {
Str << "\t.section\t.rodata." << JT.getFunctionName()
<< "$jumptable,\"a\",@progbits\n";
Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";
// On X8664 ILP32 pointers are 32-bit hence the use of .long
for (intptr_t TargetOffset : JT.getTargetOffsets())
Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
Str << "\n";
}
} break;
}
}
void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars,
const IceString &SectionSuffix) {
switch (Ctx->getFlags().getOutFileType()) {
......
......@@ -38,6 +38,9 @@ class TargetX8664 final
public:
static TargetX8664 *create(Cfg *Func) { return new TargetX8664(Func); }
protected:
void lowerCall(const InstCall *Instr) override;
private:
friend class ::Ice::X86Internal::TargetX86Base<TargetX8664>;
......
......@@ -439,7 +439,9 @@ template <> struct MachineTraits<TargetX8664> {
}
/// The maximum number of arguments to pass in XMM registers
static const uint32_t X86_MAX_XMM_ARGS = 4;
static const uint32_t X86_MAX_XMM_ARGS = 8;
/// The maximum number of arguments to pass in GPR registers
static const uint32_t X86_MAX_GPR_ARGS = 6;
/// The number of bits in a byte
static const uint32_t X86_CHAR_BIT = 8;
/// Stack alignment. This is defined in IceTargetLoweringX8664.cpp because it
......
......@@ -83,6 +83,7 @@ public:
size_t typeWidthInBytesOnStack(Type Ty) const override {
// Round up to the next multiple of 4 bytes. In particular, i1,
// i8, and i16 are rounded up to 4 bytes.
// TODO(jpp): this needs to round to multiples of 8 bytes in x86-64.
return (typeWidthInBytes(Ty) + 3) & ~3;
}
......@@ -127,7 +128,6 @@ protected:
void lowerArithmetic(const InstArithmetic *Inst) override;
void lowerAssign(const InstAssign *Inst) override;
void lowerBr(const InstBr *Inst) override;
void lowerCall(const InstCall *Inst) override;
void lowerCast(const InstCast *Inst) override;
void lowerExtractElement(const InstExtractElement *Inst) override;
void lowerFcmp(const InstFcmp *Inst) override;
......
......@@ -2126,209 +2126,6 @@ void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {
}
template <class Machine>
void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) {
// x86-32 calling convention:
//
// * At the point before the call, the stack must be aligned to 16
// bytes.
//
// * The first four arguments of vector type, regardless of their
// position relative to the other arguments in the argument list, are
// placed in registers xmm0 - xmm3.
//
// * Other arguments are pushed onto the stack in right-to-left order,
// such that the left-most argument ends up on the top of the stack at
// the lowest memory address.
//
// * Stack arguments of vector type are aligned to start at the next
// highest multiple of 16 bytes. Other stack arguments are aligned to
// 4 bytes.
//
// This intends to match the section "IA-32 Function Calling
// Convention" of the document "OS X ABI Function Call Guide" by
// Apple.
NeedsStackAlignment = true;
typedef std::vector<Operand *> OperandList;
OperandList XmmArgs;
OperandList StackArgs, StackArgLocations;
uint32_t ParameterAreaSizeBytes = 0;
// Classify each argument operand according to the location where the
// argument is passed.
for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
Operand *Arg = Instr->getArg(i);
Type Ty = Arg->getType();
// The PNaCl ABI requires the width of arguments to be at least 32 bits.
assert(typeWidthInBytes(Ty) >= 4);
if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
XmmArgs.push_back(Arg);
} else {
StackArgs.push_back(Arg);
if (isVectorType(Arg->getType())) {
ParameterAreaSizeBytes =
Traits::applyStackAlignment(ParameterAreaSizeBytes);
}
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
StackArgLocations.push_back(
Traits::X86OperandMem::create(Func, Ty, esp, Loc));
ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
}
}
// Adjust the parameter area so that the stack is aligned. It is
// assumed that the stack is already aligned at the start of the
// calling sequence.
ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
// Subtract the appropriate amount for the argument area. This also
// takes care of setting the stack adjustment during emission.
//
// TODO: If for some reason the call instruction gets dead-code
// eliminated after lowering, we would need to ensure that the
// pre-call and the post-call esp adjustment get eliminated as well.
if (ParameterAreaSizeBytes) {
_adjust_stack(ParameterAreaSizeBytes);
}
// Copy arguments that are passed on the stack to the appropriate
// stack locations.
for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
}
// Copy arguments to be passed in registers to the appropriate
// registers.
// TODO: Investigate the impact of lowering arguments passed in
// registers after lowering stack arguments as opposed to the other
// way around. Lowering register arguments after stack arguments may
// reduce register pressure. On the other hand, lowering register
// arguments first (before stack arguments) may result in more compact
// code, as the memory operand displacements may end up being smaller
// before any stack adjustment is done.
for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
Variable *Reg =
legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
// Generate a FakeUse of register arguments so that they do not get
// dead code eliminated as a result of the FakeKill of scratch
// registers after the call.
Context.insert(InstFakeUse::create(Func, Reg));
}
// Generate the call instruction. Assign its result to a temporary
// with high register allocation weight.
Variable *Dest = Instr->getDest();
// ReturnReg doubles as ReturnRegLo as necessary.
Variable *ReturnReg = nullptr;
Variable *ReturnRegHi = nullptr;
if (Dest) {
switch (Dest->getType()) {
case IceType_NUM:
llvm_unreachable("Invalid Call dest type");
break;
case IceType_void:
break;
case IceType_i1:
case IceType_i8:
case IceType_i16:
case IceType_i32:
ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
break;
case IceType_i64:
ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
break;
case IceType_f32:
case IceType_f64:
// Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
// the fstp instruction.
break;
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
case IceType_v4f32:
ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
break;
}
}
Operand *CallTarget = legalize(Instr->getCallTarget());
const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
if (NeedSandboxing) {
if (llvm::isa<Constant>(CallTarget)) {
_bundle_lock(InstBundleLock::Opt_AlignToEnd);
} else {
Variable *CallTargetVar = nullptr;
_mov(CallTargetVar, CallTarget);
_bundle_lock(InstBundleLock::Opt_AlignToEnd);
const SizeT BundleSize =
1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
_and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
CallTarget = CallTargetVar;
}
}
Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
Context.insert(NewCall);
if (NeedSandboxing)
_bundle_unlock();
if (ReturnRegHi)
Context.insert(InstFakeDef::create(Func, ReturnRegHi));
// Add the appropriate offset to esp. The call instruction takes care
// of resetting the stack offset during emission.
if (ParameterAreaSizeBytes) {
Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
_add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
}
// Insert a register-kill pseudo instruction.
Context.insert(InstFakeKill::create(Func, NewCall));
// Generate a FakeUse to keep the call live if necessary.
if (Instr->hasSideEffects() && ReturnReg) {
Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
Context.insert(FakeUse);
}
if (!Dest)
return;
// Assign the result of the call to Dest.
if (ReturnReg) {
if (ReturnRegHi) {
assert(Dest->getType() == IceType_i64);
split64(Dest);
Variable *DestLo = Dest->getLo();
Variable *DestHi = Dest->getHi();
_mov(DestLo, ReturnReg);
_mov(DestHi, ReturnRegHi);
} else {
assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
isVectorType(Dest->getType()));
if (isVectorType(Dest->getType())) {
_movp(Dest, ReturnReg);
} else {
_mov(Dest, ReturnReg);
}
}
} else if (isScalarFloatingType(Dest->getType())) {
// Special treatment for an FP function which returns its result in
// st(0).
// If Dest ends up being a physical xmm register, the fstp emit code
// will route st(0) through a temporary stack slot.
_fstp(Dest);
// Create a fake use of Dest in case it actually isn't used,
// because st(0) still needs to be popped.
Context.insert(InstFakeUse::create(Func, Dest));
}
}
template <class Machine>
void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
// a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
InstCast::OpKind CastKind = Inst->getCastKind();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment