Commit f4198548 by John Porto

Subzero. ARM32. No more SP frobbing.

Pre-computes the max stack size outgoing arguments, and pre-allocates it during prolog, deallocating during epilog. With this CL, there are no more StackAdjustments needed for the ARM32, which will simplify rematerializing alloca'd variables. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=sehr@chromium.org Review URL: https://codereview.chromium.org/1467473003 .
parent 5e0a8a71
...@@ -382,13 +382,6 @@ OperandARM32FlexReg::OperandARM32FlexReg(Cfg *Func, Type Ty, Variable *Reg, ...@@ -382,13 +382,6 @@ OperandARM32FlexReg::OperandARM32FlexReg(Cfg *Func, Type Ty, Variable *Reg,
Vars[1] = ShiftVar; Vars[1] = ShiftVar;
} }
InstARM32AdjustStack::InstARM32AdjustStack(Cfg *Func, Variable *SP,
SizeT Amount, Operand *SrcAmount)
: InstARM32(Func, InstARM32::Adjuststack, 2, SP), Amount(Amount) {
addSource(SP);
addSource(SrcAmount);
}
InstARM32Br::InstARM32Br(Cfg *Func, const CfgNode *TargetTrue, InstARM32Br::InstARM32Br(Cfg *Func, const CfgNode *TargetTrue,
const CfgNode *TargetFalse, const CfgNode *TargetFalse,
const InstARM32Label *Label, CondARM32::Cond Pred) const InstARM32Label *Label, CondARM32::Cond Pred)
...@@ -1272,33 +1265,6 @@ void InstARM32Pop::dump(const Cfg *Func) const { ...@@ -1272,33 +1265,6 @@ void InstARM32Pop::dump(const Cfg *Func) const {
} }
} }
void InstARM32AdjustStack::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2);
Str << "\t"
<< "sub"
<< "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
Func->getTarget()->updateStackAdjustment(Amount);
}
void InstARM32AdjustStack::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
getDest()->dump(Func);
Str << " = sub.i32 ";
getSrc(0)->dump(Func);
Str << ", " << Amount << " ; ";
getSrc(1)->dump(Func);
}
void InstARM32Push::emit(const Cfg *Func) const { void InstARM32Push::emit(const Cfg *Func) const {
// TODO(jpp): Improve FP register save/restore. // TODO(jpp): Improve FP register save/restore.
if (!BuildDefs::dump()) if (!BuildDefs::dump())
......
...@@ -980,34 +980,6 @@ private: ...@@ -980,34 +980,6 @@ private:
const InstARM32Label *Label; // Intra-block branch target const InstARM32Label *Label; // Intra-block branch target
}; };
/// AdjustStack instruction - subtracts SP by the given amount and updates the
/// stack offset during code emission.
class InstARM32AdjustStack : public InstARM32 {
InstARM32AdjustStack() = delete;
InstARM32AdjustStack(const InstARM32AdjustStack &) = delete;
InstARM32AdjustStack &operator=(const InstARM32AdjustStack &) = delete;
public:
/// Note: We need both Amount and SrcAmount. If Amount is too large then it
/// needs to be copied to a register (so SrcAmount could be a register).
/// However, we also need the numeric Amount for bookkeeping, and it's hard to
/// pull that from the generic SrcAmount operand.
static InstARM32AdjustStack *create(Cfg *Func, Variable *SP, SizeT Amount,
Operand *SrcAmount) {
return new (Func->allocate<InstARM32AdjustStack>())
InstARM32AdjustStack(Func, SP, Amount, SrcAmount);
}
void emit(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Adjuststack); }
SizeT getAmount() const { return Amount; }
private:
InstARM32AdjustStack(Cfg *Func, Variable *SP, SizeT Amount,
Operand *SrcAmount);
const SizeT Amount;
};
/// Call instruction (bl/blx). Arguments should have already been pushed. /// Call instruction (bl/blx). Arguments should have already been pushed.
/// Technically bl and the register form of blx can be predicated, but we'll /// Technically bl and the register form of blx can be predicated, but we'll
/// leave that out until needed. /// leave that out until needed.
......
...@@ -311,6 +311,7 @@ protected: ...@@ -311,6 +311,7 @@ protected:
virtual void lowerOther(const Inst *Instr); virtual void lowerOther(const Inst *Instr);
virtual void genTargetHelperCallFor(Inst *Instr) = 0; virtual void genTargetHelperCallFor(Inst *Instr) = 0;
virtual uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) = 0;
virtual void doAddressOptLoad() {} virtual void doAddressOptLoad() {}
virtual void doAddressOptStore() {} virtual void doAddressOptStore() {}
......
...@@ -233,12 +233,62 @@ void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) { ...@@ -233,12 +233,62 @@ void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
} }
} // end of anonymous namespace } // end of anonymous namespace
uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
TargetARM32::CallingConv CC;
size_t OutArgsSizeBytes = 0;
for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
Operand *Arg = legalizeUndef(Call->getArg(i));
Type Ty = Arg->getType();
if (Ty == IceType_i64) {
std::pair<int32_t, int32_t> Regs;
if (CC.I64InRegs(&Regs)) {
continue;
}
} else if (isVectorType(Ty) || isFloatingType(Ty)) {
int32_t Reg;
if (CC.FPInReg(Ty, &Reg)) {
continue;
}
} else {
assert(Ty == IceType_i32);
int32_t Reg;
if (CC.I32InReg(&Reg)) {
continue;
}
}
OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
}
return applyStackAlignment(OutArgsSizeBytes);
}
void TargetARM32::findMaxStackOutArgsSize() {
// MinNeededOutArgsBytes should be updated if the Target ever creates an
// high-level InstCall that requires more stack bytes.
constexpr size_t MinNeededOutArgsBytes = 0;
MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
for (CfgNode *Node : Func->getNodes()) {
Context.init(Node);
while (!Context.atEnd()) {
PostIncrLoweringContext PostIncrement(Context);
Inst *CurInstr = Context.getCur();
if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
}
}
}
}
void TargetARM32::translateO2() { void TargetARM32::translateO2() {
TimerMarker T(TimerStack::TT_O2, Func); TimerMarker T(TimerStack::TT_O2, Func);
// TODO(stichnot): share passes with X86? // TODO(stichnot): share passes with X86?
// https://code.google.com/p/nativeclient/issues/detail?id=4094 // https://code.google.com/p/nativeclient/issues/detail?id=4094
genTargetHelperCalls(); genTargetHelperCalls();
findMaxStackOutArgsSize();
// Do not merge Alloca instructions, and lay out the stack. // Do not merge Alloca instructions, and lay out the stack.
static constexpr bool SortAndCombineAllocas = false; static constexpr bool SortAndCombineAllocas = false;
...@@ -346,6 +396,7 @@ void TargetARM32::translateOm1() { ...@@ -346,6 +396,7 @@ void TargetARM32::translateOm1() {
// TODO: share passes with X86? // TODO: share passes with X86?
genTargetHelperCalls(); genTargetHelperCalls();
findMaxStackOutArgsSize();
// Do not merge Alloca instructions, and lay out the stack. // Do not merge Alloca instructions, and lay out the stack.
static constexpr bool SortAndCombineAllocas = false; static constexpr bool SortAndCombineAllocas = false;
...@@ -473,8 +524,6 @@ void TargetARM32::emitVariable(const Variable *Var) const { ...@@ -473,8 +524,6 @@ void TargetARM32::emitVariable(const Variable *Var) const {
int32_t BaseRegNum = Var->getBaseRegNum(); int32_t BaseRegNum = Var->getBaseRegNum();
if (BaseRegNum == Variable::NoRegister) { if (BaseRegNum == Variable::NoRegister) {
BaseRegNum = getFrameOrStackReg(); BaseRegNum = getFrameOrStackReg();
if (!hasFramePointer())
Offset += getStackAdjustment();
} }
const Type VarTy = Var->getType(); const Type VarTy = Var->getType();
Str << "[" << getRegName(BaseRegNum, VarTy); Str << "[" << getRegName(BaseRegNum, VarTy);
...@@ -670,7 +719,11 @@ void TargetARM32::addProlog(CfgNode *Node) { ...@@ -670,7 +719,11 @@ void TargetARM32::addProlog(CfgNode *Node) {
// +------------------------+ // +------------------------+
// | 6. padding | // | 6. padding |
// +------------------------+ // +------------------------+
// | 7. allocas | // | 7. allocas (variable) |
// +------------------------+
// | 8. padding |
// +------------------------+
// | 9. out args |
// +------------------------+ <--- StackPointer // +------------------------+ <--- StackPointer
// //
// The following variables record the size in bytes of the given areas: // The following variables record the size in bytes of the given areas:
...@@ -679,7 +732,9 @@ void TargetARM32::addProlog(CfgNode *Node) { ...@@ -679,7 +732,9 @@ void TargetARM32::addProlog(CfgNode *Node) {
// * GlobalsSize: area 3 // * GlobalsSize: area 3
// * GlobalsAndSubsequentPaddingSize: areas 3 - 4 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
// * LocalsSpillAreaSize: area 5 // * LocalsSpillAreaSize: area 5
// * SpillAreaSizeBytes: areas 2 - 6 // * SpillAreaSizeBytes: areas 2 - 6, and 9
// * MaxOutArgsSizeBytes: area 9
//
// Determine stack frame offsets for each Variable without a register // Determine stack frame offsets for each Variable without a register
// assignment. This can be done as one variable per stack slot. Or, do // assignment. This can be done as one variable per stack slot. Or, do
// coalescing by running the register allocator again with an infinite set of // coalescing by running the register allocator again with an infinite set of
...@@ -785,10 +840,13 @@ void TargetARM32::addProlog(CfgNode *Node) { ...@@ -785,10 +840,13 @@ void TargetARM32::addProlog(CfgNode *Node) {
uint32_t GlobalsAndSubsequentPaddingSize = uint32_t GlobalsAndSubsequentPaddingSize =
GlobalsSize + LocalsSlotsPaddingBytes; GlobalsSize + LocalsSlotsPaddingBytes;
// Align SP if necessary. // Adds the out args space to the stack, and align SP if necessary.
if (NeedsStackAlignment) { if (!NeedsStackAlignment) {
SpillAreaSizeBytes += MaxOutArgsSizeBytes;
} else {
uint32_t StackOffset = PreservedRegsSizeBytes; uint32_t StackOffset = PreservedRegsSizeBytes;
uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
SpillAreaSizeBytes = StackSize - StackOffset; SpillAreaSizeBytes = StackSize - StackOffset;
} }
...@@ -802,8 +860,6 @@ void TargetARM32::addProlog(CfgNode *Node) { ...@@ -802,8 +860,6 @@ void TargetARM32::addProlog(CfgNode *Node) {
} }
Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
resetStackAdjustment();
// Fill in stack offsets for stack args, and copy args into registers for // Fill in stack offsets for stack args, and copy args into registers for
// those that were register-allocated. Args are pushed right to left, so // those that were register-allocated. Args are pushed right to left, so
// Arg[0] is closest to the stack/frame pointer. // Arg[0] is closest to the stack/frame pointer.
...@@ -847,7 +903,8 @@ void TargetARM32::addProlog(CfgNode *Node) { ...@@ -847,7 +903,8 @@ void TargetARM32::addProlog(CfgNode *Node) {
Str << "Stack layout:\n"; Str << "Stack layout:\n";
uint32_t SPAdjustmentPaddingSize = uint32_t SPAdjustmentPaddingSize =
SpillAreaSizeBytes - LocalsSpillAreaSize - SpillAreaSizeBytes - LocalsSpillAreaSize -
GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
MaxOutArgsSizeBytes;
Str << " in-args = " << InArgsSizeBytes << " bytes\n" Str << " in-args = " << InArgsSizeBytes << " bytes\n"
<< " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
<< " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
...@@ -860,6 +917,7 @@ void TargetARM32::addProlog(CfgNode *Node) { ...@@ -860,6 +917,7 @@ void TargetARM32::addProlog(CfgNode *Node) {
Str << "Stack details:\n" Str << "Stack details:\n"
<< " SP adjustment = " << SpillAreaSizeBytes << " bytes\n" << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
<< " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
<< " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
<< " locals spill area alignment = " << LocalsSlotsAlignmentBytes << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
<< " bytes\n" << " bytes\n"
<< " is FP based = " << UsesFramePointer << "\n"; << " is FP based = " << UsesFramePointer << "\n";
...@@ -956,10 +1014,7 @@ bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const { ...@@ -956,10 +1014,7 @@ bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const {
return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset); return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset);
} }
Variable *TargetARM32::newBaseRegister(int32_t OriginalOffset, Variable *TargetARM32::newBaseRegister(int32_t Offset, Variable *OrigBaseReg) {
int32_t StackAdjust,
Variable *OrigBaseReg) {
int32_t Offset = OriginalOffset + StackAdjust;
// Legalize will likely need a movw/movt combination, but if the top bits are // Legalize will likely need a movw/movt combination, but if the top bits are
// all 0 from negating the offset and subtracting, we could use that instead. // all 0 from negating the offset and subtracting, we could use that instead.
bool ShouldSub = (-Offset & 0xFFFF0000) == 0; bool ShouldSub = (-Offset & 0xFFFF0000) == 0;
...@@ -976,26 +1031,25 @@ Variable *TargetARM32::newBaseRegister(int32_t OriginalOffset, ...@@ -976,26 +1031,25 @@ Variable *TargetARM32::newBaseRegister(int32_t OriginalOffset,
} }
OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset, OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,
int32_t StackAdjust,
Variable *OrigBaseReg, Variable *OrigBaseReg,
Variable **NewBaseReg, Variable **NewBaseReg,
int32_t *NewBaseOffset) { int32_t *NewBaseOffset) {
if (isLegalMemOffset(Ty, Offset + StackAdjust)) { if (isLegalMemOffset(Ty, Offset)) {
return OperandARM32Mem::create( return OperandARM32Mem::create(
Func, Ty, OrigBaseReg, llvm::cast<ConstantInteger32>( Func, Ty, OrigBaseReg,
Ctx->getConstantInt32(Offset + StackAdjust)), llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)),
OperandARM32Mem::Offset); OperandARM32Mem::Offset);
} }
if (*NewBaseReg == nullptr) { if (*NewBaseReg == nullptr) {
*NewBaseReg = newBaseRegister(Offset, StackAdjust, OrigBaseReg); *NewBaseReg = newBaseRegister(Offset, OrigBaseReg);
*NewBaseOffset = Offset + StackAdjust; *NewBaseOffset = Offset;
} }
int32_t OffsetDiff = Offset + StackAdjust - *NewBaseOffset; int32_t OffsetDiff = Offset - *NewBaseOffset;
if (!isLegalMemOffset(Ty, OffsetDiff)) { if (!isLegalMemOffset(Ty, OffsetDiff)) {
*NewBaseReg = newBaseRegister(Offset, StackAdjust, OrigBaseReg); *NewBaseReg = newBaseRegister(Offset, OrigBaseReg);
*NewBaseOffset = Offset + StackAdjust; *NewBaseOffset = Offset;
OffsetDiff = 0; OffsetDiff = 0;
} }
...@@ -1005,9 +1059,8 @@ OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset, ...@@ -1005,9 +1059,8 @@ OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,
OperandARM32Mem::Offset); OperandARM32Mem::Offset);
} }
void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, int32_t StackAdjust, void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg,
Variable *OrigBaseReg, Variable **NewBaseReg, Variable **NewBaseReg, int32_t *NewBaseOffset) {
int32_t *NewBaseOffset) {
Variable *Dest = MovInstr->getDest(); Variable *Dest = MovInstr->getDest();
assert(Dest != nullptr); assert(Dest != nullptr);
Type DestTy = Dest->getType(); Type DestTy = Dest->getType();
...@@ -1027,8 +1080,8 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, int32_t StackAdjust, ...@@ -1027,8 +1080,8 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, int32_t StackAdjust,
assert(SrcR->hasReg()); assert(SrcR->hasReg());
const int32_t Offset = Dest->getStackOffset(); const int32_t Offset = Dest->getStackOffset();
// This is a _mov(Mem(), Variable), i.e., a store. // This is a _mov(Mem(), Variable), i.e., a store.
_str(SrcR, createMemOperand(DestTy, Offset, StackAdjust, OrigBaseReg, _str(SrcR, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
NewBaseReg, NewBaseOffset), NewBaseOffset),
MovInstr->getPredicate()); MovInstr->getPredicate());
// _str() does not have a Dest, so we add a fake-def(Dest). // _str() does not have a Dest, so we add a fake-def(Dest).
Context.insert(InstFakeDef::create(Func, Dest)); Context.insert(InstFakeDef::create(Func, Dest));
...@@ -1036,8 +1089,8 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, int32_t StackAdjust, ...@@ -1036,8 +1089,8 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, int32_t StackAdjust,
} else if (auto *Var = llvm::dyn_cast<Variable>(Src)) { } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
if (!Var->hasReg()) { if (!Var->hasReg()) {
const int32_t Offset = Var->getStackOffset(); const int32_t Offset = Var->getStackOffset();
_ldr(Dest, createMemOperand(DestTy, Offset, StackAdjust, OrigBaseReg, _ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
NewBaseReg, NewBaseOffset), NewBaseOffset),
MovInstr->getPredicate()); MovInstr->getPredicate());
Legalized = true; Legalized = true;
} }
...@@ -1064,7 +1117,6 @@ void TargetARM32::legalizeStackSlots() { ...@@ -1064,7 +1117,6 @@ void TargetARM32::legalizeStackSlots() {
Func->dump("Before legalizeStackSlots"); Func->dump("Before legalizeStackSlots");
assert(hasComputedFrame()); assert(hasComputedFrame());
Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());
int32_t StackAdjust = 0;
// Do a fairly naive greedy clustering for now. Pick the first stack slot // Do a fairly naive greedy clustering for now. Pick the first stack slot
// that's out of bounds and make a new base reg using the architecture's temp // that's out of bounds and make a new base reg using the architecture's temp
// register. If that works for the next slot, then great. Otherwise, create a // register. If that works for the next slot, then great. Otherwise, create a
...@@ -1091,23 +1143,8 @@ void TargetARM32::legalizeStackSlots() { ...@@ -1091,23 +1143,8 @@ void TargetARM32::legalizeStackSlots() {
NewBaseOffset = 0; NewBaseOffset = 0;
} }
// The stack adjustment only matters if we are using SP instead of FP.
if (!hasFramePointer()) {
if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) {
StackAdjust += AdjInst->getAmount();
NewBaseOffset += AdjInst->getAmount();
continue;
}
if (llvm::isa<InstARM32Call>(CurInstr)) {
NewBaseOffset -= StackAdjust;
StackAdjust = 0;
continue;
}
}
if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) { if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {
legalizeMov(MovInstr, StackAdjust, OrigBaseReg, &NewBaseReg, legalizeMov(MovInstr, OrigBaseReg, &NewBaseReg, &NewBaseOffset);
&NewBaseOffset);
} }
} }
} }
...@@ -1269,7 +1306,14 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) { ...@@ -1269,7 +1306,14 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
alignRegisterPow2(T, Alignment); alignRegisterPow2(T, Alignment);
_sub(SP, SP, T); _sub(SP, SP, T);
} }
_mov(Dest, SP); Variable *T = SP;
if (MaxOutArgsSizeBytes != 0) {
T = makeReg(getPointerType());
Operand *OutArgsSizeRF = legalize(
Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
_add(T, SP, OutArgsSizeRF);
}
_mov(Dest, T);
} }
void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
...@@ -2093,6 +2137,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -2093,6 +2137,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
} }
case InstArithmetic::Sub: { case InstArithmetic::Sub: {
if (Srcs.hasConstOperand()) { if (Srcs.hasConstOperand()) {
// TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed
// to be used.
Variable *Src0R = Srcs.src0R(this); Variable *Src0R = Srcs.src0R(this);
if (Srcs.immediateIsFlexEncodable()) { if (Srcs.immediateIsFlexEncodable()) {
Operand *Src1RF = Srcs.src1RF(this); Operand *Src1RF = Srcs.src1RF(this);
...@@ -2346,7 +2392,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -2346,7 +2392,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs; TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs;
// Pair of Arg Operand -> stack offset. // Pair of Arg Operand -> stack offset.
llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs; llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
int32_t ParameterAreaSizeBytes = 0; size_t ParameterAreaSizeBytes = 0;
// Classify each argument operand according to the location where the // Classify each argument operand according to the location where the
// argument is passed. // argument is passed.
...@@ -2390,16 +2436,8 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -2390,16 +2436,8 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
// the stack is already aligned at the start of the calling sequence. // the stack is already aligned at the start of the calling sequence.
ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
// Subtract the appropriate amount for the argument area. This also takes if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) {
// care of setting the stack adjustment during emission. llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max.");
//
// TODO: If for some reason the call instruction gets dead-code eliminated
// after lowering, we would need to ensure that the pre-call and the
// post-call esp adjustment get eliminated as well.
if (ParameterAreaSizeBytes) {
Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
Legal_Reg | Legal_Flex);
_adjust_stack(ParameterAreaSizeBytes, SubAmount);
} }
// Copy arguments that are passed on the stack to the appropriate stack // Copy arguments that are passed on the stack to the appropriate stack
...@@ -2492,15 +2530,6 @@ void TargetARM32::lowerCall(const InstCall *Instr) { ...@@ -2492,15 +2530,6 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
if (ReturnRegHi) if (ReturnRegHi)
Context.insert(InstFakeDef::create(Func, ReturnRegHi)); Context.insert(InstFakeDef::create(Func, ReturnRegHi));
// Add the appropriate offset to SP. The call instruction takes care of
// resetting the stack offset during emission.
if (ParameterAreaSizeBytes) {
Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
Legal_Reg | Legal_Flex);
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
_add(SP, SP, AddAmount);
}
// Insert a register-kill pseudo instruction. // Insert a register-kill pseudo instruction.
Context.insert(InstFakeKill::create(Func, NewCall)); Context.insert(InstFakeKill::create(Func, NewCall));
......
...@@ -237,6 +237,7 @@ protected: ...@@ -237,6 +237,7 @@ protected:
void lowerSwitch(const InstSwitch *Inst) override; void lowerSwitch(const InstSwitch *Inst) override;
void lowerUnreachable(const InstUnreachable *Inst) override; void lowerUnreachable(const InstUnreachable *Inst) override;
void prelowerPhis() override; void prelowerPhis() override;
uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override;
void genTargetHelperCallFor(Inst *Instr) override { (void)Instr; } void genTargetHelperCallFor(Inst *Instr) override { (void)Instr; }
void doAddressOptLoad() override; void doAddressOptLoad() override;
void doAddressOptStore() override; void doAddressOptStore() override;
...@@ -289,10 +290,6 @@ protected: ...@@ -289,10 +290,6 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Adc::create(Func, Dest, Src0, Src1, Pred)); Context.insert(InstARM32Adc::create(Func, Dest, Src0, Src1, Pred));
} }
void _adjust_stack(int32_t Amount, Operand *SrcAmount) {
Context.insert(InstARM32AdjustStack::create(
Func, getPhysicalRegister(RegARM32::Reg_sp), Amount, SrcAmount));
}
void _and(Variable *Dest, Variable *Src0, Operand *Src1, void _and(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32And::create(Func, Dest, Src0, Src1, Pred)); Context.insert(InstARM32And::create(Func, Dest, Src0, Src1, Pred));
...@@ -813,6 +810,13 @@ protected: ...@@ -813,6 +810,13 @@ protected:
Context.insert(InstARM32Vsub::create(Func, Dest, Src0, Src1)); Context.insert(InstARM32Vsub::create(Func, Dest, Src0, Src1));
} }
// Iterates over the CFG and determines the maximum outgoing stack arguments
// bytes. This information is later used during addProlog() do pre-allocate
// the outargs area.
// TODO(jpp): This could live in the Parser, if we provided a Target-specific
// method that the Parser could call.
void findMaxStackOutArgsSize();
/// Run a pass through stack variables and ensure that the offsets are legal. /// Run a pass through stack variables and ensure that the offsets are legal.
/// If the offset is not legal, use a new base register that accounts for the /// If the offset is not legal, use a new base register that accounts for the
/// offset, such that the addressing mode offset bits are now legal. /// offset, such that the addressing mode offset bits are now legal.
...@@ -820,36 +824,35 @@ protected: ...@@ -820,36 +824,35 @@ protected:
/// Returns true if the given Offset can be represented in a ldr/str. /// Returns true if the given Offset can be represented in a ldr/str.
bool isLegalMemOffset(Type Ty, int32_t Offset) const; bool isLegalMemOffset(Type Ty, int32_t Offset) const;
// Creates a new Base register centered around // Creates a new Base register centered around
// [OrigBaseReg, +/- Offset+StackAdjust]. // [OrigBaseReg, +/- Offset].
Variable *newBaseRegister(int32_t Offset, int32_t StackAdjust, Variable *newBaseRegister(int32_t Offset, Variable *OrigBaseReg);
Variable *OrigBaseReg); /// Creates a new, legal OperandARM32Mem for accessing OrigBase + Offset. The
/// Creates a new, legal OperandARM32Mem for accessing OrigBase + Offset + /// returned mem operand is a legal operand for accessing memory that is of
/// StackAdjust. The returned mem operand is a legal operand for accessing /// type Ty.
/// memory that is of type Ty.
/// ///
/// If [OrigBaseReg, #Offset+StackAdjust] is encodable, then the method /// If [OrigBaseReg, #Offset] is encodable, then the method returns a Mem
/// returns a Mem operand expressing it. Otherwise, /// operand expressing it. Otherwise,
/// ///
/// if [*NewBaseReg, #Offset+StackAdjust-*NewBaseOffset] is encodable, the /// if [*NewBaseReg, #Offset-*NewBaseOffset] is encodable, the method will
/// method will return that. Otherwise, /// return that. Otherwise,
/// ///
/// a new base register ip=OrigBaseReg+Offset+StackAdjust is created, and the /// a new base register ip=OrigBaseReg+Offset is created, and the method
/// method returns [ip, #0]. /// returns [ip, #0].
OperandARM32Mem *createMemOperand(Type Ty, int32_t Offset, OperandARM32Mem *createMemOperand(Type Ty, int32_t Offset,
int32_t StackAdjust, Variable *OrigBaseReg, Variable *OrigBaseReg,
Variable **NewBaseReg, Variable **NewBaseReg,
int32_t *NewBaseOffset); int32_t *NewBaseOffset);
/// Legalizes Mov if its Source (or Destination) is a spilled Variable. Moves /// Legalizes Mov if its Source (or Destination) is a spilled Variable. Moves
/// to memory become store instructions, and moves from memory, loads. /// to memory become store instructions, and moves from memory, loads.
void legalizeMov(InstARM32Mov *Mov, int32_t StackAdjust, void legalizeMov(InstARM32Mov *Mov, Variable *OrigBaseReg,
Variable *OrigBaseReg, Variable **NewBaseReg, Variable **NewBaseReg, int32_t *NewBaseOffset);
int32_t *NewBaseOffset);
TargetARM32Features CPUFeatures; TargetARM32Features CPUFeatures;
bool UsesFramePointer = false; bool UsesFramePointer = false;
bool NeedsStackAlignment = false; bool NeedsStackAlignment = false;
bool MaybeLeafFunc = true; bool MaybeLeafFunc = true;
size_t SpillAreaSizeBytes = 0; size_t SpillAreaSizeBytes = 0;
uint32_t MaxOutArgsSizeBytes = 0;
// TODO(jpp): std::array instead of array. // TODO(jpp): std::array instead of array.
static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM]; static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM];
static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
......
...@@ -235,6 +235,10 @@ protected: ...@@ -235,6 +235,10 @@ protected:
void lowerSwitch(const InstSwitch *Inst) override; void lowerSwitch(const InstSwitch *Inst) override;
void lowerUnreachable(const InstUnreachable *Inst) override; void lowerUnreachable(const InstUnreachable *Inst) override;
void prelowerPhis() override; void prelowerPhis() override;
uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override {
(void)Instr;
return 0;
}
void genTargetHelperCallFor(Inst *Instr) override { (void)Instr; } void genTargetHelperCallFor(Inst *Instr) override { (void)Instr; }
void doAddressOptLoad() override; void doAddressOptLoad() override;
void doAddressOptStore() override; void doAddressOptStore() override;
......
...@@ -182,6 +182,10 @@ protected: ...@@ -182,6 +182,10 @@ protected:
void lowerOther(const Inst *Instr) override; void lowerOther(const Inst *Instr) override;
void lowerRMW(const typename Traits::Insts::FakeRMW *RMW); void lowerRMW(const typename Traits::Insts::FakeRMW *RMW);
void prelowerPhis() override; void prelowerPhis() override;
uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override {
(void)Instr;
return 0;
}
void genTargetHelperCallFor(Inst *Instr) override { (void)Instr; } void genTargetHelperCallFor(Inst *Instr) override { (void)Instr; }
void doAddressOptLoad() override; void doAddressOptLoad() override;
void doAddressOptStore() override; void doAddressOptStore() override;
......
...@@ -92,25 +92,19 @@ entry: ...@@ -92,25 +92,19 @@ entry:
; OPTM1: call {{.*}} R_{{.*}} ignore64BitArgNoInline ; OPTM1: call {{.*}} R_{{.*}} ignore64BitArgNoInline
; ARM32-LABEL: pass64BitArg ; ARM32-LABEL: pass64BitArg
; ARM32: sub sp, {{.*}} #16
; ARM32: str {{.*}}, [sp] ; ARM32: str {{.*}}, [sp]
; ARM32: movw r2, #123 ; ARM32: movw r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: add sp, {{.*}} #16
; ARM32: sub sp, {{.*}} #16
; ARM32: str {{.*}}, [sp] ; ARM32: str {{.*}}, [sp]
; ARM32: {{mov|ldr}} r0 ; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1 ; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123 ; ARM32: movw r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: add sp, {{.*}} #16
; ARM32: sub sp, {{.*}} #16
; ARM32: str {{.*}}, [sp] ; ARM32: str {{.*}}, [sp]
; ARM32: {{mov|ldr}} r0 ; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1 ; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123 ; ARM32: movw r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: add sp, {{.*}} #16
declare i32 @ignore64BitArgNoInline(i64, i32, i64) declare i32 @ignore64BitArgNoInline(i64, i32, i64)
...@@ -144,7 +138,6 @@ entry: ...@@ -144,7 +138,6 @@ entry:
; OPTM1: call {{.*}} R_{{.*}} ignore64BitArgNoInline ; OPTM1: call {{.*}} R_{{.*}} ignore64BitArgNoInline
; ARM32-LABEL: pass64BitConstArg ; ARM32-LABEL: pass64BitConstArg
; ARM32: sub sp, {{.*}} #16
; ARM32: movw [[REG1:r.*]], {{.*}} ; 0xbeef ; ARM32: movw [[REG1:r.*]], {{.*}} ; 0xbeef
; ARM32: movt [[REG1]], {{.*}} ; 0xdead ; ARM32: movt [[REG1]], {{.*}} ; 0xdead
; ARM32: movw [[REG2:r.*]], {{.*}} ; 0x5678 ; ARM32: movw [[REG2:r.*]], {{.*}} ; 0x5678
...@@ -155,7 +148,6 @@ entry: ...@@ -155,7 +148,6 @@ entry:
; ARM32: {{mov|ldr}} r1 ; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123 ; ARM32: movw r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline ; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: add sp, {{.*}} #16
define internal i32 @pass64BitUndefArg() { define internal i32 @pass64BitUndefArg() {
entry: entry:
......
...@@ -49,7 +49,7 @@ end: ...@@ -49,7 +49,7 @@ end:
; ARM32-NOT: mov fp, sp ; ARM32-NOT: mov fp, sp
; ARM32: movw ip, #4{{.*}} ; ARM32: movw ip, #4{{.*}}
; ARM32-NEXT: sub sp, sp, ip ; ARM32-NEXT: sub sp, sp, ip
; ARM32: movw ip, #4232 ; ARM32: movw ip, #4248
; ARM32-NEXT: add ip, sp, ip ; ARM32-NEXT: add ip, sp, ip
; ARM32-NOT: movw ip ; ARM32-NOT: movw ip
; %t2 is the result of the "or", and %t2 will be passed via r1 to the call. ; %t2 is the result of the "or", and %t2 will be passed via r1 to the call.
...@@ -61,14 +61,10 @@ end: ...@@ -61,14 +61,10 @@ end:
; ARM32: str [[REG]], [ip, #-20] ; ARM32: str [[REG]], [ip, #-20]
; ARM32: b {{[a-f0-9]+}} ; ARM32: b {{[a-f0-9]+}}
; Now skip ahead to where the call in br_1 begins, to check how %t2 is used. ; Now skip ahead to where the call in br_1 begins, to check how %t2 is used.
; ARM32: movw ip, #4216 ; ARM32: movw ip, #4232
; ARM32-NEXT: add ip, sp, ip ; ARM32-NEXT: add ip, sp, ip
; ARM32: sub sp, sp, #16
; Now sp1 = sp0 - 16, but ip is still in terms of sp0.
; So, sp0 + 4212 == ip - 4.
; ARM32: ldr r2, [ip, #-4] ; ARM32: ldr r2, [ip, #-4]
; ARM32: bl {{.*}} dummy ; ARM32: bl {{.*}} dummy
; ARM32: add sp, sp
; The call clobbers ip, so we need to re-create the base register. ; The call clobbers ip, so we need to re-create the base register.
; ARM32: movw ip, #4{{.*}} ; ARM32: movw ip, #4{{.*}}
; ARM32: b {{[a-f0-9]+}} ; ARM32: b {{[a-f0-9]+}}
...@@ -122,12 +118,8 @@ end: ...@@ -122,12 +118,8 @@ end:
; Now skip ahead to where the call in br_1 begins, to check how %t2 is used. ; Now skip ahead to where the call in br_1 begins, to check how %t2 is used.
; ARM32: movw ip, #4120 ; ARM32: movw ip, #4120
; ARM32-NEXT: sub ip, fp, ip ; ARM32-NEXT: sub ip, fp, ip
; ARM32: sub sp, sp, #16
; Now sp1 = sp0 - 16, but ip is still in terms of fp0.
; So, fp0 - 4124 == ip - 4.
; ARM32: ldr r2, [ip, #-4] ; ARM32: ldr r2, [ip, #-4]
; ARM32: bl {{.*}} dummy ; ARM32: bl {{.*}} dummy
; ARM32: add sp, sp
; The call clobbers ip, so we need to re-create the base register. ; The call clobbers ip, so we need to re-create the base register.
; ARM32: movw ip, #4{{.*}} ; ARM32: movw ip, #4{{.*}}
; ARM32: b {{[a-f0-9]+}} ; ARM32: b {{[a-f0-9]+}}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment