Commit f4198548 by John Porto

Subzero. ARM32. No more SP frobbing.

Pre-computes the max stack size outgoing arguments, and pre-allocates it during prolog, deallocating during epilog. With this CL, there are no more StackAdjustments needed for the ARM32, which will simplify rematerializing alloca'd variables. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 R=sehr@chromium.org Review URL: https://codereview.chromium.org/1467473003 .
parent 5e0a8a71
......@@ -382,13 +382,6 @@ OperandARM32FlexReg::OperandARM32FlexReg(Cfg *Func, Type Ty, Variable *Reg,
Vars[1] = ShiftVar;
}
InstARM32AdjustStack::InstARM32AdjustStack(Cfg *Func, Variable *SP,
SizeT Amount, Operand *SrcAmount)
: InstARM32(Func, InstARM32::Adjuststack, 2, SP), Amount(Amount) {
addSource(SP);
addSource(SrcAmount);
}
InstARM32Br::InstARM32Br(Cfg *Func, const CfgNode *TargetTrue,
const CfgNode *TargetFalse,
const InstARM32Label *Label, CondARM32::Cond Pred)
......@@ -1272,33 +1265,6 @@ void InstARM32Pop::dump(const Cfg *Func) const {
}
}
void InstARM32AdjustStack::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2);
Str << "\t"
<< "sub"
<< "\t";
getDest()->emit(Func);
Str << ", ";
getSrc(0)->emit(Func);
Str << ", ";
getSrc(1)->emit(Func);
Func->getTarget()->updateStackAdjustment(Amount);
}
void InstARM32AdjustStack::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
getDest()->dump(Func);
Str << " = sub.i32 ";
getSrc(0)->dump(Func);
Str << ", " << Amount << " ; ";
getSrc(1)->dump(Func);
}
void InstARM32Push::emit(const Cfg *Func) const {
// TODO(jpp): Improve FP register save/restore.
if (!BuildDefs::dump())
......
......@@ -980,34 +980,6 @@ private:
const InstARM32Label *Label; // Intra-block branch target
};
/// AdjustStack instruction - subtracts SP by the given amount and updates the
/// stack offset during code emission.
class InstARM32AdjustStack : public InstARM32 {
InstARM32AdjustStack() = delete;
InstARM32AdjustStack(const InstARM32AdjustStack &) = delete;
InstARM32AdjustStack &operator=(const InstARM32AdjustStack &) = delete;
public:
/// Note: We need both Amount and SrcAmount. If Amount is too large then it
/// needs to be copied to a register (so SrcAmount could be a register).
/// However, we also need the numeric Amount for bookkeeping, and it's hard to
/// pull that from the generic SrcAmount operand.
static InstARM32AdjustStack *create(Cfg *Func, Variable *SP, SizeT Amount,
Operand *SrcAmount) {
return new (Func->allocate<InstARM32AdjustStack>())
InstARM32AdjustStack(Func, SP, Amount, SrcAmount);
}
void emit(const Cfg *Func) const override;
void dump(const Cfg *Func) const override;
static bool classof(const Inst *Inst) { return isClassof(Inst, Adjuststack); }
SizeT getAmount() const { return Amount; }
private:
InstARM32AdjustStack(Cfg *Func, Variable *SP, SizeT Amount,
Operand *SrcAmount);
const SizeT Amount;
};
/// Call instruction (bl/blx). Arguments should have already been pushed.
/// Technically bl and the register form of blx can be predicated, but we'll
/// leave that out until needed.
......
......@@ -311,6 +311,7 @@ protected:
virtual void lowerOther(const Inst *Instr);
virtual void genTargetHelperCallFor(Inst *Instr) = 0;
virtual uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) = 0;
virtual void doAddressOptLoad() {}
virtual void doAddressOptStore() {}
......
......@@ -237,6 +237,7 @@ protected:
void lowerSwitch(const InstSwitch *Inst) override;
void lowerUnreachable(const InstUnreachable *Inst) override;
void prelowerPhis() override;
uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override;
void genTargetHelperCallFor(Inst *Instr) override { (void)Instr; }
void doAddressOptLoad() override;
void doAddressOptStore() override;
......@@ -289,10 +290,6 @@ protected:
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32Adc::create(Func, Dest, Src0, Src1, Pred));
}
void _adjust_stack(int32_t Amount, Operand *SrcAmount) {
Context.insert(InstARM32AdjustStack::create(
Func, getPhysicalRegister(RegARM32::Reg_sp), Amount, SrcAmount));
}
void _and(Variable *Dest, Variable *Src0, Operand *Src1,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert(InstARM32And::create(Func, Dest, Src0, Src1, Pred));
......@@ -813,6 +810,13 @@ protected:
Context.insert(InstARM32Vsub::create(Func, Dest, Src0, Src1));
}
// Iterates over the CFG and determines the maximum outgoing stack arguments
// bytes. This information is later used during addProlog() do pre-allocate
// the outargs area.
// TODO(jpp): This could live in the Parser, if we provided a Target-specific
// method that the Parser could call.
void findMaxStackOutArgsSize();
/// Run a pass through stack variables and ensure that the offsets are legal.
/// If the offset is not legal, use a new base register that accounts for the
/// offset, such that the addressing mode offset bits are now legal.
......@@ -820,36 +824,35 @@ protected:
/// Returns true if the given Offset can be represented in a ldr/str.
bool isLegalMemOffset(Type Ty, int32_t Offset) const;
// Creates a new Base register centered around
// [OrigBaseReg, +/- Offset+StackAdjust].
Variable *newBaseRegister(int32_t Offset, int32_t StackAdjust,
Variable *OrigBaseReg);
/// Creates a new, legal OperandARM32Mem for accessing OrigBase + Offset +
/// StackAdjust. The returned mem operand is a legal operand for accessing
/// memory that is of type Ty.
// [OrigBaseReg, +/- Offset].
Variable *newBaseRegister(int32_t Offset, Variable *OrigBaseReg);
/// Creates a new, legal OperandARM32Mem for accessing OrigBase + Offset. The
/// returned mem operand is a legal operand for accessing memory that is of
/// type Ty.
///
/// If [OrigBaseReg, #Offset+StackAdjust] is encodable, then the method
/// returns a Mem operand expressing it. Otherwise,
/// If [OrigBaseReg, #Offset] is encodable, then the method returns a Mem
/// operand expressing it. Otherwise,
///
/// if [*NewBaseReg, #Offset+StackAdjust-*NewBaseOffset] is encodable, the
/// method will return that. Otherwise,
/// if [*NewBaseReg, #Offset-*NewBaseOffset] is encodable, the method will
/// return that. Otherwise,
///
/// a new base register ip=OrigBaseReg+Offset+StackAdjust is created, and the
/// method returns [ip, #0].
/// a new base register ip=OrigBaseReg+Offset is created, and the method
/// returns [ip, #0].
OperandARM32Mem *createMemOperand(Type Ty, int32_t Offset,
int32_t StackAdjust, Variable *OrigBaseReg,
Variable *OrigBaseReg,
Variable **NewBaseReg,
int32_t *NewBaseOffset);
/// Legalizes Mov if its Source (or Destination) is a spilled Variable. Moves
/// to memory become store instructions, and moves from memory, loads.
void legalizeMov(InstARM32Mov *Mov, int32_t StackAdjust,
Variable *OrigBaseReg, Variable **NewBaseReg,
int32_t *NewBaseOffset);
void legalizeMov(InstARM32Mov *Mov, Variable *OrigBaseReg,
Variable **NewBaseReg, int32_t *NewBaseOffset);
TargetARM32Features CPUFeatures;
bool UsesFramePointer = false;
bool NeedsStackAlignment = false;
bool MaybeLeafFunc = true;
size_t SpillAreaSizeBytes = 0;
uint32_t MaxOutArgsSizeBytes = 0;
// TODO(jpp): std::array instead of array.
static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM];
static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
......
......@@ -235,6 +235,10 @@ protected:
void lowerSwitch(const InstSwitch *Inst) override;
void lowerUnreachable(const InstUnreachable *Inst) override;
void prelowerPhis() override;
uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override {
(void)Instr;
return 0;
}
void genTargetHelperCallFor(Inst *Instr) override { (void)Instr; }
void doAddressOptLoad() override;
void doAddressOptStore() override;
......
......@@ -182,6 +182,10 @@ protected:
void lowerOther(const Inst *Instr) override;
void lowerRMW(const typename Traits::Insts::FakeRMW *RMW);
void prelowerPhis() override;
uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override {
(void)Instr;
return 0;
}
void genTargetHelperCallFor(Inst *Instr) override { (void)Instr; }
void doAddressOptLoad() override;
void doAddressOptStore() override;
......
......@@ -92,25 +92,19 @@ entry:
; OPTM1: call {{.*}} R_{{.*}} ignore64BitArgNoInline
; ARM32-LABEL: pass64BitArg
; ARM32: sub sp, {{.*}} #16
; ARM32: str {{.*}}, [sp]
; ARM32: movw r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: add sp, {{.*}} #16
; ARM32: sub sp, {{.*}} #16
; ARM32: str {{.*}}, [sp]
; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: add sp, {{.*}} #16
; ARM32: sub sp, {{.*}} #16
; ARM32: str {{.*}}, [sp]
; ARM32: {{mov|ldr}} r0
; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: add sp, {{.*}} #16
declare i32 @ignore64BitArgNoInline(i64, i32, i64)
......@@ -144,7 +138,6 @@ entry:
; OPTM1: call {{.*}} R_{{.*}} ignore64BitArgNoInline
; ARM32-LABEL: pass64BitConstArg
; ARM32: sub sp, {{.*}} #16
; ARM32: movw [[REG1:r.*]], {{.*}} ; 0xbeef
; ARM32: movt [[REG1]], {{.*}} ; 0xdead
; ARM32: movw [[REG2:r.*]], {{.*}} ; 0x5678
......@@ -155,7 +148,6 @@ entry:
; ARM32: {{mov|ldr}} r1
; ARM32: movw r2, #123
; ARM32: bl {{.*}} ignore64BitArgNoInline
; ARM32: add sp, {{.*}} #16
define internal i32 @pass64BitUndefArg() {
entry:
......
......@@ -49,7 +49,7 @@ end:
; ARM32-NOT: mov fp, sp
; ARM32: movw ip, #4{{.*}}
; ARM32-NEXT: sub sp, sp, ip
; ARM32: movw ip, #4232
; ARM32: movw ip, #4248
; ARM32-NEXT: add ip, sp, ip
; ARM32-NOT: movw ip
; %t2 is the result of the "or", and %t2 will be passed via r1 to the call.
......@@ -61,14 +61,10 @@ end:
; ARM32: str [[REG]], [ip, #-20]
; ARM32: b {{[a-f0-9]+}}
; Now skip ahead to where the call in br_1 begins, to check how %t2 is used.
; ARM32: movw ip, #4216
; ARM32: movw ip, #4232
; ARM32-NEXT: add ip, sp, ip
; ARM32: sub sp, sp, #16
; Now sp1 = sp0 - 16, but ip is still in terms of sp0.
; So, sp0 + 4212 == ip - 4.
; ARM32: ldr r2, [ip, #-4]
; ARM32: bl {{.*}} dummy
; ARM32: add sp, sp
; The call clobbers ip, so we need to re-create the base register.
; ARM32: movw ip, #4{{.*}}
; ARM32: b {{[a-f0-9]+}}
......@@ -122,12 +118,8 @@ end:
; Now skip ahead to where the call in br_1 begins, to check how %t2 is used.
; ARM32: movw ip, #4120
; ARM32-NEXT: sub ip, fp, ip
; ARM32: sub sp, sp, #16
; Now sp1 = sp0 - 16, but ip is still in terms of fp0.
; So, fp0 - 4124 == ip - 4.
; ARM32: ldr r2, [ip, #-4]
; ARM32: bl {{.*}} dummy
; ARM32: add sp, sp
; The call clobbers ip, so we need to re-create the base register.
; ARM32: movw ip, #4{{.*}}
; ARM32: b {{[a-f0-9]+}}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment