Commit 614140e2 by John Porto

Subzero. ARM32. Combine allocas.

parent fc22f770
......@@ -484,8 +484,14 @@ void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
} else {
// Addressing is relative to the stack pointer or to a user pointer. Add
// the offset before adding the size of the object, because it grows
// upwards from the stack pointer.
Offsets.push_back(CurrentOffset);
// upwards from the stack pointer. In addition, if the addressing is
// relative to the stack pointer, we need to add the pre-computed max out
// args size bytes.
const uint32_t OutArgsOffsetOrZero =
(BaseVariableType == BVT_StackPointer)
? getTarget()->maxOutArgsSizeBytes()
: 0;
Offsets.push_back(CurrentOffset + OutArgsOffsetOrZero);
}
// Update the running offset of the fused alloca region.
CurrentOffset += Size;
......
......@@ -211,6 +211,7 @@ public:
virtual uint32_t getStackAlignment() const = 0;
virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0;
virtual int32_t getFrameFixedAllocaOffset() const = 0;
virtual uint32_t maxOutArgsSizeBytes() const { return 0; }
/// Return whether a 64-bit Variable should be split into a Variable64On32.
virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;
......
......@@ -265,7 +265,7 @@ uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
}
void TargetARM32::findMaxStackOutArgsSize() {
// MinNeededOutArgsBytes should be updated if the Target ever creates an
// MinNeededOutArgsBytes should be updated if the Target ever creates a
// high-level InstCall that requires more stack bytes.
constexpr size_t MinNeededOutArgsBytes = 0;
MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
......@@ -291,7 +291,7 @@ void TargetARM32::translateO2() {
findMaxStackOutArgsSize();
// Do not merge Alloca instructions, and lay out the stack.
static constexpr bool SortAndCombineAllocas = false;
static constexpr bool SortAndCombineAllocas = true;
Func->processAllocas(SortAndCombineAllocas);
Func->dump("After Alloca processing");
......@@ -356,6 +356,7 @@ void TargetARM32::translateO2() {
regAlloc(RAK_Global);
if (Func->hasError())
return;
copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
Func->dump("After linear scan regalloc");
......@@ -364,6 +365,8 @@ void TargetARM32::translateO2() {
Func->dump("After advanced Phi lowering");
}
ForbidTemporaryWithoutReg _(this);
// Stack frame mapping.
Func->genFrame();
if (Func->hasError())
......@@ -399,8 +402,8 @@ void TargetARM32::translateOm1() {
findMaxStackOutArgsSize();
// Do not merge Alloca instructions, and lay out the stack.
static constexpr bool SortAndCombineAllocas = false;
Func->processAllocas(SortAndCombineAllocas);
static constexpr bool DontSortAndCombineAllocas = false;
Func->processAllocas(DontSortAndCombineAllocas);
Func->dump("After Alloca processing");
Func->placePhiLoads();
......@@ -424,9 +427,12 @@ void TargetARM32::translateOm1() {
regAlloc(RAK_InfOnly);
if (Func->hasError())
return;
copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
Func->dump("After regalloc of infinite-weight variables");
ForbidTemporaryWithoutReg _(this);
Func->genFrame();
if (Func->hasError())
return;
......@@ -520,6 +526,7 @@ void TargetARM32::emitVariable(const Variable *Var) const {
llvm::report_fatal_error(
"Infinite-weight Variable has no register assigned");
}
assert(!Var->isRematerializable());
int32_t Offset = Var->getStackOffset();
int32_t BaseRegNum = Var->getBaseRegNum();
if (BaseRegNum == Variable::NoRegister) {
......@@ -850,6 +857,9 @@ void TargetARM32::addProlog(CfgNode *Node) {
SpillAreaSizeBytes = StackSize - StackOffset;
}
// Combine fixed alloca with SpillAreaSize.
SpillAreaSizeBytes += FixedAllocaSizeBytes;
// Generate "sub sp, SpillAreaSizeBytes"
if (SpillAreaSizeBytes) {
// Use the scratch register if needed to legalize the immediate.
......@@ -857,7 +867,11 @@ void TargetARM32::addProlog(CfgNode *Node) {
Legal_Reg | Legal_Flex, getReservedTmpReg());
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
_sub(SP, SP, SubAmount);
if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) {
alignRegisterPow2(SP, FixedAllocaAlignBytes);
}
}
Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
// Fill in stack offsets for stack args, and copy args into registers for
......@@ -1034,6 +1048,7 @@ OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,
Variable *OrigBaseReg,
Variable **NewBaseReg,
int32_t *NewBaseOffset) {
assert(!OrigBaseReg->isRematerializable());
if (isLegalMemOffset(Ty, Offset)) {
return OperandARM32Mem::create(
Func, Ty, OrigBaseReg,
......@@ -1053,6 +1068,7 @@ OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,
OffsetDiff = 0;
}
assert(!(*NewBaseReg)->isRematerializable());
return OperandARM32Mem::create(
Func, Ty, *NewBaseReg,
llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetDiff)),
......@@ -1076,8 +1092,9 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg,
bool Legalized = false;
if (!Dest->hasReg()) {
auto *const SrcR = llvm::cast<Variable>(Src);
auto *SrcR = llvm::cast<Variable>(Src);
assert(SrcR->hasReg());
assert(!SrcR->isRematerializable());
const int32_t Offset = Dest->getStackOffset();
// This is a _mov(Mem(), Variable), i.e., a store.
_str(SrcR, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
......@@ -1087,12 +1104,26 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg,
Context.insert(InstFakeDef::create(Func, Dest));
Legalized = true;
} else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
if (!Var->hasReg()) {
const int32_t Offset = Var->getStackOffset();
_ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
NewBaseOffset),
MovInstr->getPredicate());
if (Var->isRematerializable()) {
// Rematerialization arithmetic.
const int32_t ExtraOffset =
(static_cast<SizeT>(Var->getRegNum()) == getFrameReg())
? getFrameFixedAllocaOffset()
: 0;
const int32_t Offset = Var->getStackOffset() + ExtraOffset;
Operand *OffsetRF = legalize(Ctx->getConstantInt32(Offset),
Legal_Reg | Legal_Flex, Dest->getRegNum());
_add(Dest, Var, OffsetRF);
Legalized = true;
} else {
if (!Var->hasReg()) {
const int32_t Offset = Var->getStackOffset();
_ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
NewBaseOffset),
MovInstr->getPredicate());
Legalized = true;
}
}
}
......@@ -1163,13 +1194,15 @@ Operand *TargetARM32::loOperand(Operand *Operand) {
// increment) in case of duplication.
assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
Mem->getAddrMode() == OperandARM32Mem::NegOffset);
Variable *BaseR = legalizeToReg(Mem->getBase());
if (Mem->isRegReg()) {
return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
Mem->getIndex(), Mem->getShiftOp(),
Mem->getShiftAmt(), Mem->getAddrMode());
Variable *IndexR = legalizeToReg(Mem->getIndex());
return OperandARM32Mem::create(Func, IceType_i32, BaseR, IndexR,
Mem->getShiftOp(), Mem->getShiftAmt(),
Mem->getAddrMode());
} else {
return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
Mem->getOffset(), Mem->getAddrMode());
return OperandARM32Mem::create(Func, IceType_i32, BaseR, Mem->getOffset(),
Mem->getAddrMode());
}
}
llvm_unreachable("Unsupported operand type");
......@@ -1201,7 +1234,9 @@ Operand *TargetARM32::hiOperand(Operand *Operand) {
Variable *NewBase = Func->makeVariable(Base->getType());
lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
Base, Four));
return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
Variable *BaseR = legalizeToReg(NewBase);
Variable *IndexR = legalizeToReg(Mem->getIndex());
return OperandARM32Mem::create(Func, SplitType, BaseR, IndexR,
Mem->getShiftOp(), Mem->getShiftAmt(),
Mem->getAddrMode());
} else {
......@@ -1216,16 +1251,17 @@ Operand *TargetARM32::hiOperand(Operand *Operand) {
// mode into a RegReg addressing mode. Since NaCl sandboxing disallows
// RegReg addressing modes, prefer adding to base and replacing
// instead. Thus we leave the old offset alone.
Constant *Four = Ctx->getConstantInt32(4);
Constant *_4 = Ctx->getConstantInt32(4);
Variable *NewBase = Func->makeVariable(Base->getType());
lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
NewBase, Base, Four));
NewBase, Base, _4));
Base = NewBase;
} else {
Offset =
llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
}
return OperandARM32Mem::create(Func, SplitType, Base, Offset,
Variable *BaseR = legalizeToReg(Base);
return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,
Mem->getAddrMode());
}
}
......@@ -1264,7 +1300,6 @@ llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
}
void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
UsesFramePointer = true;
// Conservatively require the stack to be aligned. Some stack adjustment
// operations implemented below assume that the stack is aligned before the
// alloca. All the alloca code ensures that the stack alignment is preserved
......@@ -1272,29 +1307,53 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
// cases.
NeedsStackAlignment = true;
// TODO(stichnot): minimize the number of adjustments of SP, etc.
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Variable *Dest = Inst->getDest();
uint32_t AlignmentParam = Inst->getAlignInBytes();
// For default align=0, set it to the real value 1, to avoid any
// bit-manipulation problems below.
AlignmentParam = std::max(AlignmentParam, 1u);
const uint32_t AlignmentParam = std::max(1u, Inst->getAlignInBytes());
// LLVM enforces power of 2 alignment.
assert(llvm::isPowerOf2_32(AlignmentParam));
assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
const uint32_t Alignment =
std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
const bool OverAligned = Alignment > ARM32_STACK_ALIGNMENT_BYTES;
const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1;
const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset();
const bool UseFramePointer =
hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
if (UseFramePointer)
setHasFramePointer();
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
if (OverAligned) {
alignRegisterPow2(SP, Alignment);
}
Variable *Dest = Inst->getDest();
Operand *TotalSize = Inst->getSizeInBytes();
if (const auto *ConstantTotalSize =
llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
uint32_t Value = ConstantTotalSize->getValue();
Value = Utils::applyAlignment(Value, Alignment);
Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
_sub(SP, SP, SubAmount);
const uint32_t Value =
Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
// Constant size alloca.
if (!UseFramePointer) {
// If we don't need a Frame Pointer, this alloca has a known offset to the
// stack pointer. We don't need adjust the stack pointer, nor assign any
// value to Dest, as Dest is rematerializable.
assert(Dest->isRematerializable());
FixedAllocaSizeBytes += Value;
Context.insert(InstFakeDef::create(Func, Dest));
return;
}
// If a frame pointer is required, then we need to store the alloca'd result
// in Dest.
Operand *SubAmountRF =
legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
_sub(SP, SP, SubAmountRF);
} else {
// Non-constant sizes need to be adjusted to the next highest multiple of
// the required alignment at runtime.
......@@ -1306,6 +1365,8 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
alignRegisterPow2(T, Alignment);
_sub(SP, SP, T);
}
// Adds back a few bytes to SP to account for the out args area.
Variable *T = SP;
if (MaxOutArgsSizeBytes != 0) {
T = makeReg(getPointerType());
......@@ -1313,6 +1374,7 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
_add(T, SP, OutArgsSizeRF);
}
_mov(Dest, T);
}
......@@ -1976,6 +2038,12 @@ void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
if (Dest->isRematerializable()) {
Context.insert(InstFakeDef::create(Func, Dest));
return;
}
if (Dest->getType() == IceType_i1) {
lowerInt1Arithmetic(Inst);
return;
......@@ -2139,8 +2207,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
if (Srcs.hasConstOperand()) {
// TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed
// to be used.
Variable *Src0R = Srcs.src0R(this);
if (Srcs.immediateIsFlexEncodable()) {
Variable *Src0R = Srcs.src0R(this);
Operand *Src1RF = Srcs.src1RF(this);
if (Srcs.swappedOperands()) {
_rsb(T, Src0R, Src1RF);
......@@ -2151,6 +2219,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
return;
}
if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
Variable *Src0R = Srcs.src0R(this);
Operand *Src1F = Srcs.negatedSrc1F(this);
_add(T, Src0R, Src1F);
_mov(Dest, T);
......@@ -2215,6 +2284,12 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
void TargetARM32::lowerAssign(const InstAssign *Inst) {
Variable *Dest = Inst->getDest();
if (Dest->isRematerializable()) {
Context.insert(InstFakeDef::create(Func, Dest));
return;
}
Operand *Src0 = Inst->getSrc(0);
assert(Dest->getType() == Src0->getType());
if (Dest->getType() == IceType_i64) {
......@@ -4425,13 +4500,17 @@ OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func,
assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm
: (ValidImmMask & OffsetImm) == OffsetImm);
Variable *BaseR = makeReg(getPointerType());
Context.insert(InstAssign::create(Func, BaseR, BaseVar));
if (OffsetReg != nullptr) {
return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetReg, ShiftKind,
Variable *OffsetR = makeReg(getPointerType());
Context.insert(InstAssign::create(Func, OffsetR, OffsetReg));
return OperandARM32Mem::create(Func, Ty, BaseR, OffsetR, ShiftKind,
OffsetRegShamt);
}
return OperandARM32Mem::create(
Func, Ty, BaseVar,
Func, Ty, BaseR,
llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
}
......@@ -4630,7 +4709,8 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
if (RegNum == Variable::NoRegister) {
if (Variable *Subst = getContext().availabilityGet(From)) {
// At this point we know there is a potential substitution available.
if (Subst->mustHaveReg() && !Subst->hasReg()) {
if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
!Subst->hasReg()) {
// At this point we know the substitution will have a register.
if (From->getType() == Subst->getType()) {
// At this point we know the substitution's register is compatible.
......@@ -4788,6 +4868,13 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
}
if (auto *Var = llvm::dyn_cast<Variable>(From)) {
if (Var->isRematerializable()) {
// TODO(jpp): We don't need to rematerialize Var if legalize() was invoked
// for a Variable in a Mem operand.
Variable *T = makeReg(Var->getType(), RegNum);
_mov(T, Var);
return T;
}
// Check if the variable is guaranteed a physical register. This can happen
// either when the variable is pre-colored or when it is assigned infinite
// weight.
......@@ -4844,9 +4931,9 @@ OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
// If we didn't do address mode optimization, then we only have a
// base/offset to work with. ARM always requires a base register, so
// just use that to hold the operand.
Variable *Base = legalizeToReg(Operand);
Variable *BaseR = legalizeToReg(Operand);
return OperandARM32Mem::create(
Func, Ty, Base,
Func, Ty, BaseR,
llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
}
......@@ -4863,6 +4950,7 @@ Variable64On32 *TargetARM32::makeI64RegPair() {
Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
// There aren't any 64-bit integer registers for ARM32.
assert(Type != IceType_i64);
assert(AllowTemporaryWithNoReg || RegNum != Variable::NoRegister);
Variable *Reg = Func->makeVariable(Type);
if (RegNum == Variable::NoRegister)
Reg->setMustHaveReg();
......@@ -4871,7 +4959,8 @@ Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
return Reg;
}
void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align,
int32_t TmpRegNum) {
assert(llvm::isPowerOf2_32(Align));
uint32_t RotateAmt;
uint32_t Immed_8;
......@@ -4880,10 +4969,12 @@ void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
// it fits at all). Assume Align is usually small, in which case BIC works
// better. Thus, this rounds down to the alignment.
if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex,
TmpRegNum);
_bic(Reg, Reg, Mask);
} else {
Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex);
Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex,
TmpRegNum);
_and(Reg, Reg, Mask);
}
}
......
......@@ -99,16 +99,15 @@ public:
}
uint32_t getStackAlignment() const override;
void reserveFixedAllocaArea(size_t Size, size_t Align) override {
// TODO(sehr,jpp): Implement fixed stack layout.
(void)Size;
(void)Align;
llvm::report_fatal_error("Not yet implemented");
FixedAllocaSizeBytes = Size;
assert(llvm::isPowerOf2_32(Align));
FixedAllocaAlignBytes = Align;
PrologEmitsFixedAllocas = true;
}
int32_t getFrameFixedAllocaOffset() const override {
// TODO(sehr,jpp): Implement fixed stack layout.
llvm::report_fatal_error("Not yet implemented");
return 0;
return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes);
}
uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64;
......@@ -250,7 +249,8 @@ protected:
Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister);
static Type stackSlotType();
Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister);
void alignRegisterPow2(Variable *Reg, uint32_t Align);
void alignRegisterPow2(Variable *Reg, uint32_t Align,
int32_t TmpRegNum = Variable::NoRegister);
/// Returns a vector in a register with the given constant entries.
Variable *makeVectorOfZeros(Type Ty, int32_t RegNum = Variable::NoRegister);
......@@ -811,7 +811,7 @@ protected:
}
// Iterates over the CFG and determines the maximum outgoing stack arguments
// bytes. This information is later used during addProlog() do pre-allocate
// bytes. This information is later used during addProlog() to pre-allocate
// the outargs area.
// TODO(jpp): This could live in the Parser, if we provided a Target-specific
// method that the Parser could call.
......@@ -852,6 +852,9 @@ protected:
bool NeedsStackAlignment = false;
bool MaybeLeafFunc = true;
size_t SpillAreaSizeBytes = 0;
size_t FixedAllocaSizeBytes = 0;
size_t FixedAllocaAlignBytes = 0;
bool PrologEmitsFixedAllocas = false;
uint32_t MaxOutArgsSizeBytes = 0;
// TODO(jpp): std::array instead of array.
static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM];
......@@ -970,6 +973,29 @@ private:
};
BoolComputationTracker BoolComputations;
// AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
// without specifying a physical register. This is needed for creating unbound
// temporaries during Ice -> ARM lowering, but before register allocation.
// This a safe-guard that, during the legalization post-passes no unbound
// temporaries are created.
bool AllowTemporaryWithNoReg = true;
// ForbidTemporaryWithoutReg is a RAII class that manages
// AllowTemporaryWithNoReg.
class ForbidTemporaryWithoutReg {
ForbidTemporaryWithoutReg() = delete;
ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg&) = delete;
ForbidTemporaryWithoutReg &operator=(const ForbidTemporaryWithoutReg&) = delete;
public:
explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) {
Target->AllowTemporaryWithNoReg = false;
}
~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; }
private:
TargetARM32 *const Target;
};
};
class TargetDataARM32 final : public TargetDataLowering {
......
......@@ -28,70 +28,65 @@ define internal i32 @AllocBigAlign() {
; ASM-LABEL:AllocBigAlign:
; ASM-NEXT:.LAllocBigAlign$__0:
; ASM-NEXT: push {fp}
; ASM-NEXT: mov fp, sp
; ASM-NEXT: sub sp, sp, #12
; ASM-NEXT: bic sp, sp, #31
; ASM-NEXT: sub sp, sp, #32
; ASM-NEXT: mov r0, sp
; ASM-NEXT: mov sp, fp
; ASM-NEXT: pop {fp}
; ASM-NEXT: # fp = def.pseudo
; ASM-NEXT: bx lr
; DIS-LABEL:00000000 <AllocBigAlign>:
; DIS-NEXT: 0: e52db004
; DIS-NEXT: 4: e1a0b00d
; DIS-NEXT: 8: e24dd00c
; DIS-NEXT: c: e3cdd01f
; DIS-NEXT: 10: e24dd020
; DIS-NEXT: 14: e1a0000d
; DIS-NEXT: 18: e1a0d00b
; DIS-NEXT: 1c: e49db004
; DIS-NEXT: 20: e12fff1e
; IASM-LABEL:AllocBigAlign:
; IASM-NEXT:.LAllocBigAlign$__0:
; ASM-NEXT: push {fp}
; DIS-NEXT: 0: e52db004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0x2d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: mov fp, sp
; DIS-NEXT: 4: e1a0b00d
; IASM: .byte 0xd
; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; IASM: .byte 0xc
; ASM-NEXT: sub sp, sp, #32
; DIS-NEXT: 8: e24dd020
; IASM: .byte 0x20
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2
; ASM-NEXT: bic sp, sp, #31
; DIS-NEXT: c: e3cdd01f
; IASM: .byte 0x1f
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0xcd
; IASM-NEXT: .byte 0xe3
; IASM: .byte 0x20
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2
; ASM-NEXT: # sp = def.pseudo
; IASM: .byte 0xd
; ASM-NEXT: add r0, sp, #0
; DIS-NEXT: 10: e28d0000
; IASM: .byte 0x0
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe2
; ASM-NEXT: mov sp, fp
; DIS-NEXT: 14: e1a0d00b
; IASM: .byte 0xb
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; ASM-NEXT: pop {fp}
; DIS-NEXT: 18: e49db004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe4
; ASM-NEXT: # fp = def.pseudo
; ASM-NEXT: bx lr
; DIS-NEXT: 1c: e12fff1e
; IASM: .byte 0x1e
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x2f
......
......@@ -17,14 +17,14 @@
; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; RUN: --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPT2 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; RUN: --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPTM1 %s
define internal void @fixed_416_align_16(i32 %n) {
entry:
......@@ -47,8 +47,9 @@ entry:
; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_416_align_16
; ARM32: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1
; ARM32-OPT2: sub sp, sp, #428
; ARM32-OPTM1: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1
define internal void @fixed_416_align_32(i32 %n) {
entry:
......@@ -67,9 +68,10 @@ entry:
; CHECK: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_416_align_32
; ARM32: bic sp, sp, #31
; ARM32: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1
; ARM32-OPT2: sub sp, sp, #424
; ARM32-OPTM1: sub sp, sp, #416
; ARM32: bic sp, sp, #31
; ARM32: bl {{.*}} R_{{.*}} f1
; Show that the amount to allocate will be rounded up.
define internal void @fixed_351_align_16(i32 %n) {
......@@ -91,8 +93,9 @@ entry:
; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_351_align_16
; ARM32: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1
; ARM32-OPT2: sub sp, sp, #364
; ARM32-OPTM1: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1
define internal void @fixed_351_align_32(i32 %n) {
entry:
......@@ -111,9 +114,10 @@ entry:
; CHECK: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_351_align_32
; ARM32: bic sp, sp, #31
; ARM32: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1
; ARM32-OPT2: sub sp, sp, #360
; ARM32-OPTM1: sub sp, sp, #352
; ARM32: bic sp, sp, #31
; ARM32: bl {{.*}} R_{{.*}} f1
declare void @f1(i32 %ignored)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment