Commit 614140e2 by John Porto

Subzero. ARM32. Combine allocas.

parent fc22f770
...@@ -484,8 +484,14 @@ void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas, ...@@ -484,8 +484,14 @@ void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
} else { } else {
// Addressing is relative to the stack pointer or to a user pointer. Add // Addressing is relative to the stack pointer or to a user pointer. Add
// the offset before adding the size of the object, because it grows // the offset before adding the size of the object, because it grows
// upwards from the stack pointer. // upwards from the stack pointer. In addition, if the addressing is
Offsets.push_back(CurrentOffset); // relative to the stack pointer, we need to add the pre-computed max out
// args size bytes.
const uint32_t OutArgsOffsetOrZero =
(BaseVariableType == BVT_StackPointer)
? getTarget()->maxOutArgsSizeBytes()
: 0;
Offsets.push_back(CurrentOffset + OutArgsOffsetOrZero);
} }
// Update the running offset of the fused alloca region. // Update the running offset of the fused alloca region.
CurrentOffset += Size; CurrentOffset += Size;
......
...@@ -211,6 +211,7 @@ public: ...@@ -211,6 +211,7 @@ public:
virtual uint32_t getStackAlignment() const = 0; virtual uint32_t getStackAlignment() const = 0;
virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0; virtual void reserveFixedAllocaArea(size_t Size, size_t Align) = 0;
virtual int32_t getFrameFixedAllocaOffset() const = 0; virtual int32_t getFrameFixedAllocaOffset() const = 0;
virtual uint32_t maxOutArgsSizeBytes() const { return 0; }
/// Return whether a 64-bit Variable should be split into a Variable64On32. /// Return whether a 64-bit Variable should be split into a Variable64On32.
virtual bool shouldSplitToVariable64On32(Type Ty) const = 0; virtual bool shouldSplitToVariable64On32(Type Ty) const = 0;
......
...@@ -265,7 +265,7 @@ uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) { ...@@ -265,7 +265,7 @@ uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
} }
void TargetARM32::findMaxStackOutArgsSize() { void TargetARM32::findMaxStackOutArgsSize() {
// MinNeededOutArgsBytes should be updated if the Target ever creates an // MinNeededOutArgsBytes should be updated if the Target ever creates a
// high-level InstCall that requires more stack bytes. // high-level InstCall that requires more stack bytes.
constexpr size_t MinNeededOutArgsBytes = 0; constexpr size_t MinNeededOutArgsBytes = 0;
MaxOutArgsSizeBytes = MinNeededOutArgsBytes; MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
...@@ -291,7 +291,7 @@ void TargetARM32::translateO2() { ...@@ -291,7 +291,7 @@ void TargetARM32::translateO2() {
findMaxStackOutArgsSize(); findMaxStackOutArgsSize();
// Do not merge Alloca instructions, and lay out the stack. // Do not merge Alloca instructions, and lay out the stack.
static constexpr bool SortAndCombineAllocas = false; static constexpr bool SortAndCombineAllocas = true;
Func->processAllocas(SortAndCombineAllocas); Func->processAllocas(SortAndCombineAllocas);
Func->dump("After Alloca processing"); Func->dump("After Alloca processing");
...@@ -356,6 +356,7 @@ void TargetARM32::translateO2() { ...@@ -356,6 +356,7 @@ void TargetARM32::translateO2() {
regAlloc(RAK_Global); regAlloc(RAK_Global);
if (Func->hasError()) if (Func->hasError())
return; return;
copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
Func->dump("After linear scan regalloc"); Func->dump("After linear scan regalloc");
...@@ -364,6 +365,8 @@ void TargetARM32::translateO2() { ...@@ -364,6 +365,8 @@ void TargetARM32::translateO2() {
Func->dump("After advanced Phi lowering"); Func->dump("After advanced Phi lowering");
} }
ForbidTemporaryWithoutReg _(this);
// Stack frame mapping. // Stack frame mapping.
Func->genFrame(); Func->genFrame();
if (Func->hasError()) if (Func->hasError())
...@@ -399,8 +402,8 @@ void TargetARM32::translateOm1() { ...@@ -399,8 +402,8 @@ void TargetARM32::translateOm1() {
findMaxStackOutArgsSize(); findMaxStackOutArgsSize();
// Do not merge Alloca instructions, and lay out the stack. // Do not merge Alloca instructions, and lay out the stack.
static constexpr bool SortAndCombineAllocas = false; static constexpr bool DontSortAndCombineAllocas = false;
Func->processAllocas(SortAndCombineAllocas); Func->processAllocas(DontSortAndCombineAllocas);
Func->dump("After Alloca processing"); Func->dump("After Alloca processing");
Func->placePhiLoads(); Func->placePhiLoads();
...@@ -424,9 +427,12 @@ void TargetARM32::translateOm1() { ...@@ -424,9 +427,12 @@ void TargetARM32::translateOm1() {
regAlloc(RAK_InfOnly); regAlloc(RAK_InfOnly);
if (Func->hasError()) if (Func->hasError())
return; return;
copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
Func->dump("After regalloc of infinite-weight variables"); Func->dump("After regalloc of infinite-weight variables");
ForbidTemporaryWithoutReg _(this);
Func->genFrame(); Func->genFrame();
if (Func->hasError()) if (Func->hasError())
return; return;
...@@ -520,6 +526,7 @@ void TargetARM32::emitVariable(const Variable *Var) const { ...@@ -520,6 +526,7 @@ void TargetARM32::emitVariable(const Variable *Var) const {
llvm::report_fatal_error( llvm::report_fatal_error(
"Infinite-weight Variable has no register assigned"); "Infinite-weight Variable has no register assigned");
} }
assert(!Var->isRematerializable());
int32_t Offset = Var->getStackOffset(); int32_t Offset = Var->getStackOffset();
int32_t BaseRegNum = Var->getBaseRegNum(); int32_t BaseRegNum = Var->getBaseRegNum();
if (BaseRegNum == Variable::NoRegister) { if (BaseRegNum == Variable::NoRegister) {
...@@ -850,6 +857,9 @@ void TargetARM32::addProlog(CfgNode *Node) { ...@@ -850,6 +857,9 @@ void TargetARM32::addProlog(CfgNode *Node) {
SpillAreaSizeBytes = StackSize - StackOffset; SpillAreaSizeBytes = StackSize - StackOffset;
} }
// Combine fixed alloca with SpillAreaSize.
SpillAreaSizeBytes += FixedAllocaSizeBytes;
// Generate "sub sp, SpillAreaSizeBytes" // Generate "sub sp, SpillAreaSizeBytes"
if (SpillAreaSizeBytes) { if (SpillAreaSizeBytes) {
// Use the scratch register if needed to legalize the immediate. // Use the scratch register if needed to legalize the immediate.
...@@ -857,7 +867,11 @@ void TargetARM32::addProlog(CfgNode *Node) { ...@@ -857,7 +867,11 @@ void TargetARM32::addProlog(CfgNode *Node) {
Legal_Reg | Legal_Flex, getReservedTmpReg()); Legal_Reg | Legal_Flex, getReservedTmpReg());
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
_sub(SP, SP, SubAmount); _sub(SP, SP, SubAmount);
if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) {
alignRegisterPow2(SP, FixedAllocaAlignBytes);
}
} }
Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
// Fill in stack offsets for stack args, and copy args into registers for // Fill in stack offsets for stack args, and copy args into registers for
...@@ -1034,6 +1048,7 @@ OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset, ...@@ -1034,6 +1048,7 @@ OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,
Variable *OrigBaseReg, Variable *OrigBaseReg,
Variable **NewBaseReg, Variable **NewBaseReg,
int32_t *NewBaseOffset) { int32_t *NewBaseOffset) {
assert(!OrigBaseReg->isRematerializable());
if (isLegalMemOffset(Ty, Offset)) { if (isLegalMemOffset(Ty, Offset)) {
return OperandARM32Mem::create( return OperandARM32Mem::create(
Func, Ty, OrigBaseReg, Func, Ty, OrigBaseReg,
...@@ -1053,6 +1068,7 @@ OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset, ...@@ -1053,6 +1068,7 @@ OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,
OffsetDiff = 0; OffsetDiff = 0;
} }
assert(!(*NewBaseReg)->isRematerializable());
return OperandARM32Mem::create( return OperandARM32Mem::create(
Func, Ty, *NewBaseReg, Func, Ty, *NewBaseReg,
llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetDiff)), llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetDiff)),
...@@ -1076,8 +1092,9 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg, ...@@ -1076,8 +1092,9 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg,
bool Legalized = false; bool Legalized = false;
if (!Dest->hasReg()) { if (!Dest->hasReg()) {
auto *const SrcR = llvm::cast<Variable>(Src); auto *SrcR = llvm::cast<Variable>(Src);
assert(SrcR->hasReg()); assert(SrcR->hasReg());
assert(!SrcR->isRematerializable());
const int32_t Offset = Dest->getStackOffset(); const int32_t Offset = Dest->getStackOffset();
// This is a _mov(Mem(), Variable), i.e., a store. // This is a _mov(Mem(), Variable), i.e., a store.
_str(SrcR, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg, _str(SrcR, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
...@@ -1087,12 +1104,26 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg, ...@@ -1087,12 +1104,26 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg,
Context.insert(InstFakeDef::create(Func, Dest)); Context.insert(InstFakeDef::create(Func, Dest));
Legalized = true; Legalized = true;
} else if (auto *Var = llvm::dyn_cast<Variable>(Src)) { } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
if (!Var->hasReg()) { if (Var->isRematerializable()) {
const int32_t Offset = Var->getStackOffset(); // Rematerialization arithmetic.
_ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg, const int32_t ExtraOffset =
NewBaseOffset), (static_cast<SizeT>(Var->getRegNum()) == getFrameReg())
MovInstr->getPredicate()); ? getFrameFixedAllocaOffset()
: 0;
const int32_t Offset = Var->getStackOffset() + ExtraOffset;
Operand *OffsetRF = legalize(Ctx->getConstantInt32(Offset),
Legal_Reg | Legal_Flex, Dest->getRegNum());
_add(Dest, Var, OffsetRF);
Legalized = true; Legalized = true;
} else {
if (!Var->hasReg()) {
const int32_t Offset = Var->getStackOffset();
_ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
NewBaseOffset),
MovInstr->getPredicate());
Legalized = true;
}
} }
} }
...@@ -1163,13 +1194,15 @@ Operand *TargetARM32::loOperand(Operand *Operand) { ...@@ -1163,13 +1194,15 @@ Operand *TargetARM32::loOperand(Operand *Operand) {
// increment) in case of duplication. // increment) in case of duplication.
assert(Mem->getAddrMode() == OperandARM32Mem::Offset || assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
Mem->getAddrMode() == OperandARM32Mem::NegOffset); Mem->getAddrMode() == OperandARM32Mem::NegOffset);
Variable *BaseR = legalizeToReg(Mem->getBase());
if (Mem->isRegReg()) { if (Mem->isRegReg()) {
return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), Variable *IndexR = legalizeToReg(Mem->getIndex());
Mem->getIndex(), Mem->getShiftOp(), return OperandARM32Mem::create(Func, IceType_i32, BaseR, IndexR,
Mem->getShiftAmt(), Mem->getAddrMode()); Mem->getShiftOp(), Mem->getShiftAmt(),
Mem->getAddrMode());
} else { } else {
return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), return OperandARM32Mem::create(Func, IceType_i32, BaseR, Mem->getOffset(),
Mem->getOffset(), Mem->getAddrMode()); Mem->getAddrMode());
} }
} }
llvm_unreachable("Unsupported operand type"); llvm_unreachable("Unsupported operand type");
...@@ -1201,7 +1234,9 @@ Operand *TargetARM32::hiOperand(Operand *Operand) { ...@@ -1201,7 +1234,9 @@ Operand *TargetARM32::hiOperand(Operand *Operand) {
Variable *NewBase = Func->makeVariable(Base->getType()); Variable *NewBase = Func->makeVariable(Base->getType());
lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase, lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
Base, Four)); Base, Four));
return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(), Variable *BaseR = legalizeToReg(NewBase);
Variable *IndexR = legalizeToReg(Mem->getIndex());
return OperandARM32Mem::create(Func, SplitType, BaseR, IndexR,
Mem->getShiftOp(), Mem->getShiftAmt(), Mem->getShiftOp(), Mem->getShiftAmt(),
Mem->getAddrMode()); Mem->getAddrMode());
} else { } else {
...@@ -1216,16 +1251,17 @@ Operand *TargetARM32::hiOperand(Operand *Operand) { ...@@ -1216,16 +1251,17 @@ Operand *TargetARM32::hiOperand(Operand *Operand) {
// mode into a RegReg addressing mode. Since NaCl sandboxing disallows // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
// RegReg addressing modes, prefer adding to base and replacing // RegReg addressing modes, prefer adding to base and replacing
// instead. Thus we leave the old offset alone. // instead. Thus we leave the old offset alone.
Constant *Four = Ctx->getConstantInt32(4); Constant *_4 = Ctx->getConstantInt32(4);
Variable *NewBase = Func->makeVariable(Base->getType()); Variable *NewBase = Func->makeVariable(Base->getType());
lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
NewBase, Base, Four)); NewBase, Base, _4));
Base = NewBase; Base = NewBase;
} else { } else {
Offset = Offset =
llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal)); llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
} }
return OperandARM32Mem::create(Func, SplitType, Base, Offset, Variable *BaseR = legalizeToReg(Base);
return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,
Mem->getAddrMode()); Mem->getAddrMode());
} }
} }
...@@ -1264,7 +1300,6 @@ llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include, ...@@ -1264,7 +1300,6 @@ llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
} }
void TargetARM32::lowerAlloca(const InstAlloca *Inst) { void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
UsesFramePointer = true;
// Conservatively require the stack to be aligned. Some stack adjustment // Conservatively require the stack to be aligned. Some stack adjustment
// operations implemented below assume that the stack is aligned before the // operations implemented below assume that the stack is aligned before the
// alloca. All the alloca code ensures that the stack alignment is preserved // alloca. All the alloca code ensures that the stack alignment is preserved
...@@ -1272,29 +1307,53 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) { ...@@ -1272,29 +1307,53 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
// cases. // cases.
NeedsStackAlignment = true; NeedsStackAlignment = true;
// TODO(stichnot): minimize the number of adjustments of SP, etc.
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Variable *Dest = Inst->getDest();
uint32_t AlignmentParam = Inst->getAlignInBytes();
// For default align=0, set it to the real value 1, to avoid any // For default align=0, set it to the real value 1, to avoid any
// bit-manipulation problems below. // bit-manipulation problems below.
AlignmentParam = std::max(AlignmentParam, 1u); const uint32_t AlignmentParam = std::max(1u, Inst->getAlignInBytes());
// LLVM enforces power of 2 alignment. // LLVM enforces power of 2 alignment.
assert(llvm::isPowerOf2_32(AlignmentParam)); assert(llvm::isPowerOf2_32(AlignmentParam));
assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES)); assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES); const uint32_t Alignment =
if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) { std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
const bool OverAligned = Alignment > ARM32_STACK_ALIGNMENT_BYTES;
const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1;
const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset();
const bool UseFramePointer =
hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
if (UseFramePointer)
setHasFramePointer();
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
if (OverAligned) {
alignRegisterPow2(SP, Alignment); alignRegisterPow2(SP, Alignment);
} }
Variable *Dest = Inst->getDest();
Operand *TotalSize = Inst->getSizeInBytes(); Operand *TotalSize = Inst->getSizeInBytes();
if (const auto *ConstantTotalSize = if (const auto *ConstantTotalSize =
llvm::dyn_cast<ConstantInteger32>(TotalSize)) { llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
uint32_t Value = ConstantTotalSize->getValue(); const uint32_t Value =
Value = Utils::applyAlignment(Value, Alignment); Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
Operand *SubAmount = legalize(Ctx->getConstantInt32(Value)); // Constant size alloca.
_sub(SP, SP, SubAmount); if (!UseFramePointer) {
// If we don't need a Frame Pointer, this alloca has a known offset to the
// stack pointer. We don't need adjust the stack pointer, nor assign any
// value to Dest, as Dest is rematerializable.
assert(Dest->isRematerializable());
FixedAllocaSizeBytes += Value;
Context.insert(InstFakeDef::create(Func, Dest));
return;
}
// If a frame pointer is required, then we need to store the alloca'd result
// in Dest.
Operand *SubAmountRF =
legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
_sub(SP, SP, SubAmountRF);
} else { } else {
// Non-constant sizes need to be adjusted to the next highest multiple of // Non-constant sizes need to be adjusted to the next highest multiple of
// the required alignment at runtime. // the required alignment at runtime.
...@@ -1306,6 +1365,8 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) { ...@@ -1306,6 +1365,8 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
alignRegisterPow2(T, Alignment); alignRegisterPow2(T, Alignment);
_sub(SP, SP, T); _sub(SP, SP, T);
} }
// Adds back a few bytes to SP to account for the out args area.
Variable *T = SP; Variable *T = SP;
if (MaxOutArgsSizeBytes != 0) { if (MaxOutArgsSizeBytes != 0) {
T = makeReg(getPointerType()); T = makeReg(getPointerType());
...@@ -1313,6 +1374,7 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) { ...@@ -1313,6 +1374,7 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex); Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
_add(T, SP, OutArgsSizeRF); _add(T, SP, OutArgsSizeRF);
} }
_mov(Dest, T); _mov(Dest, T);
} }
...@@ -1976,6 +2038,12 @@ void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op, ...@@ -1976,6 +2038,12 @@ void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest(); Variable *Dest = Inst->getDest();
if (Dest->isRematerializable()) {
Context.insert(InstFakeDef::create(Func, Dest));
return;
}
if (Dest->getType() == IceType_i1) { if (Dest->getType() == IceType_i1) {
lowerInt1Arithmetic(Inst); lowerInt1Arithmetic(Inst);
return; return;
...@@ -2139,8 +2207,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -2139,8 +2207,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
if (Srcs.hasConstOperand()) { if (Srcs.hasConstOperand()) {
// TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed // TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed
// to be used. // to be used.
Variable *Src0R = Srcs.src0R(this);
if (Srcs.immediateIsFlexEncodable()) { if (Srcs.immediateIsFlexEncodable()) {
Variable *Src0R = Srcs.src0R(this);
Operand *Src1RF = Srcs.src1RF(this); Operand *Src1RF = Srcs.src1RF(this);
if (Srcs.swappedOperands()) { if (Srcs.swappedOperands()) {
_rsb(T, Src0R, Src1RF); _rsb(T, Src0R, Src1RF);
...@@ -2151,6 +2219,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -2151,6 +2219,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
return; return;
} }
if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) { if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
Variable *Src0R = Srcs.src0R(this);
Operand *Src1F = Srcs.negatedSrc1F(this); Operand *Src1F = Srcs.negatedSrc1F(this);
_add(T, Src0R, Src1F); _add(T, Src0R, Src1F);
_mov(Dest, T); _mov(Dest, T);
...@@ -2215,6 +2284,12 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { ...@@ -2215,6 +2284,12 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
void TargetARM32::lowerAssign(const InstAssign *Inst) { void TargetARM32::lowerAssign(const InstAssign *Inst) {
Variable *Dest = Inst->getDest(); Variable *Dest = Inst->getDest();
if (Dest->isRematerializable()) {
Context.insert(InstFakeDef::create(Func, Dest));
return;
}
Operand *Src0 = Inst->getSrc(0); Operand *Src0 = Inst->getSrc(0);
assert(Dest->getType() == Src0->getType()); assert(Dest->getType() == Src0->getType());
if (Dest->getType() == IceType_i64) { if (Dest->getType() == IceType_i64) {
...@@ -4425,13 +4500,17 @@ OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func, ...@@ -4425,13 +4500,17 @@ OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func,
assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm
: (ValidImmMask & OffsetImm) == OffsetImm); : (ValidImmMask & OffsetImm) == OffsetImm);
Variable *BaseR = makeReg(getPointerType());
Context.insert(InstAssign::create(Func, BaseR, BaseVar));
if (OffsetReg != nullptr) { if (OffsetReg != nullptr) {
return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetReg, ShiftKind, Variable *OffsetR = makeReg(getPointerType());
Context.insert(InstAssign::create(Func, OffsetR, OffsetReg));
return OperandARM32Mem::create(Func, Ty, BaseR, OffsetR, ShiftKind,
OffsetRegShamt); OffsetRegShamt);
} }
return OperandARM32Mem::create( return OperandARM32Mem::create(
Func, Ty, BaseVar, Func, Ty, BaseR,
llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm))); llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
} }
...@@ -4630,7 +4709,8 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, ...@@ -4630,7 +4709,8 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
if (RegNum == Variable::NoRegister) { if (RegNum == Variable::NoRegister) {
if (Variable *Subst = getContext().availabilityGet(From)) { if (Variable *Subst = getContext().availabilityGet(From)) {
// At this point we know there is a potential substitution available. // At this point we know there is a potential substitution available.
if (Subst->mustHaveReg() && !Subst->hasReg()) { if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
!Subst->hasReg()) {
// At this point we know the substitution will have a register. // At this point we know the substitution will have a register.
if (From->getType() == Subst->getType()) { if (From->getType() == Subst->getType()) {
// At this point we know the substitution's register is compatible. // At this point we know the substitution's register is compatible.
...@@ -4788,6 +4868,13 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, ...@@ -4788,6 +4868,13 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
} }
if (auto *Var = llvm::dyn_cast<Variable>(From)) { if (auto *Var = llvm::dyn_cast<Variable>(From)) {
if (Var->isRematerializable()) {
// TODO(jpp): We don't need to rematerialize Var if legalize() was invoked
// for a Variable in a Mem operand.
Variable *T = makeReg(Var->getType(), RegNum);
_mov(T, Var);
return T;
}
// Check if the variable is guaranteed a physical register. This can happen // Check if the variable is guaranteed a physical register. This can happen
// either when the variable is pre-colored or when it is assigned infinite // either when the variable is pre-colored or when it is assigned infinite
// weight. // weight.
...@@ -4844,9 +4931,9 @@ OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) { ...@@ -4844,9 +4931,9 @@ OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
// If we didn't do address mode optimization, then we only have a // If we didn't do address mode optimization, then we only have a
// base/offset to work with. ARM always requires a base register, so // base/offset to work with. ARM always requires a base register, so
// just use that to hold the operand. // just use that to hold the operand.
Variable *Base = legalizeToReg(Operand); Variable *BaseR = legalizeToReg(Operand);
return OperandARM32Mem::create( return OperandARM32Mem::create(
Func, Ty, Base, Func, Ty, BaseR,
llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
} }
...@@ -4863,6 +4950,7 @@ Variable64On32 *TargetARM32::makeI64RegPair() { ...@@ -4863,6 +4950,7 @@ Variable64On32 *TargetARM32::makeI64RegPair() {
Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
// There aren't any 64-bit integer registers for ARM32. // There aren't any 64-bit integer registers for ARM32.
assert(Type != IceType_i64); assert(Type != IceType_i64);
assert(AllowTemporaryWithNoReg || RegNum != Variable::NoRegister);
Variable *Reg = Func->makeVariable(Type); Variable *Reg = Func->makeVariable(Type);
if (RegNum == Variable::NoRegister) if (RegNum == Variable::NoRegister)
Reg->setMustHaveReg(); Reg->setMustHaveReg();
...@@ -4871,7 +4959,8 @@ Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { ...@@ -4871,7 +4959,8 @@ Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
return Reg; return Reg;
} }
void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) { void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align,
int32_t TmpRegNum) {
assert(llvm::isPowerOf2_32(Align)); assert(llvm::isPowerOf2_32(Align));
uint32_t RotateAmt; uint32_t RotateAmt;
uint32_t Immed_8; uint32_t Immed_8;
...@@ -4880,10 +4969,12 @@ void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) { ...@@ -4880,10 +4969,12 @@ void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
// it fits at all). Assume Align is usually small, in which case BIC works // it fits at all). Assume Align is usually small, in which case BIC works
// better. Thus, this rounds down to the alignment. // better. Thus, this rounds down to the alignment.
if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) { if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex); Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex,
TmpRegNum);
_bic(Reg, Reg, Mask); _bic(Reg, Reg, Mask);
} else { } else {
Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex); Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex,
TmpRegNum);
_and(Reg, Reg, Mask); _and(Reg, Reg, Mask);
} }
} }
......
...@@ -99,16 +99,15 @@ public: ...@@ -99,16 +99,15 @@ public:
} }
uint32_t getStackAlignment() const override; uint32_t getStackAlignment() const override;
void reserveFixedAllocaArea(size_t Size, size_t Align) override { void reserveFixedAllocaArea(size_t Size, size_t Align) override {
// TODO(sehr,jpp): Implement fixed stack layout. FixedAllocaSizeBytes = Size;
(void)Size; assert(llvm::isPowerOf2_32(Align));
(void)Align; FixedAllocaAlignBytes = Align;
llvm::report_fatal_error("Not yet implemented"); PrologEmitsFixedAllocas = true;
} }
int32_t getFrameFixedAllocaOffset() const override { int32_t getFrameFixedAllocaOffset() const override {
// TODO(sehr,jpp): Implement fixed stack layout. return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes);
llvm::report_fatal_error("Not yet implemented");
return 0;
} }
uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
bool shouldSplitToVariable64On32(Type Ty) const override { bool shouldSplitToVariable64On32(Type Ty) const override {
return Ty == IceType_i64; return Ty == IceType_i64;
...@@ -250,7 +249,8 @@ protected: ...@@ -250,7 +249,8 @@ protected:
Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister); Variable *makeReg(Type Ty, int32_t RegNum = Variable::NoRegister);
static Type stackSlotType(); static Type stackSlotType();
Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister); Variable *copyToReg(Operand *Src, int32_t RegNum = Variable::NoRegister);
void alignRegisterPow2(Variable *Reg, uint32_t Align); void alignRegisterPow2(Variable *Reg, uint32_t Align,
int32_t TmpRegNum = Variable::NoRegister);
/// Returns a vector in a register with the given constant entries. /// Returns a vector in a register with the given constant entries.
Variable *makeVectorOfZeros(Type Ty, int32_t RegNum = Variable::NoRegister); Variable *makeVectorOfZeros(Type Ty, int32_t RegNum = Variable::NoRegister);
...@@ -811,7 +811,7 @@ protected: ...@@ -811,7 +811,7 @@ protected:
} }
// Iterates over the CFG and determines the maximum outgoing stack arguments // Iterates over the CFG and determines the maximum outgoing stack arguments
// bytes. This information is later used during addProlog() do pre-allocate // bytes. This information is later used during addProlog() to pre-allocate
// the outargs area. // the outargs area.
// TODO(jpp): This could live in the Parser, if we provided a Target-specific // TODO(jpp): This could live in the Parser, if we provided a Target-specific
// method that the Parser could call. // method that the Parser could call.
...@@ -852,6 +852,9 @@ protected: ...@@ -852,6 +852,9 @@ protected:
bool NeedsStackAlignment = false; bool NeedsStackAlignment = false;
bool MaybeLeafFunc = true; bool MaybeLeafFunc = true;
size_t SpillAreaSizeBytes = 0; size_t SpillAreaSizeBytes = 0;
size_t FixedAllocaSizeBytes = 0;
size_t FixedAllocaAlignBytes = 0;
bool PrologEmitsFixedAllocas = false;
uint32_t MaxOutArgsSizeBytes = 0; uint32_t MaxOutArgsSizeBytes = 0;
// TODO(jpp): std::array instead of array. // TODO(jpp): std::array instead of array.
static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM]; static llvm::SmallBitVector TypeToRegisterSet[RCARM32_NUM];
...@@ -970,6 +973,29 @@ private: ...@@ -970,6 +973,29 @@ private:
}; };
BoolComputationTracker BoolComputations; BoolComputationTracker BoolComputations;
// AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
// without specifying a physical register. This is needed for creating unbound
// temporaries during Ice -> ARM lowering, but before register allocation.
// This a safe-guard that, during the legalization post-passes no unbound
// temporaries are created.
bool AllowTemporaryWithNoReg = true;
// ForbidTemporaryWithoutReg is a RAII class that manages
// AllowTemporaryWithNoReg.
class ForbidTemporaryWithoutReg {
ForbidTemporaryWithoutReg() = delete;
ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg&) = delete;
ForbidTemporaryWithoutReg &operator=(const ForbidTemporaryWithoutReg&) = delete;
public:
explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) {
Target->AllowTemporaryWithNoReg = false;
}
~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; }
private:
TargetARM32 *const Target;
};
}; };
class TargetDataARM32 final : public TargetDataLowering { class TargetDataARM32 final : public TargetDataLowering {
......
...@@ -28,70 +28,65 @@ define internal i32 @AllocBigAlign() { ...@@ -28,70 +28,65 @@ define internal i32 @AllocBigAlign() {
; ASM-LABEL:AllocBigAlign: ; ASM-LABEL:AllocBigAlign:
; ASM-NEXT:.LAllocBigAlign$__0: ; ASM-NEXT:.LAllocBigAlign$__0:
; ASM-NEXT: push {fp}
; ASM-NEXT: mov fp, sp
; ASM-NEXT: sub sp, sp, #12
; ASM-NEXT: bic sp, sp, #31
; ASM-NEXT: sub sp, sp, #32
; ASM-NEXT: mov r0, sp
; ASM-NEXT: mov sp, fp
; ASM-NEXT: pop {fp}
; ASM-NEXT: # fp = def.pseudo
; ASM-NEXT: bx lr
; DIS-LABEL:00000000 <AllocBigAlign>: ; DIS-LABEL:00000000 <AllocBigAlign>:
; DIS-NEXT: 0: e52db004
; DIS-NEXT: 4: e1a0b00d
; DIS-NEXT: 8: e24dd00c
; DIS-NEXT: c: e3cdd01f
; DIS-NEXT: 10: e24dd020
; DIS-NEXT: 14: e1a0000d
; DIS-NEXT: 18: e1a0d00b
; DIS-NEXT: 1c: e49db004
; DIS-NEXT: 20: e12fff1e
; IASM-LABEL:AllocBigAlign: ; IASM-LABEL:AllocBigAlign:
; IASM-NEXT:.LAllocBigAlign$__0: ; IASM-NEXT:.LAllocBigAlign$__0:
; ASM-NEXT: push {fp}
; DIS-NEXT: 0: e52db004
; IASM-NEXT: .byte 0x4 ; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xb0 ; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0x2d ; IASM-NEXT: .byte 0x2d
; IASM-NEXT: .byte 0xe5 ; IASM-NEXT: .byte 0xe5
; ASM-NEXT: mov fp, sp
; DIS-NEXT: 4: e1a0b00d
; IASM: .byte 0xd ; IASM: .byte 0xd
; IASM-NEXT: .byte 0xb0 ; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0xa0 ; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1 ; IASM-NEXT: .byte 0xe1
; IASM: .byte 0xc ; ASM-NEXT: sub sp, sp, #32
; DIS-NEXT: 8: e24dd020
; IASM: .byte 0x20
; IASM-NEXT: .byte 0xd0 ; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d ; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2 ; IASM-NEXT: .byte 0xe2
; ASM-NEXT: bic sp, sp, #31
; DIS-NEXT: c: e3cdd01f
; IASM: .byte 0x1f ; IASM: .byte 0x1f
; IASM-NEXT: .byte 0xd0 ; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0xcd ; IASM-NEXT: .byte 0xcd
; IASM-NEXT: .byte 0xe3 ; IASM-NEXT: .byte 0xe3
; IASM: .byte 0x20 ; ASM-NEXT: # sp = def.pseudo
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2
; IASM: .byte 0xd ; ASM-NEXT: add r0, sp, #0
; DIS-NEXT: 10: e28d0000
; IASM: .byte 0x0
; IASM-NEXT: .byte 0x0 ; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0xa0 ; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe1 ; IASM-NEXT: .byte 0xe2
; ASM-NEXT: mov sp, fp
; DIS-NEXT: 14: e1a0d00b
; IASM: .byte 0xb ; IASM: .byte 0xb
; IASM-NEXT: .byte 0xd0 ; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0xa0 ; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1 ; IASM-NEXT: .byte 0xe1
; ASM-NEXT: pop {fp}
; DIS-NEXT: 18: e49db004
; IASM-NEXT: .byte 0x4 ; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xb0 ; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0x9d ; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe4 ; IASM-NEXT: .byte 0xe4
; ASM-NEXT: # fp = def.pseudo
; ASM-NEXT: bx lr
; DIS-NEXT: 1c: e12fff1e
; IASM: .byte 0x1e ; IASM: .byte 0x1e
; IASM-NEXT: .byte 0xff ; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x2f ; IASM-NEXT: .byte 0x2f
......
...@@ -17,14 +17,14 @@ ...@@ -17,14 +17,14 @@
; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \ ; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \ ; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \ ; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s ; RUN: --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPT2 %s
; RUN: %if --need=target_ARM32 --need=allow_dump \ ; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \ ; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \ ; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \ ; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \ ; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s ; RUN: --command FileCheck --check-prefix ARM32 --check-prefix=ARM-OPTM1 %s
define internal void @fixed_416_align_16(i32 %n) { define internal void @fixed_416_align_16(i32 %n) {
entry: entry:
...@@ -47,8 +47,9 @@ entry: ...@@ -47,8 +47,9 @@ entry:
; CHECK-OPTM1: call {{.*}} R_{{.*}} f1 ; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_416_align_16 ; ARM32-LABEL: fixed_416_align_16
; ARM32: sub sp, sp, #416 ; ARM32-OPT2: sub sp, sp, #428
; ARM32: bl {{.*}} R_{{.*}} f1 ; ARM32-OPTM1: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1
define internal void @fixed_416_align_32(i32 %n) { define internal void @fixed_416_align_32(i32 %n) {
entry: entry:
...@@ -67,9 +68,10 @@ entry: ...@@ -67,9 +68,10 @@ entry:
; CHECK: call {{.*}} R_{{.*}} f1 ; CHECK: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_416_align_32 ; ARM32-LABEL: fixed_416_align_32
; ARM32: bic sp, sp, #31 ; ARM32-OPT2: sub sp, sp, #424
; ARM32: sub sp, sp, #416 ; ARM32-OPTM1: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1 ; ARM32: bic sp, sp, #31
; ARM32: bl {{.*}} R_{{.*}} f1
; Show that the amount to allocate will be rounded up. ; Show that the amount to allocate will be rounded up.
define internal void @fixed_351_align_16(i32 %n) { define internal void @fixed_351_align_16(i32 %n) {
...@@ -91,8 +93,9 @@ entry: ...@@ -91,8 +93,9 @@ entry:
; CHECK-OPTM1: call {{.*}} R_{{.*}} f1 ; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_351_align_16 ; ARM32-LABEL: fixed_351_align_16
; ARM32: sub sp, sp, #352 ; ARM32-OPT2: sub sp, sp, #364
; ARM32: bl {{.*}} R_{{.*}} f1 ; ARM32-OPTM1: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1
define internal void @fixed_351_align_32(i32 %n) { define internal void @fixed_351_align_32(i32 %n) {
entry: entry:
...@@ -111,9 +114,10 @@ entry: ...@@ -111,9 +114,10 @@ entry:
; CHECK: call {{.*}} R_{{.*}} f1 ; CHECK: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_351_align_32 ; ARM32-LABEL: fixed_351_align_32
; ARM32: bic sp, sp, #31 ; ARM32-OPT2: sub sp, sp, #360
; ARM32: sub sp, sp, #352 ; ARM32-OPTM1: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1 ; ARM32: bic sp, sp, #31
; ARM32: bl {{.*}} R_{{.*}} f1
declare void @f1(i32 %ignored) declare void @f1(i32 %ignored)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment