Commit 4318a410 by David Sehr

Combine allocas

Partition allocas that occur in the entry block into two categories. The first is those whose size is fixed and alignment are less than or equal to the stack alignment. These are emitted relative to a pointer, either in increasing offset relative to the stack pointer or decreasing offset relative to the frame pointer. (Actually, we are not enabling this optimization for frame pointer frames yet) The second category is allocas whose size is dynamic or alignment is creater than the stack alignment. These are emitted relative to a user variable in increasing offset order. This optimization is only enabled for x86 at O2. BUG= R=stichnot@chromium.org Review URL: https://codereview.chromium.org/1411583007 .
parent 5ff0cfb4
...@@ -185,10 +185,19 @@ public: ...@@ -185,10 +185,19 @@ public:
void advancedPhiLowering(); void advancedPhiLowering();
void reorderNodes(); void reorderNodes();
void shuffleNodes(); void shuffleNodes();
void sortAllocas(CfgVector<Inst *> &Allocas, InstList &Insts,
bool IsKnownFrameOffset); enum AllocaBaseVariableType {
/// Merge all the fixed-size allocas in the entry block. BVT_StackPointer,
void processAllocas(); BVT_FramePointer,
BVT_UserPointer
};
void sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
uint32_t CombinedAlignment, InstList &Insts,
AllocaBaseVariableType BaseVariableType);
/// Scan allocas to determine whether we need to use a frame pointer.
/// If SortAndCombine == true, merge all the fixed-size allocas in the
/// entry block and emit stack or frame pointer-relative addressing.
void processAllocas(bool SortAndCombine);
void doAddressOpt(); void doAddressOpt();
void doArgLowering(); void doArgLowering();
void doNopInsertion(); void doNopInsertion();
......
...@@ -101,6 +101,20 @@ MachineTraits<TargetX8632>::X86OperandMem::X86OperandMem( ...@@ -101,6 +101,20 @@ MachineTraits<TargetX8632>::X86OperandMem::X86OperandMem(
void MachineTraits<TargetX8632>::X86OperandMem::emit(const Cfg *Func) const { void MachineTraits<TargetX8632>::X86OperandMem::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
const ::Ice::TargetLowering *Target = Func->getTarget();
// If the base is rematerializable, we need to replace it with the correct
// physical register (esp or ebp), and update the Offset.
int32_t Disp = 0;
if (getBase() && getBase()->isRematerializable()) {
Disp += getBase()->getStackOffset();
if (!getIgnoreStackAdjust())
Disp += Target->getStackAdjustment();
}
// The index should never be rematerializable. But if we ever allow it, then
// we should make sure the rematerialization offset is shifted by the Shift
// value.
if (getIndex())
assert(!getIndex()->isRematerializable());
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
if (SegmentReg != DefaultSegment) { if (SegmentReg != DefaultSegment) {
assert(SegmentReg >= 0 && SegmentReg < SegReg_NUM); assert(SegmentReg >= 0 && SegmentReg < SegReg_NUM);
...@@ -108,27 +122,33 @@ void MachineTraits<TargetX8632>::X86OperandMem::emit(const Cfg *Func) const { ...@@ -108,27 +122,33 @@ void MachineTraits<TargetX8632>::X86OperandMem::emit(const Cfg *Func) const {
} }
// Emit as Offset(Base,Index,1<<Shift). Offset is emitted without the leading // Emit as Offset(Base,Index,1<<Shift). Offset is emitted without the leading
// '$'. Omit the (Base,Index,1<<Shift) part if Base==nullptr. // '$'. Omit the (Base,Index,1<<Shift) part if Base==nullptr.
if (!Offset) { if (getOffset() == 0 && Disp == 0) {
// No offset, emit nothing. // No offset, emit nothing.
} else if (const auto *CI = llvm::dyn_cast<ConstantInteger32>(Offset)) { } else if (getOffset() == 0 && Disp != 0) {
if (Base == nullptr || CI->getValue()) Str << Disp;
} else if (const auto *CI = llvm::dyn_cast<ConstantInteger32>(getOffset())) {
if (getBase() == nullptr || CI->getValue() || Disp != 0)
// Emit a non-zero offset without a leading '$'. // Emit a non-zero offset without a leading '$'.
Str << CI->getValue(); Str << CI->getValue() + Disp;
} else if (const auto *CR = llvm::dyn_cast<ConstantRelocatable>(Offset)) { } else if (const auto *CR =
llvm::dyn_cast<ConstantRelocatable>(getOffset())) {
// TODO(sehr): ConstantRelocatable still needs updating for
// rematerializable base/index and Disp.
assert(Disp == 0);
CR->emitWithoutPrefix(Func->getTarget()); CR->emitWithoutPrefix(Func->getTarget());
} else { } else {
llvm_unreachable("Invalid offset type for x86 mem operand"); llvm_unreachable("Invalid offset type for x86 mem operand");
} }
if (Base || Index) { if (getBase() || getIndex()) {
Str << "("; Str << "(";
if (Base) if (getBase())
Base->emit(Func); getBase()->emit(Func);
if (Index) { if (getIndex()) {
Str << ","; Str << ",";
Index->emit(Func); getIndex()->emit(Func);
if (Shift) if (getShift())
Str << "," << (1u << Shift); Str << "," << (1u << getShift());
} }
Str << ")"; Str << ")";
} }
...@@ -144,44 +164,54 @@ void MachineTraits<TargetX8632>::X86OperandMem::dump(const Cfg *Func, ...@@ -144,44 +164,54 @@ void MachineTraits<TargetX8632>::X86OperandMem::dump(const Cfg *Func,
} }
bool Dumped = false; bool Dumped = false;
Str << "["; Str << "[";
if (Base) { int32_t Disp = 0;
if (getBase() && getBase()->isRematerializable()) {
Disp += getBase()->getStackOffset();
if (!getIgnoreStackAdjust())
Disp += Func->getTarget()->getStackAdjustment();
}
if (getBase()) {
if (Func) if (Func)
Base->dump(Func); getBase()->dump(Func);
else else
Base->dump(Str); getBase()->dump(Str);
Dumped = true; Dumped = true;
} }
if (Index) { if (getIndex()) {
if (Base) assert(!getIndex()->isRematerializable());
if (getBase())
Str << "+"; Str << "+";
if (Shift > 0) if (getShift() > 0)
Str << (1u << Shift) << "*"; Str << (1u << getShift()) << "*";
if (Func) if (Func)
Index->dump(Func); getIndex()->dump(Func);
else else
Index->dump(Str); getIndex()->dump(Str);
Dumped = true; Dumped = true;
} }
// Pretty-print the Offset. // Pretty-print the Offset.
bool OffsetIsZero = false; bool OffsetIsZero = false;
bool OffsetIsNegative = false; bool OffsetIsNegative = false;
if (!Offset) { if (getOffset() == 0 && Disp == 0) {
OffsetIsZero = true; OffsetIsZero = true;
} else if (const auto *CI = llvm::dyn_cast<ConstantInteger32>(Offset)) { } else if (getOffset() == 0 && Disp != 0) {
OffsetIsZero = (CI->getValue() == 0); OffsetIsZero = (Disp == 0);
OffsetIsNegative = (static_cast<int32_t>(CI->getValue()) < 0); OffsetIsNegative = (Disp < 0);
} else if (const auto *CI = llvm::dyn_cast<ConstantInteger32>(getOffset())) {
OffsetIsZero = (CI->getValue() + Disp == 0);
OffsetIsNegative = (static_cast<int32_t>(CI->getValue()) + Disp < 0);
} else { } else {
assert(llvm::isa<ConstantRelocatable>(Offset)); assert(llvm::isa<ConstantRelocatable>(getOffset()) && Disp == 0);
} }
if (Dumped) { if (Dumped) {
if (!OffsetIsZero) { // Suppress if Offset is known to be 0 if (!OffsetIsZero) { // Suppress if Offset is known to be 0
if (!OffsetIsNegative) // Suppress if Offset is known to be negative if (!OffsetIsNegative) // Suppress if Offset is known to be negative
Str << "+"; Str << "+";
Offset->dump(Func, Str); getOffset()->dump(Func, Str);
} }
} else { } else {
// There is only the offset. // There is only the offset.
Offset->dump(Func, Str); getOffset()->dump(Func, Str);
} }
Str << "]"; Str << "]";
} }
...@@ -196,16 +226,28 @@ void MachineTraits<TargetX8632>::X86OperandMem::emitSegmentOverride( ...@@ -196,16 +226,28 @@ void MachineTraits<TargetX8632>::X86OperandMem::emitSegmentOverride(
MachineTraits<TargetX8632>::Address MachineTraits<TargetX8632>::Address
MachineTraits<TargetX8632>::X86OperandMem::toAsmAddress( MachineTraits<TargetX8632>::X86OperandMem::toAsmAddress(
MachineTraits<TargetX8632>::Assembler *Asm) const { MachineTraits<TargetX8632>::Assembler *Asm,
const Ice::TargetLowering *Target) const {
int32_t Disp = 0; int32_t Disp = 0;
if (getBase() && getBase()->isRematerializable()) {
Disp += getBase()->getStackOffset();
if (!getIgnoreStackAdjust()) {
Disp += Target->getStackAdjustment();
}
}
// The index should never be rematerializable. But if we ever allow it, then
// we should make sure the rematerialization offset is shifted by the Shift
// value.
if (getIndex())
assert(!getIndex()->isRematerializable());
AssemblerFixup *Fixup = nullptr; AssemblerFixup *Fixup = nullptr;
// Determine the offset (is it relocatable?) // Determine the offset (is it relocatable?)
if (getOffset()) { if (getOffset()) {
if (const auto *CI = llvm::dyn_cast<ConstantInteger32>(getOffset())) { if (const auto *CI = llvm::dyn_cast<ConstantInteger32>(getOffset())) {
Disp = static_cast<int32_t>(CI->getValue()); Disp += static_cast<int32_t>(CI->getValue());
} else if (const auto CR = } else if (const auto CR =
llvm::dyn_cast<ConstantRelocatable>(getOffset())) { llvm::dyn_cast<ConstantRelocatable>(getOffset())) {
Disp = CR->getOffset(); Disp += CR->getOffset();
Fixup = Asm->createFixup(RelFixup, CR); Fixup = Asm->createFixup(RelFixup, CR);
} else { } else {
llvm_unreachable("Unexpected offset type"); llvm_unreachable("Unexpected offset type");
......
...@@ -170,7 +170,14 @@ void MachineTraits<TargetX8664>::X86OperandMem::dump(const Cfg *Func, ...@@ -170,7 +170,14 @@ void MachineTraits<TargetX8664>::X86OperandMem::dump(const Cfg *Func,
MachineTraits<TargetX8664>::Address MachineTraits<TargetX8664>::Address
MachineTraits<TargetX8664>::X86OperandMem::toAsmAddress( MachineTraits<TargetX8664>::X86OperandMem::toAsmAddress(
MachineTraits<TargetX8664>::Assembler *Asm) const { MachineTraits<TargetX8664>::Assembler *Asm,
const Ice::TargetLowering *Target) const {
// TODO(sehr): handle rematerializable base/index.
(void)Target;
if (getBase())
assert(!getBase()->isRematerializable());
if (getIndex())
assert(!getIndex()->isRematerializable());
int32_t Disp = 0; int32_t Disp = 0;
AssemblerFixup *Fixup = nullptr; AssemblerFixup *Fixup = nullptr;
// Determine the offset (is it relocatable?) // Determine the offset (is it relocatable?)
......
...@@ -509,6 +509,13 @@ public: ...@@ -509,6 +509,13 @@ public:
bool mustNotHaveReg() const { bool mustNotHaveReg() const {
return RegRequirement == RR_MustNotHaveRegister; return RegRequirement == RR_MustNotHaveRegister;
} }
void setRematerializable(int32_t NewRegNum, int32_t NewOffset) {
IsRematerializable = true;
setRegNum(NewRegNum);
setStackOffset(NewOffset);
setMustHaveReg();
}
bool isRematerializable() const { return IsRematerializable; }
void setRegClass(uint8_t RC) { RegisterClass = static_cast<RegClass>(RC); } void setRegClass(uint8_t RC) { RegisterClass = static_cast<RegClass>(RC); }
RegClass getRegClass() const { return RegisterClass; } RegClass getRegClass() const { return RegisterClass; }
...@@ -573,6 +580,9 @@ protected: ...@@ -573,6 +580,9 @@ protected:
/// and validating live ranges. This is usually reserved for the stack /// and validating live ranges. This is usually reserved for the stack
/// pointer and other physical registers specifically referenced by name. /// pointer and other physical registers specifically referenced by name.
bool IgnoreLiveness = false; bool IgnoreLiveness = false;
// If IsRematerializable, RegNum keeps track of which register (stack or frame
// pointer), and StackOffset is the known offset from that register.
bool IsRematerializable = false;
RegRequirement RegRequirement = RR_MayHaveRegister; RegRequirement RegRequirement = RR_MayHaveRegister;
RegClass RegisterClass; RegClass RegisterClass;
/// RegNum is the allocated register, or NoRegister if it isn't /// RegNum is the allocated register, or NoRegister if it isn't
......
...@@ -239,6 +239,11 @@ void TargetARM32::translateO2() { ...@@ -239,6 +239,11 @@ void TargetARM32::translateO2() {
// TODO(stichnot): share passes with X86? // TODO(stichnot): share passes with X86?
// https://code.google.com/p/nativeclient/issues/detail?id=4094 // https://code.google.com/p/nativeclient/issues/detail?id=4094
// Do not merge Alloca instructions, and lay out the stack.
static constexpr bool SortAndCombineAllocas = false;
Func->processAllocas(SortAndCombineAllocas);
Func->dump("After Alloca processing");
if (!Ctx->getFlags().getPhiEdgeSplit()) { if (!Ctx->getFlags().getPhiEdgeSplit()) {
// Lower Phi instructions. // Lower Phi instructions.
Func->placePhiLoads(); Func->placePhiLoads();
...@@ -340,6 +345,11 @@ void TargetARM32::translateOm1() { ...@@ -340,6 +345,11 @@ void TargetARM32::translateOm1() {
// TODO: share passes with X86? // TODO: share passes with X86?
// Do not merge Alloca instructions, and lay out the stack.
static constexpr bool SortAndCombineAllocas = false;
Func->processAllocas(SortAndCombineAllocas);
Func->dump("After Alloca processing");
Func->placePhiLoads(); Func->placePhiLoads();
if (Func->hasError()) if (Func->hasError())
return; return;
......
...@@ -92,6 +92,11 @@ void TargetMIPS32::translateO2() { ...@@ -92,6 +92,11 @@ void TargetMIPS32::translateO2() {
// TODO(stichnot): share passes with X86? // TODO(stichnot): share passes with X86?
// https://code.google.com/p/nativeclient/issues/detail?id=4094 // https://code.google.com/p/nativeclient/issues/detail?id=4094
// Merge Alloca instructions, and lay out the stack.
static constexpr bool SortAndCombineAllocas = true;
Func->processAllocas(SortAndCombineAllocas);
Func->dump("After Alloca processing");
if (!Ctx->getFlags().getPhiEdgeSplit()) { if (!Ctx->getFlags().getPhiEdgeSplit()) {
// Lower Phi instructions. // Lower Phi instructions.
Func->placePhiLoads(); Func->placePhiLoads();
...@@ -187,6 +192,11 @@ void TargetMIPS32::translateOm1() { ...@@ -187,6 +192,11 @@ void TargetMIPS32::translateOm1() {
// TODO: share passes with X86? // TODO: share passes with X86?
// Do not merge Alloca instructions, and lay out the stack.
static constexpr bool SortAndCombineAllocas = false;
Func->processAllocas(SortAndCombineAllocas);
Func->dump("After Alloca processing");
Func->placePhiLoads(); Func->placePhiLoads();
if (Func->hasError()) if (Func->hasError())
return; return;
......
...@@ -151,8 +151,10 @@ void TargetX8632::lowerCall(const InstCall *Instr) { ...@@ -151,8 +151,10 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
Variable *esp = Variable *esp =
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
StackArgLocations.push_back( auto *Mem = Traits::X86OperandMem::create(Func, Ty, esp, Loc);
Traits::X86OperandMem::create(Func, Ty, esp, Loc)); // Stack stores for arguments are fixed to esp.
Mem->setIgnoreStackAdjust(true);
StackArgLocations.push_back(Mem);
ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
} }
} }
......
...@@ -735,7 +735,8 @@ template <> struct MachineTraits<TargetX8632> { ...@@ -735,7 +735,8 @@ template <> struct MachineTraits<TargetX8632> {
uint16_t getShift() const { return Shift; } uint16_t getShift() const { return Shift; }
SegmentRegisters getSegmentRegister() const { return SegmentReg; } SegmentRegisters getSegmentRegister() const { return SegmentReg; }
void emitSegmentOverride(Assembler *Asm) const; void emitSegmentOverride(Assembler *Asm) const;
Address toAsmAddress(Assembler *Asm) const; Address toAsmAddress(Assembler *Asm,
const Ice::TargetLowering *Target) const;
void emit(const Cfg *Func) const override; void emit(const Cfg *Func) const override;
using X86Operand::dump; using X86Operand::dump;
...@@ -749,6 +750,9 @@ template <> struct MachineTraits<TargetX8632> { ...@@ -749,6 +750,9 @@ template <> struct MachineTraits<TargetX8632> {
bool getRandomized() const { return Randomized; } bool getRandomized() const { return Randomized; }
void setIgnoreStackAdjust(bool Ignore) { IgnoreStackAdjust = Ignore; }
bool getIgnoreStackAdjust() const { return IgnoreStackAdjust; }
private: private:
X86OperandMem(Cfg *Func, Type Ty, Variable *Base, Constant *Offset, X86OperandMem(Cfg *Func, Type Ty, Variable *Base, Constant *Offset,
Variable *Index, uint16_t Shift, SegmentRegisters SegmentReg); Variable *Index, uint16_t Shift, SegmentRegisters SegmentReg);
...@@ -762,6 +766,11 @@ template <> struct MachineTraits<TargetX8632> { ...@@ -762,6 +766,11 @@ template <> struct MachineTraits<TargetX8632> {
/// memory operands are generated in /// memory operands are generated in
/// TargetX86Base::randomizeOrPoolImmediate() /// TargetX86Base::randomizeOrPoolImmediate()
bool Randomized; bool Randomized;
/// Memory operations involving the stack pointer need to know when the
/// stack pointer was moved temporarily. Ignore that adjustment in
/// cases that should be pinned to the stack pointer, such as outgoing
/// arguments to calls.
bool IgnoreStackAdjust = false;
}; };
/// VariableSplit is a way to treat an f64 memory location as a pair of i32 /// VariableSplit is a way to treat an f64 memory location as a pair of i32
......
...@@ -717,7 +717,8 @@ template <> struct MachineTraits<TargetX8664> { ...@@ -717,7 +717,8 @@ template <> struct MachineTraits<TargetX8664> {
uint16_t getShift() const { return Shift; } uint16_t getShift() const { return Shift; }
SegmentRegisters getSegmentRegister() const { return DefaultSegment; } SegmentRegisters getSegmentRegister() const { return DefaultSegment; }
void emitSegmentOverride(Assembler *) const {} void emitSegmentOverride(Assembler *) const {}
Address toAsmAddress(Assembler *Asm) const; Address toAsmAddress(Assembler *Asm,
const Ice::TargetLowering *Target) const;
void emit(const Cfg *Func) const override; void emit(const Cfg *Func) const override;
using X86Operand::dump; using X86Operand::dump;
...@@ -731,6 +732,9 @@ template <> struct MachineTraits<TargetX8664> { ...@@ -731,6 +732,9 @@ template <> struct MachineTraits<TargetX8664> {
bool getRandomized() const { return Randomized; } bool getRandomized() const { return Randomized; }
void setIgnoreStackAdjust(bool Ignore) { IgnoreStackAdjust = Ignore; }
bool getIgnoreStackAdjust() const { return IgnoreStackAdjust; }
private: private:
X86OperandMem(Cfg *Func, Type Ty, Variable *Base, Constant *Offset, X86OperandMem(Cfg *Func, Type Ty, Variable *Base, Constant *Offset,
Variable *Index, uint16_t Shift); Variable *Index, uint16_t Shift);
...@@ -743,6 +747,11 @@ template <> struct MachineTraits<TargetX8664> { ...@@ -743,6 +747,11 @@ template <> struct MachineTraits<TargetX8664> {
/// memory operands are generated in /// memory operands are generated in
/// TargetX86Base::randomizeOrPoolImmediate() /// TargetX86Base::randomizeOrPoolImmediate()
bool Randomized = false; bool Randomized = false;
/// Memory operations involving the stack pointer need to know when the
/// stack pointer was moved temporarily. Ignore that adjustment in
/// cases that should be pinned to the stack pointer, such as outgoing
/// arguments to calls.
bool IgnoreStackAdjust = false;
}; };
/// VariableSplit is a way to treat an f64 memory location as a pair of i32 /// VariableSplit is a way to treat an f64 memory location as a pair of i32
......
...@@ -237,7 +237,8 @@ protected: ...@@ -237,7 +237,8 @@ protected:
Legal_Reg = 1 << 0, // physical register, not stack location Legal_Reg = 1 << 0, // physical register, not stack location
Legal_Imm = 1 << 1, Legal_Imm = 1 << 1,
Legal_Mem = 1 << 2, // includes [eax+4*ecx] as well as [esp+12] Legal_Mem = 1 << 2, // includes [eax+4*ecx] as well as [esp+12]
Legal_All = ~Legal_None Legal_Rematerializable = 1 << 3,
Legal_All = ~Legal_Rematerializable
}; };
using LegalMask = uint32_t; using LegalMask = uint32_t;
Operand *legalize(Operand *From, LegalMask Allowed = Legal_All, Operand *legalize(Operand *From, LegalMask Allowed = Legal_All,
......
...@@ -302,6 +302,11 @@ template <class Machine> void TargetX86Base<Machine>::staticInit() { ...@@ -302,6 +302,11 @@ template <class Machine> void TargetX86Base<Machine>::staticInit() {
template <class Machine> void TargetX86Base<Machine>::translateO2() { template <class Machine> void TargetX86Base<Machine>::translateO2() {
TimerMarker T(TimerStack::TT_O2, Func); TimerMarker T(TimerStack::TT_O2, Func);
// Merge Alloca instructions, and lay out the stack.
static constexpr bool SortAndCombineAllocas = true;
Func->processAllocas(SortAndCombineAllocas);
Func->dump("After Alloca processing");
if (!Ctx->getFlags().getPhiEdgeSplit()) { if (!Ctx->getFlags().getPhiEdgeSplit()) {
// Lower Phi instructions. // Lower Phi instructions.
Func->placePhiLoads(); Func->placePhiLoads();
...@@ -420,6 +425,11 @@ template <class Machine> void TargetX86Base<Machine>::translateO2() { ...@@ -420,6 +425,11 @@ template <class Machine> void TargetX86Base<Machine>::translateO2() {
template <class Machine> void TargetX86Base<Machine>::translateOm1() { template <class Machine> void TargetX86Base<Machine>::translateOm1() {
TimerMarker T(TimerStack::TT_Om1, Func); TimerMarker T(TimerStack::TT_Om1, Func);
// Do not merge Alloca instructions, and lay out the stack.
static constexpr bool SortAndCombineAllocas = false;
Func->processAllocas(SortAndCombineAllocas);
Func->dump("After Alloca processing");
Func->placePhiLoads(); Func->placePhiLoads();
if (Func->hasError()) if (Func->hasError())
return; return;
...@@ -945,7 +955,7 @@ TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, ...@@ -945,7 +955,7 @@ TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
template <class Machine> template <class Machine>
void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
if (!Inst->getKnownFrameOffset()) if (!Inst->getKnownFrameOffset())
IsEbpBasedFrame = true; setHasFramePointer();
// Conservatively require the stack to be aligned. Some stack adjustment // Conservatively require the stack to be aligned. Some stack adjustment
// operations implemented below assume that the stack is aligned before the // operations implemented below assume that the stack is aligned before the
// alloca. All the alloca code ensures that the stack alignment is preserved // alloca. All the alloca code ensures that the stack alignment is preserved
...@@ -969,6 +979,7 @@ void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { ...@@ -969,6 +979,7 @@ void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
uint32_t Alignment = uint32_t Alignment =
std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) { if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {
setHasFramePointer();
_and(esp, Ctx->getConstantInt32(-Alignment)); _and(esp, Ctx->getConstantInt32(-Alignment));
} }
if (const auto *ConstantTotalSize = if (const auto *ConstantTotalSize =
...@@ -5500,10 +5511,12 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, ...@@ -5500,10 +5511,12 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
Variable *RegBase = nullptr; Variable *RegBase = nullptr;
Variable *RegIndex = nullptr; Variable *RegIndex = nullptr;
if (Base) { if (Base) {
RegBase = legalizeToReg(Base); RegBase = llvm::cast<Variable>(
legalize(Base, Legal_Reg | Legal_Rematerializable));
} }
if (Index) { if (Index) {
RegIndex = legalizeToReg(Index); RegIndex = llvm::cast<Variable>(
legalize(Index, Legal_Reg | Legal_Rematerializable));
} }
if (Base != RegBase || Index != RegIndex) { if (Base != RegBase || Index != RegIndex) {
Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(), Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(),
...@@ -5575,12 +5588,25 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, ...@@ -5575,12 +5588,25 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
// either when the variable is pre-colored or when it is assigned infinite // either when the variable is pre-colored or when it is assigned infinite
// weight. // weight.
bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
bool MustRematerialize =
(Var->isRematerializable() && !(Allowed & Legal_Rematerializable));
// We need a new physical register for the operand if: // We need a new physical register for the operand if:
// Mem is not allowed and Var isn't guaranteed a physical // - Mem is not allowed and Var isn't guaranteed a physical register, or
// register, or // - RegNum is required and Var->getRegNum() doesn't match, or
// RegNum is required and Var->getRegNum() doesn't match. // - Var is a rematerializable variable and rematerializable pass-through is
if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || // not allowed (in which case we need an lea instruction).
(RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { if (MustRematerialize) {
assert(Ty == IceType_i32);
Variable *NewVar = makeReg(Ty, RegNum);
// Since Var is rematerializable, the offset will be added when the lea is
// emitted.
constexpr Constant *NoOffset = nullptr;
auto *Mem = Traits::X86OperandMem::create(Func, Ty, Var, NoOffset);
_lea(NewVar, Mem);
From = NewVar;
} else if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
(RegNum != Variable::NoRegister && RegNum != Var->getRegNum()) ||
MustRematerialize) {
From = copyToReg(From, RegNum); From = copyToReg(From, RegNum);
} }
return From; return From;
......
; This is a basic test of the alloca instruction and a call.
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -O2 -allow-externally-defined-symbols \
; RUN: | %if --need=target_X8632 --command FileCheck %s
declare void @copy(i32 %arg1, i8* %arr1, i8* %arr2, i8* %arr3, i8* %arr4);
; Test that alloca base addresses get passed correctly to functions.
define internal void @caller1(i32 %arg) {
entry:
%a1 = alloca i8, i32 32, align 4
%p1 = bitcast i8* %a1 to i32*
store i32 %arg, i32* %p1, align 1
call void @copy(i32 %arg, i8* %a1, i8* %a1, i8* %a1, i8* %a1)
ret void
}
; CHECK-LABEL: caller1
; CHECK-NEXT: sub esp,0xc
; CHECK-NEXT: mov eax,DWORD PTR [esp+0x10]
; CHECK-NEXT: sub esp,0x20
; CHECK-NEXT: mov ecx,esp
; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: sub esp,0x20
; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: lea eax,[esp+0x20]
; CHECK-NEXT: mov DWORD PTR [esp+0x4],eax
; CHECK-NEXT: lea eax,[esp+0x20]
; CHECK-NEXT: mov DWORD PTR [esp+0x8],eax
; CHECK-NEXT: lea eax,[esp+0x20]
; CHECK-NEXT: mov DWORD PTR [esp+0xc],eax
; CHECK-NEXT: lea eax,[esp+0x20]
; CHECK-NEXT: mov DWORD PTR [esp+0x10],eax
; CHECK-NEXT: call
; CHECK-NEXT: add esp,0x20
; CHECK-NEXT: add esp,0x2c
; CHECK-NEXT: ret
; Test that alloca base addresses get passed correctly to functions.
define internal void @caller2(i32 %arg) {
entry:
%a1 = alloca i8, i32 32, align 4
%a2 = alloca i8, i32 32, align 4
%p1 = bitcast i8* %a1 to i32*
%p2 = bitcast i8* %a2 to i32*
store i32 %arg, i32* %p1, align 1
store i32 %arg, i32* %p2, align 1
call void @copy(i32 %arg, i8* %a1, i8* %a2, i8* %a1, i8* %a2)
ret void
}
; CHECK-LABEL: caller2
; CHECK-NEXT: sub esp,0xc
; CHECK-NEXT: mov eax,DWORD PTR [esp+0x10]
; CHECK-NEXT: sub esp,0x40
; CHECK-NEXT: mov ecx,esp
; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: mov DWORD PTR [esp+0x20],eax
; CHECK-NEXT: sub esp,0x20
; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: lea eax,[esp+0x20]
; CHECK-NEXT: mov DWORD PTR [esp+0x4],eax
; CHECK-NEXT: lea eax,[esp+0x40]
; CHECK-NEXT: mov DWORD PTR [esp+0x8],eax
; CHECK-NEXT: lea eax,[esp+0x20]
; CHECK-NEXT: mov DWORD PTR [esp+0xc],eax
; CHECK-NEXT: lea eax,[esp+0x40]
; CHECK-NEXT: mov DWORD PTR [esp+0x10],eax
; CHECK-NEXT: call
; CHECK-NEXT: add esp,0x20
; CHECK-NEXT: add esp,0x4c
; CHECK-NEXT: ret
; This is a basic test of the alloca instruction.
; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
; RUN: --target x8632 -i %s --args -O2 -allow-externally-defined-symbols \
; RUN: | %if --need=target_X8632 --command FileCheck %s
; Test that a sequence of allocas with less than stack alignment get fused.
define internal void @fused_small_align(i32 %arg) {
entry:
%a1 = alloca i8, i32 8, align 4
%a2 = alloca i8, i32 12, align 4
%a3 = alloca i8, i32 16, align 8
%p1 = bitcast i8* %a1 to i32*
%p2 = bitcast i8* %a2 to i32*
%p3 = bitcast i8* %a3 to i32*
store i32 %arg, i32* %p1, align 1
store i32 %arg, i32* %p2, align 1
store i32 %arg, i32* %p3, align 1
ret void
}
; CHECK-LABEL: fused_small_align
; CHECK-NEXT: sub esp,0xc
; CHECK-NEXT: mov eax,DWORD PTR [esp+0x10]
; CHECK-NEXT: sub esp,0x30
; CHECK-NEXT: mov {{.*}},esp
; CHECK-NEXT: mov DWORD PTR [esp+0x10],eax
; CHECK-NEXT: mov DWORD PTR [esp+0x18],eax
; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: add esp,0x3c
; Test that a sequence of allocas with greater than stack alignment get fused.
define internal void @fused_large_align(i32 %arg) {
entry:
%a1 = alloca i8, i32 8, align 32
%a2 = alloca i8, i32 12, align 64
%a3 = alloca i8, i32 16, align 32
%p1 = bitcast i8* %a1 to i32*
%p2 = bitcast i8* %a2 to i32*
%p3 = bitcast i8* %a3 to i32*
store i32 %arg, i32* %p1, align 1
store i32 %arg, i32* %p2, align 1
store i32 %arg, i32* %p3, align 1
ret void
}
; CHECK-LABEL: fused_large_align
; CHECK-NEXT: push ebp
; CHECK-NEXT: mov ebp,esp
; CHECK-NEXT: sub esp,0x8
; CHECK-NEXT: mov eax,DWORD PTR [ebp+0x8]
; CHECK-NEXT: and esp,0xffffffc0
; CHECK-NEXT: sub esp,0x80
; CHECK-NEXT: mov ecx,esp
; CHECK-NEXT: mov DWORD PTR [esp+0x40],eax
; CHECK-NEXT: mov DWORD PTR [esp],eax
; CHECK-NEXT: mov DWORD PTR [esp+0x60],eax
; CHECK-NEXT: mov esp,ebp
; CHECK-NEXT: pop ebp
...@@ -40,7 +40,6 @@ entry: ...@@ -40,7 +40,6 @@ entry:
} }
; CHECK-LABEL: test_fused_load_sub_a ; CHECK-LABEL: test_fused_load_sub_a
; alloca store ; alloca store
; CHECK: mov {{.*}},esp
; CHECK: mov DWORD PTR {{.*}},0x3e7 ; CHECK: mov DWORD PTR {{.*}},0x3e7
; atomic store (w/ its own mfence) ; atomic store (w/ its own mfence)
; The load + sub are optimized into one everywhere. ; The load + sub are optimized into one everywhere.
...@@ -80,7 +79,6 @@ entry: ...@@ -80,7 +79,6 @@ entry:
} }
; CHECK-LABEL: test_fused_load_sub_b ; CHECK-LABEL: test_fused_load_sub_b
; alloca store ; alloca store
; CHECK: mov {{.*}},esp
; CHECK: mov DWORD PTR {{.*}},0x3e7 ; CHECK: mov DWORD PTR {{.*}},0x3e7
; atomic store (w/ its own mfence) ; atomic store (w/ its own mfence)
; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a
...@@ -121,7 +119,6 @@ entry: ...@@ -121,7 +119,6 @@ entry:
} }
; CHECK-LABEL: test_fused_load_sub_c ; CHECK-LABEL: test_fused_load_sub_c
; alloca store ; alloca store
; CHECK: mov {{.*}},esp
; CHECK: mov DWORD PTR {{.*}},0x3e7 ; CHECK: mov DWORD PTR {{.*}},0x3e7
; atomic store (w/ its own mfence) ; atomic store (w/ its own mfence)
; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a ; CHECK: sub {{.*}},DWORD PTR {{.*}}g32_a
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment