Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
swiftshader
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Chen Yisong
swiftshader
Commits
614140e2
Commit
614140e2
authored
Nov 23, 2015
by
John Porto
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Subzero. ARM32. Combine allocas.
BUG=
https://code.google.com/p/nativeclient/issues/detail?id=4076
R=stichnot@chromium.org Review URL:
https://codereview.chromium.org/1465213002
.
parent
fc22f770
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
215 additions
and
92 deletions
+215
-92
IceCfg.cpp
src/IceCfg.cpp
+8
-2
IceTargetLowering.h
src/IceTargetLowering.h
+1
-0
IceTargetLoweringARM32.cpp
src/IceTargetLoweringARM32.cpp
+131
-40
IceTargetLoweringARM32.h
src/IceTargetLoweringARM32.h
+35
-9
bic.ll
tests_lit/assembler/arm32/bic.ll
+24
-29
alloc.ll
tests_lit/llvm2ice_tests/alloc.ll
+16
-12
No files found.
src/IceCfg.cpp
View file @
614140e2
...
...
@@ -484,8 +484,14 @@ void Cfg::sortAndCombineAllocas(CfgVector<Inst *> &Allocas,
}
else
{
// Addressing is relative to the stack pointer or to a user pointer. Add
// the offset before adding the size of the object, because it grows
// upwards from the stack pointer.
Offsets
.
push_back
(
CurrentOffset
);
// upwards from the stack pointer. In addition, if the addressing is
// relative to the stack pointer, we need to add the pre-computed max out
// args size bytes.
const
uint32_t
OutArgsOffsetOrZero
=
(
BaseVariableType
==
BVT_StackPointer
)
?
getTarget
()
->
maxOutArgsSizeBytes
()
:
0
;
Offsets
.
push_back
(
CurrentOffset
+
OutArgsOffsetOrZero
);
}
// Update the running offset of the fused alloca region.
CurrentOffset
+=
Size
;
...
...
src/IceTargetLowering.h
View file @
614140e2
...
...
@@ -211,6 +211,7 @@ public:
virtual
uint32_t
getStackAlignment
()
const
=
0
;
virtual
void
reserveFixedAllocaArea
(
size_t
Size
,
size_t
Align
)
=
0
;
virtual
int32_t
getFrameFixedAllocaOffset
()
const
=
0
;
virtual
uint32_t
maxOutArgsSizeBytes
()
const
{
return
0
;
}
/// Return whether a 64-bit Variable should be split into a Variable64On32.
virtual
bool
shouldSplitToVariable64On32
(
Type
Ty
)
const
=
0
;
...
...
src/IceTargetLoweringARM32.cpp
View file @
614140e2
...
...
@@ -265,7 +265,7 @@ uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
}
void
TargetARM32
::
findMaxStackOutArgsSize
()
{
// MinNeededOutArgsBytes should be updated if the Target ever creates a
n
// MinNeededOutArgsBytes should be updated if the Target ever creates a
// high-level InstCall that requires more stack bytes.
constexpr
size_t
MinNeededOutArgsBytes
=
0
;
MaxOutArgsSizeBytes
=
MinNeededOutArgsBytes
;
...
...
@@ -291,7 +291,7 @@ void TargetARM32::translateO2() {
findMaxStackOutArgsSize
();
// Do not merge Alloca instructions, and lay out the stack.
static
constexpr
bool
SortAndCombineAllocas
=
fals
e
;
static
constexpr
bool
SortAndCombineAllocas
=
tru
e
;
Func
->
processAllocas
(
SortAndCombineAllocas
);
Func
->
dump
(
"After Alloca processing"
);
...
...
@@ -356,6 +356,7 @@ void TargetARM32::translateO2() {
regAlloc
(
RAK_Global
);
if
(
Func
->
hasError
())
return
;
copyRegAllocFromInfWeightVariable64On32
(
Func
->
getVariables
());
Func
->
dump
(
"After linear scan regalloc"
);
...
...
@@ -364,6 +365,8 @@ void TargetARM32::translateO2() {
Func
->
dump
(
"After advanced Phi lowering"
);
}
ForbidTemporaryWithoutReg
_
(
this
);
// Stack frame mapping.
Func
->
genFrame
();
if
(
Func
->
hasError
())
...
...
@@ -399,8 +402,8 @@ void TargetARM32::translateOm1() {
findMaxStackOutArgsSize
();
// Do not merge Alloca instructions, and lay out the stack.
static
constexpr
bool
SortAndCombineAllocas
=
false
;
Func
->
processAllocas
(
SortAndCombineAllocas
);
static
constexpr
bool
Dont
SortAndCombineAllocas
=
false
;
Func
->
processAllocas
(
Dont
SortAndCombineAllocas
);
Func
->
dump
(
"After Alloca processing"
);
Func
->
placePhiLoads
();
...
...
@@ -424,9 +427,12 @@ void TargetARM32::translateOm1() {
regAlloc
(
RAK_InfOnly
);
if
(
Func
->
hasError
())
return
;
copyRegAllocFromInfWeightVariable64On32
(
Func
->
getVariables
());
Func
->
dump
(
"After regalloc of infinite-weight variables"
);
ForbidTemporaryWithoutReg
_
(
this
);
Func
->
genFrame
();
if
(
Func
->
hasError
())
return
;
...
...
@@ -520,6 +526,7 @@ void TargetARM32::emitVariable(const Variable *Var) const {
llvm
::
report_fatal_error
(
"Infinite-weight Variable has no register assigned"
);
}
assert
(
!
Var
->
isRematerializable
());
int32_t
Offset
=
Var
->
getStackOffset
();
int32_t
BaseRegNum
=
Var
->
getBaseRegNum
();
if
(
BaseRegNum
==
Variable
::
NoRegister
)
{
...
...
@@ -850,6 +857,9 @@ void TargetARM32::addProlog(CfgNode *Node) {
SpillAreaSizeBytes
=
StackSize
-
StackOffset
;
}
// Combine fixed alloca with SpillAreaSize.
SpillAreaSizeBytes
+=
FixedAllocaSizeBytes
;
// Generate "sub sp, SpillAreaSizeBytes"
if
(
SpillAreaSizeBytes
)
{
// Use the scratch register if needed to legalize the immediate.
...
...
@@ -857,7 +867,11 @@ void TargetARM32::addProlog(CfgNode *Node) {
Legal_Reg
|
Legal_Flex
,
getReservedTmpReg
());
Variable
*
SP
=
getPhysicalRegister
(
RegARM32
::
Reg_sp
);
_sub
(
SP
,
SP
,
SubAmount
);
if
(
FixedAllocaAlignBytes
>
ARM32_STACK_ALIGNMENT_BYTES
)
{
alignRegisterPow2
(
SP
,
FixedAllocaAlignBytes
);
}
}
Ctx
->
statsUpdateFrameBytes
(
SpillAreaSizeBytes
);
// Fill in stack offsets for stack args, and copy args into registers for
...
...
@@ -1034,6 +1048,7 @@ OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,
Variable
*
OrigBaseReg
,
Variable
**
NewBaseReg
,
int32_t
*
NewBaseOffset
)
{
assert
(
!
OrigBaseReg
->
isRematerializable
());
if
(
isLegalMemOffset
(
Ty
,
Offset
))
{
return
OperandARM32Mem
::
create
(
Func
,
Ty
,
OrigBaseReg
,
...
...
@@ -1053,6 +1068,7 @@ OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,
OffsetDiff
=
0
;
}
assert
(
!
(
*
NewBaseReg
)
->
isRematerializable
());
return
OperandARM32Mem
::
create
(
Func
,
Ty
,
*
NewBaseReg
,
llvm
::
cast
<
ConstantInteger32
>
(
Ctx
->
getConstantInt32
(
OffsetDiff
)),
...
...
@@ -1076,8 +1092,9 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg,
bool
Legalized
=
false
;
if
(
!
Dest
->
hasReg
())
{
auto
*
const
SrcR
=
llvm
::
cast
<
Variable
>
(
Src
);
auto
*
SrcR
=
llvm
::
cast
<
Variable
>
(
Src
);
assert
(
SrcR
->
hasReg
());
assert
(
!
SrcR
->
isRematerializable
());
const
int32_t
Offset
=
Dest
->
getStackOffset
();
// This is a _mov(Mem(), Variable), i.e., a store.
_str
(
SrcR
,
createMemOperand
(
DestTy
,
Offset
,
OrigBaseReg
,
NewBaseReg
,
...
...
@@ -1087,12 +1104,26 @@ void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg,
Context
.
insert
(
InstFakeDef
::
create
(
Func
,
Dest
));
Legalized
=
true
;
}
else
if
(
auto
*
Var
=
llvm
::
dyn_cast
<
Variable
>
(
Src
))
{
if
(
!
Var
->
hasReg
())
{
const
int32_t
Offset
=
Var
->
getStackOffset
();
_ldr
(
Dest
,
createMemOperand
(
DestTy
,
Offset
,
OrigBaseReg
,
NewBaseReg
,
NewBaseOffset
),
MovInstr
->
getPredicate
());
if
(
Var
->
isRematerializable
())
{
// Rematerialization arithmetic.
const
int32_t
ExtraOffset
=
(
static_cast
<
SizeT
>
(
Var
->
getRegNum
())
==
getFrameReg
())
?
getFrameFixedAllocaOffset
()
:
0
;
const
int32_t
Offset
=
Var
->
getStackOffset
()
+
ExtraOffset
;
Operand
*
OffsetRF
=
legalize
(
Ctx
->
getConstantInt32
(
Offset
),
Legal_Reg
|
Legal_Flex
,
Dest
->
getRegNum
());
_add
(
Dest
,
Var
,
OffsetRF
);
Legalized
=
true
;
}
else
{
if
(
!
Var
->
hasReg
())
{
const
int32_t
Offset
=
Var
->
getStackOffset
();
_ldr
(
Dest
,
createMemOperand
(
DestTy
,
Offset
,
OrigBaseReg
,
NewBaseReg
,
NewBaseOffset
),
MovInstr
->
getPredicate
());
Legalized
=
true
;
}
}
}
...
...
@@ -1163,13 +1194,15 @@ Operand *TargetARM32::loOperand(Operand *Operand) {
// increment) in case of duplication.
assert
(
Mem
->
getAddrMode
()
==
OperandARM32Mem
::
Offset
||
Mem
->
getAddrMode
()
==
OperandARM32Mem
::
NegOffset
);
Variable
*
BaseR
=
legalizeToReg
(
Mem
->
getBase
());
if
(
Mem
->
isRegReg
())
{
return
OperandARM32Mem
::
create
(
Func
,
IceType_i32
,
Mem
->
getBase
(),
Mem
->
getIndex
(),
Mem
->
getShiftOp
(),
Mem
->
getShiftAmt
(),
Mem
->
getAddrMode
());
Variable
*
IndexR
=
legalizeToReg
(
Mem
->
getIndex
());
return
OperandARM32Mem
::
create
(
Func
,
IceType_i32
,
BaseR
,
IndexR
,
Mem
->
getShiftOp
(),
Mem
->
getShiftAmt
(),
Mem
->
getAddrMode
());
}
else
{
return
OperandARM32Mem
::
create
(
Func
,
IceType_i32
,
Mem
->
getBase
(),
Mem
->
get
Offset
(),
Mem
->
get
AddrMode
());
return
OperandARM32Mem
::
create
(
Func
,
IceType_i32
,
BaseR
,
Mem
->
getOffset
(),
Mem
->
getAddrMode
());
}
}
llvm_unreachable
(
"Unsupported operand type"
);
...
...
@@ -1201,7 +1234,9 @@ Operand *TargetARM32::hiOperand(Operand *Operand) {
Variable
*
NewBase
=
Func
->
makeVariable
(
Base
->
getType
());
lowerArithmetic
(
InstArithmetic
::
create
(
Func
,
InstArithmetic
::
Add
,
NewBase
,
Base
,
Four
));
return
OperandARM32Mem
::
create
(
Func
,
SplitType
,
NewBase
,
Mem
->
getIndex
(),
Variable
*
BaseR
=
legalizeToReg
(
NewBase
);
Variable
*
IndexR
=
legalizeToReg
(
Mem
->
getIndex
());
return
OperandARM32Mem
::
create
(
Func
,
SplitType
,
BaseR
,
IndexR
,
Mem
->
getShiftOp
(),
Mem
->
getShiftAmt
(),
Mem
->
getAddrMode
());
}
else
{
...
...
@@ -1216,16 +1251,17 @@ Operand *TargetARM32::hiOperand(Operand *Operand) {
// mode into a RegReg addressing mode. Since NaCl sandboxing disallows
// RegReg addressing modes, prefer adding to base and replacing
// instead. Thus we leave the old offset alone.
Constant
*
Four
=
Ctx
->
getConstantInt32
(
4
);
Constant
*
_4
=
Ctx
->
getConstantInt32
(
4
);
Variable
*
NewBase
=
Func
->
makeVariable
(
Base
->
getType
());
lowerArithmetic
(
InstArithmetic
::
create
(
Func
,
InstArithmetic
::
Add
,
NewBase
,
Base
,
Four
));
NewBase
,
Base
,
_4
));
Base
=
NewBase
;
}
else
{
Offset
=
llvm
::
cast
<
ConstantInteger32
>
(
Ctx
->
getConstantInt32
(
NextOffsetVal
));
}
return
OperandARM32Mem
::
create
(
Func
,
SplitType
,
Base
,
Offset
,
Variable
*
BaseR
=
legalizeToReg
(
Base
);
return
OperandARM32Mem
::
create
(
Func
,
SplitType
,
BaseR
,
Offset
,
Mem
->
getAddrMode
());
}
}
...
...
@@ -1264,7 +1300,6 @@ llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
}
void
TargetARM32
::
lowerAlloca
(
const
InstAlloca
*
Inst
)
{
UsesFramePointer
=
true
;
// Conservatively require the stack to be aligned. Some stack adjustment
// operations implemented below assume that the stack is aligned before the
// alloca. All the alloca code ensures that the stack alignment is preserved
...
...
@@ -1272,29 +1307,53 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
// cases.
NeedsStackAlignment
=
true
;
// TODO(stichnot): minimize the number of adjustments of SP, etc.
Variable
*
SP
=
getPhysicalRegister
(
RegARM32
::
Reg_sp
);
Variable
*
Dest
=
Inst
->
getDest
();
uint32_t
AlignmentParam
=
Inst
->
getAlignInBytes
();
// For default align=0, set it to the real value 1, to avoid any
// bit-manipulation problems below.
AlignmentParam
=
std
::
max
(
AlignmentParam
,
1u
);
const
uint32_t
AlignmentParam
=
std
::
max
(
1u
,
Inst
->
getAlignInBytes
()
);
// LLVM enforces power of 2 alignment.
assert
(
llvm
::
isPowerOf2_32
(
AlignmentParam
));
assert
(
llvm
::
isPowerOf2_32
(
ARM32_STACK_ALIGNMENT_BYTES
));
uint32_t
Alignment
=
std
::
max
(
AlignmentParam
,
ARM32_STACK_ALIGNMENT_BYTES
);
if
(
Alignment
>
ARM32_STACK_ALIGNMENT_BYTES
)
{
const
uint32_t
Alignment
=
std
::
max
(
AlignmentParam
,
ARM32_STACK_ALIGNMENT_BYTES
);
const
bool
OverAligned
=
Alignment
>
ARM32_STACK_ALIGNMENT_BYTES
;
const
bool
OptM1
=
Ctx
->
getFlags
().
getOptLevel
()
==
Opt_m1
;
const
bool
AllocaWithKnownOffset
=
Inst
->
getKnownFrameOffset
();
const
bool
UseFramePointer
=
hasFramePointer
()
||
OverAligned
||
!
AllocaWithKnownOffset
||
OptM1
;
if
(
UseFramePointer
)
setHasFramePointer
();
Variable
*
SP
=
getPhysicalRegister
(
RegARM32
::
Reg_sp
);
if
(
OverAligned
)
{
alignRegisterPow2
(
SP
,
Alignment
);
}
Variable
*
Dest
=
Inst
->
getDest
();
Operand
*
TotalSize
=
Inst
->
getSizeInBytes
();
if
(
const
auto
*
ConstantTotalSize
=
llvm
::
dyn_cast
<
ConstantInteger32
>
(
TotalSize
))
{
uint32_t
Value
=
ConstantTotalSize
->
getValue
();
Value
=
Utils
::
applyAlignment
(
Value
,
Alignment
);
Operand
*
SubAmount
=
legalize
(
Ctx
->
getConstantInt32
(
Value
));
_sub
(
SP
,
SP
,
SubAmount
);
const
uint32_t
Value
=
Utils
::
applyAlignment
(
ConstantTotalSize
->
getValue
(),
Alignment
);
// Constant size alloca.
if
(
!
UseFramePointer
)
{
// If we don't need a Frame Pointer, this alloca has a known offset to the
// stack pointer. We don't need adjust the stack pointer, nor assign any
// value to Dest, as Dest is rematerializable.
assert
(
Dest
->
isRematerializable
());
FixedAllocaSizeBytes
+=
Value
;
Context
.
insert
(
InstFakeDef
::
create
(
Func
,
Dest
));
return
;
}
// If a frame pointer is required, then we need to store the alloca'd result
// in Dest.
Operand
*
SubAmountRF
=
legalize
(
Ctx
->
getConstantInt32
(
Value
),
Legal_Reg
|
Legal_Flex
);
_sub
(
SP
,
SP
,
SubAmountRF
);
}
else
{
// Non-constant sizes need to be adjusted to the next highest multiple of
// the required alignment at runtime.
...
...
@@ -1306,6 +1365,8 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
alignRegisterPow2
(
T
,
Alignment
);
_sub
(
SP
,
SP
,
T
);
}
// Adds back a few bytes to SP to account for the out args area.
Variable
*
T
=
SP
;
if
(
MaxOutArgsSizeBytes
!=
0
)
{
T
=
makeReg
(
getPointerType
());
...
...
@@ -1313,6 +1374,7 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
Ctx
->
getConstantInt32
(
MaxOutArgsSizeBytes
),
Legal_Reg
|
Legal_Flex
);
_add
(
T
,
SP
,
OutArgsSizeRF
);
}
_mov
(
Dest
,
T
);
}
...
...
@@ -1976,6 +2038,12 @@ void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
void
TargetARM32
::
lowerArithmetic
(
const
InstArithmetic
*
Inst
)
{
Variable
*
Dest
=
Inst
->
getDest
();
if
(
Dest
->
isRematerializable
())
{
Context
.
insert
(
InstFakeDef
::
create
(
Func
,
Dest
));
return
;
}
if
(
Dest
->
getType
()
==
IceType_i1
)
{
lowerInt1Arithmetic
(
Inst
);
return
;
...
...
@@ -2139,8 +2207,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
if
(
Srcs
.
hasConstOperand
())
{
// TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed
// to be used.
Variable
*
Src0R
=
Srcs
.
src0R
(
this
);
if
(
Srcs
.
immediateIsFlexEncodable
())
{
Variable
*
Src0R
=
Srcs
.
src0R
(
this
);
Operand
*
Src1RF
=
Srcs
.
src1RF
(
this
);
if
(
Srcs
.
swappedOperands
())
{
_rsb
(
T
,
Src0R
,
Src1RF
);
...
...
@@ -2151,6 +2219,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
return
;
}
if
(
!
Srcs
.
swappedOperands
()
&&
Srcs
.
negatedImmediateIsFlexEncodable
())
{
Variable
*
Src0R
=
Srcs
.
src0R
(
this
);
Operand
*
Src1F
=
Srcs
.
negatedSrc1F
(
this
);
_add
(
T
,
Src0R
,
Src1F
);
_mov
(
Dest
,
T
);
...
...
@@ -2215,6 +2284,12 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
void
TargetARM32
::
lowerAssign
(
const
InstAssign
*
Inst
)
{
Variable
*
Dest
=
Inst
->
getDest
();
if
(
Dest
->
isRematerializable
())
{
Context
.
insert
(
InstFakeDef
::
create
(
Func
,
Dest
));
return
;
}
Operand
*
Src0
=
Inst
->
getSrc
(
0
);
assert
(
Dest
->
getType
()
==
Src0
->
getType
());
if
(
Dest
->
getType
()
==
IceType_i64
)
{
...
...
@@ -4425,13 +4500,17 @@ OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func,
assert
(
OffsetImm
<
0
?
(
ValidImmMask
&
-
OffsetImm
)
==
-
OffsetImm
:
(
ValidImmMask
&
OffsetImm
)
==
OffsetImm
);
Variable
*
BaseR
=
makeReg
(
getPointerType
());
Context
.
insert
(
InstAssign
::
create
(
Func
,
BaseR
,
BaseVar
));
if
(
OffsetReg
!=
nullptr
)
{
return
OperandARM32Mem
::
create
(
Func
,
Ty
,
BaseVar
,
OffsetReg
,
ShiftKind
,
Variable
*
OffsetR
=
makeReg
(
getPointerType
());
Context
.
insert
(
InstAssign
::
create
(
Func
,
OffsetR
,
OffsetReg
));
return
OperandARM32Mem
::
create
(
Func
,
Ty
,
BaseR
,
OffsetR
,
ShiftKind
,
OffsetRegShamt
);
}
return
OperandARM32Mem
::
create
(
Func
,
Ty
,
Base
Var
,
Func
,
Ty
,
Base
R
,
llvm
::
cast
<
ConstantInteger32
>
(
Ctx
->
getConstantInt32
(
OffsetImm
)));
}
...
...
@@ -4630,7 +4709,8 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
if
(
RegNum
==
Variable
::
NoRegister
)
{
if
(
Variable
*
Subst
=
getContext
().
availabilityGet
(
From
))
{
// At this point we know there is a potential substitution available.
if
(
Subst
->
mustHaveReg
()
&&
!
Subst
->
hasReg
())
{
if
(
!
Subst
->
isRematerializable
()
&&
Subst
->
mustHaveReg
()
&&
!
Subst
->
hasReg
())
{
// At this point we know the substitution will have a register.
if
(
From
->
getType
()
==
Subst
->
getType
())
{
// At this point we know the substitution's register is compatible.
...
...
@@ -4788,6 +4868,13 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
}
if
(
auto
*
Var
=
llvm
::
dyn_cast
<
Variable
>
(
From
))
{
if
(
Var
->
isRematerializable
())
{
// TODO(jpp): We don't need to rematerialize Var if legalize() was invoked
// for a Variable in a Mem operand.
Variable
*
T
=
makeReg
(
Var
->
getType
(),
RegNum
);
_mov
(
T
,
Var
);
return
T
;
}
// Check if the variable is guaranteed a physical register. This can happen
// either when the variable is pre-colored or when it is assigned infinite
// weight.
...
...
@@ -4844,9 +4931,9 @@ OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
// If we didn't do address mode optimization, then we only have a
// base/offset to work with. ARM always requires a base register, so
// just use that to hold the operand.
Variable
*
Base
=
legalizeToReg
(
Operand
);
Variable
*
Base
R
=
legalizeToReg
(
Operand
);
return
OperandARM32Mem
::
create
(
Func
,
Ty
,
Base
,
Func
,
Ty
,
Base
R
,
llvm
::
cast
<
ConstantInteger32
>
(
Ctx
->
getConstantZero
(
IceType_i32
)));
}
...
...
@@ -4863,6 +4950,7 @@ Variable64On32 *TargetARM32::makeI64RegPair() {
Variable
*
TargetARM32
::
makeReg
(
Type
Type
,
int32_t
RegNum
)
{
// There aren't any 64-bit integer registers for ARM32.
assert
(
Type
!=
IceType_i64
);
assert
(
AllowTemporaryWithNoReg
||
RegNum
!=
Variable
::
NoRegister
);
Variable
*
Reg
=
Func
->
makeVariable
(
Type
);
if
(
RegNum
==
Variable
::
NoRegister
)
Reg
->
setMustHaveReg
();
...
...
@@ -4871,7 +4959,8 @@ Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
return
Reg
;
}
void
TargetARM32
::
alignRegisterPow2
(
Variable
*
Reg
,
uint32_t
Align
)
{
void
TargetARM32
::
alignRegisterPow2
(
Variable
*
Reg
,
uint32_t
Align
,
int32_t
TmpRegNum
)
{
assert
(
llvm
::
isPowerOf2_32
(
Align
));
uint32_t
RotateAmt
;
uint32_t
Immed_8
;
...
...
@@ -4880,10 +4969,12 @@ void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
// it fits at all). Assume Align is usually small, in which case BIC works
// better. Thus, this rounds down to the alignment.
if
(
OperandARM32FlexImm
::
canHoldImm
(
Align
-
1
,
&
RotateAmt
,
&
Immed_8
))
{
Mask
=
legalize
(
Ctx
->
getConstantInt32
(
Align
-
1
),
Legal_Reg
|
Legal_Flex
);
Mask
=
legalize
(
Ctx
->
getConstantInt32
(
Align
-
1
),
Legal_Reg
|
Legal_Flex
,
TmpRegNum
);
_bic
(
Reg
,
Reg
,
Mask
);
}
else
{
Mask
=
legalize
(
Ctx
->
getConstantInt32
(
-
Align
),
Legal_Reg
|
Legal_Flex
);
Mask
=
legalize
(
Ctx
->
getConstantInt32
(
-
Align
),
Legal_Reg
|
Legal_Flex
,
TmpRegNum
);
_and
(
Reg
,
Reg
,
Mask
);
}
}
...
...
src/IceTargetLoweringARM32.h
View file @
614140e2
...
...
@@ -99,16 +99,15 @@ public:
}
uint32_t
getStackAlignment
()
const
override
;
void
reserveFixedAllocaArea
(
size_t
Size
,
size_t
Align
)
override
{
// TODO(sehr,jpp): Implement fixed stack layout.
(
void
)
Size
;
(
void
)
Align
;
llvm
::
report_fatal_error
(
"Not yet implemented"
)
;
FixedAllocaSizeBytes
=
Size
;
assert
(
llvm
::
isPowerOf2_32
(
Align
))
;
FixedAllocaAlignBytes
=
Align
;
PrologEmitsFixedAllocas
=
true
;
}
int32_t
getFrameFixedAllocaOffset
()
const
override
{
// TODO(sehr,jpp): Implement fixed stack layout.
llvm
::
report_fatal_error
(
"Not yet implemented"
);
return
0
;
return
FixedAllocaSizeBytes
-
(
SpillAreaSizeBytes
-
MaxOutArgsSizeBytes
);
}
uint32_t
maxOutArgsSizeBytes
()
const
override
{
return
MaxOutArgsSizeBytes
;
}
bool
shouldSplitToVariable64On32
(
Type
Ty
)
const
override
{
return
Ty
==
IceType_i64
;
...
...
@@ -250,7 +249,8 @@ protected:
Variable
*
makeReg
(
Type
Ty
,
int32_t
RegNum
=
Variable
::
NoRegister
);
static
Type
stackSlotType
();
Variable
*
copyToReg
(
Operand
*
Src
,
int32_t
RegNum
=
Variable
::
NoRegister
);
void
alignRegisterPow2
(
Variable
*
Reg
,
uint32_t
Align
);
void
alignRegisterPow2
(
Variable
*
Reg
,
uint32_t
Align
,
int32_t
TmpRegNum
=
Variable
::
NoRegister
);
/// Returns a vector in a register with the given constant entries.
Variable
*
makeVectorOfZeros
(
Type
Ty
,
int32_t
RegNum
=
Variable
::
NoRegister
);
...
...
@@ -811,7 +811,7 @@ protected:
}
// Iterates over the CFG and determines the maximum outgoing stack arguments
// bytes. This information is later used during addProlog()
d
o pre-allocate
// bytes. This information is later used during addProlog()
t
o pre-allocate
// the outargs area.
// TODO(jpp): This could live in the Parser, if we provided a Target-specific
// method that the Parser could call.
...
...
@@ -852,6 +852,9 @@ protected:
bool
NeedsStackAlignment
=
false
;
bool
MaybeLeafFunc
=
true
;
size_t
SpillAreaSizeBytes
=
0
;
size_t
FixedAllocaSizeBytes
=
0
;
size_t
FixedAllocaAlignBytes
=
0
;
bool
PrologEmitsFixedAllocas
=
false
;
uint32_t
MaxOutArgsSizeBytes
=
0
;
// TODO(jpp): std::array instead of array.
static
llvm
::
SmallBitVector
TypeToRegisterSet
[
RCARM32_NUM
];
...
...
@@ -970,6 +973,29 @@ private:
};
BoolComputationTracker
BoolComputations
;
// AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
// without specifying a physical register. This is needed for creating unbound
// temporaries during Ice -> ARM lowering, but before register allocation.
// This a safe-guard that, during the legalization post-passes no unbound
// temporaries are created.
bool
AllowTemporaryWithNoReg
=
true
;
// ForbidTemporaryWithoutReg is a RAII class that manages
// AllowTemporaryWithNoReg.
class
ForbidTemporaryWithoutReg
{
ForbidTemporaryWithoutReg
()
=
delete
;
ForbidTemporaryWithoutReg
(
const
ForbidTemporaryWithoutReg
&
)
=
delete
;
ForbidTemporaryWithoutReg
&
operator
=
(
const
ForbidTemporaryWithoutReg
&
)
=
delete
;
public
:
explicit
ForbidTemporaryWithoutReg
(
TargetARM32
*
Target
)
:
Target
(
Target
)
{
Target
->
AllowTemporaryWithNoReg
=
false
;
}
~
ForbidTemporaryWithoutReg
()
{
Target
->
AllowTemporaryWithNoReg
=
true
;
}
private
:
TargetARM32
*
const
Target
;
};
};
class
TargetDataARM32
final
:
public
TargetDataLowering
{
...
...
tests_lit/assembler/arm32/bic.ll
View file @
614140e2
...
...
@@ -28,70 +28,65 @@ define internal i32 @AllocBigAlign() {
; ASM-LABEL:AllocBigAlign:
; ASM-NEXT:.LAllocBigAlign$__0:
; ASM-NEXT: push {fp}
; ASM-NEXT: mov fp, sp
; ASM-NEXT: sub sp, sp, #12
; ASM-NEXT: bic sp, sp, #31
; ASM-NEXT: sub sp, sp, #32
; ASM-NEXT: mov r0, sp
; ASM-NEXT: mov sp, fp
; ASM-NEXT: pop {fp}
; ASM-NEXT: # fp = def.pseudo
; ASM-NEXT: bx lr
; DIS-LABEL:00000000 <AllocBigAlign>:
; DIS-NEXT: 0: e52db004
; DIS-NEXT: 4: e1a0b00d
; DIS-NEXT: 8: e24dd00c
; DIS-NEXT: c: e3cdd01f
; DIS-NEXT: 10: e24dd020
; DIS-NEXT: 14: e1a0000d
; DIS-NEXT: 18: e1a0d00b
; DIS-NEXT: 1c: e49db004
; DIS-NEXT: 20: e12fff1e
; IASM-LABEL:AllocBigAlign:
; IASM-NEXT:.LAllocBigAlign$__0:
; ASM-NEXT: push {fp}
; DIS-NEXT: 0: e52db004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0x2d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: mov fp, sp
; DIS-NEXT: 4: e1a0b00d
; IASM: .byte 0xd
; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; IASM: .byte 0xc
; ASM-NEXT: sub sp, sp, #32
; DIS-NEXT: 8: e24dd020
; IASM: .byte 0x20
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2
; ASM-NEXT: bic sp, sp, #31
; DIS-NEXT: c: e3cdd01f
; IASM: .byte 0x1f
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0xcd
; IASM-NEXT: .byte 0xe3
; IASM: .byte 0x20
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2
; ASM-NEXT: # sp = def.pseudo
; IASM: .byte 0xd
; ASM-NEXT: add r0, sp, #0
; DIS-NEXT: 10: e28d0000
; IASM: .byte 0x0
; IASM-NEXT: .byte 0x0
; IASM-NEXT: .byte 0x
a0
; IASM-NEXT: .byte 0xe
1
; IASM-NEXT: .byte 0x
8d
; IASM-NEXT: .byte 0xe
2
; ASM-NEXT: mov sp, fp
; DIS-NEXT: 14: e1a0d00b
; IASM: .byte 0xb
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; ASM-NEXT: pop {fp}
; DIS-NEXT: 18: e49db004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xb0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe4
; ASM-NEXT: # fp = def.pseudo
; ASM-NEXT: bx lr
; DIS-NEXT: 1c: e12fff1e
; IASM: .byte 0x1e
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x2f
...
...
tests_lit/llvm2ice_tests/alloc.ll
View file @
614140e2
...
...
@@ -17,14 +17,14 @@
; RUN: --disassemble --target arm32 -i %s --args -O2 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; RUN: --command FileCheck --check-prefix ARM32
--check-prefix=ARM-OPT2
%s
; RUN: %if --need=target_ARM32 --need=allow_dump \
; RUN: --command %p2i --filetype=asm --assemble \
; RUN: --disassemble --target arm32 -i %s --args -Om1 --skip-unimplemented \
; RUN: -allow-externally-defined-symbols \
; RUN: | %if --need=target_ARM32 --need=allow_dump \
; RUN: --command FileCheck --check-prefix ARM32 %s
; RUN: --command FileCheck --check-prefix ARM32
--check-prefix=ARM-OPTM1
%s
define
internal
void
@fixed_416_align_16
(
i32
%n
)
{
entry:
...
...
@@ -47,8 +47,9 @@ entry:
; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_416_align_16
; ARM32: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1
; ARM32-OPT2: sub sp, sp, #428
; ARM32-OPTM1: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1
define
internal
void
@fixed_416_align_32
(
i32
%n
)
{
entry:
...
...
@@ -67,9 +68,10 @@ entry:
; CHECK: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_416_align_32
; ARM32: bic sp, sp, #31
; ARM32: sub sp, sp, #416
; ARM32: bl {{.*}} R_{{.*}} f1
; ARM32-OPT2: sub sp, sp, #424
; ARM32-OPTM1: sub sp, sp, #416
; ARM32: bic sp, sp, #31
; ARM32: bl {{.*}} R_{{.*}} f1
; Show that the amount to allocate will be rounded up.
define
internal
void
@fixed_351_align_16
(
i32
%n
)
{
...
...
@@ -91,8 +93,9 @@ entry:
; CHECK-OPTM1: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_351_align_16
; ARM32: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1
; ARM32-OPT2: sub sp, sp, #364
; ARM32-OPTM1: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1
define
internal
void
@fixed_351_align_32
(
i32
%n
)
{
entry:
...
...
@@ -111,9 +114,10 @@ entry:
; CHECK: call {{.*}} R_{{.*}} f1
; ARM32-LABEL: fixed_351_align_32
; ARM32: bic sp, sp, #31
; ARM32: sub sp, sp, #352
; ARM32: bl {{.*}} R_{{.*}} f1
; ARM32-OPT2: sub sp, sp, #360
; ARM32-OPTM1: sub sp, sp, #352
; ARM32: bic sp, sp, #31
; ARM32: bl {{.*}} R_{{.*}} f1
declare
void
@f1
(
i32
%ignored
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment