Commit a3c32146 by Karl Schimpf

Add VPUSH/VPOP instructions to the ARM32 integrated assembler.

Also fixes the corresponding emit methods for vpush and vpop to match constraint that the maximum number of consecutive registers that can be pushed/popped is 16. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4334 R=jpp@chromium.org, stichnot@chromium.org Review URL: https://codereview.chromium.org/1532233002 .
parent 39f40204
...@@ -1320,7 +1320,9 @@ class Assembler : public ValueObject { ...@@ -1320,7 +1320,9 @@ class Assembler : public ValueObject {
#if 0 #if 0
// Added the following missing operations: // Added the following missing operations:
// //
// ARM32::AssemblerARM::uxt() (uxtb and uxth). // ARM32::AssemblerARM32::uxt() (uxtb and uxth).
// ARM32::AssemblerARM32::vpop()
// ARM32::AssemblerARM32::vpush()
// ARM32::AssemblerARM:rbit(). // ARM32::AssemblerARM:rbit().
#endif #endif
......
...@@ -115,6 +115,9 @@ static constexpr IValueT kInstTypeDataImmediate = 1; // i.e. 001 ...@@ -115,6 +115,9 @@ static constexpr IValueT kInstTypeDataImmediate = 1; // i.e. 001
static constexpr IValueT kInstTypeMemImmediate = 2; // i.e. 010 static constexpr IValueT kInstTypeMemImmediate = 2; // i.e. 010
static constexpr IValueT kInstTypeRegisterShift = 3; // i.e. 011 static constexpr IValueT kInstTypeRegisterShift = 3; // i.e. 011
// Limit on number of registers in a vpush/vpop.
static constexpr SizeT VpushVpopMaxConsecRegs = 16;
// Offset modifier to current PC for next instruction. The offset is off by 8 // Offset modifier to current PC for next instruction. The offset is off by 8
// due to the way the ARM CPUs read PC. // due to the way the ARM CPUs read PC.
static constexpr IOffsetT kPCReadOffset = 8; static constexpr IOffsetT kPCReadOffset = 8;
...@@ -199,6 +202,12 @@ IValueT getEncodedGPRegNum(const Variable *Var) { ...@@ -199,6 +202,12 @@ IValueT getEncodedGPRegNum(const Variable *Var) {
: RegARM32::getEncodedGPR(Reg); : RegARM32::getEncodedGPR(Reg);
} }
IValueT getEncodedSRegNum(const Variable *Var) {
assert(Var->hasReg());
assert(RegARM32::isEncodedSReg(Var->getRegNum()));
return RegARM32::getEncodedSReg(Var->getRegNum());
}
// The way an operand is encoded into a sequence of bits in functions // The way an operand is encoded into a sequence of bits in functions
// encodeOperand and encodeAddress below. // encodeOperand and encodeAddress below.
enum EncodedOperand { enum EncodedOperand {
...@@ -1997,5 +2006,54 @@ void AssemblerARM32::uxt(const Operand *OpRd, const Operand *OpSrc0, ...@@ -1997,5 +2006,54 @@ void AssemblerARM32::uxt(const Operand *OpRd, const Operand *OpSrc0,
emitSignExtend(Cond, UxtOpcode, OpRd, OpSrc0, UxtName); emitSignExtend(Cond, UxtOpcode, OpRd, OpSrc0, UxtName);
} }
void AssemblerARM32::emitVStackOp(CondARM32::Cond Cond, IValueT Opcode,
const Variable *OpBaseReg,
SizeT NumConsecRegs, const char *InstName) {
const IValueT BaseReg = getEncodedSRegNum(OpBaseReg);
const IValueT DLastBit = mask(BaseReg, 0, 1); // Last bit of base register.
const IValueT Rd = mask(BaseReg, 1, 4); // Top 4 bits of base register.
assert(0 < NumConsecRegs);
assert(NumConsecRegs <= VpushVpopMaxConsecRegs);
assert((BaseReg + NumConsecRegs) <= RegARM32::getNumSRegs());
verifyCondDefined(Cond, InstName);
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
const IValueT Encoding = Opcode | (Cond << kConditionShift) | DLastBit |
(Rd << kRdShift) | NumConsecRegs;
emitInst(Encoding);
}
void AssemblerARM32::vpop(const Variable *OpBaseReg, SizeT NumConsecRegs,
CondARM32::Cond Cond) {
// Note: Current implementation assumes that OpBaseReg is defined using S
// registers. It doesn't implement the D register form.
//
// VPOP - ARM section A8.8.367, encoding A2:
// vpop<c> <RegList>
//
// cccc11001D111101dddd1010iiiiiiii where cccc=Cond, ddddD=BaseReg, and
// iiiiiiii=NumConsecRegs.
constexpr const char *VpopName = "vpop";
constexpr IValueT VpopOpcode =
B27 | B26 | B23 | B21 | B20 | B19 | B18 | B16 | B11 | B9;
emitVStackOp(Cond, VpopOpcode, OpBaseReg, NumConsecRegs, VpopName);
}
void AssemblerARM32::vpush(const Variable *OpBaseReg, SizeT NumConsecRegs,
CondARM32::Cond Cond) {
// Note: Current implementation assumes that OpBaseReg is defined using S
// registers. It doesn't implement the D register form.
//
// VPUSH - ARM section A8.8.368, encoding A2:
// vpush<c> <RegList>
//
// cccc11010D101101dddd1010iiiiiiii where cccc=Cond, ddddD=BaseReg, and
// iiiiiiii=NumConsecRegs.
constexpr const char *VpushName = "vpush";
constexpr IValueT VpushOpcode =
B27 | B26 | B24 | B21 | B19 | B18 | B16 | B11 | B9;
emitVStackOp(Cond, VpushOpcode, OpBaseReg, NumConsecRegs, VpushName);
}
} // end of namespace ARM32 } // end of namespace ARM32
} // end of namespace Ice } // end of namespace Ice
...@@ -318,6 +318,12 @@ public: ...@@ -318,6 +318,12 @@ public:
// Implements uxtb/uxth depending on type of OpSrc0. // Implements uxtb/uxth depending on type of OpSrc0.
void uxt(const Operand *OpRd, const Operand *OpSrc0, CondARM32::Cond Cond); void uxt(const Operand *OpRd, const Operand *OpSrc0, CondARM32::Cond Cond);
void vpop(const Variable *OpBaseReg, SizeT NumConsecRegs,
CondARM32::Cond Cond);
void vpush(const Variable *OpBaseReg, SizeT NumConsecRegs,
CondARM32::Cond Cond);
static bool classof(const Assembler *Asm) { static bool classof(const Assembler *Asm) {
return Asm->getKind() == Asm_ARM32; return Asm->getKind() == Asm_ARM32;
} }
...@@ -414,6 +420,12 @@ private: ...@@ -414,6 +420,12 @@ private:
bool IsLoad, IValueT BaseReg, IValueT Registers, bool IsLoad, IValueT BaseReg, IValueT Registers,
const char *InstName); const char *InstName);
// Pattern ccccxxxxxDxxxxxxddddxxxxiiiiiiii where cccc=Cond, ddddD=BaseReg,
// iiiiiiii=NumConsecRegs, and xxxxx0xxxxxx0000xxxx00000000=Opcode.
void emitVStackOp(CondARM32::Cond Cond, IValueT Opcode,
const Variable *OpBaseReg, SizeT NumConsecRegs,
const char *InstName);
// Pattern cccc011100x1dddd1111mmmm0001nnn where cccc=Cond, // Pattern cccc011100x1dddd1111mmmm0001nnn where cccc=Cond,
// x=Opcode, dddd=Rd, nnnn=Rn, mmmm=Rm. // x=Opcode, dddd=Rd, nnnn=Rn, mmmm=Rm.
void emitDivOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd, IValueT Rn, void emitDivOp(CondARM32::Cond Cond, IValueT Opcode, IValueT Rd, IValueT Rn,
......
...@@ -27,6 +27,9 @@ namespace Ice { ...@@ -27,6 +27,9 @@ namespace Ice {
namespace { namespace {
// maximum number of registers allowed in vpush/vpop.
static constexpr SizeT VpushVpopMaxConsecRegs = 16;
const struct TypeARM32Attributes_ { const struct TypeARM32Attributes_ {
const char *WidthString; // b, h, <blank>, or d const char *WidthString; // b, h, <blank>, or d
const char *VecWidthString; // i8, i16, i32, f32, f64 const char *VecWidthString; // i8, i16, i32, f32, f64
...@@ -1311,7 +1314,8 @@ template <> void InstARM32Uxt::emitIAS(const Cfg *Func) const { ...@@ -1311,7 +1314,8 @@ template <> void InstARM32Uxt::emitIAS(const Cfg *Func) const {
namespace { namespace {
bool isAssignedConsecutiveRegisters(Variable *Before, Variable *After) { bool isAssignedConsecutiveRegisters(const Variable *Before,
const Variable *After) {
assert(Before->hasReg()); assert(Before->hasReg());
assert(After->hasReg()); assert(After->hasReg());
return Before->getRegNum() + 1 == After->getRegNum(); return Before->getRegNum() + 1 == After->getRegNum();
...@@ -1380,21 +1384,30 @@ void InstARM32Pop::emit(const Cfg *Func) const { ...@@ -1380,21 +1384,30 @@ void InstARM32Pop::emit(const Cfg *Func) const {
} }
void InstARM32Pop::emitIAS(const Cfg *Func) const { void InstARM32Pop::emitIAS(const Cfg *Func) const {
// Pop can't be emitted if there are no registers to load. This should never
// happen, but if it does, we don't need to bring Subzero down -- we just skip
// emitting the pop instruction (and maybe emit a nop?) The assert() is here
// so that we can detect this error during development.
const SizeT DestSize = Dests.size();
if (DestSize == 0) {
assert(false && "Empty pop list");
return;
}
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const auto *Reg = llvm::cast<Variable>(Dests[0]);
if (isScalarIntegerType(Reg->getType())) {
// Pop GPR registers.
SizeT IntegerCount = 0; SizeT IntegerCount = 0;
ARM32::IValueT GPRegisters = 0; ARM32::IValueT GPRegisters = 0;
const Variable *LastDest = nullptr; const Variable *LastDest = nullptr;
for (const Variable *Var : Dests) { for (const Variable *Var : Dests) {
if (!isScalarIntegerType(Var->getType())) assert(Var->hasReg() && "pop only applies to registers");
// TODO(kschimpf) Implement vpush. int32_t Reg = RegARM32::getEncodedGPR(Var->getRegNum());
return emitUsingTextFixup(Func);
assert((Var && Var->hasReg()) && "pop only applies to registers");
int32_t Reg = Var->getRegNum();
assert(Reg != RegARM32::Encoded_Not_GPR);
LastDest = Var; LastDest = Var;
GPRegisters |= (1 << Reg); GPRegisters |= (1 << Reg);
++IntegerCount; ++IntegerCount;
} }
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
switch (IntegerCount) { switch (IntegerCount) {
case 0: case 0:
return; return;
...@@ -1408,6 +1421,27 @@ void InstARM32Pop::emitIAS(const Cfg *Func) const { ...@@ -1408,6 +1421,27 @@ void InstARM32Pop::emitIAS(const Cfg *Func) const {
Asm->popList(GPRegisters, CondARM32::AL); Asm->popList(GPRegisters, CondARM32::AL);
break; break;
} }
} else {
// Pop vector/floating point registers.
const Variable *BaseReg = nullptr;
SizeT RegCount = 0;
for (const Variable *NextReg : Dests) {
if (BaseReg == nullptr) {
BaseReg = NextReg;
RegCount = 1;
} else if (RegCount < VpushVpopMaxConsecRegs &&
isAssignedConsecutiveRegisters(Reg, NextReg)) {
++RegCount;
} else {
Asm->vpop(BaseReg, RegCount, CondARM32::AL);
BaseReg = NextReg;
RegCount = 1;
}
Reg = NextReg;
}
if (RegCount)
Asm->vpop(BaseReg, RegCount, CondARM32::AL);
}
if (Asm->needsTextFixup()) if (Asm->needsTextFixup())
emitUsingTextFixup(Func); emitUsingTextFixup(Func);
} }
...@@ -1441,7 +1475,7 @@ void InstARM32Push::emit(const Cfg *Func) const { ...@@ -1441,7 +1475,7 @@ void InstARM32Push::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
auto *Reg = llvm::cast<Variable>(getSrc(0)); const auto *Reg = llvm::cast<Variable>(getSrc(0));
if (isScalarIntegerType(Reg->getType())) { if (isScalarIntegerType(Reg->getType())) {
// GPR push. // GPR push.
Str << "\t" Str << "\t"
...@@ -1461,12 +1495,16 @@ void InstARM32Push::emit(const Cfg *Func) const { ...@@ -1461,12 +1495,16 @@ void InstARM32Push::emit(const Cfg *Func) const {
"vpush" "vpush"
"\t{"; "\t{";
Reg->emit(Func); Reg->emit(Func);
SizeT RegCount = 1;
for (SizeT i = 1; i < SrcSize; ++i) { for (SizeT i = 1; i < SrcSize; ++i) {
auto *NextReg = llvm::cast<Variable>(getSrc(i)); const auto *NextReg = llvm::cast<Variable>(getSrc(i));
if (isAssignedConsecutiveRegisters(Reg, NextReg)) { if (RegCount < VpushVpopMaxConsecRegs &&
isAssignedConsecutiveRegisters(Reg, NextReg)) {
++RegCount;
Str << ", "; Str << ", ";
} else { } else {
startNextInst(Func); startNextInst(Func);
RegCount = 1;
Str << "}\n\t" Str << "}\n\t"
"vpush" "vpush"
"\t{"; "\t{";
...@@ -1478,22 +1516,31 @@ void InstARM32Push::emit(const Cfg *Func) const { ...@@ -1478,22 +1516,31 @@ void InstARM32Push::emit(const Cfg *Func) const {
} }
void InstARM32Push::emitIAS(const Cfg *Func) const { void InstARM32Push::emitIAS(const Cfg *Func) const {
// Push can't be emitted if there are no registers to save. This should never
// happen, but if it does, we don't need to bring Subzero down -- we just skip
// emitting the push instruction (and maybe emit a nop?) The assert() is here
// so that we can detect this error during development.
const SizeT SrcSize = getSrcSize();
if (SrcSize == 0) {
assert(false && "Empty push list");
return;
}
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const auto *Reg = llvm::cast<Variable>(getSrc(0));
if (isScalarIntegerType(Reg->getType())) {
// Push GPR registers.
SizeT IntegerCount = 0; SizeT IntegerCount = 0;
ARM32::IValueT GPRegisters = 0; ARM32::IValueT GPRegisters = 0;
const Variable *LastSrc = nullptr; const Variable *LastSrc = nullptr;
for (SizeT Index = 0; Index < getSrcSize(); ++Index) { for (SizeT Index = 0; Index < getSrcSize(); ++Index) {
if (!isScalarIntegerType(getSrc(Index)->getType())) const auto *Var = llvm::cast<Variable>(getSrc(Index));
// TODO(kschimpf) Implement vpush. int32_t Reg = RegARM32::getEncodedGPR(Var->getRegNum());
return emitUsingTextFixup(Func);
const auto *Var = llvm::dyn_cast<Variable>(getSrc(Index));
assert((Var && Var->hasReg()) && "push only applies to registers");
int32_t Reg = Var->getRegNum();
assert(Reg != RegARM32::Encoded_Not_GPR); assert(Reg != RegARM32::Encoded_Not_GPR);
LastSrc = Var; LastSrc = Var;
GPRegisters |= (1 << Reg); GPRegisters |= (1 << Reg);
++IntegerCount; ++IntegerCount;
} }
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
switch (IntegerCount) { switch (IntegerCount) {
case 0: case 0:
return; return;
...@@ -1505,10 +1552,27 @@ void InstARM32Push::emitIAS(const Cfg *Func) const { ...@@ -1505,10 +1552,27 @@ void InstARM32Push::emitIAS(const Cfg *Func) const {
break; break;
} }
default: default:
// TODO(kschimpf) Implement pushList in assembler.
Asm->pushList(GPRegisters, CondARM32::AL); Asm->pushList(GPRegisters, CondARM32::AL);
break; break;
} }
} else {
// Push vector/Floating point registers.
const Variable *BaseReg = Reg;
SizeT RegCount = 1;
for (SizeT i = 1; i < SrcSize; ++i) {
const auto *NextReg = llvm::cast<Variable>(getSrc(i));
if (RegCount < VpushVpopMaxConsecRegs &&
isAssignedConsecutiveRegisters(Reg, NextReg)) {
++RegCount;
} else {
Asm->vpush(BaseReg, RegCount, CondARM32::AL);
BaseReg = NextReg;
RegCount = 1;
}
Reg = NextReg;
}
Asm->vpush(BaseReg, RegCount, CondARM32::AL);
}
if (Asm->needsTextFixup()) if (Asm->needsTextFixup())
emitUsingTextFixup(Func); emitUsingTextFixup(Func);
} }
......
...@@ -104,6 +104,14 @@ public: ...@@ -104,6 +104,14 @@ public:
return Reg_I64PAIR_First <= RegNum && RegNum <= Reg_I64PAIR_Last; return Reg_I64PAIR_First <= RegNum && RegNum <= Reg_I64PAIR_Last;
} }
static inline bool isEncodedSReg(int32_t RegNum) {
return Reg_SREG_First <= RegNum && RegNum <= Reg_SREG_Last;
}
static inline SizeT getNumSRegs() {
return Reg_SREG_Last + 1 - Reg_SREG_First;
}
static inline SRegister getEncodedSReg(int32_t RegNum) { static inline SRegister getEncodedSReg(int32_t RegNum) {
assert(Reg_SREG_First <= RegNum); assert(Reg_SREG_First <= RegNum);
assert(RegNum <= Reg_SREG_Last); assert(RegNum <= Reg_SREG_Last);
......
; Show that we know how to translate vpush and vpop.
; NOTE: We use -O2 because vpush/vpop only occur if optimized. Uses
; simple call with double parameters to cause the insertion of
; vpush/vpop.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 | FileCheck %s --check-prefix=DIS
define internal double @testVpushVpop(double %v1, double %v2) {
; ASM-LABEL: testVpushVpop:
; DIS-LABEL: 00000000 <testVpushVpop>:
; IASM-LABEL: testVpushVpop:
entry:
; ASM-NEXT: .LtestVpushVpop$entry:
; IASM-NEXT: .LtestVpushVpop$entry:
; ASM-NEXT: vpush {s28, s29, s30, s31}
; DIS-NEXT: 0: ed2dea04
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xea
; IASM-NEXT: .byte 0x2d
; IASM-NEXT: .byte 0xed
; ASM-NEXT: push {lr}
; DIS-NEXT: 4: e52de004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xe0
; IASM-NEXT: .byte 0x2d
; IASM-NEXT: .byte 0xe5
; ASM-NEXT: sub sp, sp, #12
; DIS-NEXT: 8: e24dd00c
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x4d
; IASM-NEXT: .byte 0xe2
; ASM-NEXT: vmov.f64 d15, d0
; DIS-NEXT: c: eeb0fb40
; IASM-NEXT: vmov.f64 d15, d0
; ASM-NEXT: vmov.f64 d14, d1
; DIS-NEXT: 10: eeb0eb41
; IASM-NEXT: vmov.f64 d14, d1
call void @foo()
; ASM-NEXT: bl foo
; DIS-NEXT: 14: ebfffffe
; IASM-NEXT: bl foo @ .word ebfffffe
%res = fadd double %v1, %v2
; ASM-NEXT: vadd.f64 d15, d15, d14
; DIS-NEXT: 18: ee3ffb0e
; IASM-NEXT: vadd.f64 d15, d15, d14
; ASM-NEXT: vmov.f64 d0, d15
; DIS-NEXT: 1c: eeb00b4f
; IASM-NEXT: vmov.f64 d0, d15
ret double %res
; ASM-NEXT: add sp, sp, #12
; DIS-NEXT: 20: e28dd00c
; IASM-NEXT: .byte 0xc
; IASM-NEXT: .byte 0xd0
; IASM-NEXT: .byte 0x8d
; IASM-NEXT: .byte 0xe2
; ASM-NEXT: pop {lr}
; ASM-NEXT: # lr = def.pseudo
; DIS-NEXT: 24: e49de004
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xe0
; IASM-NEXT: .byte 0x9d
; IASM-NEXT: .byte 0xe4
; ASM-NEXT: vpop {s28, s29, s30, s31}
; ASM-NEXT: # s28 = def.pseudo
; ASM-NEXT: # s29 = def.pseudo
; ASM-NEXT: # s30 = def.pseudo
; ASM-NEXT: # s31 = def.pseudo
; DIS-NEXT: 28: ecbdea04
; IASM-NEXT: .byte 0x4
; IASM-NEXT: .byte 0xea
; IASM-NEXT: .byte 0xbd
; IASM-NEXT: .byte 0xec
; ASM-NEXT: bx lr
; DIS-NEXT: 2c: e12fff1e
; IASM-NEXT: .byte 0x1e
; IASM-NEXT: .byte 0xff
; IASM-NEXT: .byte 0x2f
; IASM-NEXT: .byte 0xe1
}
define internal void @foo() {
ret void
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment