Commit 2c0764e2 by Karl Schimpf

Implements the vector add instructions in the integrated ARM assembler.

parent 7d59513d
...@@ -1213,7 +1213,8 @@ static inline int ShiftOfOperandSize(OperandSize size) { ...@@ -1213,7 +1213,8 @@ static inline int ShiftOfOperandSize(OperandSize size) {
return -1; return -1;
} }
#if 0
// Moved to ARM32::AssemblerARM32::emitSIMDqqq()
void Assembler::EmitSIMDqqq(int32_t opcode, OperandSize size, void Assembler::EmitSIMDqqq(int32_t opcode, OperandSize size,
QRegister qd, QRegister qn, QRegister qm) { QRegister qd, QRegister qn, QRegister qm) {
ASSERT(TargetCPUFeatures::neon_supported()); ASSERT(TargetCPUFeatures::neon_supported());
...@@ -1230,7 +1231,7 @@ void Assembler::EmitSIMDqqq(int32_t opcode, OperandSize size, ...@@ -1230,7 +1231,7 @@ void Assembler::EmitSIMDqqq(int32_t opcode, OperandSize size,
(static_cast<int32_t>(qm * 2) & 0xf); (static_cast<int32_t>(qm * 2) & 0xf);
Emit(encoding); Emit(encoding);
} }
#endif
void Assembler::EmitSIMDddd(int32_t opcode, OperandSize size, void Assembler::EmitSIMDddd(int32_t opcode, OperandSize size,
DRegister dd, DRegister dn, DRegister dm) { DRegister dd, DRegister dn, DRegister dm) {
...@@ -1254,17 +1255,18 @@ void Assembler::vmovq(QRegister qd, QRegister qm) { ...@@ -1254,17 +1255,18 @@ void Assembler::vmovq(QRegister qd, QRegister qm) {
EmitSIMDqqq(B21 | B8 | B4, kByte, qd, qm, qm); EmitSIMDqqq(B21 | B8 | B4, kByte, qd, qm, qm);
} }
#if 0
// Moved to ARM32::AssemblerARM32::vaddqi().
void Assembler::vaddqi(OperandSize sz, void Assembler::vaddqi(OperandSize sz,
QRegister qd, QRegister qn, QRegister qm) { QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B11, sz, qd, qn, qm); EmitSIMDqqq(B11, sz, qd, qn, qm);
} }
// Moved to ARM32::AssemblerARM32::vaddqf().
void Assembler::vaddqs(QRegister qd, QRegister qn, QRegister qm) { void Assembler::vaddqs(QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B11 | B10 | B8, kSWord, qd, qn, qm); EmitSIMDqqq(B11 | B10 | B8, kSWord, qd, qn, qm);
} }
#endif
void Assembler::vsubqi(OperandSize sz, void Assembler::vsubqi(OperandSize sz,
QRegister qd, QRegister qn, QRegister qm) { QRegister qd, QRegister qn, QRegister qm) {
......
...@@ -674,10 +674,10 @@ class Assembler : public ValueObject { ...@@ -674,10 +674,10 @@ class Assembler : public ValueObject {
void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL); void vadds(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
// Moved to Arm32::AssemblerARM32::vaddd() // Moved to Arm32::AssemblerARM32::vaddd()
void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL); void vaddd(DRegister dd, DRegister dn, DRegister dm, Condition cond = AL);
#endif // Moved to ARM32::AssemblerARM32::vaddqi().
void vaddqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm); void vaddqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm);
// Moved to ARM32::AssemblerARM32::vaddqf().
void vaddqs(QRegister qd, QRegister qn, QRegister qm); void vaddqs(QRegister qd, QRegister qn, QRegister qm);
#if 0
// Moved to Arm32::AssemblerARM32::vsubs() // Moved to Arm32::AssemblerARM32::vsubs()
void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL); void vsubs(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
// Moved to Arm32::AssemblerARM32::vsubd() // Moved to Arm32::AssemblerARM32::vsubd()
...@@ -1338,10 +1338,11 @@ class Assembler : public ValueObject { ...@@ -1338,10 +1338,11 @@ class Assembler : public ValueObject {
int32_t opcode, int32_t opcode,
DRegister dd, DRegister dd,
SRegister sm); SRegister sm);
#endif
// Moved to ARM32::AssemblerARM32::emitSIMDqqq()
void EmitSIMDqqq(int32_t opcode, OperandSize sz, void EmitSIMDqqq(int32_t opcode, OperandSize sz,
QRegister qd, QRegister qn, QRegister qm); QRegister qd, QRegister qn, QRegister qm);
#endif
void EmitSIMDddd(int32_t opcode, OperandSize sz, void EmitSIMDddd(int32_t opcode, OperandSize sz,
DRegister dd, DRegister dn, DRegister dm); DRegister dd, DRegister dn, DRegister dm);
......
...@@ -144,6 +144,25 @@ IValueT encodeCondition(CondARM32::Cond Cond) { ...@@ -144,6 +144,25 @@ IValueT encodeCondition(CondARM32::Cond Cond) {
return static_cast<IValueT>(Cond); return static_cast<IValueT>(Cond);
} }
// Returns the SIMD encoding of the element type for the vector.
IValueT encodeElmtType(Type ElmtTy) {
switch (ElmtTy) {
case IceType_i8:
case IceType_f32:
return 0;
case IceType_i16:
return 1;
case IceType_i32:
return 2;
case IceType_i64:
return 3;
default:
llvm::report_fatal_error(
std::string("SIMD op: Don't understand element type ") +
typeString(ElmtTy));
}
}
IValueT encodeShift(OperandARM32::ShiftKind Shift) { IValueT encodeShift(OperandARM32::ShiftKind Shift) {
// Follows encoding in ARM section A8.4.1 "Constant shifts". // Follows encoding in ARM section A8.4.1 "Constant shifts".
switch (Shift) { switch (Shift) {
...@@ -191,6 +210,12 @@ IValueT getEncodedDRegNum(const Variable *Var) { ...@@ -191,6 +210,12 @@ IValueT getEncodedDRegNum(const Variable *Var) {
return RegARM32::getEncodedDReg(Var->getRegNum()); return RegARM32::getEncodedDReg(Var->getRegNum());
} }
IValueT getEncodedQRegNum(const Variable *Var) {
return RegARM32::getEncodedQReg(Var->getRegNum());
}
IValueT mapQRegToDReg(IValueT EncodedQReg) { return EncodedQReg << 1; }
IValueT getYInRegXXXXY(IValueT RegXXXXY) { return RegXXXXY & 0x1; } IValueT getYInRegXXXXY(IValueT RegXXXXY) { return RegXXXXY & 0x1; }
IValueT getXXXXInRegXXXXY(IValueT RegXXXXY) { return RegXXXXY >> 1; } IValueT getXXXXInRegXXXXY(IValueT RegXXXXY) { return RegXXXXY >> 1; }
...@@ -305,7 +330,7 @@ IValueT encodeShiftRotateReg(IValueT Rm, OperandARM32::ShiftKind Shift, ...@@ -305,7 +330,7 @@ IValueT encodeShiftRotateReg(IValueT Rm, OperandARM32::ShiftKind Shift,
} }
// Defines the set of registers expected in an operand. // Defines the set of registers expected in an operand.
enum RegSetWanted { WantGPRegs, WantSRegs, WantDRegs }; enum RegSetWanted { WantGPRegs, WantSRegs, WantDRegs, WantQRegs };
EncodedOperand encodeOperand(const Operand *Opnd, IValueT &Value, EncodedOperand encodeOperand(const Operand *Opnd, IValueT &Value,
RegSetWanted WantedRegSet) { RegSetWanted WantedRegSet) {
...@@ -322,6 +347,9 @@ EncodedOperand encodeOperand(const Operand *Opnd, IValueT &Value, ...@@ -322,6 +347,9 @@ EncodedOperand encodeOperand(const Operand *Opnd, IValueT &Value,
case WantDRegs: case WantDRegs:
Value = getEncodedDRegNum(Var); Value = getEncodedDRegNum(Var);
break; break;
case WantQRegs:
Value = getEncodedQRegNum(Var);
break;
} }
return EncodedAsRegister; return EncodedAsRegister;
} }
...@@ -503,6 +531,11 @@ IValueT encodeDRegister(const Operand *OpReg, const char *RegName, ...@@ -503,6 +531,11 @@ IValueT encodeDRegister(const Operand *OpReg, const char *RegName,
return encodeRegister(OpReg, WantDRegs, RegName, InstName); return encodeRegister(OpReg, WantDRegs, RegName, InstName);
} }
IValueT encodeQRegister(const Operand *OpReg, const char *RegName,
const char *InstName) {
return encodeRegister(OpReg, WantQRegs, RegName, InstName);
}
void verifyPOrNotW(IValueT Address, const char *InstName) { void verifyPOrNotW(IValueT Address, const char *InstName) {
if (BuildDefs::minimal()) if (BuildDefs::minimal())
return; return;
...@@ -1030,6 +1063,30 @@ void AssemblerARM32::emitSignExtend(CondARM32::Cond Cond, IValueT Opcode, ...@@ -1030,6 +1063,30 @@ void AssemblerARM32::emitSignExtend(CondARM32::Cond Cond, IValueT Opcode,
emitInst(Encoding); emitInst(Encoding);
} }
void AssemblerARM32::emitSIMD(IValueT Opcode, Type ElmtTy, IValueT Dd,
IValueT Dn, IValueT Dm, bool UseQRegs) {
IValueT Sz = encodeElmtType(ElmtTy);
assert(Utils::IsUint(2, Sz));
IValueT Encoding =
Opcode | B25 | (encodeCondition(CondARM32::kNone) << kConditionShift) |
(Sz << 20) | (getYInRegYXXXX(Dd) << 22) | (getXXXXInRegYXXXX(Dn) << 16) |
(getXXXXInRegYXXXX(Dd) << 12) | (getYInRegYXXXX(Dn) << 7) |
(encodeBool(UseQRegs) << 6) | (getYInRegYXXXX(Dm) << 5) |
getXXXXInRegYXXXX(Dm);
emitInst(Encoding);
}
void AssemblerARM32::emitSIMDqqq(IValueT Opcode, Type ElmtTy,
const Operand *OpQd, const Operand *OpQn,
const Operand *OpQm, const char *OpcodeName) {
IValueT Qd = encodeQRegister(OpQd, "Qd", OpcodeName);
IValueT Qn = encodeQRegister(OpQn, "Qn", OpcodeName);
IValueT Qm = encodeQRegister(OpQm, "Qm", OpcodeName);
constexpr bool UseQRegs = true;
emitSIMD(Opcode, ElmtTy, mapQRegToDReg(Qd), mapQRegToDReg(Qn),
mapQRegToDReg(Qm), UseQRegs);
}
void AssemblerARM32::emitVFPddd(CondARM32::Cond Cond, IValueT Opcode, void AssemblerARM32::emitVFPddd(CondARM32::Cond Cond, IValueT Opcode,
IValueT Dd, IValueT Dn, IValueT Dm) { IValueT Dd, IValueT Dn, IValueT Dm) {
assert(Dd < RegARM32::getNumDRegs()); assert(Dd < RegARM32::getNumDRegs());
...@@ -2097,6 +2154,29 @@ void AssemblerARM32::vadds(const Operand *OpSd, const Operand *OpSn, ...@@ -2097,6 +2154,29 @@ void AssemblerARM32::vadds(const Operand *OpSd, const Operand *OpSn,
emitVFPsss(Cond, VaddsOpcode, OpSd, OpSn, OpSm, Vadds); emitVFPsss(Cond, VaddsOpcode, OpSd, OpSn, OpSm, Vadds);
} }
void AssemblerARM32::vaddqi(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// VADD (integer) - ARM section A8.8.282, encoding A1:
// vadd.<dt> <Qd>, <Qn>, <Qm>
//
// 111100100Dssnnn0ddd01000NqM0mmm0 where Dddd=OpQd, Nnnn=OpQm, Mmmm=OpQm,
// and dt in [i8, i16, i32, i64] where ss is the index.
constexpr const char *Vaddqi = "vaddqi";
constexpr IValueT VaddqiOpcode = B11;
emitSIMDqqq(VaddqiOpcode, ElmtTy, OpQd, OpQm, OpQn, Vaddqi);
}
void AssemblerARM32::vaddqf(const Operand *OpQd, const Operand *OpQn,
const Operand *OpQm) {
// VADD (floating-point) - ARM section A8.8.283, Encoding A1:
// vadd.f32 <Qd>, <Qn>, <Qm>
//
// 111100100D00nnn0ddd01101N1M0mmm0 where Dddd=Qd, Nnnn=Qn, and Mmmm=Qm.
constexpr const char *Vaddqf = "vaddqf";
constexpr IValueT VaddqfOpcode = B11 | B10 | B8;
emitSIMDqqq(VaddqfOpcode, IceType_f32, OpQd, OpQn, OpQm, Vaddqf);
}
void AssemblerARM32::vaddd(const Operand *OpDd, const Operand *OpDn, void AssemblerARM32::vaddd(const Operand *OpDd, const Operand *OpDn,
const Operand *OpDm, CondARM32::Cond Cond) { const Operand *OpDm, CondARM32::Cond Cond) {
// VADD (floating-point) - ARM section A8.8.283, encoding A2: // VADD (floating-point) - ARM section A8.8.283, encoding A2:
......
...@@ -318,6 +318,13 @@ public: ...@@ -318,6 +318,13 @@ public:
void vadds(const Operand *OpSd, const Operand *OpSn, const Operand *OpSm, void vadds(const Operand *OpSd, const Operand *OpSn, const Operand *OpSm,
CondARM32::Cond Cond); CondARM32::Cond Cond);
// Integer vector add.
void vaddqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
// Float vector add.
void vaddqf(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);
void vcmpd(const Operand *OpDd, const Operand *OpDm, CondARM32::Cond cond); void vcmpd(const Operand *OpDd, const Operand *OpDm, CondARM32::Cond cond);
// Second argument of compare is zero (+0.0). // Second argument of compare is zero (+0.0).
...@@ -592,6 +599,20 @@ private: ...@@ -592,6 +599,20 @@ private:
void emitSignExtend(CondARM32::Cond, IValueT Opcode, const Operand *OpRd, void emitSignExtend(CondARM32::Cond, IValueT Opcode, const Operand *OpRd,
const Operand *OpSrc0, const char *InstName); const Operand *OpSrc0, const char *InstName);
// Implements various forms of vector (SIMD) operations. Implements pattern
// 111100100Dssnnnndddn0000NQM0mmmm where ss=encodeElmtType(ElmtTy), Dddd=Dd,
// Nnnn=Dn, Mmmm=Dm, Q=UseQRegs, and Opcode is unioned into the pattern.
void emitSIMD(IValueT Opcode, Type ElmtTy, IValueT Dd, IValueT Dn, IValueT Dm,
bool UseQRegs);
// Implements various integer forms of vector (SIMD) operations using Q
// registers. Implements pattern 111100100Dssnnn0ddd00000N1M0mmm0 where
// ss=encodeElmtType(ElmtTy), Dddd=Qd, Nnnn=Qn, Mmmm=Qm, and Opcode is unioned
// into the pattern.
void emitSIMDqqq(IValueT Opcode, Type ElmtTy, const Operand *OpQd,
const Operand *OpQn, const Operand *OpQm,
const char *OpcodeName);
// Pattern cccctttxxxxnnnn0000iiiiiiiiiiii where cccc=Cond, nnnn=Rn, // Pattern cccctttxxxxnnnn0000iiiiiiiiiiii where cccc=Cond, nnnn=Rn,
// ttt=Instruction type (derived from OpSrc1), iiiiiiiiiiii is derived from // ttt=Instruction type (derived from OpSrc1), iiiiiiiiiiii is derived from
// OpSrc1, and xxxx=Opcode. // OpSrc1, and xxxx=Opcode.
......
...@@ -503,8 +503,7 @@ template <> void InstARM32Adc::emitIAS(const Cfg *Func) const { ...@@ -503,8 +503,7 @@ template <> void InstARM32Adc::emitIAS(const Cfg *Func) const {
template <> void InstARM32Add::emitIAS(const Cfg *Func) const { template <> void InstARM32Add::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
Asm->add(getDest(), getSrc(0), getSrc(1), SetFlags, getPredicate()); Asm->add(getDest(), getSrc(0), getSrc(1), SetFlags, getPredicate());
if (Asm->needsTextFixup()) assert(!Asm->needsTextFixup());
emitUsingTextFixup(Func);
} }
template <> void InstARM32And::emitIAS(const Cfg *Func) const { template <> void InstARM32And::emitIAS(const Cfg *Func) const {
...@@ -610,20 +609,37 @@ template <> void InstARM32Udiv::emitIAS(const Cfg *Func) const { ...@@ -610,20 +609,37 @@ template <> void InstARM32Udiv::emitIAS(const Cfg *Func) const {
template <> void InstARM32Vadd::emitIAS(const Cfg *Func) const { template <> void InstARM32Vadd::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest(); const Variable *Dest = getDest();
switch (Dest->getType()) { Type DestTy = Dest->getType();
default: switch (DestTy) {
// TODO(kschimpf) Figure if more cases are needed. case IceType_void:
emitUsingTextFixup(Func); case IceType_i1:
case IceType_i8:
case IceType_i16:
case IceType_i32:
case IceType_i64:
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_NUM:
llvm::report_fatal_error(std::string("Vadd not defined on type ") +
typeString(DestTy));
break;
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
Asm->vaddqi(typeElementType(DestTy), Dest, getSrc(0), getSrc(1));
break;
case IceType_v4f32:
Asm->vaddqf(Dest, getSrc(0), getSrc(1));
break; break;
case IceType_f32: case IceType_f32:
Asm->vadds(getDest(), getSrc(0), getSrc(1), CondARM32::AL); Asm->vadds(getDest(), getSrc(0), getSrc(1), CondARM32::AL);
assert(!Asm->needsTextFixup());
break; break;
case IceType_f64: case IceType_f64:
Asm->vaddd(getDest(), getSrc(0), getSrc(1), CondARM32::AL); Asm->vaddd(getDest(), getSrc(0), getSrc(1), CondARM32::AL);
assert(!Asm->needsTextFixup());
break; break;
} }
assert(!Asm->needsTextFixup());
} }
template <> void InstARM32Vand::emitIAS(const Cfg *Func) const { template <> void InstARM32Vand::emitIAS(const Cfg *Func) const {
......
...@@ -34,7 +34,7 @@ entry: ...@@ -34,7 +34,7 @@ entry:
; ASM: vadd.f32 q10, q10, q11 ; ASM: vadd.f32 q10, q10, q11
; DIS: 8: f2444de6 ; DIS: 8: f2444de6
; IASM: vadd.f32 ; IASM-NOT: vadd.f32
ret <4 x float> %res ret <4 x float> %res
} }
...@@ -49,7 +49,7 @@ entry: ...@@ -49,7 +49,7 @@ entry:
; ASM: vadd.i32 q10, q10, q11 ; ASM: vadd.i32 q10, q10, q11
; DIS: 28: f26448e6 ; DIS: 28: f26448e6
; IASM: vadd.i32 ; IASM-NOT: vadd.i32
ret <4 x i32> %res ret <4 x i32> %res
} }
...@@ -64,7 +64,7 @@ entry: ...@@ -64,7 +64,7 @@ entry:
; ASM: vadd.i16 q10, q10, q11 ; ASM: vadd.i16 q10, q10, q11
; DIS: 48: f25448e6 ; DIS: 48: f25448e6
; IASM: vadd.i16 ; IASM-NOT: vadd.i16
ret <8 x i16> %res ret <8 x i16> %res
} }
...@@ -79,7 +79,7 @@ entry: ...@@ -79,7 +79,7 @@ entry:
; ASM: vadd.i8 q10, q10, q11 ; ASM: vadd.i8 q10, q10, q11
; DIS: 68: f24448e6 ; DIS: 68: f24448e6
; IASM: vadd.i8 ; IASM-NOT: vadd.i8
ret <16 x i8> %res ret <16 x i8> %res
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment