Commit 15e77d46 by John Porto

Subzero. ARM32. Vector shifts.

BUG= R=kschimpf@google.com, stichnot@chromium.org Review URL: https://codereview.chromium.org/1881623002 .
parent 3018cf2b
...@@ -1289,20 +1289,20 @@ void Assembler::vmulqi(OperandSize sz, ...@@ -1289,20 +1289,20 @@ void Assembler::vmulqi(OperandSize sz,
void Assembler::vmulqs(QRegister qd, QRegister qn, QRegister qm) { void Assembler::vmulqs(QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B24 | B11 | B10 | B8 | B4, kSWord, qd, qn, qm); EmitSIMDqqq(B24 | B11 | B10 | B8 | B4, kSWord, qd, qn, qm);
} }
#endif
// Moved to ARM32::AssemblerARM32::vshlqi().
void Assembler::vshlqi(OperandSize sz, void Assembler::vshlqi(OperandSize sz,
QRegister qd, QRegister qm, QRegister qn) { QRegister qd, QRegister qm, QRegister qn) {
EmitSIMDqqq(B25 | B10, sz, qd, qn, qm); EmitSIMDqqq(B25 | B10, sz, qd, qn, qm);
} }
// Moved to ARM32::AssemblerARM32::vshlqu().
void Assembler::vshlqu(OperandSize sz, void Assembler::vshlqu(OperandSize sz,
QRegister qd, QRegister qm, QRegister qn) { QRegister qd, QRegister qm, QRegister qn) {
EmitSIMDqqq(B25 | B24 | B10, sz, qd, qn, qm); EmitSIMDqqq(B25 | B24 | B10, sz, qd, qn, qm);
} }
#if 0
// Moved to ARM32::AssemblerARM32::veorq() // Moved to ARM32::AssemblerARM32::veorq()
void Assembler::veorq(QRegister qd, QRegister qn, QRegister qm) { void Assembler::veorq(QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B24 | B8 | B4, kByte, qd, qn, qm); EmitSIMDqqq(B24 | B8 | B4, kByte, qd, qn, qm);
...@@ -1345,12 +1345,13 @@ void Assembler::vabsqs(QRegister qd, QRegister qm) { ...@@ -1345,12 +1345,13 @@ void Assembler::vabsqs(QRegister qd, QRegister qm) {
EmitSIMDqqq(B24 | B23 | B21 | B20 | B19 | B16 | B10 | B9 | B8, kSWord, EmitSIMDqqq(B24 | B23 | B21 | B20 | B19 | B16 | B10 | B9 | B8, kSWord,
qd, Q0, qm); qd, Q0, qm);
} }
#endif
// Moved to Arm32::AssemblerARM32::vnegqs().
void Assembler::vnegqs(QRegister qd, QRegister qm) { void Assembler::vnegqs(QRegister qd, QRegister qm) {
EmitSIMDqqq(B24 | B23 | B21 | B20 | B19 | B16 | B10 | B9 | B8 | B7, kSWord, EmitSIMDqqq(B24 | B23 | B21 | B20 | B19 | B16 | B10 | B9 | B8 | B7, kSWord,
qd, Q0, qm); qd, Q0, qm);
} }
#endif
void Assembler::vrecpeqs(QRegister qd, QRegister qm) { void Assembler::vrecpeqs(QRegister qd, QRegister qm) {
......
...@@ -693,10 +693,10 @@ class Assembler : public ValueObject { ...@@ -693,10 +693,10 @@ class Assembler : public ValueObject {
void vmulqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm); void vmulqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm);
// Moved to ARM32::AssemblerARM32::vmulqf(). // Moved to ARM32::AssemblerARM32::vmulqf().
void vmulqs(QRegister qd, QRegister qn, QRegister qm); void vmulqs(QRegister qd, QRegister qn, QRegister qm);
#endif // Moved to ARM32::AssemblerARM32::vshlqi().
void vshlqi(OperandSize sz, QRegister qd, QRegister qm, QRegister qn); void vshlqi(OperandSize sz, QRegister qd, QRegister qm, QRegister qn);
// Moved to ARM32::AssemblerARM32::vshlqu().
void vshlqu(OperandSize sz, QRegister qd, QRegister qm, QRegister qn); void vshlqu(OperandSize sz, QRegister qd, QRegister qm, QRegister qn);
#if 0
// Moved to Arm32::AssemblerARM32::vmlas() // Moved to Arm32::AssemblerARM32::vmlas()
void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL); void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
// Moved to Arm32::AssemblerARM32::vmlad() // Moved to Arm32::AssemblerARM32::vmlad()
...@@ -749,8 +749,9 @@ class Assembler : public ValueObject { ...@@ -749,8 +749,9 @@ class Assembler : public ValueObject {
#endif #endif
void vnegs(SRegister sd, SRegister sm, Condition cond = AL); void vnegs(SRegister sd, SRegister sm, Condition cond = AL);
void vnegd(DRegister dd, DRegister dm, Condition cond = AL); void vnegd(DRegister dd, DRegister dm, Condition cond = AL);
void vnegqs(QRegister qd, QRegister qm);
#if 0 #if 0
// Moved to ARM32::AssemblerARM32::vnegqs().
void vnegqs(QRegister qd, QRegister qm);
// Moved to ARM32::AssemblerARM32::vsqrts(). // Moved to ARM32::AssemblerARM32::vsqrts().
void vsqrts(SRegister sd, SRegister sm, Condition cond = AL); void vsqrts(SRegister sd, SRegister sm, Condition cond = AL);
// Moved to ARM32::AssemblerARM32::vsqrts(). // Moved to ARM32::AssemblerARM32::vsqrts().
......
...@@ -3056,6 +3056,30 @@ void AssemblerARM32::vmulqf(const Operand *OpQd, const Operand *OpQn, ...@@ -3056,6 +3056,30 @@ void AssemblerARM32::vmulqf(const Operand *OpQd, const Operand *OpQn,
emitSIMDqqqBase(VmulqfOpcode, OpQd, OpQn, OpQm, IsFloatTy, Vmulqf); emitSIMDqqqBase(VmulqfOpcode, OpQd, OpQn, OpQm, IsFloatTy, Vmulqf);
} }
void AssemblerARM32::vnegqs(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm) {
// VNEG - ARM section A8.8.355, encoding A1:
// vneg.<dt> <Qd>, <Qm>
//
// 111111111D11ss01dddd0F111QM0mmmm where Dddd=Qd, and Mmmm=Qm, and:
// * dt=s8 -> 00=ss, 0=F
// * dt=s16 -> 01=ss, 0=F
// * dt=s32 -> 10=ss, 0=F
// * dt=s32 -> 10=ss, 1=F
constexpr const char *Vneg = "vneg";
constexpr IValueT VnegOpcode = B24 | B23 | B21 | B20 | B16 | B9 | B8 | B7;
const IValueT Qd = encodeQRegister(OpQd, "Qd", Vneg);
constexpr IValueT Qn = 0;
const IValueT Qm = encodeQRegister(OpQm, "Qm", Vneg);
constexpr bool UseQRegs = true;
constexpr IValueT ElmtShift = 18;
const IValueT ElmtSize = encodeElmtType(ElmtTy);
assert(Utils::IsUint(2, ElmtSize));
emitSIMDBase(VnegOpcode | (ElmtSize << ElmtShift), mapQRegToDReg(Qd),
mapQRegToDReg(Qn), mapQRegToDReg(Qm), UseQRegs,
isFloatingType(ElmtTy));
}
void AssemblerARM32::vorrq(const Operand *OpQd, const Operand *OpQm, void AssemblerARM32::vorrq(const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn) { const Operand *OpQn) {
// VORR (register) - ARM section A8.8.360, encoding A1: // VORR (register) - ARM section A8.8.360, encoding A1:
...@@ -3229,6 +3253,34 @@ void AssemblerARM32::vpush(const Variable *OpBaseReg, SizeT NumConsecRegs, ...@@ -3229,6 +3253,34 @@ void AssemblerARM32::vpush(const Variable *OpBaseReg, SizeT NumConsecRegs,
emitVStackOp(Cond, VpushOpcode, OpBaseReg, NumConsecRegs); emitVStackOp(Cond, VpushOpcode, OpBaseReg, NumConsecRegs);
} }
void AssemblerARM32::vshlqi(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// VSHL - ARM section A8.8.396, encoding A1:
// vshl Qd, Qm, Qn
//
// 1111001U0Dssnnnndddd0100NQM0mmmm where Ddddd=Qd, Mmmmm=Qm, Nnnnn=Qn, 0=U,
// 1=Q
assert(isScalarIntegerType(ElmtTy) &&
"vshl expects vector with integer element type");
constexpr const char *Vshl = "vshl";
constexpr IValueT VshlOpcode = B10 | B6;
emitSIMDqqq(VshlOpcode, ElmtTy, OpQd, OpQn, OpQm, Vshl);
}
void AssemblerARM32::vshlqu(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// VSHL - ARM section A8.8.396, encoding A1:
// vshl Qd, Qm, Qn
//
// 1111001U0Dssnnnndddd0100NQM0mmmm where Ddddd=Qd, Mmmmm=Qm, Nnnnn=Qn, 1=U,
// 1=Q
assert(isScalarIntegerType(ElmtTy) &&
"vshl expects vector with integer element type");
constexpr const char *Vshl = "vshl";
constexpr IValueT VshlOpcode = B24 | B10 | B6;
emitSIMDqqq(VshlOpcode, ElmtTy, OpQd, OpQn, OpQm, Vshl);
}
void AssemblerARM32::vsqrtd(const Operand *OpDd, const Operand *OpDm, void AssemblerARM32::vsqrtd(const Operand *OpDd, const Operand *OpDm,
CondARM32::Cond Cond) { CondARM32::Cond Cond) {
// VSQRT - ARM section A8.8.401, encoding A1: // VSQRT - ARM section A8.8.401, encoding A1:
......
...@@ -495,6 +495,8 @@ public: ...@@ -495,6 +495,8 @@ public:
void vmuls(const Operand *OpSd, const Operand *OpSn, const Operand *OpSm, void vmuls(const Operand *OpSd, const Operand *OpSn, const Operand *OpSm,
CondARM32::Cond Cond); CondARM32::Cond Cond);
void vnegqs(Type ElmtTy, const Operand *OpQd, const Operand *OpQm);
void vorrq(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn); void vorrq(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);
void vpop(const Variable *OpBaseReg, SizeT NumConsecRegs, void vpop(const Variable *OpBaseReg, SizeT NumConsecRegs,
...@@ -503,6 +505,12 @@ public: ...@@ -503,6 +505,12 @@ public:
void vpush(const Variable *OpBaseReg, SizeT NumConsecRegs, void vpush(const Variable *OpBaseReg, SizeT NumConsecRegs,
CondARM32::Cond Cond); CondARM32::Cond Cond);
void vshlqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
void vshlqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
void vsqrtd(const Operand *OpDd, const Operand *OpDm, CondARM32::Cond Cond); void vsqrtd(const Operand *OpDd, const Operand *OpDm, CondARM32::Cond Cond);
void vsqrts(const Operand *OpSd, const Operand *OpSm, CondARM32::Cond Cond); void vsqrts(const Operand *OpSd, const Operand *OpSm, CondARM32::Cond Cond);
......
...@@ -33,13 +33,16 @@ using Register = RegARM32::AllRegisters; ...@@ -33,13 +33,16 @@ using Register = RegARM32::AllRegisters;
static constexpr SizeT VpushVpopMaxConsecRegs = 16; static constexpr SizeT VpushVpopMaxConsecRegs = 16;
const struct TypeARM32Attributes_ { const struct TypeARM32Attributes_ {
const char *WidthString; // b, h, <blank>, or d const char *WidthString; // b, h, <blank>, or d
const char *VecWidthString; // i8, i16, i32, f32, f64 const char *FpWidthString; // i8, i16, i32, f32, f64
const char *SVecWidthString; // s8, s16, s32, f32
const char *UVecWidthString; // u8, u16, u32, f32
int8_t SExtAddrOffsetBits; int8_t SExtAddrOffsetBits;
int8_t ZExtAddrOffsetBits; int8_t ZExtAddrOffsetBits;
} TypeARM32Attributes[] = { } TypeARM32Attributes[] = {
#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \ #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
{ int_width, vec_width, sbits, ubits } \ ubits, rraddr, shaddr) \
{ int_width, fp_width, svec_width, uvec_width, sbits, ubits } \
, ,
ICETYPEARM32_TABLE ICETYPEARM32_TABLE
#undef X #undef X
...@@ -70,16 +73,37 @@ size_t getVecElmtBitsize(Type Ty) { ...@@ -70,16 +73,37 @@ size_t getVecElmtBitsize(Type Ty) {
return typeWidthInBytes(typeElementType(Ty)) * CHAR_BIT; return typeWidthInBytes(typeElementType(Ty)) * CHAR_BIT;
} }
} // end of anonymous namespace const char *getWidthString(Type Ty) {
const char *InstARM32::getWidthString(Type Ty) {
return TypeARM32Attributes[Ty].WidthString; return TypeARM32Attributes[Ty].WidthString;
} }
const char *InstARM32::getVecWidthString(Type Ty) { const char *getFpWidthString(Type Ty) {
return TypeARM32Attributes[Ty].VecWidthString; return TypeARM32Attributes[Ty].FpWidthString;
}
const char *getSVecWidthString(Type Ty) {
return TypeARM32Attributes[Ty].SVecWidthString;
} }
const char *getUVecWidthString(Type Ty) {
return TypeARM32Attributes[Ty].UVecWidthString;
}
const char *getVWidthString(Type Ty, InstARM32::FPSign SignType) {
switch (SignType) {
case InstARM32::FS_None:
return getFpWidthString(Ty);
case InstARM32::FS_Signed:
return getSVecWidthString(Ty);
case InstARM32::FS_Unsigned:
return getUVecWidthString(Ty);
}
llvm_unreachable("Invalid Sign Type.");
return getFpWidthString(Ty);
}
} // end of anonymous namespace
const char *InstARM32Pred::predString(CondARM32::Cond Pred) { const char *InstARM32Pred::predString(CondARM32::Cond Pred) {
return InstARM32CondAttributes[Pred].EmitString; return InstARM32CondAttributes[Pred].EmitString;
} }
...@@ -150,13 +174,24 @@ void InstARM32Pred::emitUnaryopGPR(const char *Opcode, ...@@ -150,13 +174,24 @@ void InstARM32Pred::emitUnaryopGPR(const char *Opcode,
Instr->getSrc(0)->emit(Func); Instr->getSrc(0)->emit(Func);
} }
void InstARM32Pred::emitUnaryopFP(const char *Opcode, void InstARM32Pred::emitUnaryopFP(const char *Opcode, FPSign Sign,
const InstARM32Pred *Instr, const Cfg *Func) { const InstARM32Pred *Instr, const Cfg *Func) {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(Instr->getSrcSize() == 1); assert(Instr->getSrcSize() == 1);
Type SrcTy = Instr->getSrc(0)->getType(); Type SrcTy = Instr->getSrc(0)->getType();
Str << "\t" << Opcode << Instr->getPredicate() << getVecWidthString(SrcTy) Str << "\t" << Opcode << Instr->getPredicate();
<< "\t"; switch (Sign) {
case FS_None:
Str << getFpWidthString(SrcTy);
break;
case FS_Signed:
Str << getSVecWidthString(SrcTy);
break;
case FS_Unsigned:
Str << getUVecWidthString(SrcTy);
break;
}
Str << "\t";
Instr->getDest()->emit(Func); Instr->getDest()->emit(Func);
Str << ", "; Str << ", ";
Instr->getSrc(0)->emit(Func); Instr->getSrc(0)->emit(Func);
...@@ -192,14 +227,14 @@ void InstARM32Pred::emitThreeAddr(const char *Opcode, ...@@ -192,14 +227,14 @@ void InstARM32Pred::emitThreeAddr(const char *Opcode,
Instr->getSrc(1)->emit(Func); Instr->getSrc(1)->emit(Func);
} }
void InstARM32::emitThreeAddrFP(const char *Opcode, const InstARM32 *Instr, void InstARM32::emitThreeAddrFP(const char *Opcode, FPSign SignType,
const Cfg *Func) { const InstARM32 *Instr, const Cfg *Func) {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(Instr->getSrcSize() == 2); assert(Instr->getSrcSize() == 2);
Str << "\t" << Opcode << getVecWidthString(Instr->getDest()->getType()) Str << "\t" << Opcode
<< "\t"; << getVWidthString(Instr->getDest()->getType(), SignType) << "\t";
Instr->getDest()->emit(Func); Instr->getDest()->emit(Func);
Str << ", "; Str << ", ";
Instr->getSrc(0)->emit(Func); Instr->getSrc(0)->emit(Func);
...@@ -207,15 +242,15 @@ void InstARM32::emitThreeAddrFP(const char *Opcode, const InstARM32 *Instr, ...@@ -207,15 +242,15 @@ void InstARM32::emitThreeAddrFP(const char *Opcode, const InstARM32 *Instr,
Instr->getSrc(1)->emit(Func); Instr->getSrc(1)->emit(Func);
} }
void InstARM32::emitFourAddrFP(const char *Opcode, const InstARM32 *Instr, void InstARM32::emitFourAddrFP(const char *Opcode, FPSign SignType,
const Cfg *Func) { const InstARM32 *Instr, const Cfg *Func) {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(Instr->getSrcSize() == 3); assert(Instr->getSrcSize() == 3);
assert(Instr->getSrc(0) == Instr->getDest()); assert(Instr->getSrc(0) == Instr->getDest());
Str << "\t" << Opcode << getVecWidthString(Instr->getDest()->getType()) Str << "\t" << Opcode
<< "\t"; << getVWidthString(Instr->getDest()->getType(), SignType) << "\t";
Instr->getDest()->emit(Func); Instr->getDest()->emit(Func);
Str << ", "; Str << ", ";
Instr->getSrc(1)->emit(Func); Instr->getSrc(1)->emit(Func);
...@@ -254,6 +289,11 @@ void InstARM32ThreeAddrFP<K>::emitIAS(const Cfg *Func) const { ...@@ -254,6 +289,11 @@ void InstARM32ThreeAddrFP<K>::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func); emitUsingTextFixup(Func);
} }
template <InstARM32::InstKindARM32 K>
void InstARM32ThreeAddrSignAwareFP<K>::emitIAS(const Cfg *Func) const {
InstARM32::emitUsingTextFixup(Func);
}
template <> void InstARM32Mla::emitIAS(const Cfg *Func) const { template <> void InstARM32Mla::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 3); assert(getSrcSize() == 3);
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
...@@ -737,6 +777,27 @@ template <> void InstARM32Vmls::emitIAS(const Cfg *Func) const { ...@@ -737,6 +777,27 @@ template <> void InstARM32Vmls::emitIAS(const Cfg *Func) const {
} }
} }
template <> void InstARM32Vneg::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
const Type DestTy = Dest->getType();
switch (Dest->getType()) {
default:
llvm::report_fatal_error("Vneg not defined on type " +
typeStdString(Dest->getType()));
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
case IceType_v4f32: {
const Type ElmtTy = typeElementType(DestTy);
Asm->vnegqs(ElmtTy, Dest, getSrc(0));
} break;
}
}
template <> void InstARM32Vorr::emitIAS(const Cfg *Func) const { template <> void InstARM32Vorr::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest(); const Variable *Dest = getDest();
...@@ -755,6 +816,35 @@ template <> void InstARM32Vorr::emitIAS(const Cfg *Func) const { ...@@ -755,6 +816,35 @@ template <> void InstARM32Vorr::emitIAS(const Cfg *Func) const {
assert(!Asm->needsTextFixup()); assert(!Asm->needsTextFixup());
} }
template <> void InstARM32Vshl::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
const Type DestTy = Dest->getType();
switch (DestTy) {
default:
llvm::report_fatal_error("Vshl not defined on type " +
typeStdString(Dest->getType()));
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32: {
const Type ElmtTy = typeElementType(DestTy);
assert(Sign != InstARM32::FS_None);
switch (Sign) {
case InstARM32::FS_None: // defaults to unsigned.
case InstARM32::FS_Unsigned:
Asm->vshlqu(ElmtTy, Dest, getSrc(0), getSrc(1));
break;
case InstARM32::FS_Signed:
Asm->vshlqi(ElmtTy, Dest, getSrc(0), getSrc(1));
break;
}
} break;
}
}
template <> void InstARM32Vsub::emitIAS(const Cfg *Func) const { template <> void InstARM32Vsub::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest(); const Variable *Dest = getDest();
...@@ -1374,6 +1464,8 @@ template <> const char *InstARM32Vmla::Opcode = "vmla"; ...@@ -1374,6 +1464,8 @@ template <> const char *InstARM32Vmla::Opcode = "vmla";
template <> const char *InstARM32Vmls::Opcode = "vmls"; template <> const char *InstARM32Vmls::Opcode = "vmls";
template <> const char *InstARM32Vmul::Opcode = "vmul"; template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Vorr::Opcode = "vorr"; template <> const char *InstARM32Vorr::Opcode = "vorr";
template <> const char *InstARM32UnaryopFP<InstARM32::Vneg>::Opcode = "vneg";
template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshl>::Opcode = "vshl";
template <> const char *InstARM32Vsub::Opcode = "vsub"; template <> const char *InstARM32Vsub::Opcode = "vsub";
// Four-addr ops // Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla"; template <> const char *InstARM32Mla::Opcode = "mla";
...@@ -1479,7 +1571,7 @@ void InstARM32Mov::emitSingleDestSingleSource(const Cfg *Func) const { ...@@ -1479,7 +1571,7 @@ void InstARM32Mov::emitSingleDestSingleSource(const Cfg *Func) const {
// when vmov{c}'ing, we need to emit a width string. Otherwise, the // when vmov{c}'ing, we need to emit a width string. Otherwise, the
// assembler might be tempted to assume we want a vector vmov{c}, and that // assembler might be tempted to assume we want a vector vmov{c}, and that
// is disallowed because ARM. // is disallowed because ARM.
const char *WidthString = !CoreVFPMove ? getVecWidthString(Ty) : ""; const char *WidthString = !CoreVFPMove ? getFpWidthString(Ty) : "";
CondARM32::Cond Cond = getPredicate(); CondARM32::Cond Cond = getPredicate();
if (IsVector) if (IsVector)
assert(CondARM32::isUnconditional(Cond) && assert(CondARM32::isUnconditional(Cond) &&
...@@ -2015,6 +2107,11 @@ void InstARM32UnaryopFP<K>::emitIAS(const Cfg *Func) const { ...@@ -2015,6 +2107,11 @@ void InstARM32UnaryopFP<K>::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func); emitUsingTextFixup(Func);
} }
template <InstARM32::InstKindARM32 K>
void InstARM32UnaryopSignAwareFP<K>::emitIAS(const Cfg *Func) const {
InstARM32::emitUsingTextFixup(Func);
}
template <> void InstARM32Vsqrt::emitIAS(const Cfg *Func) const { template <> void InstARM32Vsqrt::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
...@@ -2426,7 +2523,7 @@ void InstARM32Vcmp::emit(const Cfg *Func) const { ...@@ -2426,7 +2523,7 @@ void InstARM32Vcmp::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2); assert(getSrcSize() == 2);
Str << "\t" Str << "\t"
"vcmp" << getPredicate() << getVecWidthString(getSrc(0)->getType()) "vcmp" << getPredicate() << getFpWidthString(getSrc(0)->getType())
<< "\t"; << "\t";
getSrc(0)->emit(Func); getSrc(0)->emit(Func);
Str << ", "; Str << ", ";
...@@ -2470,7 +2567,7 @@ void InstARM32Vcmp::dump(const Cfg *Func) const { ...@@ -2470,7 +2567,7 @@ void InstARM32Vcmp::dump(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Ostream &Str = Func->getContext()->getStrDump(); Ostream &Str = Func->getContext()->getStrDump();
Str << "vcmp" << getPredicate() << getVecWidthString(getSrc(0)->getType()); Str << "vcmp" << getPredicate() << getFpWidthString(getSrc(0)->getType());
dumpSources(Func); dumpSources(Func);
} }
...@@ -2506,7 +2603,7 @@ void InstARM32Vabs::emit(const Cfg *Func) const { ...@@ -2506,7 +2603,7 @@ void InstARM32Vabs::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit(); Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1); assert(getSrcSize() == 1);
Str << "\t" Str << "\t"
"vabs" << getPredicate() << getVecWidthString(getSrc(0)->getType()) "vabs" << getPredicate() << getFpWidthString(getSrc(0)->getType())
<< "\t"; << "\t";
getDest()->emit(Func); getDest()->emit(Func);
Str << ", "; Str << ", ";
...@@ -2540,7 +2637,7 @@ void InstARM32Vabs::dump(const Cfg *Func) const { ...@@ -2540,7 +2637,7 @@ void InstARM32Vabs::dump(const Cfg *Func) const {
return; return;
Ostream &Str = Func->getContext()->getStrDump(); Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func); dumpDest(Func);
Str << " = vabs" << getPredicate() << getVecWidthString(getSrc(0)->getType()); Str << " = vabs" << getPredicate() << getFpWidthString(getSrc(0)->getType());
} }
void InstARM32Dmb::emit(const Cfg *Func) const { void InstARM32Dmb::emit(const Cfg *Func) const {
...@@ -2740,8 +2837,7 @@ void OperandARM32FlexFpImm::emit(const Cfg *Func) const { ...@@ -2740,8 +2837,7 @@ void OperandARM32FlexFpImm::emit(const Cfg *Func) const {
void OperandARM32FlexFpImm::dump(const Cfg * /*Func*/, Ostream &Str) const { void OperandARM32FlexFpImm::dump(const Cfg * /*Func*/, Ostream &Str) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Str << "#" << materializeFloatImmediate(ModifiedImm) Str << "#" << materializeFloatImmediate(ModifiedImm) << getFpWidthString(Ty);
<< InstARM32::getVecWidthString(Ty);
} }
void OperandARM32FlexFpZero::emit(const Cfg *Func) const { void OperandARM32FlexFpZero::emit(const Cfg *Func) const {
...@@ -2760,7 +2856,7 @@ void OperandARM32FlexFpZero::emit(const Cfg *Func) const { ...@@ -2760,7 +2856,7 @@ void OperandARM32FlexFpZero::emit(const Cfg *Func) const {
void OperandARM32FlexFpZero::dump(const Cfg * /*Func*/, Ostream &Str) const { void OperandARM32FlexFpZero::dump(const Cfg * /*Func*/, Ostream &Str) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
Str << "#0.0" << InstARM32::getVecWidthString(Ty); Str << "#0.0" << getFpWidthString(Ty);
} }
void OperandARM32FlexReg::emit(const Cfg *Func) const { void OperandARM32FlexReg::emit(const Cfg *Func) const {
...@@ -2815,6 +2911,8 @@ template class InstARM32ThreeAddrFP<InstARM32::Veor>; ...@@ -2815,6 +2911,8 @@ template class InstARM32ThreeAddrFP<InstARM32::Veor>;
template class InstARM32FourAddrFP<InstARM32::Vmla>; template class InstARM32FourAddrFP<InstARM32::Vmla>;
template class InstARM32FourAddrFP<InstARM32::Vmls>; template class InstARM32FourAddrFP<InstARM32::Vmls>;
template class InstARM32ThreeAddrFP<InstARM32::Vmul>; template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
template class InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
template class InstARM32ThreeAddrFP<InstARM32::Vsub>; template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
template class InstARM32LoadBase<InstARM32::Ldr>; template class InstARM32LoadBase<InstARM32::Ldr>;
......
...@@ -70,24 +70,25 @@ ...@@ -70,24 +70,25 @@
// the # of offset bits allowed as part of an addressing mode (for sign or zero // the # of offset bits allowed as part of an addressing mode (for sign or zero
// extending load/stores). // extending load/stores).
#define ICETYPEARM32_TABLE \ #define ICETYPEARM32_TABLE \
/* tag, element type, int_width, vec_width, addr bits sext, zext, \ /* tag, element type, int_width, fp_width, uvec_width, svec_width, \
reg-reg addr allowed, shift allowed, */ \ addr bits sext, zext, reg-reg addr allowed, shift allowed, */ \
X(IceType_void, IceType_void, "" , "" , 0 , 0 , 0, 0) \ X(IceType_void, IceType_void, "" , "" , "" , "" , 0 , 0 , 0, 0) \
X(IceType_i1, IceType_void, "b", "" , 8 , 12, 1, 1) \ X(IceType_i1, IceType_void, "b", "" , "" , "" , 8 , 12, 1, 1) \
X(IceType_i8, IceType_void, "b", "" , 8 , 12, 1, 1) \ X(IceType_i8, IceType_void, "b", "" , "" , "" , 8 , 12, 1, 1) \
X(IceType_i16, IceType_void, "h", "" , 8 , 8 , 1, 0) \ X(IceType_i16, IceType_void, "h", "" , "" , "" , 8 , 8 , 1, 0) \
X(IceType_i32, IceType_void, "" , "" , 12, 12, 1, 1) \ X(IceType_i32, IceType_void, "" , "" , "" , "" , 12, 12, 1, 1) \
X(IceType_i64, IceType_void, "d", "" , 12, 12, 1, 1) \ X(IceType_i64, IceType_void, "d", "" , "" , "" , 12, 12, 1, 1) \
X(IceType_f32, IceType_void, "" , ".f32", 8, 8 , 0, 0) \ X(IceType_f32, IceType_void, "" , ".f32", "" , "" , 8, 8 , 0, 0) \
X(IceType_f64, IceType_void, "" , ".f64", 8, 8 , 0, 0) \ X(IceType_f64, IceType_void, "" , ".f64", "" , "" , 8, 8 , 0, 0) \
X(IceType_v4i1, IceType_i32 , "" , ".i32", 0 , 0 , 1, 0) \ X(IceType_v4i1, IceType_i32 , "" , ".i32", ".u32", ".s32", 0 , 0 , 1, 0) \
X(IceType_v8i1, IceType_i16 , "" , ".i16", 0 , 0 , 1, 0) \ X(IceType_v8i1, IceType_i16 , "" , ".i16", ".u16", ".s16", 0 , 0 , 1, 0) \
X(IceType_v16i1, IceType_i8 , "" , ".i8" , 0 , 0 , 1, 0) \ X(IceType_v16i1, IceType_i8 , "" , ".i8" , ".u8" , ".s8" , 0 , 0 , 1, 0) \
X(IceType_v16i8, IceType_i8 , "" , ".i8" , 0 , 0 , 1, 0) \ X(IceType_v16i8, IceType_i8 , "" , ".i8" , ".u8" , ".s8" , 0 , 0 , 1, 0) \
X(IceType_v8i16, IceType_i16 , "" , ".i16", 0 , 0 , 1, 0) \ X(IceType_v8i16, IceType_i16 , "" , ".i16", ".u16", ".s16", 0 , 0 , 1, 0) \
X(IceType_v4i32, IceType_i32 , "" , ".i32", 0 , 0 , 1, 0) \ X(IceType_v4i32, IceType_i32 , "" , ".i32", ".u32", ".s32", 0 , 0 , 1, 0) \
X(IceType_v4f32, IceType_f32 , "" , ".f32", 0 , 0 , 1, 0) X(IceType_v4f32, IceType_f32 , "" , ".f32", ".f32", ".f32", 0 , 0 , 1, 0)
//#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) //#define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits,
// ubits, rraddr, shaddr)
// Shifter types for Data-processing operands as defined in section A5.1.2. // Shifter types for Data-processing operands as defined in section A5.1.2.
#define ICEINSTARM32SHIFT_TABLE \ #define ICEINSTARM32SHIFT_TABLE \
......
...@@ -435,15 +435,15 @@ public: ...@@ -435,15 +435,15 @@ public:
Vmls, Vmls,
Vmrs, Vmrs,
Vmul, Vmul,
Vneg,
Vorr, Vorr,
Vshl,
Vsqrt, Vsqrt,
Vsub Vsub
}; };
static constexpr size_t InstSize = sizeof(uint32_t); static constexpr size_t InstSize = sizeof(uint32_t);
static const char *getWidthString(Type Ty);
static const char *getVecWidthString(Type Ty);
static CondARM32::Cond getOppositeCondition(CondARM32::Cond Cond); static CondARM32::Cond getOppositeCondition(CondARM32::Cond Cond);
/// Called inside derived methods emit() to communicate that multiple /// Called inside derived methods emit() to communicate that multiple
...@@ -452,11 +452,20 @@ public: ...@@ -452,11 +452,20 @@ public:
/// implemented. /// implemented.
void startNextInst(const Cfg *Func) const; void startNextInst(const Cfg *Func) const;
/// FPSign is used for certain vector instructions (particularly, right
/// shifts) that require an operand sign specification.
enum FPSign {
FS_None,
FS_Signed,
FS_Unsigned,
};
/// Shared emit routines for common forms of instructions. /// Shared emit routines for common forms of instructions.
static void emitThreeAddrFP(const char *Opcode, const InstARM32 *Instr, /// @{
const Cfg *Func); static void emitThreeAddrFP(const char *Opcode, FPSign Sign,
static void emitFourAddrFP(const char *Opcode, const InstARM32 *Instr, const InstARM32 *Instr, const Cfg *Func);
const Cfg *Func); static void emitFourAddrFP(const char *Opcode, FPSign Sign,
const InstARM32 *Instr, const Cfg *Func);
/// @}
void dump(const Cfg *Func) const override; void dump(const Cfg *Func) const override;
...@@ -495,8 +504,8 @@ public: ...@@ -495,8 +504,8 @@ public:
/// Shared emit routines for common forms of instructions. /// Shared emit routines for common forms of instructions.
static void emitUnaryopGPR(const char *Opcode, const InstARM32Pred *Instr, static void emitUnaryopGPR(const char *Opcode, const InstARM32Pred *Instr,
const Cfg *Func, bool NeedsWidthSuffix); const Cfg *Func, bool NeedsWidthSuffix);
static void emitUnaryopFP(const char *Opcode, const InstARM32Pred *Instr, static void emitUnaryopFP(const char *Opcode, FPSign Sign,
const Cfg *Func); const InstARM32Pred *Instr, const Cfg *Func);
static void emitTwoAddr(const char *Opcode, const InstARM32Pred *Instr, static void emitTwoAddr(const char *Opcode, const InstARM32Pred *Instr,
const Cfg *Func); const Cfg *Func);
static void emitThreeAddr(const char *Opcode, const InstARM32Pred *Instr, static void emitThreeAddr(const char *Opcode, const InstARM32Pred *Instr,
...@@ -573,7 +582,7 @@ public: ...@@ -573,7 +582,7 @@ public:
void emit(const Cfg *Func) const override { void emit(const Cfg *Func) const override {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
emitUnaryopFP(Opcode, this, Func); emitUnaryopFP(Opcode, Sign, this, Func);
} }
void emitIAS(const Cfg *Func) const override; void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override { void dump(const Cfg *Func) const override {
...@@ -588,16 +597,39 @@ public: ...@@ -588,16 +597,39 @@ public:
} }
static bool classof(const Inst *Instr) { return isClassof(Instr, K); } static bool classof(const Inst *Instr) { return isClassof(Instr, K); }
private: protected:
InstARM32UnaryopFP(Cfg *Func, Variable *Dest, Operand *Src, InstARM32UnaryopFP(Cfg *Func, Variable *Dest, Operand *Src,
CondARM32::Cond Predicate) CondARM32::Cond Predicate)
: InstARM32Pred(Func, K, 1, Dest, Predicate) { : InstARM32Pred(Func, K, 1, Dest, Predicate) {
addSource(Src); addSource(Src);
} }
FPSign Sign = FS_None;
static const char *Opcode; static const char *Opcode;
}; };
template <InstARM32::InstKindARM32 K>
class InstARM32UnaryopSignAwareFP : public InstARM32UnaryopFP<K> {
InstARM32UnaryopSignAwareFP() = delete;
InstARM32UnaryopSignAwareFP(const InstARM32UnaryopSignAwareFP &) = delete;
InstARM32UnaryopSignAwareFP &
operator=(const InstARM32UnaryopSignAwareFP &) = delete;
public:
static InstARM32UnaryopSignAwareFP *
create(Cfg *Func, Variable *Dest, Variable *Src, CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32UnaryopSignAwareFP>())
InstARM32UnaryopSignAwareFP(Func, Dest, Src, Predicate);
}
void emitIAS(const Cfg *Func) const override;
void setSignType(InstARM32::FPSign SignType) { this->Sign = SignType; }
private:
InstARM32UnaryopSignAwareFP(Cfg *Func, Variable *Dest, Operand *Src,
CondARM32::Cond Predicate)
: InstARM32UnaryopFP<K>(Func, Dest, Src, Predicate) {}
};
/// Instructions of the form x := x op y. /// Instructions of the form x := x op y.
template <InstARM32::InstKindARM32 K> template <InstARM32::InstKindARM32 K>
class InstARM32TwoAddrGPR : public InstARM32Pred { class InstARM32TwoAddrGPR : public InstARM32Pred {
...@@ -748,7 +780,7 @@ public: ...@@ -748,7 +780,7 @@ public:
void emit(const Cfg *Func) const override { void emit(const Cfg *Func) const override {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
emitThreeAddrFP(Opcode, this, Func); emitThreeAddrFP(Opcode, Sign, this, Func);
} }
void emitIAS(const Cfg *Func) const override; void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override { void dump(const Cfg *Func) const override {
...@@ -762,9 +794,10 @@ public: ...@@ -762,9 +794,10 @@ public:
} }
static bool classof(const Inst *Instr) { return isClassof(Instr, K); } static bool classof(const Inst *Instr) { return isClassof(Instr, K); }
private: protected:
InstARM32ThreeAddrFP(Cfg *Func, Variable *Dest, Variable *Src0, FPSign Sign = FS_None;
Variable *Src1)
InstARM32ThreeAddrFP(Cfg *Func, Variable *Dest, Variable *Src0, Operand *Src1)
: InstARM32(Func, K, 2, Dest) { : InstARM32(Func, K, 2, Dest) {
addSource(Src0); addSource(Src0);
addSource(Src1); addSource(Src1);
...@@ -773,6 +806,31 @@ private: ...@@ -773,6 +806,31 @@ private:
static const char *Opcode; static const char *Opcode;
}; };
template <InstARM32::InstKindARM32 K>
class InstARM32ThreeAddrSignAwareFP : public InstARM32ThreeAddrFP<K> {
InstARM32ThreeAddrSignAwareFP() = delete;
InstARM32ThreeAddrSignAwareFP(const InstARM32ThreeAddrSignAwareFP &) = delete;
InstARM32ThreeAddrSignAwareFP &
operator=(const InstARM32ThreeAddrSignAwareFP &) = delete;
public:
/// Create a vector/FP binary-op instruction like vadd, and vsub. Everything
/// must be a register.
static InstARM32ThreeAddrSignAwareFP *create(Cfg *Func, Variable *Dest,
Variable *Src0, Variable *Src1) {
return new (Func->allocate<InstARM32ThreeAddrSignAwareFP>())
InstARM32ThreeAddrSignAwareFP(Func, Dest, Src0, Src1);
}
void emitIAS(const Cfg *Func) const override;
void setSignType(InstARM32::FPSign SignType) { this->Sign = SignType; }
private:
InstARM32ThreeAddrSignAwareFP(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1)
: InstARM32ThreeAddrFP<K>(Func, Dest, Src0, Src1) {}
};
/// Instructions of the form x := a op1 (y op2 z). E.g., multiply accumulate. /// Instructions of the form x := a op1 (y op2 z). E.g., multiply accumulate.
template <InstARM32::InstKindARM32 K> template <InstARM32::InstKindARM32 K>
class InstARM32FourAddrGPR : public InstARM32Pred { class InstARM32FourAddrGPR : public InstARM32Pred {
...@@ -840,7 +898,7 @@ public: ...@@ -840,7 +898,7 @@ public:
void emit(const Cfg *Func) const override { void emit(const Cfg *Func) const override {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
emitFourAddrFP(Opcode, this, Func); emitFourAddrFP(Opcode, Sign, this, Func);
} }
void emitIAS(const Cfg *Func) const override; void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override { void dump(const Cfg *Func) const override {
...@@ -864,6 +922,7 @@ private: ...@@ -864,6 +922,7 @@ private:
addSource(Src1); addSource(Src1);
} }
FPSign Sign = FS_None;
static const char *Opcode; static const char *Opcode;
}; };
...@@ -931,7 +990,9 @@ using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>; ...@@ -931,7 +990,9 @@ using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>;
using InstARM32Vmla = InstARM32FourAddrFP<InstARM32::Vmla>; using InstARM32Vmla = InstARM32FourAddrFP<InstARM32::Vmla>;
using InstARM32Vmls = InstARM32FourAddrFP<InstARM32::Vmls>; using InstARM32Vmls = InstARM32FourAddrFP<InstARM32::Vmls>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>; using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Vneg = InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>; using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>;
using InstARM32Vshl = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>; using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>; using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>; using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;
......
...@@ -454,12 +454,9 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) { ...@@ -454,12 +454,9 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
switch (Op) { switch (Op) {
default: default:
break; break;
case InstArithmetic::Ashr:
case InstArithmetic::Fdiv: case InstArithmetic::Fdiv:
case InstArithmetic::Frem: case InstArithmetic::Frem:
case InstArithmetic::Lshr:
case InstArithmetic::Sdiv: case InstArithmetic::Sdiv:
case InstArithmetic::Shl:
case InstArithmetic::Srem: case InstArithmetic::Srem:
case InstArithmetic::Udiv: case InstArithmetic::Udiv:
case InstArithmetic::Urem: case InstArithmetic::Urem:
...@@ -1960,7 +1957,8 @@ void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) { ...@@ -1960,7 +1957,8 @@ void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) {
// For now, we don't handle address modes with Relocatables. // For now, we don't handle address modes with Relocatables.
namespace { namespace {
// MemTraits contains per-type valid address mode information. // MemTraits contains per-type valid address mode information.
#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \ #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
ubits, rraddr, shaddr) \
static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag); static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag);
ICETYPEARM32_TABLE ICETYPEARM32_TABLE
#undef X #undef X
...@@ -1971,7 +1969,8 @@ static const struct { ...@@ -1971,7 +1969,8 @@ static const struct {
bool CanHaveIndex; bool CanHaveIndex;
bool CanHaveShiftedIndex; bool CanHaveShiftedIndex;
} MemTraits[] = { } MemTraits[] = {
#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \ #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
ubits, rraddr, shaddr) \
{ (1 << ubits) - 1, (ubits) > 0, rraddr, shaddr, } \ { (1 << ubits) - 1, (ubits) > 0, rraddr, shaddr, } \
, ,
ICETYPEARM32_TABLE ICETYPEARM32_TABLE
...@@ -3120,15 +3119,18 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) { ...@@ -3120,15 +3119,18 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
UnimplementedLoweringError(this, Instr); UnimplementedLoweringError(this, Instr);
return; return;
// Explicitly whitelist vector instructions we have implemented/enabled. // Explicitly whitelist vector instructions we have implemented/enabled.
case InstArithmetic::Fadd:
case InstArithmetic::Add: case InstArithmetic::Add:
case InstArithmetic::Fsub:
case InstArithmetic::Sub:
case InstArithmetic::And: case InstArithmetic::And:
case InstArithmetic::Or: case InstArithmetic::Ashr:
case InstArithmetic::Xor: case InstArithmetic::Fadd:
case InstArithmetic::Fmul: case InstArithmetic::Fmul:
case InstArithmetic::Fsub:
case InstArithmetic::Lshr:
case InstArithmetic::Mul: case InstArithmetic::Mul:
case InstArithmetic::Or:
case InstArithmetic::Shl:
case InstArithmetic::Sub:
case InstArithmetic::Xor:
break; break;
} }
} }
...@@ -3448,26 +3450,46 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) { ...@@ -3448,26 +3450,46 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
} }
case InstArithmetic::Shl: { case InstArithmetic::Shl: {
Variable *Src0R = Srcs.unswappedSrc0R(this); Variable *Src0R = Srcs.unswappedSrc0R(this);
Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this); if (!isVectorType(T->getType())) {
_lsl(T, Src0R, Src1R); Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
_lsl(T, Src0R, Src1R);
} else {
auto *Src1R = Srcs.unswappedSrc1R(this);
_vshl(T, Src0R, Src1R)->setSignType(InstARM32::FS_Unsigned);
}
_mov(Dest, T); _mov(Dest, T);
return; return;
} }
case InstArithmetic::Lshr: { case InstArithmetic::Lshr: {
Variable *Src0R = Srcs.unswappedSrc0R(this); Variable *Src0R = Srcs.unswappedSrc0R(this);
if (DestTy != IceType_i32) { if (!isVectorType(T->getType())) {
_uxt(Src0R, Src0R); Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
if (DestTy != IceType_i32) {
_uxt(Src0R, Src0R);
}
_lsr(T, Src0R, Src1R);
} else {
auto *Src1R = Srcs.unswappedSrc1R(this);
auto *Src1RNeg = makeReg(Src1R->getType());
_vneg(Src1RNeg, Src1R);
_vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Unsigned);
} }
_lsr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
_mov(Dest, T); _mov(Dest, T);
return; return;
} }
case InstArithmetic::Ashr: { case InstArithmetic::Ashr: {
Variable *Src0R = Srcs.unswappedSrc0R(this); Variable *Src0R = Srcs.unswappedSrc0R(this);
if (DestTy != IceType_i32) { if (!isVectorType(T->getType())) {
_sxt(Src0R, Src0R); if (DestTy != IceType_i32) {
_sxt(Src0R, Src0R);
}
_asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
} else {
auto *Src1R = Srcs.unswappedSrc1R(this);
auto *Src1RNeg = makeReg(Src1R->getType());
_vneg(Src1RNeg, Src1R);
_vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Signed);
} }
_asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
_mov(Dest, T); _mov(Dest, T);
return; return;
} }
......
...@@ -884,9 +884,16 @@ protected: ...@@ -884,9 +884,16 @@ protected:
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) { void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmul>(Dest, Src0, Src1); Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
} }
void _vneg(Variable *Dest, Variable *Src0) {
Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL)
->setSignType(InstARM32::FS_Signed);
}
void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) { void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vorr>(Dest, Src0, Src1); Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
} }
InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstARM32Vshl>(Dest, Src0, Src1);
}
void _vsqrt(Variable *Dest, Variable *Src, void _vsqrt(Variable *Dest, Variable *Src,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vsqrt>(Dest, Src, Pred); Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
......
...@@ -33,6 +33,7 @@ entry: ...@@ -33,6 +33,7 @@ entry:
; ASM-NEXT: asr r0, r0, #23 ; ASM-NEXT: asr r0, r0, #23
; DIS-NEXT: 0: e1a00bc0 ; DIS-NEXT: 0: e1a00bc0
; IASM-NOT: asr
; IASM-NEXT: .byte 0xc0 ; IASM-NEXT: .byte 0xc0
; IASM-NEXT: .byte 0xb ; IASM-NEXT: .byte 0xb
; IASM-NEXT: .byte 0xa0 ; IASM-NEXT: .byte 0xa0
...@@ -54,6 +55,7 @@ entry: ...@@ -54,6 +55,7 @@ entry:
; ASM-NEXT: asr r0, r0, r1 ; ASM-NEXT: asr r0, r0, r1
; DIS-NEXT: 10: e1a00150 ; DIS-NEXT: 10: e1a00150
; IASM-NOT: asr
; IASM-NEXT: .byte 0x50 ; IASM-NEXT: .byte 0x50
; IASM-NEXT: .byte 0x1 ; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0xa0 ; IASM-NEXT: .byte 0xa0
...@@ -71,14 +73,12 @@ entry: ...@@ -71,14 +73,12 @@ entry:
%v = ashr <4 x i32> %a, %b %v = ashr <4 x i32> %a, %b
; ASM: asr r0, r0, r1 ; ASM: vneg.s32 q1, q1
; ASM: asr r0, r0, r1 ; ASM-NEXT: vshl.s32 q0, q0, q1
; ASM: asr r0, r0, r1 ; DIS: 20: f3b923c2
; ASM: asr r0, r0, r1 ; DIS: 24: f2220440
; DIS: 28: e1a00150 ; IASM-NOT: vneg
; DIS: 38: e1a00150 ; IASM-NOT: vshl
; DIS: 48: e1a00150
; DIS: 58: e1a00150
ret <4 x i32> %v ret <4 x i32> %v
} }
...@@ -90,14 +90,12 @@ entry: ...@@ -90,14 +90,12 @@ entry:
%v = ashr <8 x i16> %a, %b %v = ashr <8 x i16> %a, %b
; ASM: asr r0, r0, r1 ; ASM: vneg.s16 q1, q1
; ASM: asr r0, r0, r1 ; ASM-NEXT: vshl.s16 q0, q0, q1
; ASM: asr r0, r0, r1 ; DIS: 30: f3b523c2
; ASM: asr r0, r0, r1 ; DIS: 34: f2120440
; ASM: asr r0, r0, r1 ; IASM-NOT: vneg
; ASM: asr r0, r0, r1 ; IASM-NOT: vshl
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
ret <8 x i16> %v ret <8 x i16> %v
} }
...@@ -109,22 +107,12 @@ entry: ...@@ -109,22 +107,12 @@ entry:
%v = ashr <16 x i8> %a, %b %v = ashr <16 x i8> %a, %b
; ASM: asr r0, r0, r1 ; ASM: vneg.s8 q1, q1
; ASM: asr r0, r0, r1 ; ASM-NEXT: vshl.s8 q0, q0, q1
; ASM: asr r0, r0, r1 ; DIS: 40: f3b123c2
; ASM: asr r0, r0, r1 ; DIS: 44: f2020440
; ASM: asr r0, r0, r1 ; IASM-NOT: vneg
; ASM: asr r0, r0, r1 ; IASM-NOT: vshl
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
ret <16 x i8> %v ret <16 x i8> %v
} }
...@@ -33,10 +33,7 @@ entry: ...@@ -33,10 +33,7 @@ entry:
; ASM-NEXT: lsl r0, r0, #23 ; ASM-NEXT: lsl r0, r0, #23
; DIS-NEXT: 0: e1a00b80 ; DIS-NEXT: 0: e1a00b80
; IASM-NEXT: .byte 0x80 ; IASM-NOT: lsl
; IASM-NEXT: .byte 0xb
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
ret i32 %shl ret i32 %shl
} }
...@@ -54,10 +51,7 @@ entry: ...@@ -54,10 +51,7 @@ entry:
; ASM-NEXT: lsl r0, r0, r1 ; ASM-NEXT: lsl r0, r0, r1
; DIS-NEXT: 10: e1a00110 ; DIS-NEXT: 10: e1a00110
; IASM-NEXT: .byte 0x10 ; IASM-NOT: lsl
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
ret i32 %shl ret i32 %shl
} }
...@@ -73,11 +67,9 @@ entry: ...@@ -73,11 +67,9 @@ entry:
%shl = shl <4 x i32> %a, %b %shl = shl <4 x i32> %a, %b
; ASM: lsl r0, r0, r1 ; ASM: vshl.u32 q0, q0, q1
; ASM: lsl r0, r0, r1 ; DIS: 20: f3220440
; ASM: lsl r0, r0, r1 ; IASM-NOT: vshl
; ASM: lsl r0, r0, r1
; DIS: 28: e1a00110
ret <4 x i32> %shl ret <4 x i32> %shl
} }
...@@ -89,14 +81,9 @@ entry: ...@@ -89,14 +81,9 @@ entry:
%v = shl <8 x i16> %a, %b %v = shl <8 x i16> %a, %b
; ASM: lsl r0, r0, r1 ; ASM: vshl.u16 q0, q0, q1
; ASM: lsl r0, r0, r1 ; DIS: 30: f3120440
; ASM: lsl r0, r0, r1 ; IASM-NOT: vshl
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
ret <8 x i16> %v ret <8 x i16> %v
} }
...@@ -108,22 +95,9 @@ entry: ...@@ -108,22 +95,9 @@ entry:
%v = shl <16 x i8> %a, %b %v = shl <16 x i8> %a, %b
; ASM: lsl r0, r0, r1 ; ASM: vshl.u8 q0, q0, q1
; ASM: lsl r0, r0, r1 ; DIS: 40: f3020440
; ASM: lsl r0, r0, r1 ; IASM-NOT: vshl
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
ret <16 x i8> %v ret <16 x i8> %v
} }
...@@ -33,10 +33,7 @@ entry: ...@@ -33,10 +33,7 @@ entry:
; ASM-NEXT: lsr r0, r0, #23 ; ASM-NEXT: lsr r0, r0, #23
; DIS-NEXT: 0: e1a00ba0 ; DIS-NEXT: 0: e1a00ba0
; IASM-NEXT: .byte 0xa0 ; IASM-NOT: lsr
; IASM-NEXT: .byte 0xb
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
ret i32 %v ret i32 %v
} }
...@@ -54,10 +51,7 @@ entry: ...@@ -54,10 +51,7 @@ entry:
; ASM-NEXT: lsr r0, r0, r1 ; ASM-NEXT: lsr r0, r0, r1
; DIS-NEXT: 10: e1a00130 ; DIS-NEXT: 10: e1a00130
; IASM-NEXT: .byte 0x30 ; IASM-NOT: lsr
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
ret i32 %v ret i32 %v
} }
...@@ -73,11 +67,12 @@ entry: ...@@ -73,11 +67,12 @@ entry:
%v = lshr <4 x i32> %a, %b %v = lshr <4 x i32> %a, %b
; ASM: lsr r0, r0, r1 ; ASM: vneg.s32 q1, q1
; ASM: lsr r0, r0, r1 ; ASM-NEXT: vshl.u32 q0, q0, q1
; ASM: lsr r0, r0, r1 ; DIS: 20: f3b923c2
; ASM: lsr r0, r0, r1 ; DIS: 24: f3220440
; DIS: 28: e1a00130 ; IASM-NOT: vneg
; IASM-NOT: vshl
ret <4 x i32> %v ret <4 x i32> %v
} }
...@@ -89,14 +84,12 @@ entry: ...@@ -89,14 +84,12 @@ entry:
%v = lshr <8 x i16> %a, %b %v = lshr <8 x i16> %a, %b
; ASM: lsr r0, r0, r1 ; ASM: vneg.s16 q1, q1
; ASM: lsr r0, r0, r1 ; ASM-NEXT: vshl.u16 q0, q0, q1
; ASM: lsr r0, r0, r1 ; DIS: 30: f3b523c2
; ASM: lsr r0, r0, r1 ; DIS: 34: f3120440
; ASM: lsr r0, r0, r1 ; IASM-NOT: vneg
; ASM: lsr r0, r0, r1 ; IASM-NOT: vshl
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
ret <8 x i16> %v ret <8 x i16> %v
} }
...@@ -108,22 +101,12 @@ entry: ...@@ -108,22 +101,12 @@ entry:
%v = lshr <16 x i8> %a, %b %v = lshr <16 x i8> %a, %b
; ASM: lsr r0, r0, r1 ; ASM: vneg.s8 q1, q1
; ASM: lsr r0, r0, r1 ; ASM-NEXT: vshl.u8 q0, q0, q1
; ASM: lsr r0, r0, r1 ; DIS: 40: f3b123c2
; ASM: lsr r0, r0, r1 ; DIS: 44: f3020440
; ASM: lsr r0, r0, r1 ; IASM-NOT: vneg
; ASM: lsr r0, r0, r1 ; IASM-NOT: vshl
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
ret <16 x i8> %v ret <16 x i8> %v
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment