Commit 15e77d46 by John Porto

Subzero. ARM32. Vector shifts.

BUG= R=kschimpf@google.com, stichnot@chromium.org Review URL: https://codereview.chromium.org/1881623002 .
parent 3018cf2b
......@@ -1289,20 +1289,20 @@ void Assembler::vmulqi(OperandSize sz,
void Assembler::vmulqs(QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B24 | B11 | B10 | B8 | B4, kSWord, qd, qn, qm);
}
#endif
// Moved to ARM32::AssemblerARM32::vshlqi().
void Assembler::vshlqi(OperandSize sz,
QRegister qd, QRegister qm, QRegister qn) {
EmitSIMDqqq(B25 | B10, sz, qd, qn, qm);
}
// Moved to ARM32::AssemblerARM32::vshlqu().
void Assembler::vshlqu(OperandSize sz,
QRegister qd, QRegister qm, QRegister qn) {
EmitSIMDqqq(B25 | B24 | B10, sz, qd, qn, qm);
}
#if 0
// Moved to ARM32::AssemblerARM32::veorq()
void Assembler::veorq(QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B24 | B8 | B4, kByte, qd, qn, qm);
......@@ -1345,12 +1345,13 @@ void Assembler::vabsqs(QRegister qd, QRegister qm) {
EmitSIMDqqq(B24 | B23 | B21 | B20 | B19 | B16 | B10 | B9 | B8, kSWord,
qd, Q0, qm);
}
#endif
// Moved to Arm32::AssemblerARM32::vnegqs().
void Assembler::vnegqs(QRegister qd, QRegister qm) {
EmitSIMDqqq(B24 | B23 | B21 | B20 | B19 | B16 | B10 | B9 | B8 | B7, kSWord,
qd, Q0, qm);
}
#endif
void Assembler::vrecpeqs(QRegister qd, QRegister qm) {
......
......@@ -693,10 +693,10 @@ class Assembler : public ValueObject {
void vmulqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm);
// Moved to ARM32::AssemblerARM32::vmulqf().
void vmulqs(QRegister qd, QRegister qn, QRegister qm);
#endif
// Moved to ARM32::AssemblerARM32::vshlqi().
void vshlqi(OperandSize sz, QRegister qd, QRegister qm, QRegister qn);
// Moved to ARM32::AssemblerARM32::vshlqu().
void vshlqu(OperandSize sz, QRegister qd, QRegister qm, QRegister qn);
#if 0
// Moved to Arm32::AssemblerARM32::vmlas()
void vmlas(SRegister sd, SRegister sn, SRegister sm, Condition cond = AL);
// Moved to Arm32::AssemblerARM32::vmlad()
......@@ -749,8 +749,9 @@ class Assembler : public ValueObject {
#endif
void vnegs(SRegister sd, SRegister sm, Condition cond = AL);
void vnegd(DRegister dd, DRegister dm, Condition cond = AL);
void vnegqs(QRegister qd, QRegister qm);
#if 0
// Moved to ARM32::AssemblerARM32::vnegqs().
void vnegqs(QRegister qd, QRegister qm);
// Moved to ARM32::AssemblerARM32::vsqrts().
void vsqrts(SRegister sd, SRegister sm, Condition cond = AL);
// Moved to ARM32::AssemblerARM32::vsqrts().
......
......@@ -3056,6 +3056,30 @@ void AssemblerARM32::vmulqf(const Operand *OpQd, const Operand *OpQn,
emitSIMDqqqBase(VmulqfOpcode, OpQd, OpQn, OpQm, IsFloatTy, Vmulqf);
}
void AssemblerARM32::vnegqs(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm) {
// VNEG - ARM section A8.8.355, encoding A1:
// vneg.<dt> <Qd>, <Qm>
//
// 111111111D11ss01dddd0F111QM0mmmm where Dddd=Qd, and Mmmm=Qm, and:
// * dt=s8 -> 00=ss, 0=F
// * dt=s16 -> 01=ss, 0=F
// * dt=s32 -> 10=ss, 0=F
// * dt=s32 -> 10=ss, 1=F
constexpr const char *Vneg = "vneg";
constexpr IValueT VnegOpcode = B24 | B23 | B21 | B20 | B16 | B9 | B8 | B7;
const IValueT Qd = encodeQRegister(OpQd, "Qd", Vneg);
constexpr IValueT Qn = 0;
const IValueT Qm = encodeQRegister(OpQm, "Qm", Vneg);
constexpr bool UseQRegs = true;
constexpr IValueT ElmtShift = 18;
const IValueT ElmtSize = encodeElmtType(ElmtTy);
assert(Utils::IsUint(2, ElmtSize));
emitSIMDBase(VnegOpcode | (ElmtSize << ElmtShift), mapQRegToDReg(Qd),
mapQRegToDReg(Qn), mapQRegToDReg(Qm), UseQRegs,
isFloatingType(ElmtTy));
}
void AssemblerARM32::vorrq(const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn) {
// VORR (register) - ARM section A8.8.360, encoding A1:
......@@ -3229,6 +3253,34 @@ void AssemblerARM32::vpush(const Variable *OpBaseReg, SizeT NumConsecRegs,
emitVStackOp(Cond, VpushOpcode, OpBaseReg, NumConsecRegs);
}
void AssemblerARM32::vshlqi(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// VSHL - ARM section A8.8.396, encoding A1:
// vshl Qd, Qm, Qn
//
// 1111001U0Dssnnnndddd0100NQM0mmmm where Ddddd=Qd, Mmmmm=Qm, Nnnnn=Qn, 0=U,
// 1=Q
assert(isScalarIntegerType(ElmtTy) &&
"vshl expects vector with integer element type");
constexpr const char *Vshl = "vshl";
constexpr IValueT VshlOpcode = B10 | B6;
emitSIMDqqq(VshlOpcode, ElmtTy, OpQd, OpQn, OpQm, Vshl);
}
void AssemblerARM32::vshlqu(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// VSHL - ARM section A8.8.396, encoding A1:
// vshl Qd, Qm, Qn
//
// 1111001U0Dssnnnndddd0100NQM0mmmm where Ddddd=Qd, Mmmmm=Qm, Nnnnn=Qn, 1=U,
// 1=Q
assert(isScalarIntegerType(ElmtTy) &&
"vshl expects vector with integer element type");
constexpr const char *Vshl = "vshl";
constexpr IValueT VshlOpcode = B24 | B10 | B6;
emitSIMDqqq(VshlOpcode, ElmtTy, OpQd, OpQn, OpQm, Vshl);
}
void AssemblerARM32::vsqrtd(const Operand *OpDd, const Operand *OpDm,
CondARM32::Cond Cond) {
// VSQRT - ARM section A8.8.401, encoding A1:
......
......@@ -495,6 +495,8 @@ public:
void vmuls(const Operand *OpSd, const Operand *OpSn, const Operand *OpSm,
CondARM32::Cond Cond);
void vnegqs(Type ElmtTy, const Operand *OpQd, const Operand *OpQm);
void vorrq(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);
void vpop(const Variable *OpBaseReg, SizeT NumConsecRegs,
......@@ -503,6 +505,12 @@ public:
void vpush(const Variable *OpBaseReg, SizeT NumConsecRegs,
CondARM32::Cond Cond);
void vshlqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
void vshlqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
void vsqrtd(const Operand *OpDd, const Operand *OpDm, CondARM32::Cond Cond);
void vsqrts(const Operand *OpSd, const Operand *OpSm, CondARM32::Cond Cond);
......
......@@ -33,13 +33,16 @@ using Register = RegARM32::AllRegisters;
static constexpr SizeT VpushVpopMaxConsecRegs = 16;
const struct TypeARM32Attributes_ {
const char *WidthString; // b, h, <blank>, or d
const char *VecWidthString; // i8, i16, i32, f32, f64
const char *WidthString; // b, h, <blank>, or d
const char *FpWidthString; // i8, i16, i32, f32, f64
const char *SVecWidthString; // s8, s16, s32, f32
const char *UVecWidthString; // u8, u16, u32, f32
int8_t SExtAddrOffsetBits;
int8_t ZExtAddrOffsetBits;
} TypeARM32Attributes[] = {
#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \
{ int_width, vec_width, sbits, ubits } \
#define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
ubits, rraddr, shaddr) \
{ int_width, fp_width, svec_width, uvec_width, sbits, ubits } \
,
ICETYPEARM32_TABLE
#undef X
......@@ -70,16 +73,37 @@ size_t getVecElmtBitsize(Type Ty) {
return typeWidthInBytes(typeElementType(Ty)) * CHAR_BIT;
}
} // end of anonymous namespace
const char *InstARM32::getWidthString(Type Ty) {
const char *getWidthString(Type Ty) {
return TypeARM32Attributes[Ty].WidthString;
}
const char *InstARM32::getVecWidthString(Type Ty) {
return TypeARM32Attributes[Ty].VecWidthString;
const char *getFpWidthString(Type Ty) {
return TypeARM32Attributes[Ty].FpWidthString;
}
const char *getSVecWidthString(Type Ty) {
return TypeARM32Attributes[Ty].SVecWidthString;
}
const char *getUVecWidthString(Type Ty) {
return TypeARM32Attributes[Ty].UVecWidthString;
}
const char *getVWidthString(Type Ty, InstARM32::FPSign SignType) {
switch (SignType) {
case InstARM32::FS_None:
return getFpWidthString(Ty);
case InstARM32::FS_Signed:
return getSVecWidthString(Ty);
case InstARM32::FS_Unsigned:
return getUVecWidthString(Ty);
}
llvm_unreachable("Invalid Sign Type.");
return getFpWidthString(Ty);
}
} // end of anonymous namespace
const char *InstARM32Pred::predString(CondARM32::Cond Pred) {
return InstARM32CondAttributes[Pred].EmitString;
}
......@@ -150,13 +174,24 @@ void InstARM32Pred::emitUnaryopGPR(const char *Opcode,
Instr->getSrc(0)->emit(Func);
}
void InstARM32Pred::emitUnaryopFP(const char *Opcode,
void InstARM32Pred::emitUnaryopFP(const char *Opcode, FPSign Sign,
const InstARM32Pred *Instr, const Cfg *Func) {
Ostream &Str = Func->getContext()->getStrEmit();
assert(Instr->getSrcSize() == 1);
Type SrcTy = Instr->getSrc(0)->getType();
Str << "\t" << Opcode << Instr->getPredicate() << getVecWidthString(SrcTy)
<< "\t";
Str << "\t" << Opcode << Instr->getPredicate();
switch (Sign) {
case FS_None:
Str << getFpWidthString(SrcTy);
break;
case FS_Signed:
Str << getSVecWidthString(SrcTy);
break;
case FS_Unsigned:
Str << getUVecWidthString(SrcTy);
break;
}
Str << "\t";
Instr->getDest()->emit(Func);
Str << ", ";
Instr->getSrc(0)->emit(Func);
......@@ -192,14 +227,14 @@ void InstARM32Pred::emitThreeAddr(const char *Opcode,
Instr->getSrc(1)->emit(Func);
}
void InstARM32::emitThreeAddrFP(const char *Opcode, const InstARM32 *Instr,
const Cfg *Func) {
void InstARM32::emitThreeAddrFP(const char *Opcode, FPSign SignType,
const InstARM32 *Instr, const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Instr->getSrcSize() == 2);
Str << "\t" << Opcode << getVecWidthString(Instr->getDest()->getType())
<< "\t";
Str << "\t" << Opcode
<< getVWidthString(Instr->getDest()->getType(), SignType) << "\t";
Instr->getDest()->emit(Func);
Str << ", ";
Instr->getSrc(0)->emit(Func);
......@@ -207,15 +242,15 @@ void InstARM32::emitThreeAddrFP(const char *Opcode, const InstARM32 *Instr,
Instr->getSrc(1)->emit(Func);
}
void InstARM32::emitFourAddrFP(const char *Opcode, const InstARM32 *Instr,
const Cfg *Func) {
void InstARM32::emitFourAddrFP(const char *Opcode, FPSign SignType,
const InstARM32 *Instr, const Cfg *Func) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Instr->getSrcSize() == 3);
assert(Instr->getSrc(0) == Instr->getDest());
Str << "\t" << Opcode << getVecWidthString(Instr->getDest()->getType())
<< "\t";
Str << "\t" << Opcode
<< getVWidthString(Instr->getDest()->getType(), SignType) << "\t";
Instr->getDest()->emit(Func);
Str << ", ";
Instr->getSrc(1)->emit(Func);
......@@ -254,6 +289,11 @@ void InstARM32ThreeAddrFP<K>::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func);
}
template <InstARM32::InstKindARM32 K>
void InstARM32ThreeAddrSignAwareFP<K>::emitIAS(const Cfg *Func) const {
InstARM32::emitUsingTextFixup(Func);
}
template <> void InstARM32Mla::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 3);
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
......@@ -737,6 +777,27 @@ template <> void InstARM32Vmls::emitIAS(const Cfg *Func) const {
}
}
template <> void InstARM32Vneg::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
const Type DestTy = Dest->getType();
switch (Dest->getType()) {
default:
llvm::report_fatal_error("Vneg not defined on type " +
typeStdString(Dest->getType()));
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
case IceType_v4f32: {
const Type ElmtTy = typeElementType(DestTy);
Asm->vnegqs(ElmtTy, Dest, getSrc(0));
} break;
}
}
template <> void InstARM32Vorr::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
......@@ -755,6 +816,35 @@ template <> void InstARM32Vorr::emitIAS(const Cfg *Func) const {
assert(!Asm->needsTextFixup());
}
template <> void InstARM32Vshl::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
const Type DestTy = Dest->getType();
switch (DestTy) {
default:
llvm::report_fatal_error("Vshl not defined on type " +
typeStdString(Dest->getType()));
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32: {
const Type ElmtTy = typeElementType(DestTy);
assert(Sign != InstARM32::FS_None);
switch (Sign) {
case InstARM32::FS_None: // defaults to unsigned.
case InstARM32::FS_Unsigned:
Asm->vshlqu(ElmtTy, Dest, getSrc(0), getSrc(1));
break;
case InstARM32::FS_Signed:
Asm->vshlqi(ElmtTy, Dest, getSrc(0), getSrc(1));
break;
}
} break;
}
}
template <> void InstARM32Vsub::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
......@@ -1374,6 +1464,8 @@ template <> const char *InstARM32Vmla::Opcode = "vmla";
template <> const char *InstARM32Vmls::Opcode = "vmls";
template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Vorr::Opcode = "vorr";
template <> const char *InstARM32UnaryopFP<InstARM32::Vneg>::Opcode = "vneg";
template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshl>::Opcode = "vshl";
template <> const char *InstARM32Vsub::Opcode = "vsub";
// Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla";
......@@ -1479,7 +1571,7 @@ void InstARM32Mov::emitSingleDestSingleSource(const Cfg *Func) const {
// when vmov{c}'ing, we need to emit a width string. Otherwise, the
// assembler might be tempted to assume we want a vector vmov{c}, and that
// is disallowed because ARM.
const char *WidthString = !CoreVFPMove ? getVecWidthString(Ty) : "";
const char *WidthString = !CoreVFPMove ? getFpWidthString(Ty) : "";
CondARM32::Cond Cond = getPredicate();
if (IsVector)
assert(CondARM32::isUnconditional(Cond) &&
......@@ -2015,6 +2107,11 @@ void InstARM32UnaryopFP<K>::emitIAS(const Cfg *Func) const {
emitUsingTextFixup(Func);
}
template <InstARM32::InstKindARM32 K>
void InstARM32UnaryopSignAwareFP<K>::emitIAS(const Cfg *Func) const {
InstARM32::emitUsingTextFixup(Func);
}
template <> void InstARM32Vsqrt::emitIAS(const Cfg *Func) const {
assert(getSrcSize() == 1);
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
......@@ -2426,7 +2523,7 @@ void InstARM32Vcmp::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 2);
Str << "\t"
"vcmp" << getPredicate() << getVecWidthString(getSrc(0)->getType())
"vcmp" << getPredicate() << getFpWidthString(getSrc(0)->getType())
<< "\t";
getSrc(0)->emit(Func);
Str << ", ";
......@@ -2470,7 +2567,7 @@ void InstARM32Vcmp::dump(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrDump();
Str << "vcmp" << getPredicate() << getVecWidthString(getSrc(0)->getType());
Str << "vcmp" << getPredicate() << getFpWidthString(getSrc(0)->getType());
dumpSources(Func);
}
......@@ -2506,7 +2603,7 @@ void InstARM32Vabs::emit(const Cfg *Func) const {
Ostream &Str = Func->getContext()->getStrEmit();
assert(getSrcSize() == 1);
Str << "\t"
"vabs" << getPredicate() << getVecWidthString(getSrc(0)->getType())
"vabs" << getPredicate() << getFpWidthString(getSrc(0)->getType())
<< "\t";
getDest()->emit(Func);
Str << ", ";
......@@ -2540,7 +2637,7 @@ void InstARM32Vabs::dump(const Cfg *Func) const {
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = vabs" << getPredicate() << getVecWidthString(getSrc(0)->getType());
Str << " = vabs" << getPredicate() << getFpWidthString(getSrc(0)->getType());
}
void InstARM32Dmb::emit(const Cfg *Func) const {
......@@ -2740,8 +2837,7 @@ void OperandARM32FlexFpImm::emit(const Cfg *Func) const {
void OperandARM32FlexFpImm::dump(const Cfg * /*Func*/, Ostream &Str) const {
if (!BuildDefs::dump())
return;
Str << "#" << materializeFloatImmediate(ModifiedImm)
<< InstARM32::getVecWidthString(Ty);
Str << "#" << materializeFloatImmediate(ModifiedImm) << getFpWidthString(Ty);
}
void OperandARM32FlexFpZero::emit(const Cfg *Func) const {
......@@ -2760,7 +2856,7 @@ void OperandARM32FlexFpZero::emit(const Cfg *Func) const {
void OperandARM32FlexFpZero::dump(const Cfg * /*Func*/, Ostream &Str) const {
if (!BuildDefs::dump())
return;
Str << "#0.0" << InstARM32::getVecWidthString(Ty);
Str << "#0.0" << getFpWidthString(Ty);
}
void OperandARM32FlexReg::emit(const Cfg *Func) const {
......@@ -2815,6 +2911,8 @@ template class InstARM32ThreeAddrFP<InstARM32::Veor>;
template class InstARM32FourAddrFP<InstARM32::Vmla>;
template class InstARM32FourAddrFP<InstARM32::Vmls>;
template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
template class InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
template class InstARM32LoadBase<InstARM32::Ldr>;
......
......@@ -70,24 +70,25 @@
// the # of offset bits allowed as part of an addressing mode (for sign or zero
// extending load/stores).
#define ICETYPEARM32_TABLE \
/* tag, element type, int_width, vec_width, addr bits sext, zext, \
reg-reg addr allowed, shift allowed, */ \
X(IceType_void, IceType_void, "" , "" , 0 , 0 , 0, 0) \
X(IceType_i1, IceType_void, "b", "" , 8 , 12, 1, 1) \
X(IceType_i8, IceType_void, "b", "" , 8 , 12, 1, 1) \
X(IceType_i16, IceType_void, "h", "" , 8 , 8 , 1, 0) \
X(IceType_i32, IceType_void, "" , "" , 12, 12, 1, 1) \
X(IceType_i64, IceType_void, "d", "" , 12, 12, 1, 1) \
X(IceType_f32, IceType_void, "" , ".f32", 8, 8 , 0, 0) \
X(IceType_f64, IceType_void, "" , ".f64", 8, 8 , 0, 0) \
X(IceType_v4i1, IceType_i32 , "" , ".i32", 0 , 0 , 1, 0) \
X(IceType_v8i1, IceType_i16 , "" , ".i16", 0 , 0 , 1, 0) \
X(IceType_v16i1, IceType_i8 , "" , ".i8" , 0 , 0 , 1, 0) \
X(IceType_v16i8, IceType_i8 , "" , ".i8" , 0 , 0 , 1, 0) \
X(IceType_v8i16, IceType_i16 , "" , ".i16", 0 , 0 , 1, 0) \
X(IceType_v4i32, IceType_i32 , "" , ".i32", 0 , 0 , 1, 0) \
X(IceType_v4f32, IceType_f32 , "" , ".f32", 0 , 0 , 1, 0)
//#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr)
/* tag, element type, int_width, fp_width, uvec_width, svec_width, \
addr bits sext, zext, reg-reg addr allowed, shift allowed, */ \
X(IceType_void, IceType_void, "" , "" , "" , "" , 0 , 0 , 0, 0) \
X(IceType_i1, IceType_void, "b", "" , "" , "" , 8 , 12, 1, 1) \
X(IceType_i8, IceType_void, "b", "" , "" , "" , 8 , 12, 1, 1) \
X(IceType_i16, IceType_void, "h", "" , "" , "" , 8 , 8 , 1, 0) \
X(IceType_i32, IceType_void, "" , "" , "" , "" , 12, 12, 1, 1) \
X(IceType_i64, IceType_void, "d", "" , "" , "" , 12, 12, 1, 1) \
X(IceType_f32, IceType_void, "" , ".f32", "" , "" , 8, 8 , 0, 0) \
X(IceType_f64, IceType_void, "" , ".f64", "" , "" , 8, 8 , 0, 0) \
X(IceType_v4i1, IceType_i32 , "" , ".i32", ".u32", ".s32", 0 , 0 , 1, 0) \
X(IceType_v8i1, IceType_i16 , "" , ".i16", ".u16", ".s16", 0 , 0 , 1, 0) \
X(IceType_v16i1, IceType_i8 , "" , ".i8" , ".u8" , ".s8" , 0 , 0 , 1, 0) \
X(IceType_v16i8, IceType_i8 , "" , ".i8" , ".u8" , ".s8" , 0 , 0 , 1, 0) \
X(IceType_v8i16, IceType_i16 , "" , ".i16", ".u16", ".s16", 0 , 0 , 1, 0) \
X(IceType_v4i32, IceType_i32 , "" , ".i32", ".u32", ".s32", 0 , 0 , 1, 0) \
X(IceType_v4f32, IceType_f32 , "" , ".f32", ".f32", ".f32", 0 , 0 , 1, 0)
//#define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits,
// ubits, rraddr, shaddr)
// Shifter types for Data-processing operands as defined in section A5.1.2.
#define ICEINSTARM32SHIFT_TABLE \
......
......@@ -435,15 +435,15 @@ public:
Vmls,
Vmrs,
Vmul,
Vneg,
Vorr,
Vshl,
Vsqrt,
Vsub
};
static constexpr size_t InstSize = sizeof(uint32_t);
static const char *getWidthString(Type Ty);
static const char *getVecWidthString(Type Ty);
static CondARM32::Cond getOppositeCondition(CondARM32::Cond Cond);
/// Called inside derived methods emit() to communicate that multiple
......@@ -452,11 +452,20 @@ public:
/// implemented.
void startNextInst(const Cfg *Func) const;
/// FPSign is used for certain vector instructions (particularly, right
/// shifts) that require an operand sign specification.
enum FPSign {
FS_None,
FS_Signed,
FS_Unsigned,
};
/// Shared emit routines for common forms of instructions.
static void emitThreeAddrFP(const char *Opcode, const InstARM32 *Instr,
const Cfg *Func);
static void emitFourAddrFP(const char *Opcode, const InstARM32 *Instr,
const Cfg *Func);
/// @{
static void emitThreeAddrFP(const char *Opcode, FPSign Sign,
const InstARM32 *Instr, const Cfg *Func);
static void emitFourAddrFP(const char *Opcode, FPSign Sign,
const InstARM32 *Instr, const Cfg *Func);
/// @}
void dump(const Cfg *Func) const override;
......@@ -495,8 +504,8 @@ public:
/// Shared emit routines for common forms of instructions.
static void emitUnaryopGPR(const char *Opcode, const InstARM32Pred *Instr,
const Cfg *Func, bool NeedsWidthSuffix);
static void emitUnaryopFP(const char *Opcode, const InstARM32Pred *Instr,
const Cfg *Func);
static void emitUnaryopFP(const char *Opcode, FPSign Sign,
const InstARM32Pred *Instr, const Cfg *Func);
static void emitTwoAddr(const char *Opcode, const InstARM32Pred *Instr,
const Cfg *Func);
static void emitThreeAddr(const char *Opcode, const InstARM32Pred *Instr,
......@@ -573,7 +582,7 @@ public:
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitUnaryopFP(Opcode, this, Func);
emitUnaryopFP(Opcode, Sign, this, Func);
}
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override {
......@@ -588,16 +597,39 @@ public:
}
static bool classof(const Inst *Instr) { return isClassof(Instr, K); }
private:
protected:
InstARM32UnaryopFP(Cfg *Func, Variable *Dest, Operand *Src,
CondARM32::Cond Predicate)
: InstARM32Pred(Func, K, 1, Dest, Predicate) {
addSource(Src);
}
FPSign Sign = FS_None;
static const char *Opcode;
};
template <InstARM32::InstKindARM32 K>
class InstARM32UnaryopSignAwareFP : public InstARM32UnaryopFP<K> {
InstARM32UnaryopSignAwareFP() = delete;
InstARM32UnaryopSignAwareFP(const InstARM32UnaryopSignAwareFP &) = delete;
InstARM32UnaryopSignAwareFP &
operator=(const InstARM32UnaryopSignAwareFP &) = delete;
public:
static InstARM32UnaryopSignAwareFP *
create(Cfg *Func, Variable *Dest, Variable *Src, CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32UnaryopSignAwareFP>())
InstARM32UnaryopSignAwareFP(Func, Dest, Src, Predicate);
}
void emitIAS(const Cfg *Func) const override;
void setSignType(InstARM32::FPSign SignType) { this->Sign = SignType; }
private:
InstARM32UnaryopSignAwareFP(Cfg *Func, Variable *Dest, Operand *Src,
CondARM32::Cond Predicate)
: InstARM32UnaryopFP<K>(Func, Dest, Src, Predicate) {}
};
/// Instructions of the form x := x op y.
template <InstARM32::InstKindARM32 K>
class InstARM32TwoAddrGPR : public InstARM32Pred {
......@@ -748,7 +780,7 @@ public:
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitThreeAddrFP(Opcode, this, Func);
emitThreeAddrFP(Opcode, Sign, this, Func);
}
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override {
......@@ -762,9 +794,10 @@ public:
}
static bool classof(const Inst *Instr) { return isClassof(Instr, K); }
private:
InstARM32ThreeAddrFP(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1)
protected:
FPSign Sign = FS_None;
InstARM32ThreeAddrFP(Cfg *Func, Variable *Dest, Variable *Src0, Operand *Src1)
: InstARM32(Func, K, 2, Dest) {
addSource(Src0);
addSource(Src1);
......@@ -773,6 +806,31 @@ private:
static const char *Opcode;
};
template <InstARM32::InstKindARM32 K>
class InstARM32ThreeAddrSignAwareFP : public InstARM32ThreeAddrFP<K> {
InstARM32ThreeAddrSignAwareFP() = delete;
InstARM32ThreeAddrSignAwareFP(const InstARM32ThreeAddrSignAwareFP &) = delete;
InstARM32ThreeAddrSignAwareFP &
operator=(const InstARM32ThreeAddrSignAwareFP &) = delete;
public:
/// Create a vector/FP binary-op instruction like vadd, and vsub. Everything
/// must be a register.
static InstARM32ThreeAddrSignAwareFP *create(Cfg *Func, Variable *Dest,
Variable *Src0, Variable *Src1) {
return new (Func->allocate<InstARM32ThreeAddrSignAwareFP>())
InstARM32ThreeAddrSignAwareFP(Func, Dest, Src0, Src1);
}
void emitIAS(const Cfg *Func) const override;
void setSignType(InstARM32::FPSign SignType) { this->Sign = SignType; }
private:
InstARM32ThreeAddrSignAwareFP(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1)
: InstARM32ThreeAddrFP<K>(Func, Dest, Src0, Src1) {}
};
/// Instructions of the form x := a op1 (y op2 z). E.g., multiply accumulate.
template <InstARM32::InstKindARM32 K>
class InstARM32FourAddrGPR : public InstARM32Pred {
......@@ -840,7 +898,7 @@ public:
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitFourAddrFP(Opcode, this, Func);
emitFourAddrFP(Opcode, Sign, this, Func);
}
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override {
......@@ -864,6 +922,7 @@ private:
addSource(Src1);
}
FPSign Sign = FS_None;
static const char *Opcode;
};
......@@ -931,7 +990,9 @@ using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>;
using InstARM32Vmla = InstARM32FourAddrFP<InstARM32::Vmla>;
using InstARM32Vmls = InstARM32FourAddrFP<InstARM32::Vmls>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Vneg = InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>;
using InstARM32Vshl = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;
......
......@@ -454,12 +454,9 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
switch (Op) {
default:
break;
case InstArithmetic::Ashr:
case InstArithmetic::Fdiv:
case InstArithmetic::Frem:
case InstArithmetic::Lshr:
case InstArithmetic::Sdiv:
case InstArithmetic::Shl:
case InstArithmetic::Srem:
case InstArithmetic::Udiv:
case InstArithmetic::Urem:
......@@ -1960,7 +1957,8 @@ void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) {
// For now, we don't handle address modes with Relocatables.
namespace {
// MemTraits contains per-type valid address mode information.
#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \
#define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
ubits, rraddr, shaddr) \
static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag);
ICETYPEARM32_TABLE
#undef X
......@@ -1971,7 +1969,8 @@ static const struct {
bool CanHaveIndex;
bool CanHaveShiftedIndex;
} MemTraits[] = {
#define X(tag, elementty, int_width, vec_width, sbits, ubits, rraddr, shaddr) \
#define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
ubits, rraddr, shaddr) \
{ (1 << ubits) - 1, (ubits) > 0, rraddr, shaddr, } \
,
ICETYPEARM32_TABLE
......@@ -3120,15 +3119,18 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
UnimplementedLoweringError(this, Instr);
return;
// Explicitly whitelist vector instructions we have implemented/enabled.
case InstArithmetic::Fadd:
case InstArithmetic::Add:
case InstArithmetic::Fsub:
case InstArithmetic::Sub:
case InstArithmetic::And:
case InstArithmetic::Or:
case InstArithmetic::Xor:
case InstArithmetic::Ashr:
case InstArithmetic::Fadd:
case InstArithmetic::Fmul:
case InstArithmetic::Fsub:
case InstArithmetic::Lshr:
case InstArithmetic::Mul:
case InstArithmetic::Or:
case InstArithmetic::Shl:
case InstArithmetic::Sub:
case InstArithmetic::Xor:
break;
}
}
......@@ -3448,26 +3450,46 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
}
case InstArithmetic::Shl: {
Variable *Src0R = Srcs.unswappedSrc0R(this);
Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
_lsl(T, Src0R, Src1R);
if (!isVectorType(T->getType())) {
Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
_lsl(T, Src0R, Src1R);
} else {
auto *Src1R = Srcs.unswappedSrc1R(this);
_vshl(T, Src0R, Src1R)->setSignType(InstARM32::FS_Unsigned);
}
_mov(Dest, T);
return;
}
case InstArithmetic::Lshr: {
Variable *Src0R = Srcs.unswappedSrc0R(this);
if (DestTy != IceType_i32) {
_uxt(Src0R, Src0R);
if (!isVectorType(T->getType())) {
Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
if (DestTy != IceType_i32) {
_uxt(Src0R, Src0R);
}
_lsr(T, Src0R, Src1R);
} else {
auto *Src1R = Srcs.unswappedSrc1R(this);
auto *Src1RNeg = makeReg(Src1R->getType());
_vneg(Src1RNeg, Src1R);
_vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Unsigned);
}
_lsr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
_mov(Dest, T);
return;
}
case InstArithmetic::Ashr: {
Variable *Src0R = Srcs.unswappedSrc0R(this);
if (DestTy != IceType_i32) {
_sxt(Src0R, Src0R);
if (!isVectorType(T->getType())) {
if (DestTy != IceType_i32) {
_sxt(Src0R, Src0R);
}
_asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
} else {
auto *Src1R = Srcs.unswappedSrc1R(this);
auto *Src1RNeg = makeReg(Src1R->getType());
_vneg(Src1RNeg, Src1R);
_vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Signed);
}
_asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
_mov(Dest, T);
return;
}
......
......@@ -884,9 +884,16 @@ protected:
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
}
void _vneg(Variable *Dest, Variable *Src0) {
Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL)
->setSignType(InstARM32::FS_Signed);
}
void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
}
InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstARM32Vshl>(Dest, Src0, Src1);
}
void _vsqrt(Variable *Dest, Variable *Src,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
......
......@@ -33,6 +33,7 @@ entry:
; ASM-NEXT: asr r0, r0, #23
; DIS-NEXT: 0: e1a00bc0
; IASM-NOT: asr
; IASM-NEXT: .byte 0xc0
; IASM-NEXT: .byte 0xb
; IASM-NEXT: .byte 0xa0
......@@ -54,6 +55,7 @@ entry:
; ASM-NEXT: asr r0, r0, r1
; DIS-NEXT: 10: e1a00150
; IASM-NOT: asr
; IASM-NEXT: .byte 0x50
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0xa0
......@@ -71,14 +73,12 @@ entry:
%v = ashr <4 x i32> %a, %b
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; DIS: 28: e1a00150
; DIS: 38: e1a00150
; DIS: 48: e1a00150
; DIS: 58: e1a00150
; ASM: vneg.s32 q1, q1
; ASM-NEXT: vshl.s32 q0, q0, q1
; DIS: 20: f3b923c2
; DIS: 24: f2220440
; IASM-NOT: vneg
; IASM-NOT: vshl
ret <4 x i32> %v
}
......@@ -90,14 +90,12 @@ entry:
%v = ashr <8 x i16> %a, %b
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: vneg.s16 q1, q1
; ASM-NEXT: vshl.s16 q0, q0, q1
; DIS: 30: f3b523c2
; DIS: 34: f2120440
; IASM-NOT: vneg
; IASM-NOT: vshl
ret <8 x i16> %v
}
......@@ -109,22 +107,12 @@ entry:
%v = ashr <16 x i8> %a, %b
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: asr r0, r0, r1
; ASM: vneg.s8 q1, q1
; ASM-NEXT: vshl.s8 q0, q0, q1
; DIS: 40: f3b123c2
; DIS: 44: f2020440
; IASM-NOT: vneg
; IASM-NOT: vshl
ret <16 x i8> %v
}
......@@ -33,10 +33,7 @@ entry:
; ASM-NEXT: lsl r0, r0, #23
; DIS-NEXT: 0: e1a00b80
; IASM-NEXT: .byte 0x80
; IASM-NEXT: .byte 0xb
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; IASM-NOT: lsl
ret i32 %shl
}
......@@ -54,10 +51,7 @@ entry:
; ASM-NEXT: lsl r0, r0, r1
; DIS-NEXT: 10: e1a00110
; IASM-NEXT: .byte 0x10
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; IASM-NOT: lsl
ret i32 %shl
}
......@@ -73,11 +67,9 @@ entry:
%shl = shl <4 x i32> %a, %b
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; DIS: 28: e1a00110
; ASM: vshl.u32 q0, q0, q1
; DIS: 20: f3220440
; IASM-NOT: vshl
ret <4 x i32> %shl
}
......@@ -89,14 +81,9 @@ entry:
%v = shl <8 x i16> %a, %b
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: vshl.u16 q0, q0, q1
; DIS: 30: f3120440
; IASM-NOT: vshl
ret <8 x i16> %v
}
......@@ -108,22 +95,9 @@ entry:
%v = shl <16 x i8> %a, %b
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: lsl r0, r0, r1
; ASM: vshl.u8 q0, q0, q1
; DIS: 40: f3020440
; IASM-NOT: vshl
ret <16 x i8> %v
}
......@@ -33,10 +33,7 @@ entry:
; ASM-NEXT: lsr r0, r0, #23
; DIS-NEXT: 0: e1a00ba0
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xb
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; IASM-NOT: lsr
ret i32 %v
}
......@@ -54,10 +51,7 @@ entry:
; ASM-NEXT: lsr r0, r0, r1
; DIS-NEXT: 10: e1a00130
; IASM-NEXT: .byte 0x30
; IASM-NEXT: .byte 0x1
; IASM-NEXT: .byte 0xa0
; IASM-NEXT: .byte 0xe1
; IASM-NOT: lsr
ret i32 %v
}
......@@ -73,11 +67,12 @@ entry:
%v = lshr <4 x i32> %a, %b
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; DIS: 28: e1a00130
; ASM: vneg.s32 q1, q1
; ASM-NEXT: vshl.u32 q0, q0, q1
; DIS: 20: f3b923c2
; DIS: 24: f3220440
; IASM-NOT: vneg
; IASM-NOT: vshl
ret <4 x i32> %v
}
......@@ -89,14 +84,12 @@ entry:
%v = lshr <8 x i16> %a, %b
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: vneg.s16 q1, q1
; ASM-NEXT: vshl.u16 q0, q0, q1
; DIS: 30: f3b523c2
; DIS: 34: f3120440
; IASM-NOT: vneg
; IASM-NOT: vshl
ret <8 x i16> %v
}
......@@ -108,22 +101,12 @@ entry:
%v = lshr <16 x i8> %a, %b
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: lsr r0, r0, r1
; ASM: vneg.s8 q1, q1
; ASM-NEXT: vshl.u8 q0, q0, q1
; DIS: 40: f3b123c2
; DIS: 44: f3020440
; IASM-NOT: vneg
; IASM-NOT: vshl
ret <16 x i8> %v
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment