Commit a4d100ab by John Porto

Subzero. ARM32. De-scalarizes icmp and fcmp for vectors.

parent 085bdae2
......@@ -30,9 +30,7 @@ void env$$abort() {
void env$$_abort() { env$$abort(); }
void env$$exit(int Status) {
exit(Status);
}
void env$$exit(int Status) { exit(Status); }
void env$$_exit(int Status) { env$$exit(Status); }
#define UNIMPLEMENTED(f) \
......
......@@ -1323,11 +1323,11 @@ void Assembler::vornq(QRegister qd, QRegister qn, QRegister qm) {
void Assembler::vandq(QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B8 | B4, kByte, qd, qn, qm);
}
#endif
void Assembler::vmvnq(QRegister qd, QRegister qm) {
EmitSIMDqqq(B25 | B24 | B23 | B10 | B8 | B7, kWordPair, qd, Q0, qm);
}
#endif
void Assembler::vminqs(QRegister qd, QRegister qn, QRegister qm) {
......@@ -1422,52 +1422,52 @@ void Assembler::vzipqw(QRegister qd, QRegister qm) {
}
#if 0
// Moved to Arm32::AssemblerARM32::vceqqi().
void Assembler::vceqqi(OperandSize sz,
QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B24 | B11 | B4, sz, qd, qn, qm);
}
// Moved to Arm32::AssemblerARM32::vceqqi().
void Assembler::vceqqs(QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B11 | B10 | B9, kSWord, qd, qn, qm);
}
// Moved to Arm32::AssemblerARM32::vcgeqi().
void Assembler::vcgeqi(OperandSize sz,
QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B9 | B8 | B4, sz, qd, qn, qm);
}
// Moved to Arm32::AssemblerARM32::vcugeqi().
void Assembler::vcugeqi(OperandSize sz,
QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B24 | B9 | B8 | B4, sz, qd, qn, qm);
}
// Moved to Arm32::AssemblerARM32::vcgeqs().
void Assembler::vcgeqs(QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B24 | B11 | B10 | B9, kSWord, qd, qn, qm);
}
// Moved to Arm32::AssemblerARM32::vcgtqi().
void Assembler::vcgtqi(OperandSize sz,
QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B9 | B8, sz, qd, qn, qm);
}
// Moved to Arm32::AssemblerARM32::vcugtqi().
void Assembler::vcugtqi(OperandSize sz,
QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B24 | B9 | B8, sz, qd, qn, qm);
}
// Moved to Arm32::AssemblerARM32::vcgtqs().
void Assembler::vcgtqs(QRegister qd, QRegister qn, QRegister qm) {
EmitSIMDqqq(B24 | B21 | B11 | B10 | B9, kSWord, qd, qn, qm);
}
#if 0
// Moved to ARM32::AssemblerARM32::bkpt()
void Assembler::bkpt(uint16_t imm16) {
Emit(BkptEncoding(imm16));
......
......@@ -727,19 +727,26 @@ class Assembler : public ValueObject {
#if 0
// Moved to Arm32::AssemblerARM32::vandq().
void vandq(QRegister qd, QRegister qn, QRegister qm);
#endif
// Moved to Arm32::AssemblerARM32::vandq().
void vmvnq(QRegister qd, QRegister qm);
// Moved to Arm32::AssemblerARM32::vceqqi().
void vceqqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm);
// Moved to Arm32::AssemblerARM32::vceqqs().
void vceqqs(QRegister qd, QRegister qn, QRegister qm);
// Moved to Arm32::AssemblerARM32::vcgeqi().
void vcgeqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm);
// Moved to Arm32::AssemblerARM32::vcugeqi().
void vcugeqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm);
// Moved to Arm32::AssemblerARM32::vcgeqs().
void vcgeqs(QRegister qd, QRegister qn, QRegister qm);
// Moved to Arm32::AssemblerARM32::vcgtqi().
void vcgtqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm);
// Moved to Arm32::AssemblerARM32::vcugtqi().
void vcugtqi(OperandSize sz, QRegister qd, QRegister qn, QRegister qm);
// Moved to Arm32::AssemblerARM32::vcgtqs().
void vcgtqs(QRegister qd, QRegister qn, QRegister qm);
#if 0
// Moved to Arm32::AssemblerARM32::vabss().
void vabss(SRegister sd, SRegister sm, Condition cond = AL);
// Moved to Arm32::AssemblerARM32::vabsd().
......
......@@ -2422,6 +2422,102 @@ void AssemblerARM32::vbslq(const Operand *OpQd, const Operand *OpQm,
emitSIMDqqq(VbslqOpcode, ElmtTy, OpQd, OpQm, OpQn, Vbslq);
}
void AssemblerARM32::vceqqi(const Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// vceq (register) - ARM section A8.8.291, encoding A1:
// vceq.<st> <Qd>, <Qn>, <Qm>
//
// 111100110Dssnnnndddd1000NQM1mmmm where Dddd=OpQd, Nnnn=OpQm, Mmmm=OpQm, and
// st in [i8, i16, i32] where ss is the index.
constexpr const char *Vceq = "vceq";
constexpr IValueT VceqOpcode = B24 | B11 | B4;
emitSIMDqqq(VceqOpcode, ElmtTy, OpQd, OpQm, OpQn, Vceq);
}
void AssemblerARM32::vceqqs(const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn) {
// vceq (register) - ARM section A8.8.291, encoding A2:
// vceq.f32 <Qd>, <Qn>, <Qm>
//
// 111100100D00nnnndddd1110NQM0mmmm where Dddd=OpQd, Nnnn=OpQm, and Mmmm=OpQm.
constexpr const char *Vceq = "vceq";
constexpr IValueT VceqOpcode = B11 | B10 | B9;
constexpr Type ElmtTy = IceType_i8; // encoded as 0b00
emitSIMDqqq(VceqOpcode, ElmtTy, OpQd, OpQm, OpQn, Vceq);
}
void AssemblerARM32::vcgeqi(const Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// vcge (register) - ARM section A8.8.293, encoding A1:
// vcge.<st> <Qd>, <Qn>, <Qm>
//
// 1111001U0Dssnnnndddd0011NQM1mmmm where Dddd=OpQd, Nnnn=OpQm, Mmmm=OpQm,
// 0=U, and st in [s8, s16, s32] where ss is the index.
constexpr const char *Vcge = "vcge";
constexpr IValueT VcgeOpcode = B9 | B8 | B4;
emitSIMDqqq(VcgeOpcode, ElmtTy, OpQd, OpQm, OpQn, Vcge);
}
void AssemblerARM32::vcugeqi(const Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// vcge (register) - ARM section A8.8.293, encoding A1:
// vcge.<st> <Qd>, <Qn>, <Qm>
//
// 1111001U0Dssnnnndddd0011NQM1mmmm where Dddd=OpQd, Nnnn=OpQm, Mmmm=OpQm,
// 1=U, and st in [u8, u16, u32] where ss is the index.
constexpr const char *Vcge = "vcge";
constexpr IValueT VcgeOpcode = B24 | B9 | B8 | B4;
emitSIMDqqq(VcgeOpcode, ElmtTy, OpQd, OpQm, OpQn, Vcge);
}
void AssemblerARM32::vcgeqs(const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn) {
// vcge (register) - ARM section A8.8.293, encoding A2:
// vcge.f32 <Qd>, <Qn>, <Qm>
//
// 111100110D00nnnndddd1110NQM0mmmm where Dddd=OpQd, Nnnn=OpQm, and Mmmm=OpQm.
constexpr const char *Vcge = "vcge";
constexpr IValueT VcgeOpcode = B24 | B11 | B10 | B9;
constexpr Type ElmtTy = IceType_i8; // encoded as 0b00.
emitSIMDqqq(VcgeOpcode, ElmtTy, OpQd, OpQm, OpQn, Vcge);
}
void AssemblerARM32::vcgtqi(const Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// vcgt (register) - ARM section A8.8.295, encoding A1:
// vcgt.<st> <Qd>, <Qn>, <Qm>
//
// 1111001U0Dssnnnndddd0011NQM0mmmm where Dddd=OpQd, Nnnn=OpQm, Mmmm=OpQm,
// 0=U, and st in [s8, s16, s32] where ss is the index.
constexpr const char *Vcge = "vcgt";
constexpr IValueT VcgeOpcode = B9 | B8;
emitSIMDqqq(VcgeOpcode, ElmtTy, OpQd, OpQm, OpQn, Vcge);
}
void AssemblerARM32::vcugtqi(const Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// vcgt (register) - ARM section A8.8.295, encoding A1:
// vcgt.<st> <Qd>, <Qn>, <Qm>
//
// 111100110Dssnnnndddd0011NQM0mmmm where Dddd=OpQd, Nnnn=OpQm, Mmmm=OpQm,
// 1=U, and st in [u8, u16, u32] where ss is the index.
constexpr const char *Vcge = "vcgt";
constexpr IValueT VcgeOpcode = B24 | B9 | B8;
emitSIMDqqq(VcgeOpcode, ElmtTy, OpQd, OpQm, OpQn, Vcge);
}
void AssemblerARM32::vcgtqs(const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn) {
// vcgt (register) - ARM section A8.8.295, encoding A2:
// vcgt.f32 <Qd>, <Qn>, <Qm>
//
// 111100110D10nnnndddd1110NQM0mmmm where Dddd=OpQd, Nnnn=OpQm, and Mmmm=OpQm.
constexpr const char *Vcge = "vcgt";
constexpr IValueT VcgeOpcode = B24 | B21 | B11 | B10 | B9;
constexpr Type ElmtTy = IceType_i8; // encoded as 0b00.
emitSIMDqqq(VcgeOpcode, ElmtTy, OpQd, OpQm, OpQn, Vcge);
}
void AssemblerARM32::vcmpd(const Operand *OpDd, const Operand *OpDm,
CondARM32::Cond Cond) {
constexpr const char *Vcmpd = "vcmpd";
......@@ -3143,6 +3239,23 @@ void AssemblerARM32::vmulqf(const Operand *OpQd, const Operand *OpQn,
emitSIMDqqqBase(VmulqfOpcode, OpQd, OpQn, OpQm, IsFloatTy, Vmulqf);
}
void AssemblerARM32::vmvnq(const Operand *OpQd, const Operand *OpQm) {
// VMVN (integer) - ARM section A8.8.354, encoding A1:
// vmvn <Qd>, <Qm>
//
// 111100111D110000dddd01011QM0mmmm where Dddd=Qd, Mmmm=Qm, and 1=Q.
// TODO(jpp) xxx: unify
constexpr const char *Vmvn = "vmvn";
constexpr IValueT VmvnOpcode = B24 | B23 | B21 | B20 | B10 | B8 | B7;
const IValueT Qd = encodeQRegister(OpQd, "Qd", Vmvn);
constexpr IValueT Qn = 0;
const IValueT Qm = encodeQRegister(OpQm, "Qm", Vmvn);
constexpr bool UseQRegs = true;
constexpr bool IsFloat = false;
emitSIMDBase(VmvnOpcode, mapQRegToDReg(Qd), mapQRegToDReg(Qn),
mapQRegToDReg(Qm), UseQRegs, IsFloat);
}
void AssemblerARM32::vnegqs(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm) {
// VNEG - ARM section A8.8.355, encoding A1:
......
......@@ -345,6 +345,27 @@ public:
void vbslq(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);
void vceqqi(const Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
void vceqqs(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);
void vcgeqi(const Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
void vcugeqi(const Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
void vcgeqs(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);
void vcgtqi(const Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
void vcugtqi(const Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
void vcgtqs(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);
void vcmpd(const Operand *OpDd, const Operand *OpDm, CondARM32::Cond cond);
// Second argument of compare is zero (+0.0).
......@@ -505,6 +526,10 @@ public:
void vmuls(const Operand *OpSd, const Operand *OpSn, const Operand *OpSm,
CondARM32::Cond Cond);
void vmvnq(const Operand *OpQd, const Operand *OpQm);
void vnegqs(const Operand *OpQd, const Operand *OpQm);
void vnegqs(Type ElmtTy, const Operand *OpQd, const Operand *OpQm);
void vorrq(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);
......
......@@ -228,13 +228,13 @@ void InstARM32Pred::emitThreeAddr(const char *Opcode,
}
void InstARM32::emitThreeAddrFP(const char *Opcode, FPSign SignType,
const InstARM32 *Instr, const Cfg *Func) {
const InstARM32 *Instr, const Cfg *Func,
Type OpType) {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(Instr->getSrcSize() == 2);
Str << "\t" << Opcode
<< getVWidthString(Instr->getDest()->getType(), SignType) << "\t";
Str << "\t" << Opcode << getVWidthString(OpType, SignType) << "\t";
Instr->getDest()->emit(Func);
Str << ", ";
Instr->getSrc(0)->emit(Func);
......@@ -704,6 +704,95 @@ template <> void InstARM32Vand::emitIAS(const Cfg *Func) const {
assert(!Asm->needsTextFixup());
}
template <> void InstARM32Vceq::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
const Type SrcTy = getSrc(0)->getType();
switch (SrcTy) {
default:
llvm::report_fatal_error("Vceq not defined on type " +
typeStdString(SrcTy));
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
Asm->vceqqi(typeElementType(SrcTy), Dest, getSrc(0), getSrc(1));
break;
case IceType_v4f32:
Asm->vceqqs(Dest, getSrc(0), getSrc(1));
break;
}
assert(!Asm->needsTextFixup());
}
template <> void InstARM32Vcge::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
const Type SrcTy = getSrc(0)->getType();
switch (SrcTy) {
default:
llvm::report_fatal_error("Vcge not defined on type " +
typeStdString(Dest->getType()));
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32: {
const Type ElmtTy = typeElementType(SrcTy);
assert(Sign != InstARM32::FS_None);
switch (Sign) {
case InstARM32::FS_None: // defaults to unsigned.
llvm_unreachable("Sign should not be FS_None.");
case InstARM32::FS_Unsigned:
Asm->vcugeqi(ElmtTy, Dest, getSrc(0), getSrc(1));
break;
case InstARM32::FS_Signed:
Asm->vcgeqi(ElmtTy, Dest, getSrc(0), getSrc(1));
break;
}
} break;
case IceType_v4f32:
Asm->vcgeqs(Dest, getSrc(0), getSrc(1));
break;
}
}
template <> void InstARM32Vcgt::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
const Type SrcTy = getSrc(0)->getType();
switch (SrcTy) {
default:
llvm::report_fatal_error("Vcgt not defined on type " +
typeStdString(Dest->getType()));
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32: {
const Type ElmtTy = typeElementType(SrcTy);
assert(Sign != InstARM32::FS_None);
switch (Sign) {
case InstARM32::FS_None: // defaults to unsigned.
llvm_unreachable("Sign should not be FS_None.");
case InstARM32::FS_Unsigned:
Asm->vcugtqi(ElmtTy, Dest, getSrc(0), getSrc(1));
break;
case InstARM32::FS_Signed:
Asm->vcgtqi(ElmtTy, Dest, getSrc(0), getSrc(1));
break;
}
} break;
case IceType_v4f32:
Asm->vcgtqs(Dest, getSrc(0), getSrc(1));
break;
}
}
template <> void InstARM32Vbsl::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
......@@ -795,6 +884,25 @@ template <> void InstARM32Vmls::emitIAS(const Cfg *Func) const {
}
}
template <> void InstARM32Vmvn::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
switch (Dest->getType()) {
default:
llvm::report_fatal_error("Vmvn not defined on type " +
typeStdString(Dest->getType()));
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
case IceType_v4f32: {
Asm->vmvnq(Dest, getSrc(0));
} break;
}
}
template <> void InstARM32Vneg::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
......@@ -842,6 +950,8 @@ template <> void InstARM32Vshl::emitIAS(const Cfg *Func) const {
default:
llvm::report_fatal_error("Vshl not defined on type " +
typeStdString(Dest->getType()));
// TODO(jpp): handle i1 vectors in terms of element count instead of element
// type.
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
......@@ -879,6 +989,8 @@ template <> void InstARM32Vshr::emitIAS(const Cfg *Func) const {
default:
llvm::report_fatal_error("Vshr not defined on type " +
typeStdString(Dest->getType()));
// TODO(jpp): handle i1 vectors in terms of element count instead of element
// type.
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
......@@ -1515,11 +1627,15 @@ template <> const char *InstARM32Udiv::Opcode = "udiv";
template <> const char *InstARM32Vadd::Opcode = "vadd";
template <> const char *InstARM32Vand::Opcode = "vand";
template <> const char *InstARM32Vbsl::Opcode = "vbsl";
template <> const char *InstARM32Vceq::Opcode = "vceq";
template <> const char *InstARM32ThreeAddrFP<InstARM32::Vcge>::Opcode = "vcge";
template <> const char *InstARM32ThreeAddrFP<InstARM32::Vcgt>::Opcode = "vcgt";
template <> const char *InstARM32Vdiv::Opcode = "vdiv";
template <> const char *InstARM32Veor::Opcode = "veor";
template <> const char *InstARM32Vmla::Opcode = "vmla";
template <> const char *InstARM32Vmls::Opcode = "vmls";
template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Vmvn::Opcode = "vmvn";
template <> const char *InstARM32Vorr::Opcode = "vorr";
template <> const char *InstARM32UnaryopFP<InstARM32::Vneg>::Opcode = "vneg";
template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshl>::Opcode = "vshl";
......@@ -1758,6 +1874,7 @@ void InstARM32Mov::emitIAS(const Cfg *Func) const {
}
}
break; // Error
// TODO(jpp): Remove vectors of i1.
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
......@@ -2984,6 +3101,8 @@ template class InstARM32ThreeAddrGPR<InstARM32::Sub>;
template class InstARM32ThreeAddrGPR<InstARM32::Udiv>;
template class InstARM32ThreeAddrFP<InstARM32::Vadd>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vcge>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vcgt>;
template class InstARM32ThreeAddrFP<InstARM32::Vdiv>;
template class InstARM32ThreeAddrFP<InstARM32::Veor>;
template class InstARM32FourAddrFP<InstARM32::Vmla>;
......
......@@ -428,6 +428,9 @@ public:
Vadd,
Vand,
Vbsl,
Vceq,
Vcge,
Vcgt,
Vcmp,
Vcvt,
Vdiv,
......@@ -436,6 +439,7 @@ public:
Vmls,
Vmrs,
Vmul,
Vmvn,
Vneg,
Vorr,
Vshl,
......@@ -464,7 +468,8 @@ public:
/// Shared emit routines for common forms of instructions.
/// @{
static void emitThreeAddrFP(const char *Opcode, FPSign Sign,
const InstARM32 *Instr, const Cfg *Func);
const InstARM32 *Instr, const Cfg *Func,
Type OpType);
static void emitFourAddrFP(const char *Opcode, FPSign Sign,
const InstARM32 *Instr, const Cfg *Func);
/// @}
......@@ -782,7 +787,8 @@ public:
void emit(const Cfg *Func) const override {
if (!BuildDefs::dump())
return;
emitThreeAddrFP(Opcode, Sign, this, Func);
const Type OpType = (isVectorCompare() ? getSrc(0) : getDest())->getType();
emitThreeAddrFP(Opcode, Sign, this, Func, OpType);
}
void emitIAS(const Cfg *Func) const override;
void dump(const Cfg *Func) const override {
......@@ -790,8 +796,8 @@ public:
return;
Ostream &Str = Func->getContext()->getStrDump();
dumpDest(Func);
Str << " = ";
Str << Opcode << "." << getDest()->getType() << " ";
const Type OpType = (isVectorCompare() ? getSrc(0) : getDest())->getType();
Str << " = " << Opcode << "." << OpType << " ";
dumpSources(Func);
}
static bool classof(const Inst *Instr) { return isClassof(Instr, K); }
......@@ -806,6 +812,11 @@ protected:
}
static const char *Opcode;
private:
static constexpr bool isVectorCompare() {
return K == InstARM32::Vceq || K == InstARM32::Vcgt || K == InstARM32::Vcge;
}
};
template <InstARM32::InstKindARM32 K>
......@@ -994,11 +1005,15 @@ using InstARM32Udiv = InstARM32ThreeAddrGPR<InstARM32::Udiv>;
using InstARM32Vadd = InstARM32ThreeAddrFP<InstARM32::Vadd>;
using InstARM32Vand = InstARM32ThreeAddrFP<InstARM32::Vand>;
using InstARM32Vbsl = InstARM32ThreeAddrFP<InstARM32::Vbsl>;
using InstARM32Vceq = InstARM32ThreeAddrFP<InstARM32::Vceq>;
using InstARM32Vcge = InstARM32ThreeAddrSignAwareFP<InstARM32::Vcge>;
using InstARM32Vcgt = InstARM32ThreeAddrSignAwareFP<InstARM32::Vcgt>;
using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>;
using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>;
using InstARM32Vmla = InstARM32FourAddrFP<InstARM32::Vmla>;
using InstARM32Vmls = InstARM32FourAddrFP<InstARM32::Vmls>;
using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Vmvn = InstARM32UnaryopFP<InstARM32::Vmvn>;
using InstARM32Vneg = InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>;
using InstARM32Vshl = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
......
......@@ -124,7 +124,7 @@ namespace {
const struct TableIcmp32_ {
CondARM32::Cond Mapping;
} TableIcmp32[] = {
#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
#define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
{ CondARM32::C_32 } \
,
ICMPARM32_TABLE
......@@ -140,7 +140,7 @@ const struct TableIcmp64_ {
bool Swapped;
CondARM32::Cond C1, C2;
} TableIcmp64[] = {
#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
#define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
{ is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
,
ICMPARM32_TABLE
......@@ -163,7 +163,8 @@ CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
namespace {
// Define a temporary set of enum values based on low-level table entries.
enum _icmp_ll_enum {
#define X(val, signed, swapped64, C_32, C1_64, C2_64) _icmp_ll_##val,
#define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
_icmp_ll_##val,
ICMPARM32_TABLE
#undef X
_num
......@@ -174,7 +175,7 @@ ICEINSTICMP_TABLE
#undef X
// Define a set of constants based on low-level table entries, and ensure the
// table entry keys are consistent.
#define X(val, signed, swapped64, C_32, C1_64, C2_64) \
#define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
static_assert( \
_icmp_ll_##val == _icmp_hl_##val, \
"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #val);
......@@ -829,38 +830,6 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
}
llvm::report_fatal_error("Control flow should never have reached here.");
}
case Inst::Icmp: {
Variable *Dest = Instr->getDest();
const Type DestTy = Dest->getType();
if (isVectorType(DestTy)) {
auto *CmpInstr = llvm::cast<InstIcmp>(Instr);
const auto Condition = CmpInstr->getCondition();
scalarizeInstruction(
Dest,
[this, Condition](Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstIcmp>(Condition, Dest, Src0, Src1);
},
CmpInstr->getSrc(0), CmpInstr->getSrc(1));
CmpInstr->setDeleted();
}
return;
}
case Inst::Fcmp: {
Variable *Dest = Instr->getDest();
const Type DestTy = Dest->getType();
if (isVectorType(DestTy)) {
auto *CmpInstr = llvm::cast<InstFcmp>(Instr);
const auto Condition = CmpInstr->getCondition();
scalarizeInstruction(
Dest,
[this, Condition](Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstFcmp>(Condition, Dest, Src0, Src1);
},
CmpInstr->getSrc(0), CmpInstr->getSrc(1));
CmpInstr->setDeleted();
}
return;
}
}
}
......@@ -4251,7 +4220,7 @@ namespace {
// Validates FCMPARM32_TABLE's declaration w.r.t. InstFcmp::FCondition ordering
// (and naming).
enum {
#define X(val, CC0, CC1) _fcmp_ll_##val,
#define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) _fcmp_ll_##val,
FCMPARM32_TABLE
#undef X
_fcmp_ll_NUM
......@@ -4277,7 +4246,7 @@ struct {
CondARM32::Cond CC0;
CondARM32::Cond CC1;
} TableFcmp[] = {
#define X(val, CC0, CC1) \
#define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) \
{ CondARM32::CC0, CondARM32::CC1 } \
,
FCMPARM32_TABLE
......@@ -4322,8 +4291,80 @@ TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
Variable *Dest = Instr->getDest();
if (isVectorType(Dest->getType())) {
UnimplementedLoweringError(this, Instr);
const Type DestTy = Dest->getType();
if (isVectorType(DestTy)) {
if (Instr->getCondition() == InstFcmp::False) {
constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
auto *T = makeReg(SafeTypeForMovingConstant);
_mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(0)));
_mov(Dest, T);
return;
}
if (Instr->getCondition() == InstFcmp::True) {
constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
auto *T = makeReg(SafeTypeForMovingConstant);
_mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(1)));
_mov(Dest, T);
return;
}
Variable *T0;
Variable *T1;
bool Negate = false;
auto *Src0 = legalizeToReg(Instr->getSrc(0));
auto *Src1 = legalizeToReg(Instr->getSrc(1));
switch (Instr->getCondition()) {
default:
llvm::report_fatal_error("Unhandled fp comparison.");
#define _Vcnone(Tptr, S0, S1) \
do { \
*(Tptr) = nullptr; \
} while (0)
#define _Vceq(Tptr, S0, S1) \
do { \
*(Tptr) = makeReg(DestTy); \
_vceq(*(Tptr), S0, S1); \
} while (0)
#define _Vcge(Tptr, S0, S1) \
do { \
*(Tptr) = makeReg(DestTy); \
_vcge(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed); \
} while (0)
#define _Vcgt(Tptr, S0, S1) \
do { \
*(Tptr) = makeReg(DestTy); \
_vcgt(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed); \
} while (0)
#define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) \
case InstFcmp::val: { \
_Vc##CC0_V(&T0, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1); \
_Vc##CC1_V(&T1, (INV_V) ? Src0 : Src1, (INV_V) ? Src1 : Src0); \
Negate = NEG_V; \
} break;
FCMPARM32_TABLE
#undef X
#undef _Vcgt
#undef _Vcge
#undef _Vceq
#undef _Vcnone
}
assert(T0 != nullptr);
Variable *T = T0;
if (T1 != nullptr) {
T = makeReg(DestTy);
_vorr(T, T0, T1);
}
if (Negate) {
auto *TNeg = makeReg(DestTy);
_vmvn(TNeg, T);
T = TNeg;
}
_mov(Dest, T);
return;
}
......@@ -4621,9 +4662,78 @@ TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition,
void TargetARM32::lowerIcmp(const InstIcmp *Instr) {
Variable *Dest = Instr->getDest();
const Type DestTy = Dest->getType();
if (isVectorType(Dest->getType())) {
UnimplementedLoweringError(this, Instr);
if (isVectorType(DestTy)) {
auto *T = makeReg(DestTy);
auto *Src0 = legalizeToReg(Instr->getSrc(0));
auto *Src1 = legalizeToReg(Instr->getSrc(1));
const Type SrcTy = Src0->getType();
bool NeedsShl = false;
Type NewTypeAfterShl;
SizeT ShAmt;
switch (SrcTy) {
default:
break;
case IceType_v16i1:
NeedsShl = true;
NewTypeAfterShl = IceType_v16i8;
ShAmt = 7;
break;
case IceType_v8i1:
NeedsShl = true;
NewTypeAfterShl = IceType_v8i16;
ShAmt = 15;
break;
case IceType_v4i1:
NeedsShl = true;
NewTypeAfterShl = IceType_v4i32;
ShAmt = 31;
break;
}
if (NeedsShl) {
auto *Imm = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmt));
auto *Src0T = makeReg(NewTypeAfterShl);
auto *Src0Shl = makeReg(NewTypeAfterShl);
_mov(Src0T, Src0);
_vshl(Src0Shl, Src0T, Imm);
Src0 = Src0Shl;
auto *Src1T = makeReg(NewTypeAfterShl);
auto *Src1Shl = makeReg(NewTypeAfterShl);
_mov(Src1T, Src1);
_vshl(Src1Shl, Src1T, Imm);
Src1 = Src1Shl;
}
switch (Instr->getCondition()) {
default:
llvm::report_fatal_error("Unhandled integer comparison.");
#define _Vceq(T, S0, S1, Signed) _vceq(T, S0, S1)
#define _Vcge(T, S0, S1, Signed) \
_vcge(T, S0, S1) \
->setSignType(Signed ? InstARM32::FS_Signed : InstARM32::FS_Unsigned)
#define _Vcgt(T, S0, S1, Signed) \
_vcgt(T, S0, S1) \
->setSignType(Signed ? InstARM32::FS_Signed : InstARM32::FS_Unsigned)
#define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
case InstIcmp::val: { \
_Vc##C_V(T, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1, is_signed); \
if (NEG_V) { \
auto *TInv = makeReg(DestTy); \
_vmvn(TInv, T); \
T = TInv; \
} \
} break;
ICMPARM32_TABLE
#undef X
#undef _Vcgt
#undef _Vcge
#undef _Vceq
}
_mov(Dest, T);
return;
}
......
......@@ -18,44 +18,52 @@
// Patterns for lowering fcmp. These are expected to be used in the following
// manner:
//
// Scalar:
// mov reg, #0
// movCC0 reg, #1 /* only if CC0 != kNone */
// movCC1 reg, #1 /* only if CC1 != kNone */
//
// TODO(jpp): vector lowerings.
// Vector:
// vcCC0_V Cmp0, Src0, Src1 /* only if CC0_V != none */
// vcCC1_V Cmp1, Src1, Src0 /* only if CC1_V != none */
// vorr Cmp2, Cmp0, Cmp1 /* only if CC1_V != none */
// vmvn Reg3, Cmp? /* only if NEG_V = true */
//
// If INV_V = true, then Src0 and Src1 should be swapped.
//
#define FCMPARM32_TABLE \
/* val, CC0, CC1 */ \
X(False, kNone, kNone) \
X(Oeq, EQ, kNone) \
X(Ogt, GT, kNone) \
X(Oge, GE, kNone) \
X(Olt, MI, kNone) \
X(Ole, LS, kNone) \
X(One, MI, GT) \
X(Ord, VC, kNone) \
X(Ueq, EQ, VS) \
X(Ugt, HI, kNone) \
X(Uge, PL, kNone) \
X(Ult, LT, kNone) \
X(Ule, LE, kNone) \
X(Une, NE, kNone) \
X(Uno, VS, kNone) \
X(True, AL, kNone) \
//#define X(val, CC0, CC1)
/*val , CC0 , CC1 , CC0_V, CC1_V, INV_V, NEG_V */ \
X(False, kNone, kNone, none , none , false, false) \
X(Oeq , EQ , kNone, eq , none , false, false) \
X(Ogt , GT , kNone, gt , none , false, false) \
X(Oge , GE , kNone, ge , none , false, false) \
X(Olt , MI , kNone, gt , none , true , false) \
X(Ole , LS , kNone, ge , none , true , false) \
X(One , MI , GT , gt , gt , false, false) \
X(Ord , VC , kNone, ge , gt , false, false) \
X(Ueq , EQ , VS , gt , gt , false, true) \
X(Ugt , HI , kNone, ge , none , true , true) \
X(Uge , PL , kNone, gt , none , true , true) \
X(Ult , LT , kNone, ge , none , false, true) \
X(Ule , LE , kNone, gt , none , false, true) \
X(Une , NE , kNone, eq , none , false, true) \
X(Uno , VS , kNone, ge , gt , false, true) \
X(True , AL , kNone, none , none , false, false)
//#define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V)
// Patterns for lowering icmp.
#define ICMPARM32_TABLE \
/* val, is_signed, swapped64, C_32, C1_64, C2_64 */ \
X(Eq, false, false, EQ, EQ, NE) \
X(Ne, false, false, NE, NE, EQ) \
X(Ugt, false, false, HI, HI, LS) \
X(Uge, false, false, CS, CS, CC) \
X(Ult, false, false, CC, CC, CS) \
X(Ule, false, false, LS, LS, HI) \
X(Sgt, true, true, GT, LT, GE) \
X(Sge, true, false, GE, GE, LT) \
X(Slt, true, false, LT, LT, GE) \
X(Sle, true, true, LE, GE, LT) \
//#define X(val, is_signed, swapped64, C_32, C1_64, C2_64)
#define ICMPARM32_TABLE \
/*val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V */ \
X(Eq , false , false , EQ, EQ , NE , eq , false, false) \
X(Ne , false , false , NE, NE , EQ , eq , false, true) \
X(Ugt, false , false , HI, HI , LS , gt , false, false) \
X(Uge, false , false , CS, CS , CC , ge , false, false) \
X(Ult, false , false , CC, CC , CS , gt , true , false) \
X(Ule, false , false , LS, LS , HI , ge , true , false) \
X(Sgt, true , true , GT, LT , GE , gt , false, false) \
X(Sge, true , false , GE, GE , LT , ge , false, false) \
X(Slt, true , false , LT, LT , GE , gt , true , false) \
X(Sle, true , true , LE, GE , LT , ge , true , false)
//#define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)
#endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_DEF
......@@ -858,6 +858,15 @@ protected:
InstARM32Vbsl *_vbsl(Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstARM32Vbsl>(Dest, Src0, Src1);
}
void _vceq(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vceq>(Dest, Src0, Src1);
}
InstARM32Vcge *_vcge(Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstARM32Vcge>(Dest, Src0, Src1);
}
InstARM32Vcgt *_vcgt(Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstARM32Vcgt>(Dest, Src0, Src1);
}
void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred);
......@@ -888,6 +897,9 @@ protected:
void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
}
void _vmvn(Variable *Dest, Variable *Src0) {
Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL);
}
void _vneg(Variable *Dest, Variable *Src0) {
Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL)
->setSignType(InstARM32::FS_Signed);
......
......@@ -941,8 +941,8 @@ public:
auto MemBase = Ctx->getConstantSym(0, Ctx->getGlobalString("WASM_MEMORY"));
if (!ConstZeroBase) {
auto RealAddrV = Func->makeVariable(Ice::getPointerType());
Control()->appendInst(InstArithmetic::create(
Func, InstArithmetic::Add, RealAddrV, Base, MemBase));
Control()->appendInst(InstArithmetic::create(Func, InstArithmetic::Add,
RealAddrV, Base, MemBase));
RealAddr = RealAddrV;
} else {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment