Commit b40560b1 by Casey Dahlin Committed by Nicolas Capens

Support saturating vector add and subtract on ARM32.

Bug b/37495545 Change-Id: I1d025f894bb7cf08dcaafd950605781633596ab3 Reviewed-on: https://chromium-review.googlesource.com/689098Reviewed-by: 's avatarJim Stichnoth <stichnot@chromium.org> Reviewed-on: https://swiftshader-review.googlesource.com/12748Reviewed-by: 's avatarNicolas Capens <nicolascapens@google.com> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 8d90a34e
...@@ -3381,6 +3381,62 @@ void AssemblerARM32::vsubd(const Operand *OpDd, const Operand *OpDn, ...@@ -3381,6 +3381,62 @@ void AssemblerARM32::vsubd(const Operand *OpDd, const Operand *OpDn,
emitVFPddd(Cond, VsubdOpcode, OpDd, OpDn, OpDm, Vsubd); emitVFPddd(Cond, VsubdOpcode, OpDd, OpDn, OpDm, Vsubd);
} }
void AssemblerARM32::vqaddqi(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// VQADD (integer) - ARM section A8.6.369, encoding A1:
// vqadd<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
//
// 111100100Dssnnn0ddd00000N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
// size is 8, 16, 32, or 64.
assert(isScalarIntegerType(ElmtTy) &&
"vqaddqi expects vector with integer element type");
constexpr const char *Vqaddqi = "vqaddqi";
constexpr IValueT VqaddqiOpcode = B4;
emitSIMDqqq(VqaddqiOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqaddqi);
}
void AssemblerARM32::vqaddqu(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// VQADD (integer) - ARM section A8.6.369, encoding A1:
// vqadd<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
//
// 111100110Dssnnn0ddd00000N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
// size is 8, 16, 32, or 64.
assert(isScalarIntegerType(ElmtTy) &&
"vqaddqu expects vector with integer element type");
constexpr const char *Vqaddqu = "vqaddqu";
constexpr IValueT VqaddquOpcode = B24 | B4;
emitSIMDqqq(VqaddquOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqaddqu);
}
void AssemblerARM32::vqsubqi(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// VQSUB (integer) - ARM section A8.6.369, encoding A1:
// vqsub<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
//
// 111100100Dssnnn0ddd00010N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
// size is 8, 16, 32, or 64.
assert(isScalarIntegerType(ElmtTy) &&
"vqsubqi expects vector with integer element type");
constexpr const char *Vqsubqi = "vqsubqi";
constexpr IValueT VqsubqiOpcode = B9 | B4;
emitSIMDqqq(VqsubqiOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqsubqi);
}
void AssemblerARM32::vqsubqu(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// VQSUB (integer) - ARM section A8.6.369, encoding A1:
// vqsub<c><q>.s<size> {<Qd>,} <Qn>, <Qm>
//
// 111100110Dssnnn0ddd00010N1M1mmm0 where Dddd=OpQd, Nnnn=OpQn, Mmmm=OpQm,
// size is 8, 16, 32, or 64.
assert(isScalarIntegerType(ElmtTy) &&
"vqsubqu expects vector with integer element type");
constexpr const char *Vqsubqu = "vqsubqu";
constexpr IValueT VqsubquOpcode = B24 | B9 | B4;
emitSIMDqqq(VqsubquOpcode, ElmtTy, OpQd, OpQm, OpQn, Vqsubqu);
}
void AssemblerARM32::vsubqi(Type ElmtTy, const Operand *OpQd, void AssemblerARM32::vsubqi(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) { const Operand *OpQm, const Operand *OpQn) {
// VSUB (integer) - ARM section A8.8.414, encoding A1: // VSUB (integer) - ARM section A8.8.414, encoding A1:
......
...@@ -591,6 +591,18 @@ public: ...@@ -591,6 +591,18 @@ public:
void vsubqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm, void vsubqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn); const Operand *OpQn);
// Integer vector saturating subtract.
void vqsubqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
void vqsubqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
// Integer vector saturating add.
void vqaddqi(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
void vqaddqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
// Float vector subtract // Float vector subtract
void vsubqf(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn); void vsubqf(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);
......
...@@ -1037,6 +1037,56 @@ template <> void InstARM32Vsub::emitIAS(const Cfg *Func) const { ...@@ -1037,6 +1037,56 @@ template <> void InstARM32Vsub::emitIAS(const Cfg *Func) const {
assert(!Asm->needsTextFixup()); assert(!Asm->needsTextFixup());
} }
template <> void InstARM32Vqadd::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
Type DestTy = Dest->getType();
switch (DestTy) {
default:
llvm::report_fatal_error("Vqadd not defined on type " +
typeStdString(DestTy));
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
switch (Sign) {
case InstARM32::FS_None: // defaults to unsigned.
case InstARM32::FS_Unsigned:
Asm->vqaddqu(typeElementType(DestTy), Dest, getSrc(0), getSrc(1));
break;
case InstARM32::FS_Signed:
Asm->vqaddqi(typeElementType(DestTy), Dest, getSrc(0), getSrc(1));
break;
}
break;
}
assert(!Asm->needsTextFixup());
}
template <> void InstARM32Vqsub::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
Type DestTy = Dest->getType();
switch (DestTy) {
default:
llvm::report_fatal_error("Vqsub not defined on type " +
typeStdString(DestTy));
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
switch (Sign) {
case InstARM32::FS_None: // defaults to unsigned.
case InstARM32::FS_Unsigned:
Asm->vqsubqu(typeElementType(DestTy), Dest, getSrc(0), getSrc(1));
break;
case InstARM32::FS_Signed:
Asm->vqsubqi(typeElementType(DestTy), Dest, getSrc(0), getSrc(1));
break;
}
break;
}
assert(!Asm->needsTextFixup());
}
template <> void InstARM32Vmul::emitIAS(const Cfg *Func) const { template <> void InstARM32Vmul::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest(); const Variable *Dest = getDest();
...@@ -1639,6 +1689,10 @@ template <> const char *InstARM32UnaryopFP<InstARM32::Vneg>::Opcode = "vneg"; ...@@ -1639,6 +1689,10 @@ template <> const char *InstARM32UnaryopFP<InstARM32::Vneg>::Opcode = "vneg";
template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshl>::Opcode = "vshl"; template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshl>::Opcode = "vshl";
template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshr>::Opcode = "vshr"; template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshr>::Opcode = "vshr";
template <> const char *InstARM32Vsub::Opcode = "vsub"; template <> const char *InstARM32Vsub::Opcode = "vsub";
template <>
const char *InstARM32ThreeAddrFP<InstARM32::Vqadd>::Opcode = "vqadd";
template <>
const char *InstARM32ThreeAddrFP<InstARM32::Vqsub>::Opcode = "vqsub";
// Four-addr ops // Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla"; template <> const char *InstARM32Mla::Opcode = "mla";
template <> const char *InstARM32Mls::Opcode = "mls"; template <> const char *InstARM32Mls::Opcode = "mls";
...@@ -3110,6 +3164,8 @@ template class InstARM32UnaryopSignAwareFP<InstARM32::Vneg>; ...@@ -3110,6 +3164,8 @@ template class InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>; template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>; template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>;
template class InstARM32ThreeAddrFP<InstARM32::Vsub>; template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vqadd>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vqsub>;
template class InstARM32LoadBase<InstARM32::Ldr>; template class InstARM32LoadBase<InstARM32::Ldr>;
template class InstARM32LoadBase<InstARM32::Ldrex>; template class InstARM32LoadBase<InstARM32::Ldrex>;
......
...@@ -442,6 +442,8 @@ public: ...@@ -442,6 +442,8 @@ public:
Vmvn, Vmvn,
Vneg, Vneg,
Vorr, Vorr,
Vqadd,
Vqsub,
Vshl, Vshl,
Vshr, Vshr,
Vsqrt, Vsqrt,
...@@ -1016,6 +1018,8 @@ using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>; ...@@ -1016,6 +1018,8 @@ using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Vmvn = InstARM32UnaryopFP<InstARM32::Vmvn>; using InstARM32Vmvn = InstARM32UnaryopFP<InstARM32::Vmvn>;
using InstARM32Vneg = InstARM32UnaryopSignAwareFP<InstARM32::Vneg>; using InstARM32Vneg = InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>; using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>;
using InstARM32Vqadd = InstARM32ThreeAddrSignAwareFP<InstARM32::Vqadd>;
using InstARM32Vqsub = InstARM32ThreeAddrSignAwareFP<InstARM32::Vqsub>;
using InstARM32Vshl = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>; using InstARM32Vshl = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
using InstARM32Vshr = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>; using InstARM32Vshr = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>; using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
......
...@@ -5320,12 +5320,14 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -5320,12 +5320,14 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
case Intrinsics::Trap: case Intrinsics::Trap:
_trap(); _trap();
return; return;
case Intrinsics::AddSaturateSigned: { case Intrinsics::AddSaturateSigned:
UnimplementedLoweringError(this, Instr);
return;
}
case Intrinsics::AddSaturateUnsigned: { case Intrinsics::AddSaturateUnsigned: {
UnimplementedLoweringError(this, Instr); bool Unsigned = (ID == Intrinsics::AddSaturateUnsigned);
Variable *Src0 = legalizeToReg(Instr->getArg(0));
Variable *Src1 = legalizeToReg(Instr->getArg(1));
Variable *T = makeReg(DestTy);
_vqadd(T, Src0, Src1, Unsigned);
_mov(Dest, T);
return; return;
} }
case Intrinsics::LoadSubVector: { case Intrinsics::LoadSubVector: {
...@@ -5360,12 +5362,14 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { ...@@ -5360,12 +5362,14 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
UnimplementedLoweringError(this, Instr); UnimplementedLoweringError(this, Instr);
return; return;
} }
case Intrinsics::SubtractSaturateSigned: { case Intrinsics::SubtractSaturateSigned:
UnimplementedLoweringError(this, Instr);
return;
}
case Intrinsics::SubtractSaturateUnsigned: { case Intrinsics::SubtractSaturateUnsigned: {
UnimplementedLoweringError(this, Instr); bool Unsigned = (ID == Intrinsics::SubtractSaturateUnsigned);
Variable *Src0 = legalizeToReg(Instr->getArg(0));
Variable *Src1 = legalizeToReg(Instr->getArg(1));
Variable *T = makeReg(DestTy);
_vqsub(T, Src0, Src1, Unsigned);
_mov(Dest, T);
return; return;
} }
case Intrinsics::VectorPackSigned: { case Intrinsics::VectorPackSigned: {
......
...@@ -910,6 +910,14 @@ protected: ...@@ -910,6 +910,14 @@ protected:
void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) { void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vorr>(Dest, Src0, Src1); Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
} }
void _vqadd(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
Context.insert<InstARM32Vqadd>(Dest, Src0, Src1)
->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
}
void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
Context.insert<InstARM32Vqsub>(Dest, Src0, Src1)
->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
}
InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) { InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstARM32Vshl>(Dest, Src0, Src1); return Context.insert<InstARM32Vshl>(Dest, Src0, Src1);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment