Commit 67a49b5b by Nicolas Capens Committed by Nicolas Capens

Implement saturated vector add/subtract.

BUG=swiftshader:15 Change-Id: Ic120eddd1761e33b7d76bf3ed8ec5ca74634f958 Reviewed-on: https://chromium-review.googlesource.com/403477Reviewed-by: 's avatarJim Stichnoth <stichnot@chromium.org> Tested-by: 's avatarNicolas Capens <nicolascapens@google.com>
parent 13cde0f5
......@@ -357,6 +357,10 @@ public:
void padd(Type Ty, XmmRegister dst, XmmRegister src);
void padd(Type Ty, XmmRegister dst, const Address &src);
void padds(Type Ty, XmmRegister dst, XmmRegister src);
void padds(Type Ty, XmmRegister dst, const Address &src);
void paddus(Type Ty, XmmRegister dst, XmmRegister src);
void paddus(Type Ty, XmmRegister dst, const Address &src);
void pand(Type Ty, XmmRegister dst, XmmRegister src);
void pand(Type Ty, XmmRegister dst, const Address &src);
void pandn(Type Ty, XmmRegister dst, XmmRegister src);
......@@ -375,6 +379,10 @@ public:
void por(Type Ty, XmmRegister dst, const Address &src);
void psub(Type Ty, XmmRegister dst, XmmRegister src);
void psub(Type Ty, XmmRegister dst, const Address &src);
void psubs(Type Ty, XmmRegister dst, XmmRegister src);
void psubs(Type Ty, XmmRegister dst, const Address &src);
void psubus(Type Ty, XmmRegister dst, XmmRegister src);
void psubus(Type Ty, XmmRegister dst, const Address &src);
void pxor(Type Ty, XmmRegister dst, XmmRegister src);
void pxor(Type Ty, XmmRegister dst, const Address &src);
......
......@@ -758,6 +758,76 @@ void AssemblerX86Base<TraitsType>::padd(Type Ty, XmmRegister dst,
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::padds(Type Ty, XmmRegister dst,
XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, dst, src);
emitUint8(0x0F);
if (isByteSizedArithType(Ty)) {
emitUint8(0xEC);
} else if (Ty == IceType_i16) {
emitUint8(0xED);
} else {
assert(false && "Unexpected padds operand type");
}
emitXmmRegisterOperand(dst, src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::padds(Type Ty, XmmRegister dst,
const Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, src, dst);
emitUint8(0x0F);
if (isByteSizedArithType(Ty)) {
emitUint8(0xEC);
} else if (Ty == IceType_i16) {
emitUint8(0xED);
} else {
assert(false && "Unexpected padds operand type");
}
emitOperand(gprEncoding(dst), src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::paddus(Type Ty, XmmRegister dst,
XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, dst, src);
emitUint8(0x0F);
if (isByteSizedArithType(Ty)) {
emitUint8(0xDC);
} else if (Ty == IceType_i16) {
emitUint8(0xDD);
} else {
assert(false && "Unexpected paddus operand type");
}
emitXmmRegisterOperand(dst, src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::paddus(Type Ty, XmmRegister dst,
const Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, src, dst);
emitUint8(0x0F);
if (isByteSizedArithType(Ty)) {
emitUint8(0xDC);
} else if (Ty == IceType_i16) {
emitUint8(0xDD);
} else {
assert(false && "Unexpected paddus operand type");
}
emitOperand(gprEncoding(dst), src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::pand(Type /* Ty */, XmmRegister dst,
XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
......@@ -1001,6 +1071,75 @@ void AssemblerX86Base<TraitsType>::psub(Type Ty, XmmRegister dst,
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::psubs(Type Ty, XmmRegister dst,
XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, dst, src);
emitUint8(0x0F);
if (isByteSizedArithType(Ty)) {
emitUint8(0xE8);
} else if (Ty == IceType_i16) {
emitUint8(0xE9);
} else {
assert(false && "Unexpected psubs operand type");
}
emitXmmRegisterOperand(dst, src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::psubs(Type Ty, XmmRegister dst,
const Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, src, dst);
emitUint8(0x0F);
if (isByteSizedArithType(Ty)) {
emitUint8(0xE8);
} else if (Ty == IceType_i16) {
emitUint8(0xE9);
} else {
assert(false && "Unexpected psubs operand type");
}
emitOperand(gprEncoding(dst), src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::psubus(Type Ty, XmmRegister dst,
XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, dst, src);
emitUint8(0x0F);
if (isByteSizedArithType(Ty)) {
emitUint8(0xD8);
} else if (Ty == IceType_i16) {
emitUint8(0xD9);
} else {
assert(false && "Unexpected psubus operand type");
}
emitXmmRegisterOperand(dst, src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::psubus(Type Ty, XmmRegister dst,
const Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, src, dst);
emitUint8(0x0F);
if (isByteSizedArithType(Ty)) {
emitUint8(0xD8);
} else if (Ty == IceType_i16) {
emitUint8(0xD9);
} else {
assert(false && "Unexpected psubus operand type");
}
emitOperand(gprEncoding(dst), src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::pxor(Type /* Ty */, XmmRegister dst,
XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
......
......@@ -132,6 +132,8 @@ template <typename TraitsType> struct InstImpl {
Orps,
OrRMW,
Padd,
Padds,
Paddus,
Pand,
Pandn,
Pblendvb,
......@@ -156,6 +158,8 @@ template <typename TraitsType> struct InstImpl {
Psra,
Psrl,
Psub,
Psubs,
Psubus,
Push,
Pxor,
Ret,
......@@ -1435,6 +1439,38 @@ template <typename TraitsType> struct InstImpl {
Source) {}
};
class InstX86Padds
: public InstX86BaseBinopXmm<InstX86Base::Padds, true,
InstX86Base::SseSuffix::Integral> {
public:
static InstX86Padds *create(Cfg *Func, Variable *Dest, Operand *Source) {
return new (Func->allocate<InstX86Padds>())
InstX86Padds(Func, Dest, Source);
}
private:
InstX86Padds(Cfg *Func, Variable *Dest, Operand *Source)
: InstX86BaseBinopXmm<InstX86Base::Padds, true,
InstX86Base::SseSuffix::Integral>(Func, Dest,
Source) {}
};
class InstX86Paddus
: public InstX86BaseBinopXmm<InstX86Base::Paddus, true,
InstX86Base::SseSuffix::Integral> {
public:
static InstX86Paddus *create(Cfg *Func, Variable *Dest, Operand *Source) {
return new (Func->allocate<InstX86Paddus>())
InstX86Paddus(Func, Dest, Source);
}
private:
InstX86Paddus(Cfg *Func, Variable *Dest, Operand *Source)
: InstX86BaseBinopXmm<InstX86Base::Paddus, true,
InstX86Base::SseSuffix::Integral>(Func, Dest,
Source) {}
};
class InstX86Sub : public InstX86BaseBinopGPR<InstX86Base::Sub> {
public:
static InstX86Sub *create(Cfg *Func, Variable *Dest, Operand *Source) {
......@@ -1531,6 +1567,38 @@ template <typename TraitsType> struct InstImpl {
Source) {}
};
class InstX86Psubs
: public InstX86BaseBinopXmm<InstX86Base::Psubs, true,
InstX86Base::SseSuffix::Integral> {
public:
static InstX86Psubs *create(Cfg *Func, Variable *Dest, Operand *Source) {
return new (Func->allocate<InstX86Psubs>())
InstX86Psubs(Func, Dest, Source);
}
private:
InstX86Psubs(Cfg *Func, Variable *Dest, Operand *Source)
: InstX86BaseBinopXmm<InstX86Base::Psubs, true,
InstX86Base::SseSuffix::Integral>(Func, Dest,
Source) {}
};
class InstX86Psubus
: public InstX86BaseBinopXmm<InstX86Base::Psubus, true,
InstX86Base::SseSuffix::Integral> {
public:
static InstX86Psubus *create(Cfg *Func, Variable *Dest, Operand *Source) {
return new (Func->allocate<InstX86Psubus>())
InstX86Psubus(Func, Dest, Source);
}
private:
InstX86Psubus(Cfg *Func, Variable *Dest, Operand *Source)
: InstX86BaseBinopXmm<InstX86Base::Psubus, true,
InstX86Base::SseSuffix::Integral>(Func, Dest,
Source) {}
};
class InstX86And : public InstX86BaseBinopGPR<InstX86Base::And> {
public:
static InstX86And *create(Cfg *Func, Variable *Dest, Operand *Source) {
......@@ -3097,6 +3165,8 @@ template <typename TraitsType> struct Insts {
using Andnps = typename InstImpl<TraitsType>::InstX86Andnps;
using Andps = typename InstImpl<TraitsType>::InstX86Andps;
using Padd = typename InstImpl<TraitsType>::InstX86Padd;
using Padds = typename InstImpl<TraitsType>::InstX86Padds;
using Paddus = typename InstImpl<TraitsType>::InstX86Paddus;
using Sub = typename InstImpl<TraitsType>::InstX86Sub;
using SubRMW = typename InstImpl<TraitsType>::InstX86SubRMW;
using Subps = typename InstImpl<TraitsType>::InstX86Subps;
......@@ -3104,6 +3174,8 @@ template <typename TraitsType> struct Insts {
using Sbb = typename InstImpl<TraitsType>::InstX86Sbb;
using SbbRMW = typename InstImpl<TraitsType>::InstX86SbbRMW;
using Psub = typename InstImpl<TraitsType>::InstX86Psub;
using Psubs = typename InstImpl<TraitsType>::InstX86Psubs;
using Psubus = typename InstImpl<TraitsType>::InstX86Psubus;
using And = typename InstImpl<TraitsType>::InstX86And;
using AndRMW = typename InstImpl<TraitsType>::InstX86AndRMW;
using Pand = typename InstImpl<TraitsType>::InstX86Pand;
......@@ -3279,6 +3351,12 @@ template <typename TraitsType> struct Insts {
const char *InstImpl<TraitsType>::InstX86Padd::Base::Opcode = "padd"; \
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Padds::Base::Opcode = "padds"; \
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Paddus::Base::Opcode = "paddus"; \
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Sub::Base::Opcode = "sub"; \
template <> \
template <> \
......@@ -3300,6 +3378,12 @@ template <typename TraitsType> struct Insts {
const char *InstImpl<TraitsType>::InstX86Psub::Base::Opcode = "psub"; \
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Psubs::Base::Opcode = "psubs"; \
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Psubus::Base::Opcode = "psubus"; \
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86And::Base::Opcode = "and"; \
template <> \
template <> \
......@@ -3683,6 +3767,18 @@ template <typename TraitsType> struct Insts {
template <> \
template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
InstImpl<TraitsType>::InstX86Padds::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::padds, \
&InstImpl<TraitsType>::Assembler::padds}; \
template <> \
template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
InstImpl<TraitsType>::InstX86Paddus::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::paddus, \
&InstImpl<TraitsType>::Assembler::paddus}; \
template <> \
template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
InstImpl<TraitsType>::InstX86Pand::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::pand, \
&InstImpl<TraitsType>::Assembler::pand}; \
......@@ -3749,6 +3845,18 @@ template <typename TraitsType> struct Insts {
template <> \
template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
InstImpl<TraitsType>::InstX86Psubs::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::psubs, \
&InstImpl<TraitsType>::Assembler::psubs}; \
template <> \
template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
InstImpl<TraitsType>::InstX86Psubus::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::psubus, \
&InstImpl<TraitsType>::Assembler::psubus}; \
template <> \
template <> \
const InstImpl<TraitsType>::Assembler::XmmEmitterRegOp \
InstImpl<TraitsType>::InstX86Pxor::Base::Emitter = { \
&InstImpl<TraitsType>::Assembler::pxor, \
&InstImpl<TraitsType>::Assembler::pxor}; \
......
......@@ -62,12 +62,16 @@ public:
Stackrestore,
Trap,
// The intrinsics below are not part of the PNaCl specification.
AddSaturateSigned,
AddSaturateUnsigned,
LoadSubVector,
MultiplyAddPairs,
MultiplyHighSigned,
MultiplyHighUnsigned,
SignMask,
StoreSubVector,
SubtractSaturateSigned,
SubtractSaturateUnsigned,
VectorPackSigned,
VectorPackUnsigned
};
......
......@@ -767,6 +767,14 @@ protected:
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Padd>(Dest, Src0);
}
void _padds(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Padds>(Dest, Src0);
}
void _paddus(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Paddus>(Dest, Src0);
}
void _pand(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Pand>(Dest, Src0);
......@@ -864,6 +872,14 @@ protected:
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Psub>(Dest, Src0);
}
void _psubs(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Psubs>(Dest, Src0);
}
void _psubus(Variable *Dest, Operand *Src0) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Psubus>(Dest, Src0);
}
void _push(Operand *Src0) {
Context.insert<typename Traits::Insts::Push>(Src0);
}
......
......@@ -4509,6 +4509,54 @@ void TargetX86Base<TraitsType>::lowerIntrinsicCall(
_movp(Dest, T);
return;
}
case Intrinsics::AddSaturateSigned: {
Operand *Src0 = Instr->getArg(0);
Operand *Src1 = Instr->getArg(1);
Variable *Dest = Instr->getDest();
auto *T = makeReg(Dest->getType());
auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
_movp(T, Src0RM);
_padds(T, Src1RM);
_movp(Dest, T);
return;
}
case Intrinsics::SubtractSaturateSigned: {
Operand *Src0 = Instr->getArg(0);
Operand *Src1 = Instr->getArg(1);
Variable *Dest = Instr->getDest();
auto *T = makeReg(Dest->getType());
auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
_movp(T, Src0RM);
_psubs(T, Src1RM);
_movp(Dest, T);
return;
}
case Intrinsics::AddSaturateUnsigned: {
Operand *Src0 = Instr->getArg(0);
Operand *Src1 = Instr->getArg(1);
Variable *Dest = Instr->getDest();
auto *T = makeReg(Dest->getType());
auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
_movp(T, Src0RM);
_paddus(T, Src1RM);
_movp(Dest, T);
return;
}
case Intrinsics::SubtractSaturateUnsigned: {
Operand *Src0 = Instr->getArg(0);
Operand *Src1 = Instr->getArg(1);
Variable *Dest = Instr->getDest();
auto *T = makeReg(Dest->getType());
auto *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
auto *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
_movp(T, Src0RM);
_psubus(T, Src1RM);
_movp(Dest, T);
return;
}
default: // UnknownIntrinsic
Func->setError("Unexpected intrinsic");
return;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment