Commit f0d12c30 by Nicolas Capens Committed by Nicolas Capens

Implement floating-point rounding intrinsic.

BUG=swiftshader:15 Change-Id: I8e53f2fdb8208f8be0f4cdff3241b4a5efe9bc8a Reviewed-on: https://chromium-review.googlesource.com/404352Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarJim Stichnoth <stichnot@chromium.org>
parent f8c9977b
...@@ -511,7 +511,9 @@ public: ...@@ -511,7 +511,9 @@ public:
kRoundUp = 0x2, kRoundUp = 0x2,
kRoundToZero = 0x3 kRoundToZero = 0x3
}; };
void roundsd(XmmRegister dst, XmmRegister src, RoundingMode mode); void round(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mode);
void round(Type Ty, XmmRegister dst, const Address &src,
const Immediate &mode);
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
// //
......
...@@ -2392,17 +2392,58 @@ void AssemblerX86Base<TraitsType>::pcmpgt(Type Ty, XmmRegister dst, ...@@ -2392,17 +2392,58 @@ void AssemblerX86Base<TraitsType>::pcmpgt(Type Ty, XmmRegister dst,
} }
template <typename TraitsType> template <typename TraitsType>
void AssemblerX86Base<TraitsType>::roundsd(XmmRegister dst, XmmRegister src, void AssemblerX86Base<TraitsType>::round(Type Ty, XmmRegister dst,
RoundingMode mode) { XmmRegister src,
const Immediate &mode) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer); AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66); emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, dst, src); emitRexRB(RexTypeIrrelevant, dst, src);
emitUint8(0x0F); emitUint8(0x0F);
emitUint8(0x3A); emitUint8(0x3A);
switch (Ty) {
case IceType_v4f32:
emitUint8(0x08);
break;
case IceType_f32:
emitUint8(0x0A);
break;
case IceType_f64:
emitUint8(0x0B); emitUint8(0x0B);
break;
default:
assert(false && "Unsupported round operand type");
}
emitXmmRegisterOperand(dst, src); emitXmmRegisterOperand(dst, src);
// Mask precision exeption. // Mask precision exeption.
emitUint8(static_cast<uint8_t>(mode) | 0x8); emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::round(Type Ty, XmmRegister dst,
const Address &src,
const Immediate &mode) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, src, dst);
emitUint8(0x0F);
emitUint8(0x3A);
switch (Ty) {
case IceType_v4f32:
emitUint8(0x08);
break;
case IceType_f32:
emitUint8(0x0A);
break;
case IceType_f64:
emitUint8(0x0B);
break;
default:
assert(false && "Unsupported round operand type");
}
emitOperand(gprEncoding(dst), src);
// Mask precision exeption.
emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
} }
template <typename TraitsType> template <typename TraitsType>
......
...@@ -164,6 +164,7 @@ template <typename TraitsType> struct InstImpl { ...@@ -164,6 +164,7 @@ template <typename TraitsType> struct InstImpl {
Pxor, Pxor,
Ret, Ret,
Rol, Rol,
Round,
Sar, Sar,
Sbb, Sbb,
SbbRMW, SbbRMW,
...@@ -2564,6 +2565,25 @@ template <typename TraitsType> struct InstImpl { ...@@ -2564,6 +2565,25 @@ template <typename TraitsType> struct InstImpl {
InstX86Cvt(Cfg *Func, Variable *Dest, Operand *Source, CvtVariant Variant); InstX86Cvt(Cfg *Func, Variable *Dest, Operand *Source, CvtVariant Variant);
}; };
/// Round instruction
class InstX86Round final
: public InstX86BaseThreeAddressop<InstX86Base::Round> {
public:
static InstX86Round *create(Cfg *Func, Variable *Dest, Operand *Source,
Operand *Imm) {
return new (Func->allocate<InstX86Round>())
InstX86Round(Func, Dest, Source, Imm);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
private:
InstX86Round(Cfg *Func, Variable *Dest, Operand *Source, Operand *Imm)
: InstX86BaseThreeAddressop<InstX86Base::Round>(Func, Dest, Source,
Imm) {}
};
/// cmp - Integer compare instruction. /// cmp - Integer compare instruction.
class InstX86Icmp final : public InstX86Base { class InstX86Icmp final : public InstX86Base {
InstX86Icmp() = delete; InstX86Icmp() = delete;
...@@ -3229,6 +3249,7 @@ template <typename TraitsType> struct Insts { ...@@ -3229,6 +3249,7 @@ template <typename TraitsType> struct Insts {
using Cmpxchg = typename InstImpl<TraitsType>::InstX86Cmpxchg; using Cmpxchg = typename InstImpl<TraitsType>::InstX86Cmpxchg;
using Cmpxchg8b = typename InstImpl<TraitsType>::InstX86Cmpxchg8b; using Cmpxchg8b = typename InstImpl<TraitsType>::InstX86Cmpxchg8b;
using Cvt = typename InstImpl<TraitsType>::InstX86Cvt; using Cvt = typename InstImpl<TraitsType>::InstX86Cvt;
using Round = typename InstImpl<TraitsType>::InstX86Round;
using Icmp = typename InstImpl<TraitsType>::InstX86Icmp; using Icmp = typename InstImpl<TraitsType>::InstX86Icmp;
using Ucomiss = typename InstImpl<TraitsType>::InstX86Ucomiss; using Ucomiss = typename InstImpl<TraitsType>::InstX86Ucomiss;
using UD2 = typename InstImpl<TraitsType>::InstX86UD2; using UD2 = typename InstImpl<TraitsType>::InstX86UD2;
...@@ -3494,6 +3515,9 @@ template <typename TraitsType> struct Insts { ...@@ -3494,6 +3515,9 @@ template <typename TraitsType> struct Insts {
"insertps"; \ "insertps"; \
template <> \ template <> \
template <> \ template <> \
const char *InstImpl<TraitsType>::InstX86Round::Base::Opcode = "round"; \
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Shufps::Base::Opcode = "shufps"; \ const char *InstImpl<TraitsType>::InstX86Shufps::Base::Opcode = "shufps"; \
template <> \ template <> \
template <> \ template <> \
......
...@@ -1767,6 +1767,35 @@ void InstImpl<TraitsType>::InstX86Cvt::dump(const Cfg *Func) const { ...@@ -1767,6 +1767,35 @@ void InstImpl<TraitsType>::InstX86Cvt::dump(const Cfg *Func) const {
} }
template <typename TraitsType> template <typename TraitsType>
void InstImpl<TraitsType>::InstX86Round::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(this->getSrcSize() == 3);
Str << "\t" << this->Opcode
<< Traits::TypeAttributes[this->getDest()->getType()].SpSdString
<< "\t";
this->getSrc(1)->emit(Func);
Str << ", ";
this->getSrc(0)->emit(Func);
Str << ", ";
this->getDest()->emit(Func);
}
template <typename TraitsType>
void InstImpl<TraitsType>::InstX86Round::emitIAS(const Cfg *Func) const {
assert(this->getSrcSize() == 2);
assert(InstX86Base::getTarget(Func)->getInstructionSet() >= Traits::SSE4_1);
const Variable *Dest = this->getDest();
Type Ty = Dest->getType();
static const ThreeOpImmEmitter<XmmRegister, XmmRegister> Emitter = {
&Assembler::round, &Assembler::round};
emitIASThreeOpImmOps<XmmRegister, XmmRegister, Traits::getEncodedXmm,
Traits::getEncodedXmm>(Func, Ty, Dest, this->getSrc(0),
this->getSrc(1), Emitter);
}
template <typename TraitsType>
void InstImpl<TraitsType>::InstX86Icmp::emit(const Cfg *Func) const { void InstImpl<TraitsType>::InstX86Icmp::emit(const Cfg *Func) const {
if (!BuildDefs::dump()) if (!BuildDefs::dump())
return; return;
......
...@@ -68,6 +68,7 @@ public: ...@@ -68,6 +68,7 @@ public:
MultiplyAddPairs, MultiplyAddPairs,
MultiplyHighSigned, MultiplyHighSigned,
MultiplyHighUnsigned, MultiplyHighUnsigned,
Round,
SignMask, SignMask,
StoreSubVector, StoreSubVector,
SubtractSaturateSigned, SubtractSaturateSigned,
......
...@@ -619,6 +619,10 @@ protected: ...@@ -619,6 +619,10 @@ protected:
AutoMemorySandboxer<> _(this, &Dest, &Src0); AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Cvt>(Dest, Src0, Variant); Context.insert<typename Traits::Insts::Cvt>(Dest, Src0, Variant);
} }
void _round(Variable *Dest, Operand *Src0, Operand *Imm) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Round>(Dest, Src0, Imm);
}
void _div(Variable *Dest, Operand *Src0, Operand *Src1) { void _div(Variable *Dest, Operand *Src0, Operand *Src1) {
AutoMemorySandboxer<> _(this, &Dest, &Src0, &Src1); AutoMemorySandboxer<> _(this, &Dest, &Src0, &Src1);
Context.insert<typename Traits::Insts::Div>(Dest, Src0, Src1); Context.insert<typename Traits::Insts::Div>(Dest, Src0, Src1);
...@@ -894,6 +898,10 @@ protected: ...@@ -894,6 +898,10 @@ protected:
AutoMemorySandboxer<> _(this, &Dest, &Src0); AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Rol>(Dest, Src0); Context.insert<typename Traits::Insts::Rol>(Dest, Src0);
} }
void _round(Variable *Dest, Operand *Src, Constant *Imm) {
AutoMemorySandboxer<> _(this, &Dest, &Src);
Context.insert<typename Traits::Insts::Round>(Dest, Src, Imm);
}
X86OperandMem *_sandbox_mem_reference(X86OperandMem *Mem) { X86OperandMem *_sandbox_mem_reference(X86OperandMem *Mem) {
return dispatchToConcrete(&Traits::ConcreteTarget::_sandbox_mem_reference, return dispatchToConcrete(&Traits::ConcreteTarget::_sandbox_mem_reference,
std::move(Mem)); std::move(Mem));
......
...@@ -4557,6 +4557,21 @@ void TargetX86Base<TraitsType>::lowerIntrinsicCall( ...@@ -4557,6 +4557,21 @@ void TargetX86Base<TraitsType>::lowerIntrinsicCall(
_movp(Dest, T); _movp(Dest, T);
return; return;
} }
case Intrinsics::Round: {
Variable *Dest = Instr->getDest();
Operand *Src = Instr->getArg(0);
Operand *Mode = Instr->getArg(1);
assert(llvm::isa<ConstantInteger32>(Mode) &&
"Round last argument must be a constant");
auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem);
int32_t Imm = llvm::cast<ConstantInteger32>(Mode)->getValue();
(void)Imm;
assert(Imm >= 0 && Imm < 4 && "Invalid rounding mode");
auto *T = makeReg(Dest->getType());
_round(T, SrcRM, Mode);
_movp(Dest, T);
return;
}
default: // UnknownIntrinsic default: // UnknownIntrinsic
Func->setError("Unexpected intrinsic"); Func->setError("Unexpected intrinsic");
return; return;
......
...@@ -2069,8 +2069,9 @@ TEST_F(AssemblerX8632Test, Roundsd) { ...@@ -2069,8 +2069,9 @@ TEST_F(AssemblerX8632Test, Roundsd) {
\ \
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \ __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \ __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
__ roundsd(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src, \ __ round(IceType_f64, XmmRegister::Encoded_Reg_##Dst, \
AssemblerX8632::k##Mode); \ XmmRegister::Encoded_Reg_##Src, \
Immediate(AssemblerX8632::k##Mode)); \
\ \
AssembledTest test = assemble(); \ AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \ test.setDqwordTo(T0, V0); \
......
...@@ -2194,8 +2194,8 @@ TEST_F(AssemblerX8664Test, Roundsd) { ...@@ -2194,8 +2194,8 @@ TEST_F(AssemblerX8664Test, Roundsd) {
\ \
__ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \ __ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
__ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \ __ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
__ roundsd(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \ __ round(IceType_f64, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \
AssemblerX8664::k##Mode); \ Immediate(AssemblerX8664::k##Mode)); \
\ \
AssembledTest test = assemble(); \ AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \ test.setDqwordTo(T0, V0); \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment