Commit f0d12c30 by Nicolas Capens Committed by Nicolas Capens

Implement floating-point rounding intrinsic.

BUG=swiftshader:15 Change-Id: I8e53f2fdb8208f8be0f4cdff3241b4a5efe9bc8a Reviewed-on: https://chromium-review.googlesource.com/404352Tested-by: 's avatarNicolas Capens <nicolascapens@google.com> Reviewed-by: 's avatarJim Stichnoth <stichnot@chromium.org>
parent f8c9977b
......@@ -511,7 +511,9 @@ public:
kRoundUp = 0x2,
kRoundToZero = 0x3
};
void roundsd(XmmRegister dst, XmmRegister src, RoundingMode mode);
void round(Type Ty, XmmRegister dst, XmmRegister src, const Immediate &mode);
void round(Type Ty, XmmRegister dst, const Address &src,
const Immediate &mode);
//----------------------------------------------------------------------------
//
......
......@@ -2392,17 +2392,58 @@ void AssemblerX86Base<TraitsType>::pcmpgt(Type Ty, XmmRegister dst,
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::roundsd(XmmRegister dst, XmmRegister src,
RoundingMode mode) {
void AssemblerX86Base<TraitsType>::round(Type Ty, XmmRegister dst,
XmmRegister src,
const Immediate &mode) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, dst, src);
emitUint8(0x0F);
emitUint8(0x3A);
emitUint8(0x0B);
switch (Ty) {
case IceType_v4f32:
emitUint8(0x08);
break;
case IceType_f32:
emitUint8(0x0A);
break;
case IceType_f64:
emitUint8(0x0B);
break;
default:
assert(false && "Unsupported round operand type");
}
emitXmmRegisterOperand(dst, src);
// Mask precision exeption.
emitUint8(static_cast<uint8_t>(mode) | 0x8);
emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::round(Type Ty, XmmRegister dst,
const Address &src,
const Immediate &mode) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, src, dst);
emitUint8(0x0F);
emitUint8(0x3A);
switch (Ty) {
case IceType_v4f32:
emitUint8(0x08);
break;
case IceType_f32:
emitUint8(0x0A);
break;
case IceType_f64:
emitUint8(0x0B);
break;
default:
assert(false && "Unsupported round operand type");
}
emitOperand(gprEncoding(dst), src);
// Mask precision exeption.
emitUint8(static_cast<uint8_t>(mode.value()) | 0x8);
}
template <typename TraitsType>
......
......@@ -164,6 +164,7 @@ template <typename TraitsType> struct InstImpl {
Pxor,
Ret,
Rol,
Round,
Sar,
Sbb,
SbbRMW,
......@@ -2564,6 +2565,25 @@ template <typename TraitsType> struct InstImpl {
InstX86Cvt(Cfg *Func, Variable *Dest, Operand *Source, CvtVariant Variant);
};
/// Round instruction
class InstX86Round final
: public InstX86BaseThreeAddressop<InstX86Base::Round> {
public:
static InstX86Round *create(Cfg *Func, Variable *Dest, Operand *Source,
Operand *Imm) {
return new (Func->allocate<InstX86Round>())
InstX86Round(Func, Dest, Source, Imm);
}
void emit(const Cfg *Func) const override;
void emitIAS(const Cfg *Func) const override;
private:
InstX86Round(Cfg *Func, Variable *Dest, Operand *Source, Operand *Imm)
: InstX86BaseThreeAddressop<InstX86Base::Round>(Func, Dest, Source,
Imm) {}
};
/// cmp - Integer compare instruction.
class InstX86Icmp final : public InstX86Base {
InstX86Icmp() = delete;
......@@ -3229,6 +3249,7 @@ template <typename TraitsType> struct Insts {
using Cmpxchg = typename InstImpl<TraitsType>::InstX86Cmpxchg;
using Cmpxchg8b = typename InstImpl<TraitsType>::InstX86Cmpxchg8b;
using Cvt = typename InstImpl<TraitsType>::InstX86Cvt;
using Round = typename InstImpl<TraitsType>::InstX86Round;
using Icmp = typename InstImpl<TraitsType>::InstX86Icmp;
using Ucomiss = typename InstImpl<TraitsType>::InstX86Ucomiss;
using UD2 = typename InstImpl<TraitsType>::InstX86UD2;
......@@ -3494,6 +3515,9 @@ template <typename TraitsType> struct Insts {
"insertps"; \
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Round::Base::Opcode = "round"; \
template <> \
template <> \
const char *InstImpl<TraitsType>::InstX86Shufps::Base::Opcode = "shufps"; \
template <> \
template <> \
......
......@@ -1767,6 +1767,35 @@ void InstImpl<TraitsType>::InstX86Cvt::dump(const Cfg *Func) const {
}
template <typename TraitsType>
void InstImpl<TraitsType>::InstX86Round::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
Ostream &Str = Func->getContext()->getStrEmit();
assert(this->getSrcSize() == 3);
Str << "\t" << this->Opcode
<< Traits::TypeAttributes[this->getDest()->getType()].SpSdString
<< "\t";
this->getSrc(1)->emit(Func);
Str << ", ";
this->getSrc(0)->emit(Func);
Str << ", ";
this->getDest()->emit(Func);
}
template <typename TraitsType>
void InstImpl<TraitsType>::InstX86Round::emitIAS(const Cfg *Func) const {
assert(this->getSrcSize() == 2);
assert(InstX86Base::getTarget(Func)->getInstructionSet() >= Traits::SSE4_1);
const Variable *Dest = this->getDest();
Type Ty = Dest->getType();
static const ThreeOpImmEmitter<XmmRegister, XmmRegister> Emitter = {
&Assembler::round, &Assembler::round};
emitIASThreeOpImmOps<XmmRegister, XmmRegister, Traits::getEncodedXmm,
Traits::getEncodedXmm>(Func, Ty, Dest, this->getSrc(0),
this->getSrc(1), Emitter);
}
template <typename TraitsType>
void InstImpl<TraitsType>::InstX86Icmp::emit(const Cfg *Func) const {
if (!BuildDefs::dump())
return;
......
......@@ -68,6 +68,7 @@ public:
MultiplyAddPairs,
MultiplyHighSigned,
MultiplyHighUnsigned,
Round,
SignMask,
StoreSubVector,
SubtractSaturateSigned,
......
......@@ -619,6 +619,10 @@ protected:
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Cvt>(Dest, Src0, Variant);
}
void _round(Variable *Dest, Operand *Src0, Operand *Imm) {
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Round>(Dest, Src0, Imm);
}
void _div(Variable *Dest, Operand *Src0, Operand *Src1) {
AutoMemorySandboxer<> _(this, &Dest, &Src0, &Src1);
Context.insert<typename Traits::Insts::Div>(Dest, Src0, Src1);
......@@ -894,6 +898,10 @@ protected:
AutoMemorySandboxer<> _(this, &Dest, &Src0);
Context.insert<typename Traits::Insts::Rol>(Dest, Src0);
}
void _round(Variable *Dest, Operand *Src, Constant *Imm) {
AutoMemorySandboxer<> _(this, &Dest, &Src);
Context.insert<typename Traits::Insts::Round>(Dest, Src, Imm);
}
X86OperandMem *_sandbox_mem_reference(X86OperandMem *Mem) {
return dispatchToConcrete(&Traits::ConcreteTarget::_sandbox_mem_reference,
std::move(Mem));
......
......@@ -4557,6 +4557,21 @@ void TargetX86Base<TraitsType>::lowerIntrinsicCall(
_movp(Dest, T);
return;
}
case Intrinsics::Round: {
Variable *Dest = Instr->getDest();
Operand *Src = Instr->getArg(0);
Operand *Mode = Instr->getArg(1);
assert(llvm::isa<ConstantInteger32>(Mode) &&
"Round last argument must be a constant");
auto *SrcRM = legalize(Src, Legal_Reg | Legal_Mem);
int32_t Imm = llvm::cast<ConstantInteger32>(Mode)->getValue();
(void)Imm;
assert(Imm >= 0 && Imm < 4 && "Invalid rounding mode");
auto *T = makeReg(Dest->getType());
_round(T, SrcRM, Mode);
_movp(Dest, T);
return;
}
default: // UnknownIntrinsic
Func->setError("Unexpected intrinsic");
return;
......
......@@ -2069,8 +2069,9 @@ TEST_F(AssemblerX8632Test, Roundsd) {
\
__ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
__ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
__ roundsd(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src, \
AssemblerX8632::k##Mode); \
__ round(IceType_f64, XmmRegister::Encoded_Reg_##Dst, \
XmmRegister::Encoded_Reg_##Src, \
Immediate(AssemblerX8632::k##Mode)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \
......
......@@ -2194,8 +2194,8 @@ TEST_F(AssemblerX8664Test, Roundsd) {
\
__ movups(Encoded_Xmm_##Dst(), dwordAddress(T0)); \
__ movups(Encoded_Xmm_##Src(), dwordAddress(T1)); \
__ roundsd(Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \
AssemblerX8664::k##Mode); \
__ round(IceType_f64, Encoded_Xmm_##Dst(), Encoded_Xmm_##Src(), \
Immediate(AssemblerX8664::k##Mode)); \
\
AssembledTest test = assemble(); \
test.setDqwordTo(T0, V0); \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment