Commit dbf81e0c by Nicolas Capens Committed by Nicolas Capens

Implement an intrinsic for nearbyint.

The round intrinsic gets translated to roundps on x86, which is SSE4.1 only. cvtps2pd + cvtdq2ps can be used as an SSE2 fallback. cvtps2pd also corresponds to LLVM's nearbyint intrinsic. BUG=swiftshader:20 Change-Id: I8b5896c443f202a5b25125b4e5049b0b3d3a11b0 Reviewed-on: https://chromium-review.googlesource.com/428491Reviewed-by: 's avatarJim Stichnoth <stichnot@chromium.org>
parent 4e679e51
...@@ -467,6 +467,9 @@ public: ...@@ -467,6 +467,9 @@ public:
void cvttps2dq(Type, XmmRegister dst, XmmRegister src); void cvttps2dq(Type, XmmRegister dst, XmmRegister src);
void cvttps2dq(Type, XmmRegister dst, const Address &src); void cvttps2dq(Type, XmmRegister dst, const Address &src);
void cvtps2dq(Type, XmmRegister dst, XmmRegister src);
void cvtps2dq(Type, XmmRegister dst, const Address &src);
void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, GPRRegister src); void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, GPRRegister src);
void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, const Address &src); void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, const Address &src);
...@@ -476,6 +479,9 @@ public: ...@@ -476,6 +479,9 @@ public:
void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src); void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src); void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
void ucomiss(Type Ty, XmmRegister a, XmmRegister b); void ucomiss(Type Ty, XmmRegister a, XmmRegister b);
void ucomiss(Type Ty, XmmRegister a, const Address &b); void ucomiss(Type Ty, XmmRegister a, const Address &b);
......
...@@ -2041,6 +2041,29 @@ void AssemblerX86Base<TraitsType>::cvttps2dq(Type /* Ignore */, XmmRegister dst, ...@@ -2041,6 +2041,29 @@ void AssemblerX86Base<TraitsType>::cvttps2dq(Type /* Ignore */, XmmRegister dst,
} }
template <typename TraitsType> template <typename TraitsType>
void AssemblerX86Base<TraitsType>::cvtps2dq(Type /* Ignore */, XmmRegister dst,
XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, dst, src);
emitUint8(0x0F);
emitUint8(0x5B);
emitXmmRegisterOperand(dst, src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::cvtps2dq(Type /* Ignore */, XmmRegister dst,
const Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, src, dst);
emitUint8(0x0F);
emitUint8(0x5B);
emitOperand(gprEncoding(dst), src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::cvtsi2ss(Type DestTy, XmmRegister dst, void AssemblerX86Base<TraitsType>::cvtsi2ss(Type DestTy, XmmRegister dst,
Type SrcTy, GPRRegister src) { Type SrcTy, GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer); AssemblerBuffer::EnsureCapacity ensured(&Buffer);
...@@ -2111,6 +2134,29 @@ void AssemblerX86Base<TraitsType>::cvttss2si(Type DestTy, GPRRegister dst, ...@@ -2111,6 +2134,29 @@ void AssemblerX86Base<TraitsType>::cvttss2si(Type DestTy, GPRRegister dst,
} }
template <typename TraitsType> template <typename TraitsType>
void AssemblerX86Base<TraitsType>::cvtss2si(Type DestTy, GPRRegister dst,
Type SrcTy, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
emitRexRB(DestTy, dst, src);
emitUint8(0x0F);
emitUint8(0x2D);
emitXmmRegisterOperand(dst, src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::cvtss2si(Type DestTy, GPRRegister dst,
Type SrcTy, const Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
emitAddrSizeOverridePrefix();
emitRex(DestTy, src, dst);
emitUint8(0x0F);
emitUint8(0x2D);
emitOperand(gprEncoding(dst), src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::ucomiss(Type Ty, XmmRegister a, void AssemblerX86Base<TraitsType>::ucomiss(Type Ty, XmmRegister a,
XmmRegister b) { XmmRegister b) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer); AssemblerBuffer::EnsureCapacity ensured(&Buffer);
......
...@@ -2546,7 +2546,7 @@ template <typename TraitsType> struct InstImpl { ...@@ -2546,7 +2546,7 @@ template <typename TraitsType> struct InstImpl {
InstX86Cvt &operator=(const InstX86Cvt &) = delete; InstX86Cvt &operator=(const InstX86Cvt &) = delete;
public: public:
enum CvtVariant { Si2ss, Tss2si, Float2float, Dq2ps, Tps2dq }; enum CvtVariant { Si2ss, Tss2si, Ss2si, Float2float, Dq2ps, Tps2dq, Ps2dq };
static InstX86Cvt *create(Cfg *Func, Variable *Dest, Operand *Source, static InstX86Cvt *create(Cfg *Func, Variable *Dest, Operand *Source,
CvtVariant Variant) { CvtVariant Variant) {
return new (Func->allocate<InstX86Cvt>()) return new (Func->allocate<InstX86Cvt>())
......
...@@ -1063,10 +1063,10 @@ void InstImpl<TraitsType>::InstX86Movmsk::emitIAS(const Cfg *Func) const { ...@@ -1063,10 +1063,10 @@ void InstImpl<TraitsType>::InstX86Movmsk::emitIAS(const Cfg *Func) const {
const Type SrcTy = Src->getType(); const Type SrcTy = Src->getType();
assert(isVectorType(SrcTy)); assert(isVectorType(SrcTy));
assert(isScalarIntegerType(DestTy)); assert(isScalarIntegerType(DestTy));
if (!Traits::Is64Bit) { if (Traits::Is64Bit) {
assert(typeWidthInBytes(DestTy) <= 4);
} else {
assert(DestTy == IceType_i32 || DestTy == IceType_i64); assert(DestTy == IceType_i32 || DestTy == IceType_i64);
} else {
assert(typeWidthInBytes(DestTy) <= 4);
} }
XmmRegister SrcReg = Traits::getEncodedXmm(Src->getRegNum()); XmmRegister SrcReg = Traits::getEncodedXmm(Src->getRegNum());
GPRRegister DestReg = Traits::getEncodedGPR(Dest->getRegNum()); GPRRegister DestReg = Traits::getEncodedGPR(Dest->getRegNum());
...@@ -1712,10 +1712,10 @@ void InstImpl<TraitsType>::InstX86Cvt::emitIAS(const Cfg *Func) const { ...@@ -1712,10 +1712,10 @@ void InstImpl<TraitsType>::InstX86Cvt::emitIAS(const Cfg *Func) const {
case Tss2si: { case Tss2si: {
assert(isScalarFloatingType(SrcTy)); assert(isScalarFloatingType(SrcTy));
assert(isScalarIntegerType(DestTy)); assert(isScalarIntegerType(DestTy));
if (!Traits::Is64Bit) { if (Traits::Is64Bit) {
assert(typeWidthInBytes(DestTy) <= 4);
} else {
assert(DestTy == IceType_i32 || DestTy == IceType_i64); assert(DestTy == IceType_i32 || DestTy == IceType_i64);
} else {
assert(typeWidthInBytes(DestTy) <= 4);
} }
static const CastEmitterRegOp<GPRRegister, XmmRegister> Emitter = { static const CastEmitterRegOp<GPRRegister, XmmRegister> Emitter = {
&Assembler::cvttss2si, &Assembler::cvttss2si}; &Assembler::cvttss2si, &Assembler::cvttss2si};
...@@ -1724,6 +1724,21 @@ void InstImpl<TraitsType>::InstX86Cvt::emitIAS(const Cfg *Func) const { ...@@ -1724,6 +1724,21 @@ void InstImpl<TraitsType>::InstX86Cvt::emitIAS(const Cfg *Func) const {
Emitter); Emitter);
return; return;
} }
case Ss2si: {
assert(isScalarFloatingType(SrcTy));
assert(isScalarIntegerType(DestTy));
if (Traits::Is64Bit) {
assert(DestTy == IceType_i32 || DestTy == IceType_i64);
} else {
assert(typeWidthInBytes(DestTy) <= 4);
}
static const CastEmitterRegOp<GPRRegister, XmmRegister> Emitter = {
&Assembler::cvtss2si, &Assembler::cvtss2si};
emitIASCastRegOp<GPRRegister, XmmRegister, Traits::getEncodedGPR,
Traits::getEncodedXmm>(Func, DestTy, Dest, SrcTy, Src,
Emitter);
return;
}
case Float2float: { case Float2float: {
assert(isScalarFloatingType(SrcTy)); assert(isScalarFloatingType(SrcTy));
assert(isScalarFloatingType(DestTy)); assert(isScalarFloatingType(DestTy));
...@@ -1749,6 +1764,14 @@ void InstImpl<TraitsType>::InstX86Cvt::emitIAS(const Cfg *Func) const { ...@@ -1749,6 +1764,14 @@ void InstImpl<TraitsType>::InstX86Cvt::emitIAS(const Cfg *Func) const {
emitIASRegOpTyXMM(Func, DestTy, Dest, Src, Emitter); emitIASRegOpTyXMM(Func, DestTy, Dest, Src, Emitter);
return; return;
} }
case Ps2dq: {
assert(isVectorFloatingType(SrcTy));
assert(isVectorIntegerType(DestTy));
static const XmmEmitterRegOp Emitter = {&Assembler::cvtps2dq,
&Assembler::cvtps2dq};
emitIASRegOpTyXMM(Func, DestTy, Dest, Src, Emitter);
return;
}
} }
} }
......
...@@ -68,6 +68,7 @@ public: ...@@ -68,6 +68,7 @@ public:
MultiplyAddPairs, MultiplyAddPairs,
MultiplyHighSigned, MultiplyHighSigned,
MultiplyHighUnsigned, MultiplyHighUnsigned,
Nearbyint,
Round, Round,
SignMask, SignMask,
StoreSubVector, StoreSubVector,
......
...@@ -2981,13 +2981,11 @@ void TargetX86Base<TraitsType>::lowerCast(const InstCast *Instr) { ...@@ -2981,13 +2981,11 @@ void TargetX86Base<TraitsType>::lowerCast(const InstCast *Instr) {
} }
case InstCast::Fptosi: case InstCast::Fptosi:
if (isVectorType(DestTy)) { if (isVectorType(DestTy)) {
assert(DestTy == IceType_v4i32 && assert(DestTy == IceType_v4i32);
Instr->getSrc(0)->getType() == IceType_v4f32); assert(Instr->getSrc(0)->getType() == IceType_v4f32);
Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem); Operand *Src0R = legalizeToReg(Instr->getSrc(0));
if (llvm::isa<X86OperandMem>(Src0RM))
Src0RM = legalizeToReg(Src0RM);
Variable *T = makeReg(DestTy); Variable *T = makeReg(DestTy);
_cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); _cvt(T, Src0R, Traits::Insts::Cvt::Tps2dq);
_movp(Dest, T); _movp(Dest, T);
} else if (!Traits::Is64Bit && DestTy == IceType_i64) { } else if (!Traits::Is64Bit && DestTy == IceType_i64) {
llvm::report_fatal_error("Helper call was expected"); llvm::report_fatal_error("Helper call was expected");
...@@ -3047,13 +3045,11 @@ void TargetX86Base<TraitsType>::lowerCast(const InstCast *Instr) { ...@@ -3047,13 +3045,11 @@ void TargetX86Base<TraitsType>::lowerCast(const InstCast *Instr) {
break; break;
case InstCast::Sitofp: case InstCast::Sitofp:
if (isVectorType(DestTy)) { if (isVectorType(DestTy)) {
assert(DestTy == IceType_v4f32 && assert(DestTy == IceType_v4f32);
Instr->getSrc(0)->getType() == IceType_v4i32); assert(Instr->getSrc(0)->getType() == IceType_v4i32);
Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem); Operand *Src0R = legalizeToReg(Instr->getSrc(0));
if (llvm::isa<X86OperandMem>(Src0RM))
Src0RM = legalizeToReg(Src0RM);
Variable *T = makeReg(DestTy); Variable *T = makeReg(DestTy);
_cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps); _cvt(T, Src0R, Traits::Insts::Cvt::Dq2ps);
_movp(Dest, T); _movp(Dest, T);
} else if (!Traits::Is64Bit && Instr->getSrc(0)->getType() == IceType_i64) { } else if (!Traits::Is64Bit && Instr->getSrc(0)->getType() == IceType_i64) {
llvm::report_fatal_error("Helper call was expected"); llvm::report_fatal_error("Helper call was expected");
...@@ -4571,7 +4567,46 @@ void TargetX86Base<TraitsType>::lowerIntrinsicCall( ...@@ -4571,7 +4567,46 @@ void TargetX86Base<TraitsType>::lowerIntrinsicCall(
_movp(Dest, T); _movp(Dest, T);
return; return;
} }
case Intrinsics::Nearbyint: {
Operand *Src = Instr->getArg(0);
Variable *Dest = Instr->getDest();
Type DestTy = Dest->getType();
if (isVectorType(DestTy)) {
assert(DestTy == IceType_v4i32);
assert(Src->getType() == IceType_v4f32);
Operand *Src0R = legalizeToReg(Src);
Variable *T = makeReg(DestTy);
_cvt(T, Src0R, Traits::Insts::Cvt::Ps2dq);
_movp(Dest, T);
} else if (!Traits::Is64Bit && DestTy == IceType_i64) {
llvm::report_fatal_error("Helper call was expected");
} else {
Operand *Src0RM = legalize(Src, Legal_Reg | Legal_Mem);
// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
Variable *T_1 = nullptr;
if (Traits::Is64Bit && DestTy == IceType_i64) {
T_1 = makeReg(IceType_i64);
} else {
assert(DestTy != IceType_i64);
T_1 = makeReg(IceType_i32);
}
// cvt() requires its integer argument to be a GPR.
Variable *T_2 = makeReg(DestTy);
if (isByteSizedType(DestTy)) {
assert(T_1->getType() == IceType_i32);
T_1->setRegClass(RCX86_Is32To8);
T_2->setRegClass(RCX86_IsTrunc8Rcvr);
}
_cvt(T_1, Src0RM, Traits::Insts::Cvt::Ss2si);
_mov(T_2, T_1); // T_1 and T_2 may have different integer types
if (DestTy == IceType_i1)
_and(T_2, Ctx->getConstantInt1(1));
_mov(Dest, T_2);
}
return;
}
case Intrinsics::Round: { case Intrinsics::Round: {
assert(InstructionSet >= Traits::SSE4_1);
Variable *Dest = Instr->getDest(); Variable *Dest = Instr->getDest();
Operand *Src = Instr->getArg(0); Operand *Src = Instr->getArg(0);
Operand *Mode = Instr->getArg(1); Operand *Mode = Instr->getArg(1);
...@@ -7311,7 +7346,8 @@ void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) { ...@@ -7311,7 +7346,8 @@ void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {
break; break;
case InstCast::Fptoui: case InstCast::Fptoui:
if (isVectorType(DestTy)) { if (isVectorType(DestTy)) {
assert(DestTy == IceType_v4i32 && SrcType == IceType_v4f32); assert(DestTy == IceType_v4i32);
assert(SrcType == IceType_v4f32);
HelperID = RuntimeHelper::H_fptoui_4xi32_f32; HelperID = RuntimeHelper::H_fptoui_4xi32_f32;
} else if (DestTy == IceType_i64 || } else if (DestTy == IceType_i64 ||
(!Traits::Is64Bit && DestTy == IceType_i32)) { (!Traits::Is64Bit && DestTy == IceType_i32)) {
...@@ -7343,7 +7379,8 @@ void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) { ...@@ -7343,7 +7379,8 @@ void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {
break; break;
case InstCast::Uitofp: case InstCast::Uitofp:
if (isVectorType(SrcType)) { if (isVectorType(SrcType)) {
assert(DestTy == IceType_v4f32 && SrcType == IceType_v4i32); assert(DestTy == IceType_v4f32);
assert(SrcType == IceType_v4i32);
HelperID = RuntimeHelper::H_uitofp_4xi32_4xf32; HelperID = RuntimeHelper::H_uitofp_4xi32_4xf32;
} else if (SrcType == IceType_i64 || } else if (SrcType == IceType_i64 ||
(!Traits::Is64Bit && SrcType == IceType_i32)) { (!Traits::Is64Bit && SrcType == IceType_i32)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment