Commit dbf81e0c by Nicolas Capens Committed by Nicolas Capens

Implement an intrinsic for nearbyint.

The round intrinsic gets translated to roundps on x86, which is SSE4.1 only. cvtps2pd + cvtdq2ps can be used as an SSE2 fallback. cvtps2pd also corresponds to LLVM's nearbyint intrinsic. BUG=swiftshader:20 Change-Id: I8b5896c443f202a5b25125b4e5049b0b3d3a11b0 Reviewed-on: https://chromium-review.googlesource.com/428491Reviewed-by: 's avatarJim Stichnoth <stichnot@chromium.org>
parent 4e679e51
......@@ -467,6 +467,9 @@ public:
void cvttps2dq(Type, XmmRegister dst, XmmRegister src);
void cvttps2dq(Type, XmmRegister dst, const Address &src);
void cvtps2dq(Type, XmmRegister dst, XmmRegister src);
void cvtps2dq(Type, XmmRegister dst, const Address &src);
void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, GPRRegister src);
void cvtsi2ss(Type DestTy, XmmRegister dst, Type SrcTy, const Address &src);
......@@ -476,6 +479,9 @@ public:
void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
void cvttss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, XmmRegister src);
void cvtss2si(Type DestTy, GPRRegister dst, Type SrcTy, const Address &src);
void ucomiss(Type Ty, XmmRegister a, XmmRegister b);
void ucomiss(Type Ty, XmmRegister a, const Address &b);
......
......@@ -2041,6 +2041,29 @@ void AssemblerX86Base<TraitsType>::cvttps2dq(Type /* Ignore */, XmmRegister dst,
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::cvtps2dq(Type /* Ignore */, XmmRegister dst,
XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitRexRB(RexTypeIrrelevant, dst, src);
emitUint8(0x0F);
emitUint8(0x5B);
emitXmmRegisterOperand(dst, src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::cvtps2dq(Type /* Ignore */, XmmRegister dst,
const Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(0x66);
emitAddrSizeOverridePrefix();
emitRex(RexTypeIrrelevant, src, dst);
emitUint8(0x0F);
emitUint8(0x5B);
emitOperand(gprEncoding(dst), src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::cvtsi2ss(Type DestTy, XmmRegister dst,
Type SrcTy, GPRRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
......@@ -2111,6 +2134,29 @@ void AssemblerX86Base<TraitsType>::cvttss2si(Type DestTy, GPRRegister dst,
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::cvtss2si(Type DestTy, GPRRegister dst,
Type SrcTy, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
emitRexRB(DestTy, dst, src);
emitUint8(0x0F);
emitUint8(0x2D);
emitXmmRegisterOperand(dst, src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::cvtss2si(Type DestTy, GPRRegister dst,
Type SrcTy, const Address &src) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
emitUint8(isFloat32Asserting32Or64(SrcTy) ? 0xF3 : 0xF2);
emitAddrSizeOverridePrefix();
emitRex(DestTy, src, dst);
emitUint8(0x0F);
emitUint8(0x2D);
emitOperand(gprEncoding(dst), src);
}
template <typename TraitsType>
void AssemblerX86Base<TraitsType>::ucomiss(Type Ty, XmmRegister a,
XmmRegister b) {
AssemblerBuffer::EnsureCapacity ensured(&Buffer);
......
......@@ -2546,7 +2546,7 @@ template <typename TraitsType> struct InstImpl {
InstX86Cvt &operator=(const InstX86Cvt &) = delete;
public:
enum CvtVariant { Si2ss, Tss2si, Float2float, Dq2ps, Tps2dq };
enum CvtVariant { Si2ss, Tss2si, Ss2si, Float2float, Dq2ps, Tps2dq, Ps2dq };
static InstX86Cvt *create(Cfg *Func, Variable *Dest, Operand *Source,
CvtVariant Variant) {
return new (Func->allocate<InstX86Cvt>())
......
......@@ -1063,10 +1063,10 @@ void InstImpl<TraitsType>::InstX86Movmsk::emitIAS(const Cfg *Func) const {
const Type SrcTy = Src->getType();
assert(isVectorType(SrcTy));
assert(isScalarIntegerType(DestTy));
if (!Traits::Is64Bit) {
assert(typeWidthInBytes(DestTy) <= 4);
} else {
if (Traits::Is64Bit) {
assert(DestTy == IceType_i32 || DestTy == IceType_i64);
} else {
assert(typeWidthInBytes(DestTy) <= 4);
}
XmmRegister SrcReg = Traits::getEncodedXmm(Src->getRegNum());
GPRRegister DestReg = Traits::getEncodedGPR(Dest->getRegNum());
......@@ -1712,10 +1712,10 @@ void InstImpl<TraitsType>::InstX86Cvt::emitIAS(const Cfg *Func) const {
case Tss2si: {
assert(isScalarFloatingType(SrcTy));
assert(isScalarIntegerType(DestTy));
if (!Traits::Is64Bit) {
assert(typeWidthInBytes(DestTy) <= 4);
} else {
if (Traits::Is64Bit) {
assert(DestTy == IceType_i32 || DestTy == IceType_i64);
} else {
assert(typeWidthInBytes(DestTy) <= 4);
}
static const CastEmitterRegOp<GPRRegister, XmmRegister> Emitter = {
&Assembler::cvttss2si, &Assembler::cvttss2si};
......@@ -1724,6 +1724,21 @@ void InstImpl<TraitsType>::InstX86Cvt::emitIAS(const Cfg *Func) const {
Emitter);
return;
}
case Ss2si: {
assert(isScalarFloatingType(SrcTy));
assert(isScalarIntegerType(DestTy));
if (Traits::Is64Bit) {
assert(DestTy == IceType_i32 || DestTy == IceType_i64);
} else {
assert(typeWidthInBytes(DestTy) <= 4);
}
static const CastEmitterRegOp<GPRRegister, XmmRegister> Emitter = {
&Assembler::cvtss2si, &Assembler::cvtss2si};
emitIASCastRegOp<GPRRegister, XmmRegister, Traits::getEncodedGPR,
Traits::getEncodedXmm>(Func, DestTy, Dest, SrcTy, Src,
Emitter);
return;
}
case Float2float: {
assert(isScalarFloatingType(SrcTy));
assert(isScalarFloatingType(DestTy));
......@@ -1749,6 +1764,14 @@ void InstImpl<TraitsType>::InstX86Cvt::emitIAS(const Cfg *Func) const {
emitIASRegOpTyXMM(Func, DestTy, Dest, Src, Emitter);
return;
}
case Ps2dq: {
assert(isVectorFloatingType(SrcTy));
assert(isVectorIntegerType(DestTy));
static const XmmEmitterRegOp Emitter = {&Assembler::cvtps2dq,
&Assembler::cvtps2dq};
emitIASRegOpTyXMM(Func, DestTy, Dest, Src, Emitter);
return;
}
}
}
......
......@@ -68,6 +68,7 @@ public:
MultiplyAddPairs,
MultiplyHighSigned,
MultiplyHighUnsigned,
Nearbyint,
Round,
SignMask,
StoreSubVector,
......
......@@ -2981,13 +2981,11 @@ void TargetX86Base<TraitsType>::lowerCast(const InstCast *Instr) {
}
case InstCast::Fptosi:
if (isVectorType(DestTy)) {
assert(DestTy == IceType_v4i32 &&
Instr->getSrc(0)->getType() == IceType_v4f32);
Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
if (llvm::isa<X86OperandMem>(Src0RM))
Src0RM = legalizeToReg(Src0RM);
assert(DestTy == IceType_v4i32);
assert(Instr->getSrc(0)->getType() == IceType_v4f32);
Operand *Src0R = legalizeToReg(Instr->getSrc(0));
Variable *T = makeReg(DestTy);
_cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
_cvt(T, Src0R, Traits::Insts::Cvt::Tps2dq);
_movp(Dest, T);
} else if (!Traits::Is64Bit && DestTy == IceType_i64) {
llvm::report_fatal_error("Helper call was expected");
......@@ -3047,13 +3045,11 @@ void TargetX86Base<TraitsType>::lowerCast(const InstCast *Instr) {
break;
case InstCast::Sitofp:
if (isVectorType(DestTy)) {
assert(DestTy == IceType_v4f32 &&
Instr->getSrc(0)->getType() == IceType_v4i32);
Operand *Src0RM = legalize(Instr->getSrc(0), Legal_Reg | Legal_Mem);
if (llvm::isa<X86OperandMem>(Src0RM))
Src0RM = legalizeToReg(Src0RM);
assert(DestTy == IceType_v4f32);
assert(Instr->getSrc(0)->getType() == IceType_v4i32);
Operand *Src0R = legalizeToReg(Instr->getSrc(0));
Variable *T = makeReg(DestTy);
_cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
_cvt(T, Src0R, Traits::Insts::Cvt::Dq2ps);
_movp(Dest, T);
} else if (!Traits::Is64Bit && Instr->getSrc(0)->getType() == IceType_i64) {
llvm::report_fatal_error("Helper call was expected");
......@@ -4571,7 +4567,46 @@ void TargetX86Base<TraitsType>::lowerIntrinsicCall(
_movp(Dest, T);
return;
}
case Intrinsics::Nearbyint: {
Operand *Src = Instr->getArg(0);
Variable *Dest = Instr->getDest();
Type DestTy = Dest->getType();
if (isVectorType(DestTy)) {
assert(DestTy == IceType_v4i32);
assert(Src->getType() == IceType_v4f32);
Operand *Src0R = legalizeToReg(Src);
Variable *T = makeReg(DestTy);
_cvt(T, Src0R, Traits::Insts::Cvt::Ps2dq);
_movp(Dest, T);
} else if (!Traits::Is64Bit && DestTy == IceType_i64) {
llvm::report_fatal_error("Helper call was expected");
} else {
Operand *Src0RM = legalize(Src, Legal_Reg | Legal_Mem);
// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
Variable *T_1 = nullptr;
if (Traits::Is64Bit && DestTy == IceType_i64) {
T_1 = makeReg(IceType_i64);
} else {
assert(DestTy != IceType_i64);
T_1 = makeReg(IceType_i32);
}
// cvt() requires its integer argument to be a GPR.
Variable *T_2 = makeReg(DestTy);
if (isByteSizedType(DestTy)) {
assert(T_1->getType() == IceType_i32);
T_1->setRegClass(RCX86_Is32To8);
T_2->setRegClass(RCX86_IsTrunc8Rcvr);
}
_cvt(T_1, Src0RM, Traits::Insts::Cvt::Ss2si);
_mov(T_2, T_1); // T_1 and T_2 may have different integer types
if (DestTy == IceType_i1)
_and(T_2, Ctx->getConstantInt1(1));
_mov(Dest, T_2);
}
return;
}
case Intrinsics::Round: {
assert(InstructionSet >= Traits::SSE4_1);
Variable *Dest = Instr->getDest();
Operand *Src = Instr->getArg(0);
Operand *Mode = Instr->getArg(1);
......@@ -7311,7 +7346,8 @@ void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {
break;
case InstCast::Fptoui:
if (isVectorType(DestTy)) {
assert(DestTy == IceType_v4i32 && SrcType == IceType_v4f32);
assert(DestTy == IceType_v4i32);
assert(SrcType == IceType_v4f32);
HelperID = RuntimeHelper::H_fptoui_4xi32_f32;
} else if (DestTy == IceType_i64 ||
(!Traits::Is64Bit && DestTy == IceType_i32)) {
......@@ -7343,7 +7379,8 @@ void TargetX86Base<TraitsType>::genTargetHelperCallFor(Inst *Instr) {
break;
case InstCast::Uitofp:
if (isVectorType(SrcType)) {
assert(DestTy == IceType_v4f32 && SrcType == IceType_v4i32);
assert(DestTy == IceType_v4f32);
assert(SrcType == IceType_v4i32);
HelperID = RuntimeHelper::H_uitofp_4xi32_4xf32;
} else if (SrcType == IceType_i64 ||
(!Traits::Is64Bit && SrcType == IceType_i32)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment