Commit e88c7deb by John Porto

Subzero. ARM32. Vector casts.

This CL un-scalarizes all vector casts operations in Subzero. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076 R=eholk@chromium.org Review URL: https://codereview.chromium.org/1878943009 .
parent 15e77d46
...@@ -224,13 +224,17 @@ bool encodeAdvSIMDExpandImm(IValueT Value, Type ElmtTy, IValueT &Op, ...@@ -224,13 +224,17 @@ bool encodeAdvSIMDExpandImm(IValueT Value, Type ElmtTy, IValueT &Op,
return false; return false;
Imm8 = Value; Imm8 = Value;
switch (ElmtTy) { switch (ElmtTy) {
case IceType_i8:
Op = 0;
Cmode = 14; // 0b1110
return true;
case IceType_i16: case IceType_i16:
Op = 0; Op = 0;
Cmode = 8; // 100:0 Cmode = 8; // 0b1000
return true; return true;
case IceType_i32: case IceType_i32:
Op = 0; Op = 0;
Cmode = 0; // 000:0 Cmode = 0; // 0b0000
return true; return true;
default: default:
return false; return false;
...@@ -1215,6 +1219,33 @@ void AssemblerARM32::emitSIMDqqq(IValueT Opcode, Type ElmtTy, ...@@ -1215,6 +1219,33 @@ void AssemblerARM32::emitSIMDqqq(IValueT Opcode, Type ElmtTy,
isFloatingType(ElmtTy), OpcodeName); isFloatingType(ElmtTy), OpcodeName);
} }
void AssemblerARM32::emitSIMDShiftqqc(IValueT Opcode, const Operand *OpQd,
const Operand *OpQm, const IValueT Imm6,
const char *OpcodeName) {
const IValueT Qd = encodeQRegister(OpQd, "Qd", OpcodeName);
const IValueT Qn = 0;
const IValueT Qm = encodeQRegister(OpQm, "Qm", OpcodeName);
constexpr bool UseQRegs = true;
constexpr bool IsFloatTy = false;
constexpr IValueT ElmtShift = 16;
emitSIMDBase(Opcode | (Imm6 << ElmtShift), mapQRegToDReg(Qd),
mapQRegToDReg(Qn), mapQRegToDReg(Qm), UseQRegs, IsFloatTy);
}
void AssemblerARM32::emitSIMDCvtqq(IValueT Opcode, const Operand *OpQd,
const Operand *OpQm,
const char *OpcodeName) {
const IValueT SIMDOpcode =
B24 | B23 | B21 | B20 | B19 | B17 | B16 | B10 | B9 | Opcode;
constexpr bool UseQRegs = true;
constexpr bool IsFloatTy = false;
const IValueT Qd = encodeQRegister(OpQd, "Qd", OpcodeName);
constexpr IValueT Qn = 0;
const IValueT Qm = encodeQRegister(OpQm, "Qm", OpcodeName);
emitSIMDBase(SIMDOpcode, mapQRegToDReg(Qd), mapQRegToDReg(Qn),
mapQRegToDReg(Qm), UseQRegs, IsFloatTy);
}
void AssemblerARM32::emitVFPddd(CondARM32::Cond Cond, IValueT Opcode, void AssemblerARM32::emitVFPddd(CondARM32::Cond Cond, IValueT Opcode,
IValueT Dd, IValueT Dn, IValueT Dm) { IValueT Dd, IValueT Dn, IValueT Dm) {
assert(Dd < RegARM32::getNumDRegs()); assert(Dd < RegARM32::getNumDRegs());
...@@ -2557,6 +2588,50 @@ void AssemblerARM32::vcvtus(const Operand *OpSd, const Operand *OpSm, ...@@ -2557,6 +2588,50 @@ void AssemblerARM32::vcvtus(const Operand *OpSd, const Operand *OpSm,
emitVFPsss(Cond, VcvtsiOpcode, Sd, S0, Sm); emitVFPsss(Cond, VcvtsiOpcode, Sd, S0, Sm);
} }
void AssemblerARM32::vcvtqsi(const Operand *OpQd, const Operand *OpQm) {
// VCVT (between floating-point and integer, Advanced SIMD)
// - ARM Section A8.8.305, encoding A1:
// vcvt<c>.f32.s32 <Qd>, <Qm>
//
// 111100111D11ss11dddd011ooQM0mmmm where Ddddd=Qd, Mmmmm=Qm, and 10=op.
constexpr const char *Vcvtqsi = "vcvt.s32.f32";
constexpr IValueT VcvtqsiOpcode = B8;
emitSIMDCvtqq(VcvtqsiOpcode, OpQd, OpQm, Vcvtqsi);
}
void AssemblerARM32::vcvtqsu(const Operand *OpQd, const Operand *OpQm) {
// VCVT (between floating-point and integer, Advanced SIMD)
// - ARM Section A8.8.305, encoding A1:
// vcvt<c>.f32.u32 <Qd>, <Qm>
//
// 111100111D11ss11dddd011ooQM0mmmm where Ddddd=Qd, Mmmmm=Qm, and 11=op.
constexpr const char *Vcvtqsu = "vcvt.u32.f32";
constexpr IValueT VcvtqsuOpcode = B8 | B7;
emitSIMDCvtqq(VcvtqsuOpcode, OpQd, OpQm, Vcvtqsu);
}
void AssemblerARM32::vcvtqis(const Operand *OpQd, const Operand *OpQm) {
// VCVT (between floating-point and integer, Advanced SIMD)
// - ARM Section A8.8.305, encoding A1:
// vcvt<c>.f32.s32 <Qd>, <Qm>
//
// 111100111D11ss11dddd011ooQM0mmmm where Ddddd=Qd, Mmmmm=Qm, and 01=op.
constexpr const char *Vcvtqis = "vcvt.f32.s32";
constexpr IValueT VcvtqisOpcode = 0;
emitSIMDCvtqq(VcvtqisOpcode, OpQd, OpQm, Vcvtqis);
}
void AssemblerARM32::vcvtqus(const Operand *OpQd, const Operand *OpQm) {
// VCVT (between floating-point and integer, Advanced SIMD)
// - ARM Section A8.8.305, encoding A1:
// vcvt<c>.f32.u32 <Qd>, <Qm>
//
// 111100111D11ss11dddd011ooQM0mmmm where Ddddd=Qd, Mmmmm=Qm, and 01=op.
constexpr const char *Vcvtqus = "vcvt.f32.u32";
constexpr IValueT VcvtqusOpcode = B7;
emitSIMDCvtqq(VcvtqusOpcode, OpQd, OpQm, Vcvtqus);
}
void AssemblerARM32::emitVFPds(CondARM32::Cond Cond, IValueT Opcode, IValueT Dd, void AssemblerARM32::emitVFPds(CondARM32::Cond Cond, IValueT Opcode, IValueT Dd,
IValueT Sm) { IValueT Sm) {
assert(Dd < RegARM32::getNumDRegs()); assert(Dd < RegARM32::getNumDRegs());
...@@ -3267,6 +3342,70 @@ void AssemblerARM32::vshlqi(Type ElmtTy, const Operand *OpQd, ...@@ -3267,6 +3342,70 @@ void AssemblerARM32::vshlqi(Type ElmtTy, const Operand *OpQd,
emitSIMDqqq(VshlOpcode, ElmtTy, OpQd, OpQn, OpQm, Vshl); emitSIMDqqq(VshlOpcode, ElmtTy, OpQd, OpQn, OpQm, Vshl);
} }
namespace {
enum SIMDShiftType { ST_Vshl, ST_Vshr };
IValueT encodeSIMDShiftImm6(SIMDShiftType Shift, Type ElmtTy,
const ConstantInteger32 *Imm6) {
const IValueT Imm = Imm6->getValue();
assert(Imm > 0);
const SizeT MaxShift = getScalarIntBitWidth(ElmtTy);
assert(Imm < MaxShift);
assert(ElmtTy == IceType_i8 || ElmtTy == IceType_i16 ||
ElmtTy == IceType_i32);
const IValueT VshlImm = Imm - MaxShift;
const IValueT VshrImm = 2 * MaxShift - Imm;
return ((Shift == ST_Vshl) ? VshlImm : VshrImm) & (2 * MaxShift - 1);
}
} // end of anonymous namespace
void AssemblerARM32::vshlqc(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm,
const ConstantInteger32 *Imm6) {
// VSHL - ARM section A8.8.395, encoding A1:
// vshl Qd, Qm, #Imm
//
// 1111001U1Diiiiiidddd0101LQM1mmmm where Ddddd=Qd, Mmmmm=Qm, iiiiii=Imm6,
// 0=U, 1=Q, 0=L.
assert(isScalarIntegerType(ElmtTy) &&
"vshl expects vector with integer element type");
constexpr const char *Vshl = "vshl";
constexpr IValueT VshlOpcode = B23 | B10 | B8 | B4;
emitSIMDShiftqqc(VshlOpcode, OpQd, OpQm,
encodeSIMDShiftImm6(ST_Vshl, ElmtTy, Imm6), Vshl);
}
void AssemblerARM32::vshrqic(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm,
const ConstantInteger32 *Imm6) {
// VSHR - ARM section A8.8.398, encoding A1:
// vshr Qd, Qm, #Imm
//
// 1111001U1Diiiiiidddd0101LQM1mmmm where Ddddd=Qd, Mmmmm=Qm, iiiiii=Imm6,
// 0=U, 1=Q, 0=L.
assert(isScalarIntegerType(ElmtTy) &&
"vshr expects vector with integer element type");
constexpr const char *Vshr = "vshr";
constexpr IValueT VshrOpcode = B23 | B4;
emitSIMDShiftqqc(VshrOpcode, OpQd, OpQm,
encodeSIMDShiftImm6(ST_Vshr, ElmtTy, Imm6), Vshr);
}
void AssemblerARM32::vshrquc(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm,
const ConstantInteger32 *Imm6) {
// VSHR - ARM section A8.8.398, encoding A1:
// vshr Qd, Qm, #Imm
//
// 1111001U1Diiiiiidddd0101LQM1mmmm where Ddddd=Qd, Mmmmm=Qm, iiiiii=Imm6,
// 0=U, 1=Q, 0=L.
assert(isScalarIntegerType(ElmtTy) &&
"vshr expects vector with integer element type");
constexpr const char *Vshr = "vshr";
constexpr IValueT VshrOpcode = B23 | B4;
emitSIMDShiftqqc(VshrOpcode, OpQd, OpQm,
encodeSIMDShiftImm6(ST_Vshr, ElmtTy, Imm6), Vshr);
}
void AssemblerARM32::vshlqu(Type ElmtTy, const Operand *OpQd, void AssemblerARM32::vshlqu(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) { const Operand *OpQm, const Operand *OpQn) {
// VSHL - ARM section A8.8.396, encoding A1: // VSHL - ARM section A8.8.396, encoding A1:
......
...@@ -381,6 +381,14 @@ public: ...@@ -381,6 +381,14 @@ public:
// vcvt<c>.u32.f32 // vcvt<c>.u32.f32
void vcvtus(const Operand *OpSd, const Operand *OpSm, CondARM32::Cond Cond); void vcvtus(const Operand *OpSd, const Operand *OpSm, CondARM32::Cond Cond);
void vcvtqsi(const Operand *OpQd, const Operand *OpQm);
void vcvtqsu(const Operand *OpQd, const Operand *OpQm);
void vcvtqis(const Operand *OpQd, const Operand *OpQm);
void vcvtqus(const Operand *OpQd, const Operand *OpQm);
void vdivd(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm, void vdivd(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm,
CondARM32::Cond Cond); CondARM32::Cond Cond);
...@@ -511,6 +519,15 @@ public: ...@@ -511,6 +519,15 @@ public:
void vshlqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm, void vshlqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn); const Operand *OpQn);
void vshlqc(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const ConstantInteger32 *OpQn);
void vshrqic(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const ConstantInteger32 *OpQn);
void vshrquc(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const ConstantInteger32 *OpQn);
void vsqrtd(const Operand *OpDd, const Operand *OpDm, CondARM32::Cond Cond); void vsqrtd(const Operand *OpDd, const Operand *OpDm, CondARM32::Cond Cond);
void vsqrts(const Operand *OpSd, const Operand *OpSm, CondARM32::Cond Cond); void vsqrts(const Operand *OpSd, const Operand *OpSm, CondARM32::Cond Cond);
...@@ -739,6 +756,20 @@ private: ...@@ -739,6 +756,20 @@ private:
const Operand *OpQn, const Operand *OpQm, const Operand *OpQn, const Operand *OpQm,
const char *OpcodeName); const char *OpcodeName);
// Implements various forms of vector (SIMD) shifts using Q registers.
// Implements pattern 111100101Diiiiiidddd010101M1mmmm where Dddd=Qd, Mmmm=Qm,
// iiiiii=Imm6, and Opcode is unioned into the pattern.
void emitSIMDShiftqqc(IValueT Opcode, const Operand *OpQd,
const Operand *OpQm, const IValueT Imm6,
const char *OpcodeName);
// Implements various forms of vector (SIMD) casts between (signed and
// unsigned) integer and floating point types (f32). Implements pattern
// 111100111D11ss11dddd011ooQM0mmmm where Dddd=Qd, Mmmm=Qm, 10=ss, op=00, 1=Q,
// and Opcode is unioned into the pattern.
void emitSIMDCvtqq(IValueT Opcode, const Operand *OpQd, const Operand *OpQm,
const char *CvtName);
// Pattern cccctttxxxxnnnn0000iiiiiiiiiiii where cccc=Cond, nnnn=Rn, // Pattern cccctttxxxxnnnn0000iiiiiiiiiiii where cccc=Cond, nnnn=Rn,
// ttt=Instruction type (derived from OpSrc1), iiiiiiiiiiii is derived from // ttt=Instruction type (derived from OpSrc1), iiiiiiiiiiii is derived from
// OpSrc1, and xxxx=Opcode. // OpSrc1, and xxxx=Opcode.
......
...@@ -835,10 +835,48 @@ template <> void InstARM32Vshl::emitIAS(const Cfg *Func) const { ...@@ -835,10 +835,48 @@ template <> void InstARM32Vshl::emitIAS(const Cfg *Func) const {
switch (Sign) { switch (Sign) {
case InstARM32::FS_None: // defaults to unsigned. case InstARM32::FS_None: // defaults to unsigned.
case InstARM32::FS_Unsigned: case InstARM32::FS_Unsigned:
if (const auto *Imm6 = llvm::dyn_cast<ConstantInteger32>(getSrc(1))) {
Asm->vshlqc(ElmtTy, Dest, getSrc(0), Imm6);
} else {
Asm->vshlqu(ElmtTy, Dest, getSrc(0), getSrc(1)); Asm->vshlqu(ElmtTy, Dest, getSrc(0), getSrc(1));
}
break; break;
case InstARM32::FS_Signed: case InstARM32::FS_Signed:
if (const auto *Imm6 = llvm::dyn_cast<ConstantInteger32>(getSrc(1))) {
Asm->vshlqc(ElmtTy, Dest, getSrc(0), Imm6);
} else {
Asm->vshlqi(ElmtTy, Dest, getSrc(0), getSrc(1)); Asm->vshlqi(ElmtTy, Dest, getSrc(0), getSrc(1));
}
break;
}
} break;
}
}
template <> void InstARM32Vshr::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
const Type DestTy = Dest->getType();
switch (DestTy) {
default:
llvm::report_fatal_error("Vshr not defined on type " +
typeStdString(Dest->getType()));
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32: {
const Type ElmtTy = typeElementType(DestTy);
const auto *Imm6 = llvm::cast<ConstantInteger32>(getSrc(1));
assert(Sign != InstARM32::FS_None);
switch (Sign) {
case InstARM32::FS_None: // defaults to unsigned.
case InstARM32::FS_Unsigned:
Asm->vshrquc(ElmtTy, Dest, getSrc(0), Imm6);
break;
case InstARM32::FS_Signed:
Asm->vshrqic(ElmtTy, Dest, getSrc(0), Imm6);
break; break;
} }
} break; } break;
...@@ -1466,6 +1504,7 @@ template <> const char *InstARM32Vmul::Opcode = "vmul"; ...@@ -1466,6 +1504,7 @@ template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Vorr::Opcode = "vorr"; template <> const char *InstARM32Vorr::Opcode = "vorr";
template <> const char *InstARM32UnaryopFP<InstARM32::Vneg>::Opcode = "vneg"; template <> const char *InstARM32UnaryopFP<InstARM32::Vneg>::Opcode = "vneg";
template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshl>::Opcode = "vshl"; template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshl>::Opcode = "vshl";
template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshr>::Opcode = "vshr";
template <> const char *InstARM32Vsub::Opcode = "vsub"; template <> const char *InstARM32Vsub::Opcode = "vsub";
// Four-addr ops // Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla"; template <> const char *InstARM32Mla::Opcode = "mla";
...@@ -2452,6 +2491,14 @@ const char *vcvtVariantSuffix(const InstARM32Vcvt::VcvtVariant Variant) { ...@@ -2452,6 +2491,14 @@ const char *vcvtVariantSuffix(const InstARM32Vcvt::VcvtVariant Variant) {
return ".f64.f32"; return ".f64.f32";
case InstARM32Vcvt::D2s: case InstARM32Vcvt::D2s:
return ".f32.f64"; return ".f32.f64";
case InstARM32Vcvt::Vs2si:
return ".s32.f32";
case InstARM32Vcvt::Vs2ui:
return ".u32.f32";
case InstARM32Vcvt::Vsi2s:
return ".f32.s32";
case InstARM32Vcvt::Vui2s:
return ".f32.u32";
} }
llvm::report_fatal_error("Invalid VcvtVariant enum."); llvm::report_fatal_error("Invalid VcvtVariant enum.");
} }
...@@ -2503,6 +2550,18 @@ void InstARM32Vcvt::emitIAS(const Cfg *Func) const { ...@@ -2503,6 +2550,18 @@ void InstARM32Vcvt::emitIAS(const Cfg *Func) const {
case D2s: case D2s:
Asm->vcvtsd(getDest(), getSrc(0), getPredicate()); Asm->vcvtsd(getDest(), getSrc(0), getPredicate());
break; break;
case Vs2si:
Asm->vcvtqsi(getDest(), getSrc(0));
break;
case Vs2ui:
Asm->vcvtqsu(getDest(), getSrc(0));
break;
case Vsi2s:
Asm->vcvtqis(getDest(), getSrc(0));
break;
case Vui2s:
Asm->vcvtqus(getDest(), getSrc(0));
break;
} }
assert(!Asm->needsTextFixup()); assert(!Asm->needsTextFixup());
} }
...@@ -2913,6 +2972,7 @@ template class InstARM32FourAddrFP<InstARM32::Vmls>; ...@@ -2913,6 +2972,7 @@ template class InstARM32FourAddrFP<InstARM32::Vmls>;
template class InstARM32ThreeAddrFP<InstARM32::Vmul>; template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
template class InstARM32UnaryopSignAwareFP<InstARM32::Vneg>; template class InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>; template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>;
template class InstARM32ThreeAddrFP<InstARM32::Vsub>; template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
template class InstARM32LoadBase<InstARM32::Ldr>; template class InstARM32LoadBase<InstARM32::Ldr>;
......
...@@ -438,6 +438,7 @@ public: ...@@ -438,6 +438,7 @@ public:
Vneg, Vneg,
Vorr, Vorr,
Vshl, Vshl,
Vshr,
Vsqrt, Vsqrt,
Vsub Vsub
}; };
...@@ -822,12 +823,18 @@ public: ...@@ -822,12 +823,18 @@ public:
InstARM32ThreeAddrSignAwareFP(Func, Dest, Src0, Src1); InstARM32ThreeAddrSignAwareFP(Func, Dest, Src0, Src1);
} }
static InstARM32ThreeAddrSignAwareFP *
create(Cfg *Func, Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) {
return new (Func->allocate<InstARM32ThreeAddrSignAwareFP>())
InstARM32ThreeAddrSignAwareFP(Func, Dest, Src0, Src1);
}
void emitIAS(const Cfg *Func) const override; void emitIAS(const Cfg *Func) const override;
void setSignType(InstARM32::FPSign SignType) { this->Sign = SignType; } void setSignType(InstARM32::FPSign SignType) { this->Sign = SignType; }
private: private:
InstARM32ThreeAddrSignAwareFP(Cfg *Func, Variable *Dest, Variable *Src0, InstARM32ThreeAddrSignAwareFP(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1) Operand *Src1)
: InstARM32ThreeAddrFP<K>(Func, Dest, Src0, Src1) {} : InstARM32ThreeAddrFP<K>(Func, Dest, Src0, Src1) {}
}; };
...@@ -993,6 +1000,7 @@ using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>; ...@@ -993,6 +1000,7 @@ using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Vneg = InstARM32UnaryopSignAwareFP<InstARM32::Vneg>; using InstARM32Vneg = InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>; using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>;
using InstARM32Vshl = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>; using InstARM32Vshl = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
using InstARM32Vshr = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>; using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>; using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>; using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;
...@@ -1358,7 +1366,22 @@ class InstARM32Vcvt final : public InstARM32Pred { ...@@ -1358,7 +1366,22 @@ class InstARM32Vcvt final : public InstARM32Pred {
InstARM32Vcvt &operator=(const InstARM32Vcvt &) = delete; InstARM32Vcvt &operator=(const InstARM32Vcvt &) = delete;
public: public:
enum VcvtVariant { S2si, S2ui, Si2s, Ui2s, D2si, D2ui, Si2d, Ui2d, S2d, D2s }; enum VcvtVariant {
S2si,
S2ui,
Si2s,
Ui2s,
D2si,
D2ui,
Si2d,
Ui2d,
S2d,
D2s,
Vs2si,
Vs2ui,
Vsi2s,
Vui2s,
};
static InstARM32Vcvt *create(Cfg *Func, Variable *Dest, Variable *Src, static InstARM32Vcvt *create(Cfg *Func, Variable *Dest, Variable *Src,
VcvtVariant Variant, CondARM32::Cond Predicate) { VcvtVariant Variant, CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Vcvt>()) return new (Func->allocate<InstARM32Vcvt>())
......
...@@ -894,6 +894,14 @@ protected: ...@@ -894,6 +894,14 @@ protected:
InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) { InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstARM32Vshl>(Dest, Src0, Src1); return Context.insert<InstARM32Vshl>(Dest, Src0, Src1);
} }
void _vshl(Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) {
Context.insert<InstARM32Vshl>(Dest, Src0, Src1)
->setSignType(InstARM32::FS_Unsigned);
}
InstARM32Vshr *_vshr(Variable *Dest, Variable *Src0,
ConstantInteger32 *Src1) {
return Context.insert<InstARM32Vshr>(Dest, Src0, Src1);
}
void _vsqrt(Variable *Dest, Variable *Src, void _vsqrt(Variable *Dest, Variable *Src,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vsqrt>(Dest, Src, Pred); Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
......
...@@ -36,7 +36,7 @@ entry: ...@@ -36,7 +36,7 @@ entry:
define internal <4 x i32> @cmpEq4f32(<4 x float> %a, <4 x float> %b) { define internal <4 x i32> @cmpEq4f32(<4 x float> %a, <4 x float> %b) {
; ASM-LABEL:cmpEq4f32: ; ASM-LABEL:cmpEq4f32:
; DIS-LABEL:00000240 <cmpEq4f32>: ; DIS-LABEL:00000180 <cmpEq4f32>:
entry: entry:
%cmp = fcmp oeq <4 x float> %a, %b %cmp = fcmp oeq <4 x float> %a, %b
...@@ -45,7 +45,7 @@ entry: ...@@ -45,7 +45,7 @@ entry:
; ASM: vcmp.f32 s0, s1 ; ASM: vcmp.f32 s0, s1
; ASM: vcmp.f32 s0, s1 ; ASM: vcmp.f32 s0, s1
; ASM: vcmp.f32 s0, s1 ; ASM: vcmp.f32 s0, s1
; DIS: 27c: eeb40a60 ; DIS: 1bc: eeb40a60
%cmp.ret_ext = zext <4 x i1> %cmp to <4 x i32> %cmp.ret_ext = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %cmp.ret_ext ret <4 x i32> %cmp.ret_ext
......
...@@ -36,3 +36,17 @@ entry: ...@@ -36,3 +36,17 @@ entry:
ret float %v ret float %v
} }
define internal <4 x float> @IntVecToFloatVec(<4 x i32> %a) {
; ASM-LABEL: IntVecToFloatVec:
; DIS-LABEL: 00000030 <IntVecToFloatVec>:
; IASM-LABEL: IntVecToFloatVec:
%v = sitofp <4 x i32> %a to <4 x float>
; ASM: vcvt.f32.s32 q0, q0
; DIS: 40: f3bb0640
; IASM-NOT: vcvt.f32.s32
ret <4 x float> %v
}
...@@ -36,3 +36,17 @@ entry: ...@@ -36,3 +36,17 @@ entry:
ret float %v ret float %v
} }
define internal <4 x float> @UIntVecToFloatVec(<4 x i32> %a) {
; ASM-LABEL: UIntVecToFloatVec:
; DIS-LABEL: 00000030 <UIntVecToFloatVec>:
; IASM-LABEL: UIntVecToFloatVec:
%v = uitofp <4 x i32> %a to <4 x float>
; ASM: vcvt.f32.u32 q0, q0
; DIS: 40: f3bb06c0
; IASM-NOT: vcvt.f32.u32
ret <4 x float> %v
}
...@@ -36,3 +36,17 @@ entry: ...@@ -36,3 +36,17 @@ entry:
ret i32 %v ret i32 %v
} }
define internal <4 x i32> @FloatVecToIntVec(<4 x float> %a) {
; ASM-LABEL: FloatVecToIntVec:
; DIS-LABEL: 00000030 <FloatVecToIntVec>:
; IASM-LABEL: FloatVecToIntVec:
%v = fptosi <4 x float> %a to <4 x i32>
; ASM: vcvt.s32.f32 q0, q0
; DIS: 40: f3bb0740
; IASM-NOT: vcvt.s32.f32
ret <4 x i32> %v
}
...@@ -35,3 +35,17 @@ entry: ...@@ -35,3 +35,17 @@ entry:
ret i32 %v ret i32 %v
} }
define internal <4 x i32> @FloatVecToUIntVec(<4 x float> %a) {
; ASM-LABEL: FloatVecToUIntVec:
; DIS-LABEL: 00000030 <FloatVecToUIntVec>:
; IASM-LABEL: FloatVecToUIntVec:
%v = fptoui <4 x float> %a to <4 x i32>
; ASM: vcvt.u32.f32 q0, q0
; DIS: 40: f3bb07c0
; IASM-NOT: vcvt.u32.f32
ret <4 x i32> %v
}
...@@ -23,17 +23,19 @@ ...@@ -23,17 +23,19 @@
; RUN: | FileCheck %s --check-prefix=DIS ; RUN: | FileCheck %s --check-prefix=DIS
define internal <4 x float> @testMoveVector(<4 x i32> %a) { define internal <4 x float> @testMoveVector(<4 x i32> %a, <4 x i32> %b) {
; ASM-LABEL: testMoveVector: ; ASM-LABEL: testMoveVector:
; DIS-LABEL:{{.+}} <testMoveVector>: ; DIS-LABEL:{{.+}} <testMoveVector>:
; IASM-LABEL: testMoveVector: ; IASM-LABEL: testMoveVector:
entry: entry:
%0 = sitofp <4 x i32> %a to <4 x float> %0 = bitcast <4 x i32> %b to <4 x float>
ret <4 x float> %0 ret <4 x float> %0
; ASM: vmov.f32 q0, q1 ; ASM: vmov.f32 q0, q1
; DIS: 3c: eef03a40 ; The integrated assembler emits a vorr instead of a vmov.
; DIS: 0: f2220152
; IASM-NOT: vmov.f32 q0, q1 ; IASM-NOT: vmov.f32 q0, q1
; IASM-NOT: vorr q0, q1, q1
} }
; Show that we know how to translate vshl and vshr with immediate shift amounts.
; We abuse sign extension of vectors of i1 because that's the only way to force
; Subzero to emit these instructions.
; NOTE: We use -O2 to get rid of memory stores.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 | FileCheck %s --check-prefix=DIS
define internal <4 x i32> @SextV4I1(<4 x i32> %a) {
; ASM-LABEL:SextV4I1
; DIS-LABEL:00000000 <SextV4I1>:
; IASM-LABEL:SextV4I1:
%trunc = trunc <4 x i32> %a to <4 x i1>
%sext = sext <4 x i1> %trunc to <4 x i32>
ret <4 x i32> %sext
; ASM: vshl.u32 {{.*}}, #31
; ASM-NEXT: vshr.s32 {{.*}}, #31
; DIS: 0: f2bf0550
; DIS-NEXT: 4: f2a10050
; IASM-NOT: vshl.u32 {{.*}}, #31
; IASM-NOT: vshr.s32 {{.*}}, #31
}
define internal <8 x i16> @SextV8I1(<8 x i16> %a) {
; ASM-LABEL:SextV8I1
; DIS-LABEL:00000010 <SextV8I1>:
; IASM-LABEL:SextV8I1:
%trunc = trunc <8 x i16> %a to <8 x i1>
%sext = sext <8 x i1> %trunc to <8 x i16>
ret <8 x i16> %sext
; ASM: vshl.u16 {{.*}}, #15
; ASM-NEXT: vshr.s16 {{.*}}, #15
; DIS: 10: f29f0550
; DIS-NEXT: 14: f2910050
; IASM-NOT: vshl.u16 {{.*}}, #15
; IASM-NOT: vshr.s16 {{.*}}, #15
}
define internal <16 x i8> @SextV16I1(<16 x i8> %a) {
; ASM-LABEL:SextV16I1
; DIS-LABEL:00000020 <SextV16I1>:
; IASM-LABEL:SextV16I1:
%trunc = trunc <16 x i8> %a to <16 x i1>
%sext = sext <16 x i1> %trunc to <16 x i8>
ret <16 x i8> %sext
; ASM: vshl.u8 {{.*}}, #7
; ASM-NEXT: vshr.s8 {{.*}}, #7
; DIS: 20: f28f0550
; DIS-NEXT: 24: f2890050
; IASM-NOT: vshl.u8 {{.*}}, #7
; IASM-NOT: vshr.s8 {{.*}}, #7
}
; This file tests casting / conversion operations that apply to vector types. ; This file tests casting / conversion operations that apply to vector types.
; bitcast operations are in vector-bitcast.ll. ; bitcast operations are in vector-bitcast.ll.
; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s ; RUN: %p2i -i %s --target=x8632 --filetype=obj --disassemble --args -O2 \
; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 | FileCheck %s ; RUN: | FileCheck %s --check-prefix=X8632 --check-prefix=CHECK
; RUN: %p2i -i %s --target=x8632 --filetype=obj --disassemble --args -Om1 \
; RUN: | FileCheck %s --check-prefix=X8632 --check-prefix=CHECK
; RUN: %p2i -i %s --target=arm32 --filetype=obj --disassemble --args -O2 \
; RUN: | FileCheck %s --check-prefix=ARM32 --check-prefix=CHECK
; RUN: %p2i -i %s --target=arm32 --filetype=obj --disassemble --args -Om1 \
; RUN: | FileCheck %s --check-prefix=ARM32 --check-prefix=CHECK
; sext operations ; sext operations
...@@ -12,12 +19,14 @@ entry: ...@@ -12,12 +19,14 @@ entry:
ret <16 x i8> %res ret <16 x i8> %res
; CHECK-LABEL: test_sext_v16i1_to_v16i8 ; CHECK-LABEL: test_sext_v16i1_to_v16i8
; CHECK: pxor ; X8632: pxor
; CHECK: pcmpeqb ; X8632: pcmpeqb
; CHECK: psubb ; X8632: psubb
; CHECK: pand ; X8632: pand
; CHECK: pxor ; X8632: pxor
; CHECK: pcmpgtb ; X8632: pcmpgtb
; ARM32: vshl.s8
; ARM32-NEXT: vshr.s8
} }
define internal <8 x i16> @test_sext_v8i1_to_v8i16(<8 x i1> %arg) { define internal <8 x i16> @test_sext_v8i1_to_v8i16(<8 x i1> %arg) {
...@@ -26,8 +35,10 @@ entry: ...@@ -26,8 +35,10 @@ entry:
ret <8 x i16> %res ret <8 x i16> %res
; CHECK-LABEL: test_sext_v8i1_to_v8i16 ; CHECK-LABEL: test_sext_v8i1_to_v8i16
; CHECK: psllw {{.*}},0xf ; X8632: psllw {{.*}},0xf
; CHECK: psraw {{.*}},0xf ; X8632: psraw {{.*}},0xf
; ARM32: vshl.s16
; ARM32-NEXT: vshr.s16
} }
define internal <4 x i32> @test_sext_v4i1_to_v4i32(<4 x i1> %arg) { define internal <4 x i32> @test_sext_v4i1_to_v4i32(<4 x i1> %arg) {
...@@ -36,8 +47,10 @@ entry: ...@@ -36,8 +47,10 @@ entry:
ret <4 x i32> %res ret <4 x i32> %res
; CHECK-LABEL: test_sext_v4i1_to_v4i32 ; CHECK-LABEL: test_sext_v4i1_to_v4i32
; CHECK: pslld {{.*}},0x1f ; X8632: pslld {{.*}},0x1f
; CHECK: psrad {{.*}},0x1f ; X8632: psrad {{.*}},0x1f
; ARM32: vshl.s32
; ARM32-NEXT: vshr.s32
} }
; zext operations ; zext operations
...@@ -48,10 +61,12 @@ entry: ...@@ -48,10 +61,12 @@ entry:
ret <16 x i8> %res ret <16 x i8> %res
; CHECK-LABEL: test_zext_v16i1_to_v16i8 ; CHECK-LABEL: test_zext_v16i1_to_v16i8
; CHECK: pxor ; X8632: pxor
; CHECK: pcmpeqb ; X8632: pcmpeqb
; CHECK: psubb ; X8632: psubb
; CHECK: pand ; X8632: pand
; ARM32: vmov.i8 [[S:.*]], #1
; ARM32-NEXT: vand {{.*}}, [[S]]
} }
define internal <8 x i16> @test_zext_v8i1_to_v8i16(<8 x i1> %arg) { define internal <8 x i16> @test_zext_v8i1_to_v8i16(<8 x i1> %arg) {
...@@ -60,10 +75,12 @@ entry: ...@@ -60,10 +75,12 @@ entry:
ret <8 x i16> %res ret <8 x i16> %res
; CHECK-LABEL: test_zext_v8i1_to_v8i16 ; CHECK-LABEL: test_zext_v8i1_to_v8i16
; CHECK: pxor ; X8632: pxor
; CHECK: pcmpeqw ; X8632: pcmpeqw
; CHECK: psubw ; X8632: psubw
; CHECK: pand ; X8632: pand
; ARM32: vmov.i16 [[S:.*]], #1
; ARM32-NEXT: vand {{.*}}, [[S]]
} }
define internal <4 x i32> @test_zext_v4i1_to_v4i32(<4 x i1> %arg) { define internal <4 x i32> @test_zext_v4i1_to_v4i32(<4 x i1> %arg) {
...@@ -72,10 +89,12 @@ entry: ...@@ -72,10 +89,12 @@ entry:
ret <4 x i32> %res ret <4 x i32> %res
; CHECK-LABEL: test_zext_v4i1_to_v4i32 ; CHECK-LABEL: test_zext_v4i1_to_v4i32
; CHECK: pxor ; X8632: pxor
; CHECK: pcmpeqd ; X8632: pcmpeqd
; CHECK: psubd ; X8632: psubd
; CHECK: pand ; X8632: pand
; ARM32: vmov.i32 [[S:.*]], #1
; ARM32-NEXT: vand {{.*}}, [[S]]
} }
; trunc operations ; trunc operations
...@@ -86,10 +105,10 @@ entry: ...@@ -86,10 +105,10 @@ entry:
ret <16 x i1> %res ret <16 x i1> %res
; CHECK-LABEL: test_trunc_v16i8_to_v16i1 ; CHECK-LABEL: test_trunc_v16i8_to_v16i1
; CHECK: pxor ; X8632: pxor
; CHECK: pcmpeqb ; X8632: pcmpeqb
; CHECK: psubb ; X8632: psubb
; CHECK: pand ; X8632: pand
} }
define internal <8 x i1> @test_trunc_v8i16_to_v8i1(<8 x i16> %arg) { define internal <8 x i1> @test_trunc_v8i16_to_v8i1(<8 x i16> %arg) {
...@@ -98,10 +117,10 @@ entry: ...@@ -98,10 +117,10 @@ entry:
ret <8 x i1> %res ret <8 x i1> %res
; CHECK-LABEL: test_trunc_v8i16_to_v8i1 ; CHECK-LABEL: test_trunc_v8i16_to_v8i1
; CHECK: pxor ; X8632: pxor
; CHECK: pcmpeqw ; X8632: pcmpeqw
; CHECK: psubw ; X8632: psubw
; CHECK: pand ; X8632: pand
} }
define internal <4 x i1> @test_trunc_v4i32_to_v4i1(<4 x i32> %arg) { define internal <4 x i1> @test_trunc_v4i32_to_v4i1(<4 x i32> %arg) {
...@@ -110,10 +129,10 @@ entry: ...@@ -110,10 +129,10 @@ entry:
ret <4 x i1> %res ret <4 x i1> %res
; CHECK-LABEL: test_trunc_v4i32_to_v4i1 ; CHECK-LABEL: test_trunc_v4i32_to_v4i1
; CHECK: pxor ; X8632: pxor
; CHECK: pcmpeqd ; X8632: pcmpeqd
; CHECK: psubd ; X8632: psubd
; CHECK: pand ; X8632: pand
} }
; fpto[us]i operations ; fpto[us]i operations
...@@ -124,7 +143,8 @@ entry: ...@@ -124,7 +143,8 @@ entry:
ret <4 x i32> %res ret <4 x i32> %res
; CHECK-LABEL: test_fptosi_v4f32_to_v4i32 ; CHECK-LABEL: test_fptosi_v4f32_to_v4i32
; CHECK: cvttps2dq ; X8632: cvttps2dq
; ARM32: vcvt.s32.f32
} }
define internal <4 x i32> @test_fptoui_v4f32_to_v4i32(<4 x float> %arg) { define internal <4 x i32> @test_fptoui_v4f32_to_v4i32(<4 x float> %arg) {
...@@ -133,7 +153,8 @@ entry: ...@@ -133,7 +153,8 @@ entry:
ret <4 x i32> %res ret <4 x i32> %res
; CHECK-LABEL: test_fptoui_v4f32_to_v4i32 ; CHECK-LABEL: test_fptoui_v4f32_to_v4i32
; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_4xi32_f32 ; X8632: call {{.*}} R_{{.*}} __Sz_fptoui_4xi32_f32
; ARM32: vcvt.u32.f32
} }
; [su]itofp operations ; [su]itofp operations
...@@ -144,7 +165,8 @@ entry: ...@@ -144,7 +165,8 @@ entry:
ret <4 x float> %res ret <4 x float> %res
; CHECK-LABEL: test_sitofp_v4i32_to_v4f32 ; CHECK-LABEL: test_sitofp_v4i32_to_v4f32
; CHECK: cvtdq2ps ; X8632: cvtdq2ps
; ARM32: vcvt.f32.s32
} }
define internal <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) { define internal <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) {
...@@ -153,5 +175,6 @@ entry: ...@@ -153,5 +175,6 @@ entry:
ret <4 x float> %res ret <4 x float> %res
; CHECK-LABEL: test_uitofp_v4i32_to_v4f32 ; CHECK-LABEL: test_uitofp_v4i32_to_v4f32
; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_4xi32_4xf32 ; X8632: call {{.*}} R_{{.*}} __Sz_uitofp_4xi32_4xf32
; ARM32: vcvt.f32.u32
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment