Commit e88c7deb by John Porto

Subzero. ARM32. Vector casts.

This CL un-scalarizes all vector casts operations in Subzero. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076 R=eholk@chromium.org Review URL: https://codereview.chromium.org/1878943009 .
parent 15e77d46
......@@ -224,13 +224,17 @@ bool encodeAdvSIMDExpandImm(IValueT Value, Type ElmtTy, IValueT &Op,
return false;
Imm8 = Value;
switch (ElmtTy) {
case IceType_i8:
Op = 0;
Cmode = 14; // 0b1110
return true;
case IceType_i16:
Op = 0;
Cmode = 8; // 100:0
Cmode = 8; // 0b1000
return true;
case IceType_i32:
Op = 0;
Cmode = 0; // 000:0
Cmode = 0; // 0b0000
return true;
default:
return false;
......@@ -1215,6 +1219,33 @@ void AssemblerARM32::emitSIMDqqq(IValueT Opcode, Type ElmtTy,
isFloatingType(ElmtTy), OpcodeName);
}
void AssemblerARM32::emitSIMDShiftqqc(IValueT Opcode, const Operand *OpQd,
const Operand *OpQm, const IValueT Imm6,
const char *OpcodeName) {
const IValueT Qd = encodeQRegister(OpQd, "Qd", OpcodeName);
const IValueT Qn = 0;
const IValueT Qm = encodeQRegister(OpQm, "Qm", OpcodeName);
constexpr bool UseQRegs = true;
constexpr bool IsFloatTy = false;
constexpr IValueT ElmtShift = 16;
emitSIMDBase(Opcode | (Imm6 << ElmtShift), mapQRegToDReg(Qd),
mapQRegToDReg(Qn), mapQRegToDReg(Qm), UseQRegs, IsFloatTy);
}
void AssemblerARM32::emitSIMDCvtqq(IValueT Opcode, const Operand *OpQd,
const Operand *OpQm,
const char *OpcodeName) {
const IValueT SIMDOpcode =
B24 | B23 | B21 | B20 | B19 | B17 | B16 | B10 | B9 | Opcode;
constexpr bool UseQRegs = true;
constexpr bool IsFloatTy = false;
const IValueT Qd = encodeQRegister(OpQd, "Qd", OpcodeName);
constexpr IValueT Qn = 0;
const IValueT Qm = encodeQRegister(OpQm, "Qm", OpcodeName);
emitSIMDBase(SIMDOpcode, mapQRegToDReg(Qd), mapQRegToDReg(Qn),
mapQRegToDReg(Qm), UseQRegs, IsFloatTy);
}
void AssemblerARM32::emitVFPddd(CondARM32::Cond Cond, IValueT Opcode,
IValueT Dd, IValueT Dn, IValueT Dm) {
assert(Dd < RegARM32::getNumDRegs());
......@@ -2557,6 +2588,50 @@ void AssemblerARM32::vcvtus(const Operand *OpSd, const Operand *OpSm,
emitVFPsss(Cond, VcvtsiOpcode, Sd, S0, Sm);
}
void AssemblerARM32::vcvtqsi(const Operand *OpQd, const Operand *OpQm) {
// VCVT (between floating-point and integer, Advanced SIMD)
// - ARM Section A8.8.305, encoding A1:
// vcvt<c>.f32.s32 <Qd>, <Qm>
//
// 111100111D11ss11dddd011ooQM0mmmm where Ddddd=Qd, Mmmmm=Qm, and 10=op.
constexpr const char *Vcvtqsi = "vcvt.s32.f32";
constexpr IValueT VcvtqsiOpcode = B8;
emitSIMDCvtqq(VcvtqsiOpcode, OpQd, OpQm, Vcvtqsi);
}
void AssemblerARM32::vcvtqsu(const Operand *OpQd, const Operand *OpQm) {
// VCVT (between floating-point and integer, Advanced SIMD)
// - ARM Section A8.8.305, encoding A1:
// vcvt<c>.f32.u32 <Qd>, <Qm>
//
// 111100111D11ss11dddd011ooQM0mmmm where Ddddd=Qd, Mmmmm=Qm, and 11=op.
constexpr const char *Vcvtqsu = "vcvt.u32.f32";
constexpr IValueT VcvtqsuOpcode = B8 | B7;
emitSIMDCvtqq(VcvtqsuOpcode, OpQd, OpQm, Vcvtqsu);
}
void AssemblerARM32::vcvtqis(const Operand *OpQd, const Operand *OpQm) {
// VCVT (between floating-point and integer, Advanced SIMD)
// - ARM Section A8.8.305, encoding A1:
// vcvt<c>.f32.s32 <Qd>, <Qm>
//
// 111100111D11ss11dddd011ooQM0mmmm where Ddddd=Qd, Mmmmm=Qm, and 01=op.
constexpr const char *Vcvtqis = "vcvt.f32.s32";
constexpr IValueT VcvtqisOpcode = 0;
emitSIMDCvtqq(VcvtqisOpcode, OpQd, OpQm, Vcvtqis);
}
void AssemblerARM32::vcvtqus(const Operand *OpQd, const Operand *OpQm) {
// VCVT (between floating-point and integer, Advanced SIMD)
// - ARM Section A8.8.305, encoding A1:
// vcvt<c>.f32.u32 <Qd>, <Qm>
//
// 111100111D11ss11dddd011ooQM0mmmm where Ddddd=Qd, Mmmmm=Qm, and 01=op.
constexpr const char *Vcvtqus = "vcvt.f32.u32";
constexpr IValueT VcvtqusOpcode = B7;
emitSIMDCvtqq(VcvtqusOpcode, OpQd, OpQm, Vcvtqus);
}
void AssemblerARM32::emitVFPds(CondARM32::Cond Cond, IValueT Opcode, IValueT Dd,
IValueT Sm) {
assert(Dd < RegARM32::getNumDRegs());
......@@ -3267,6 +3342,70 @@ void AssemblerARM32::vshlqi(Type ElmtTy, const Operand *OpQd,
emitSIMDqqq(VshlOpcode, ElmtTy, OpQd, OpQn, OpQm, Vshl);
}
namespace {
enum SIMDShiftType { ST_Vshl, ST_Vshr };
IValueT encodeSIMDShiftImm6(SIMDShiftType Shift, Type ElmtTy,
const ConstantInteger32 *Imm6) {
const IValueT Imm = Imm6->getValue();
assert(Imm > 0);
const SizeT MaxShift = getScalarIntBitWidth(ElmtTy);
assert(Imm < MaxShift);
assert(ElmtTy == IceType_i8 || ElmtTy == IceType_i16 ||
ElmtTy == IceType_i32);
const IValueT VshlImm = Imm - MaxShift;
const IValueT VshrImm = 2 * MaxShift - Imm;
return ((Shift == ST_Vshl) ? VshlImm : VshrImm) & (2 * MaxShift - 1);
}
} // end of anonymous namespace
void AssemblerARM32::vshlqc(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm,
const ConstantInteger32 *Imm6) {
// VSHL - ARM section A8.8.395, encoding A1:
// vshl Qd, Qm, #Imm
//
// 1111001U1Diiiiiidddd0101LQM1mmmm where Ddddd=Qd, Mmmmm=Qm, iiiiii=Imm6,
// 0=U, 1=Q, 0=L.
assert(isScalarIntegerType(ElmtTy) &&
"vshl expects vector with integer element type");
constexpr const char *Vshl = "vshl";
constexpr IValueT VshlOpcode = B23 | B10 | B8 | B4;
emitSIMDShiftqqc(VshlOpcode, OpQd, OpQm,
encodeSIMDShiftImm6(ST_Vshl, ElmtTy, Imm6), Vshl);
}
void AssemblerARM32::vshrqic(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm,
const ConstantInteger32 *Imm6) {
// VSHR - ARM section A8.8.398, encoding A1:
// vshr Qd, Qm, #Imm
//
// 1111001U1Diiiiiidddd0101LQM1mmmm where Ddddd=Qd, Mmmmm=Qm, iiiiii=Imm6,
// 0=U, 1=Q, 0=L.
assert(isScalarIntegerType(ElmtTy) &&
"vshr expects vector with integer element type");
constexpr const char *Vshr = "vshr";
constexpr IValueT VshrOpcode = B23 | B4;
emitSIMDShiftqqc(VshrOpcode, OpQd, OpQm,
encodeSIMDShiftImm6(ST_Vshr, ElmtTy, Imm6), Vshr);
}
void AssemblerARM32::vshrquc(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm,
const ConstantInteger32 *Imm6) {
// VSHR - ARM section A8.8.398, encoding A1:
// vshr Qd, Qm, #Imm
//
// 1111001U1Diiiiiidddd0101LQM1mmmm where Ddddd=Qd, Mmmmm=Qm, iiiiii=Imm6,
// 0=U, 1=Q, 0=L.
assert(isScalarIntegerType(ElmtTy) &&
"vshr expects vector with integer element type");
constexpr const char *Vshr = "vshr";
constexpr IValueT VshrOpcode = B23 | B4;
emitSIMDShiftqqc(VshrOpcode, OpQd, OpQm,
encodeSIMDShiftImm6(ST_Vshr, ElmtTy, Imm6), Vshr);
}
void AssemblerARM32::vshlqu(Type ElmtTy, const Operand *OpQd,
const Operand *OpQm, const Operand *OpQn) {
// VSHL - ARM section A8.8.396, encoding A1:
......
......@@ -381,6 +381,14 @@ public:
// vcvt<c>.u32.f32
void vcvtus(const Operand *OpSd, const Operand *OpSm, CondARM32::Cond Cond);
void vcvtqsi(const Operand *OpQd, const Operand *OpQm);
void vcvtqsu(const Operand *OpQd, const Operand *OpQm);
void vcvtqis(const Operand *OpQd, const Operand *OpQm);
void vcvtqus(const Operand *OpQd, const Operand *OpQm);
void vdivd(const Operand *OpDd, const Operand *OpDn, const Operand *OpDm,
CondARM32::Cond Cond);
......@@ -511,6 +519,15 @@ public:
void vshlqu(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn);
void vshlqc(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const ConstantInteger32 *OpQn);
void vshrqic(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const ConstantInteger32 *OpQn);
void vshrquc(Type ElmtTy, const Operand *OpQd, const Operand *OpQm,
const ConstantInteger32 *OpQn);
void vsqrtd(const Operand *OpDd, const Operand *OpDm, CondARM32::Cond Cond);
void vsqrts(const Operand *OpSd, const Operand *OpSm, CondARM32::Cond Cond);
......@@ -739,6 +756,20 @@ private:
const Operand *OpQn, const Operand *OpQm,
const char *OpcodeName);
// Implements various forms of vector (SIMD) shifts using Q registers.
// Implements pattern 111100101Diiiiiidddd010101M1mmmm where Dddd=Qd, Mmmm=Qm,
// iiiiii=Imm6, and Opcode is unioned into the pattern.
void emitSIMDShiftqqc(IValueT Opcode, const Operand *OpQd,
const Operand *OpQm, const IValueT Imm6,
const char *OpcodeName);
// Implements various forms of vector (SIMD) casts between (signed and
// unsigned) integer and floating point types (f32). Implements pattern
// 111100111D11ss11dddd011ooQM0mmmm where Dddd=Qd, Mmmm=Qm, 10=ss, op=00, 1=Q,
// and Opcode is unioned into the pattern.
void emitSIMDCvtqq(IValueT Opcode, const Operand *OpQd, const Operand *OpQm,
const char *CvtName);
// Pattern cccctttxxxxnnnn0000iiiiiiiiiiii where cccc=Cond, nnnn=Rn,
// ttt=Instruction type (derived from OpSrc1), iiiiiiiiiiii is derived from
// OpSrc1, and xxxx=Opcode.
......
......@@ -835,10 +835,48 @@ template <> void InstARM32Vshl::emitIAS(const Cfg *Func) const {
switch (Sign) {
case InstARM32::FS_None: // defaults to unsigned.
case InstARM32::FS_Unsigned:
Asm->vshlqu(ElmtTy, Dest, getSrc(0), getSrc(1));
if (const auto *Imm6 = llvm::dyn_cast<ConstantInteger32>(getSrc(1))) {
Asm->vshlqc(ElmtTy, Dest, getSrc(0), Imm6);
} else {
Asm->vshlqu(ElmtTy, Dest, getSrc(0), getSrc(1));
}
break;
case InstARM32::FS_Signed:
Asm->vshlqi(ElmtTy, Dest, getSrc(0), getSrc(1));
if (const auto *Imm6 = llvm::dyn_cast<ConstantInteger32>(getSrc(1))) {
Asm->vshlqc(ElmtTy, Dest, getSrc(0), Imm6);
} else {
Asm->vshlqi(ElmtTy, Dest, getSrc(0), getSrc(1));
}
break;
}
} break;
}
}
template <> void InstARM32Vshr::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
const Type DestTy = Dest->getType();
switch (DestTy) {
default:
llvm::report_fatal_error("Vshr not defined on type " +
typeStdString(Dest->getType()));
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32: {
const Type ElmtTy = typeElementType(DestTy);
const auto *Imm6 = llvm::cast<ConstantInteger32>(getSrc(1));
assert(Sign != InstARM32::FS_None);
switch (Sign) {
case InstARM32::FS_None: // defaults to unsigned.
case InstARM32::FS_Unsigned:
Asm->vshrquc(ElmtTy, Dest, getSrc(0), Imm6);
break;
case InstARM32::FS_Signed:
Asm->vshrqic(ElmtTy, Dest, getSrc(0), Imm6);
break;
}
} break;
......@@ -1466,6 +1504,7 @@ template <> const char *InstARM32Vmul::Opcode = "vmul";
template <> const char *InstARM32Vorr::Opcode = "vorr";
template <> const char *InstARM32UnaryopFP<InstARM32::Vneg>::Opcode = "vneg";
template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshl>::Opcode = "vshl";
template <> const char *InstARM32ThreeAddrFP<InstARM32::Vshr>::Opcode = "vshr";
template <> const char *InstARM32Vsub::Opcode = "vsub";
// Four-addr ops
template <> const char *InstARM32Mla::Opcode = "mla";
......@@ -2452,6 +2491,14 @@ const char *vcvtVariantSuffix(const InstARM32Vcvt::VcvtVariant Variant) {
return ".f64.f32";
case InstARM32Vcvt::D2s:
return ".f32.f64";
case InstARM32Vcvt::Vs2si:
return ".s32.f32";
case InstARM32Vcvt::Vs2ui:
return ".u32.f32";
case InstARM32Vcvt::Vsi2s:
return ".f32.s32";
case InstARM32Vcvt::Vui2s:
return ".f32.u32";
}
llvm::report_fatal_error("Invalid VcvtVariant enum.");
}
......@@ -2503,6 +2550,18 @@ void InstARM32Vcvt::emitIAS(const Cfg *Func) const {
case D2s:
Asm->vcvtsd(getDest(), getSrc(0), getPredicate());
break;
case Vs2si:
Asm->vcvtqsi(getDest(), getSrc(0));
break;
case Vs2ui:
Asm->vcvtqsu(getDest(), getSrc(0));
break;
case Vsi2s:
Asm->vcvtqis(getDest(), getSrc(0));
break;
case Vui2s:
Asm->vcvtqus(getDest(), getSrc(0));
break;
}
assert(!Asm->needsTextFixup());
}
......@@ -2913,6 +2972,7 @@ template class InstARM32FourAddrFP<InstARM32::Vmls>;
template class InstARM32ThreeAddrFP<InstARM32::Vmul>;
template class InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
template class InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>;
template class InstARM32ThreeAddrFP<InstARM32::Vsub>;
template class InstARM32LoadBase<InstARM32::Ldr>;
......
......@@ -438,6 +438,7 @@ public:
Vneg,
Vorr,
Vshl,
Vshr,
Vsqrt,
Vsub
};
......@@ -822,12 +823,18 @@ public:
InstARM32ThreeAddrSignAwareFP(Func, Dest, Src0, Src1);
}
static InstARM32ThreeAddrSignAwareFP *
create(Cfg *Func, Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) {
return new (Func->allocate<InstARM32ThreeAddrSignAwareFP>())
InstARM32ThreeAddrSignAwareFP(Func, Dest, Src0, Src1);
}
void emitIAS(const Cfg *Func) const override;
void setSignType(InstARM32::FPSign SignType) { this->Sign = SignType; }
private:
InstARM32ThreeAddrSignAwareFP(Cfg *Func, Variable *Dest, Variable *Src0,
Variable *Src1)
Operand *Src1)
: InstARM32ThreeAddrFP<K>(Func, Dest, Src0, Src1) {}
};
......@@ -993,6 +1000,7 @@ using InstARM32Vmul = InstARM32ThreeAddrFP<InstARM32::Vmul>;
using InstARM32Vneg = InstARM32UnaryopSignAwareFP<InstARM32::Vneg>;
using InstARM32Vorr = InstARM32ThreeAddrFP<InstARM32::Vorr>;
using InstARM32Vshl = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshl>;
using InstARM32Vshr = InstARM32ThreeAddrSignAwareFP<InstARM32::Vshr>;
using InstARM32Vsub = InstARM32ThreeAddrFP<InstARM32::Vsub>;
using InstARM32Ldr = InstARM32LoadBase<InstARM32::Ldr>;
using InstARM32Ldrex = InstARM32LoadBase<InstARM32::Ldrex>;
......@@ -1358,7 +1366,22 @@ class InstARM32Vcvt final : public InstARM32Pred {
InstARM32Vcvt &operator=(const InstARM32Vcvt &) = delete;
public:
enum VcvtVariant { S2si, S2ui, Si2s, Ui2s, D2si, D2ui, Si2d, Ui2d, S2d, D2s };
enum VcvtVariant {
S2si,
S2ui,
Si2s,
Ui2s,
D2si,
D2ui,
Si2d,
Ui2d,
S2d,
D2s,
Vs2si,
Vs2ui,
Vsi2s,
Vui2s,
};
static InstARM32Vcvt *create(Cfg *Func, Variable *Dest, Variable *Src,
VcvtVariant Variant, CondARM32::Cond Predicate) {
return new (Func->allocate<InstARM32Vcvt>())
......
......@@ -894,6 +894,14 @@ protected:
InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstARM32Vshl>(Dest, Src0, Src1);
}
void _vshl(Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) {
Context.insert<InstARM32Vshl>(Dest, Src0, Src1)
->setSignType(InstARM32::FS_Unsigned);
}
InstARM32Vshr *_vshr(Variable *Dest, Variable *Src0,
ConstantInteger32 *Src1) {
return Context.insert<InstARM32Vshr>(Dest, Src0, Src1);
}
void _vsqrt(Variable *Dest, Variable *Src,
CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
......
......@@ -36,7 +36,7 @@ entry:
define internal <4 x i32> @cmpEq4f32(<4 x float> %a, <4 x float> %b) {
; ASM-LABEL:cmpEq4f32:
; DIS-LABEL:00000240 <cmpEq4f32>:
; DIS-LABEL:00000180 <cmpEq4f32>:
entry:
%cmp = fcmp oeq <4 x float> %a, %b
......@@ -45,7 +45,7 @@ entry:
; ASM: vcmp.f32 s0, s1
; ASM: vcmp.f32 s0, s1
; ASM: vcmp.f32 s0, s1
; DIS: 27c: eeb40a60
; DIS: 1bc: eeb40a60
%cmp.ret_ext = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %cmp.ret_ext
......
......@@ -36,3 +36,17 @@ entry:
ret float %v
}
define internal <4 x float> @IntVecToFloatVec(<4 x i32> %a) {
; ASM-LABEL: IntVecToFloatVec:
; DIS-LABEL: 00000030 <IntVecToFloatVec>:
; IASM-LABEL: IntVecToFloatVec:
%v = sitofp <4 x i32> %a to <4 x float>
; ASM: vcvt.f32.s32 q0, q0
; DIS: 40: f3bb0640
; IASM-NOT: vcvt.f32.s32
ret <4 x float> %v
}
......@@ -36,3 +36,17 @@ entry:
ret float %v
}
define internal <4 x float> @UIntVecToFloatVec(<4 x i32> %a) {
; ASM-LABEL: UIntVecToFloatVec:
; DIS-LABEL: 00000030 <UIntVecToFloatVec>:
; IASM-LABEL: UIntVecToFloatVec:
%v = uitofp <4 x i32> %a to <4 x float>
; ASM: vcvt.f32.u32 q0, q0
; DIS: 40: f3bb06c0
; IASM-NOT: vcvt.f32.u32
ret <4 x float> %v
}
......@@ -36,3 +36,17 @@ entry:
ret i32 %v
}
define internal <4 x i32> @FloatVecToIntVec(<4 x float> %a) {
; ASM-LABEL: FloatVecToIntVec:
; DIS-LABEL: 00000030 <FloatVecToIntVec>:
; IASM-LABEL: FloatVecToIntVec:
%v = fptosi <4 x float> %a to <4 x i32>
; ASM: vcvt.s32.f32 q0, q0
; DIS: 40: f3bb0740
; IASM-NOT: vcvt.s32.f32
ret <4 x i32> %v
}
......@@ -35,3 +35,17 @@ entry:
ret i32 %v
}
define internal <4 x i32> @FloatVecToUIntVec(<4 x float> %a) {
; ASM-LABEL: FloatVecToUIntVec:
; DIS-LABEL: 00000030 <FloatVecToUIntVec>:
; IASM-LABEL: FloatVecToUIntVec:
%v = fptoui <4 x float> %a to <4 x i32>
; ASM: vcvt.u32.f32 q0, q0
; DIS: 40: f3bb07c0
; IASM-NOT: vcvt.u32.f32
ret <4 x i32> %v
}
......@@ -23,17 +23,19 @@
; RUN: | FileCheck %s --check-prefix=DIS
define internal <4 x float> @testMoveVector(<4 x i32> %a) {
define internal <4 x float> @testMoveVector(<4 x i32> %a, <4 x i32> %b) {
; ASM-LABEL: testMoveVector:
; DIS-LABEL:{{.+}} <testMoveVector>:
; IASM-LABEL: testMoveVector:
entry:
%0 = sitofp <4 x i32> %a to <4 x float>
%0 = bitcast <4 x i32> %b to <4 x float>
ret <4 x float> %0
; ASM: vmov.f32 q0, q1
; DIS: 3c: eef03a40
; The integrated assembler emits a vorr instead of a vmov.
; DIS: 0: f2220152
; IASM-NOT: vmov.f32 q0, q1
; IASM-NOT: vorr q0, q1, q1
}
; Show that we know how to translate vshl and vshr with immediate shift amounts.
; We abuse sign extension of vectors of i1 because that's the only way to force
; Subzero to emit these instructions.
; NOTE: We use -O2 to get rid of memory stores.
; REQUIRES: allow_dump
; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 | FileCheck %s --check-prefix=DIS
define internal <4 x i32> @SextV4I1(<4 x i32> %a) {
; ASM-LABEL:SextV4I1
; DIS-LABEL:00000000 <SextV4I1>:
; IASM-LABEL:SextV4I1:
%trunc = trunc <4 x i32> %a to <4 x i1>
%sext = sext <4 x i1> %trunc to <4 x i32>
ret <4 x i32> %sext
; ASM: vshl.u32 {{.*}}, #31
; ASM-NEXT: vshr.s32 {{.*}}, #31
; DIS: 0: f2bf0550
; DIS-NEXT: 4: f2a10050
; IASM-NOT: vshl.u32 {{.*}}, #31
; IASM-NOT: vshr.s32 {{.*}}, #31
}
define internal <8 x i16> @SextV8I1(<8 x i16> %a) {
; ASM-LABEL:SextV8I1
; DIS-LABEL:00000010 <SextV8I1>:
; IASM-LABEL:SextV8I1:
%trunc = trunc <8 x i16> %a to <8 x i1>
%sext = sext <8 x i1> %trunc to <8 x i16>
ret <8 x i16> %sext
; ASM: vshl.u16 {{.*}}, #15
; ASM-NEXT: vshr.s16 {{.*}}, #15
; DIS: 10: f29f0550
; DIS-NEXT: 14: f2910050
; IASM-NOT: vshl.u16 {{.*}}, #15
; IASM-NOT: vshr.s16 {{.*}}, #15
}
define internal <16 x i8> @SextV16I1(<16 x i8> %a) {
; ASM-LABEL:SextV16I1
; DIS-LABEL:00000020 <SextV16I1>:
; IASM-LABEL:SextV16I1:
%trunc = trunc <16 x i8> %a to <16 x i1>
%sext = sext <16 x i1> %trunc to <16 x i8>
ret <16 x i8> %sext
; ASM: vshl.u8 {{.*}}, #7
; ASM-NEXT: vshr.s8 {{.*}}, #7
; DIS: 20: f28f0550
; DIS-NEXT: 24: f2890050
; IASM-NOT: vshl.u8 {{.*}}, #7
; IASM-NOT: vshr.s8 {{.*}}, #7
}
; This file tests casting / conversion operations that apply to vector types.
; bitcast operations are in vector-bitcast.ll.
; RUN: %p2i -i %s --filetype=obj --disassemble --args -O2 | FileCheck %s
; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 | FileCheck %s
; RUN: %p2i -i %s --target=x8632 --filetype=obj --disassemble --args -O2 \
; RUN: | FileCheck %s --check-prefix=X8632 --check-prefix=CHECK
; RUN: %p2i -i %s --target=x8632 --filetype=obj --disassemble --args -Om1 \
; RUN: | FileCheck %s --check-prefix=X8632 --check-prefix=CHECK
; RUN: %p2i -i %s --target=arm32 --filetype=obj --disassemble --args -O2 \
; RUN: | FileCheck %s --check-prefix=ARM32 --check-prefix=CHECK
; RUN: %p2i -i %s --target=arm32 --filetype=obj --disassemble --args -Om1 \
; RUN: | FileCheck %s --check-prefix=ARM32 --check-prefix=CHECK
; sext operations
......@@ -12,12 +19,14 @@ entry:
ret <16 x i8> %res
; CHECK-LABEL: test_sext_v16i1_to_v16i8
; CHECK: pxor
; CHECK: pcmpeqb
; CHECK: psubb
; CHECK: pand
; CHECK: pxor
; CHECK: pcmpgtb
; X8632: pxor
; X8632: pcmpeqb
; X8632: psubb
; X8632: pand
; X8632: pxor
; X8632: pcmpgtb
; ARM32: vshl.s8
; ARM32-NEXT: vshr.s8
}
define internal <8 x i16> @test_sext_v8i1_to_v8i16(<8 x i1> %arg) {
......@@ -26,8 +35,10 @@ entry:
ret <8 x i16> %res
; CHECK-LABEL: test_sext_v8i1_to_v8i16
; CHECK: psllw {{.*}},0xf
; CHECK: psraw {{.*}},0xf
; X8632: psllw {{.*}},0xf
; X8632: psraw {{.*}},0xf
; ARM32: vshl.s16
; ARM32-NEXT: vshr.s16
}
define internal <4 x i32> @test_sext_v4i1_to_v4i32(<4 x i1> %arg) {
......@@ -36,8 +47,10 @@ entry:
ret <4 x i32> %res
; CHECK-LABEL: test_sext_v4i1_to_v4i32
; CHECK: pslld {{.*}},0x1f
; CHECK: psrad {{.*}},0x1f
; X8632: pslld {{.*}},0x1f
; X8632: psrad {{.*}},0x1f
; ARM32: vshl.s32
; ARM32-NEXT: vshr.s32
}
; zext operations
......@@ -48,10 +61,12 @@ entry:
ret <16 x i8> %res
; CHECK-LABEL: test_zext_v16i1_to_v16i8
; CHECK: pxor
; CHECK: pcmpeqb
; CHECK: psubb
; CHECK: pand
; X8632: pxor
; X8632: pcmpeqb
; X8632: psubb
; X8632: pand
; ARM32: vmov.i8 [[S:.*]], #1
; ARM32-NEXT: vand {{.*}}, [[S]]
}
define internal <8 x i16> @test_zext_v8i1_to_v8i16(<8 x i1> %arg) {
......@@ -60,10 +75,12 @@ entry:
ret <8 x i16> %res
; CHECK-LABEL: test_zext_v8i1_to_v8i16
; CHECK: pxor
; CHECK: pcmpeqw
; CHECK: psubw
; CHECK: pand
; X8632: pxor
; X8632: pcmpeqw
; X8632: psubw
; X8632: pand
; ARM32: vmov.i16 [[S:.*]], #1
; ARM32-NEXT: vand {{.*}}, [[S]]
}
define internal <4 x i32> @test_zext_v4i1_to_v4i32(<4 x i1> %arg) {
......@@ -72,10 +89,12 @@ entry:
ret <4 x i32> %res
; CHECK-LABEL: test_zext_v4i1_to_v4i32
; CHECK: pxor
; CHECK: pcmpeqd
; CHECK: psubd
; CHECK: pand
; X8632: pxor
; X8632: pcmpeqd
; X8632: psubd
; X8632: pand
; ARM32: vmov.i32 [[S:.*]], #1
; ARM32-NEXT: vand {{.*}}, [[S]]
}
; trunc operations
......@@ -86,10 +105,10 @@ entry:
ret <16 x i1> %res
; CHECK-LABEL: test_trunc_v16i8_to_v16i1
; CHECK: pxor
; CHECK: pcmpeqb
; CHECK: psubb
; CHECK: pand
; X8632: pxor
; X8632: pcmpeqb
; X8632: psubb
; X8632: pand
}
define internal <8 x i1> @test_trunc_v8i16_to_v8i1(<8 x i16> %arg) {
......@@ -98,10 +117,10 @@ entry:
ret <8 x i1> %res
; CHECK-LABEL: test_trunc_v8i16_to_v8i1
; CHECK: pxor
; CHECK: pcmpeqw
; CHECK: psubw
; CHECK: pand
; X8632: pxor
; X8632: pcmpeqw
; X8632: psubw
; X8632: pand
}
define internal <4 x i1> @test_trunc_v4i32_to_v4i1(<4 x i32> %arg) {
......@@ -110,10 +129,10 @@ entry:
ret <4 x i1> %res
; CHECK-LABEL: test_trunc_v4i32_to_v4i1
; CHECK: pxor
; CHECK: pcmpeqd
; CHECK: psubd
; CHECK: pand
; X8632: pxor
; X8632: pcmpeqd
; X8632: psubd
; X8632: pand
}
; fpto[us]i operations
......@@ -124,7 +143,8 @@ entry:
ret <4 x i32> %res
; CHECK-LABEL: test_fptosi_v4f32_to_v4i32
; CHECK: cvttps2dq
; X8632: cvttps2dq
; ARM32: vcvt.s32.f32
}
define internal <4 x i32> @test_fptoui_v4f32_to_v4i32(<4 x float> %arg) {
......@@ -133,7 +153,8 @@ entry:
ret <4 x i32> %res
; CHECK-LABEL: test_fptoui_v4f32_to_v4i32
; CHECK: call {{.*}} R_{{.*}} __Sz_fptoui_4xi32_f32
; X8632: call {{.*}} R_{{.*}} __Sz_fptoui_4xi32_f32
; ARM32: vcvt.u32.f32
}
; [su]itofp operations
......@@ -144,7 +165,8 @@ entry:
ret <4 x float> %res
; CHECK-LABEL: test_sitofp_v4i32_to_v4f32
; CHECK: cvtdq2ps
; X8632: cvtdq2ps
; ARM32: vcvt.f32.s32
}
define internal <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) {
......@@ -153,5 +175,6 @@ entry:
ret <4 x float> %res
; CHECK-LABEL: test_uitofp_v4i32_to_v4f32
; CHECK: call {{.*}} R_{{.*}} __Sz_uitofp_4xi32_4xf32
; X8632: call {{.*}} R_{{.*}} __Sz_uitofp_4xi32_4xf32
; ARM32: vcvt.f32.u32
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment