Commit 397f602c by John Porto

Subzero. ARM32. Implements vector select.

Also piggy-backs necro-comments from cl 1878943009. BUG= https://bugs.chromium.org/p/nativeclient/issues/detail?id=4076 R=kschimpf@google.com Review URL: https://codereview.chromium.org/1886263004 .
parent e88c7deb
...@@ -1406,9 +1406,13 @@ class Assembler : public ValueObject { ...@@ -1406,9 +1406,13 @@ class Assembler : public ValueObject {
// ARM32::AssemblerARM32::uxt() (uxtb and uxth) // ARM32::AssemblerARM32::uxt() (uxtb and uxth)
// ARM32::AssemblerARM32::vpop() // ARM32::AssemblerARM32::vpop()
// ARM32::AssemblerARM32::vpush() // ARM32::AssemblerARM32::vpush()
// ARM32::AssemblerARM32:rbit() // ARM32::AssemblerARM32::rbit()
// ARM32::AssemblerARM32::vbslq()
// ARM32::AssemblerARM32::veord() // ARM32::AssemblerARM32::veord()
// ARM32::AssemblerARM32::vld1qr() // ARM32::AssemblerARM32::vld1qr()
// ARM32::AssemblerARM32::vshlqc
// ARM32::AssemblerARM32::vshrqic
// ARM32::AssemblerARM32::vshrquc
// ARM32::AssemblerARM32::vst1qr() // ARM32::AssemblerARM32::vst1qr()
// ARM32::AssemblerARM32::vmorqi() // ARM32::AssemblerARM32::vmorqi()
// ARM32::AssemblerARM32::vmovqc() // ARM32::AssemblerARM32::vmovqc()
......
...@@ -2410,6 +2410,18 @@ void AssemblerARM32::vandq(const Operand *OpQd, const Operand *OpQm, ...@@ -2410,6 +2410,18 @@ void AssemblerARM32::vandq(const Operand *OpQd, const Operand *OpQm,
emitSIMDqqq(VandqOpcode, ElmtTy, OpQd, OpQm, OpQn, Vandq); emitSIMDqqq(VandqOpcode, ElmtTy, OpQd, OpQm, OpQn, Vandq);
} }
void AssemblerARM32::vbslq(const Operand *OpQd, const Operand *OpQm,
const Operand *OpQn) {
// VBSL (register) - ARM section A8.8.290, encoding A1:
// vbsl <Qd>, <Qn>, <Qm>
//
// 111100110D01nnn0ddd00001N1M1mmm0 where Dddd=OpQd, Nnnn=OpQm, and Mmmm=OpQm.
constexpr const char *Vbslq = "vbslq";
constexpr IValueT VbslqOpcode = B24 | B20 | B8 | B4;
constexpr Type ElmtTy = IceType_i8; // emits sz=0
emitSIMDqqq(VbslqOpcode, ElmtTy, OpQd, OpQm, OpQn, Vbslq);
}
void AssemblerARM32::vcmpd(const Operand *OpDd, const Operand *OpDm, void AssemblerARM32::vcmpd(const Operand *OpDd, const Operand *OpDm,
CondARM32::Cond Cond) { CondARM32::Cond Cond) {
constexpr const char *Vcmpd = "vcmpd"; constexpr const char *Vcmpd = "vcmpd";
......
...@@ -343,6 +343,8 @@ public: ...@@ -343,6 +343,8 @@ public:
void vandq(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn); void vandq(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);
void vbslq(const Operand *OpQd, const Operand *OpQm, const Operand *OpQn);
void vcmpd(const Operand *OpDd, const Operand *OpDm, CondARM32::Cond cond); void vcmpd(const Operand *OpDd, const Operand *OpDm, CondARM32::Cond cond);
// Second argument of compare is zero (+0.0). // Second argument of compare is zero (+0.0).
......
...@@ -704,6 +704,24 @@ template <> void InstARM32Vand::emitIAS(const Cfg *Func) const { ...@@ -704,6 +704,24 @@ template <> void InstARM32Vand::emitIAS(const Cfg *Func) const {
assert(!Asm->needsTextFixup()); assert(!Asm->needsTextFixup());
} }
template <> void InstARM32Vbsl::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest();
switch (Dest->getType()) {
default:
llvm::report_fatal_error("Vbsl not defined on type " +
typeStdString(Dest->getType()));
case IceType_v4i1:
case IceType_v8i1:
case IceType_v16i1:
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
Asm->vbslq(Dest, getSrc(0), getSrc(1));
}
assert(!Asm->needsTextFixup());
}
template <> void InstARM32Vdiv::emitIAS(const Cfg *Func) const { template <> void InstARM32Vdiv::emitIAS(const Cfg *Func) const {
auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>(); auto *Asm = Func->getAssembler<ARM32::AssemblerARM32>();
const Variable *Dest = getDest(); const Variable *Dest = getDest();
...@@ -1496,6 +1514,7 @@ template <> const char *InstARM32Udiv::Opcode = "udiv"; ...@@ -1496,6 +1514,7 @@ template <> const char *InstARM32Udiv::Opcode = "udiv";
// FP // FP
template <> const char *InstARM32Vadd::Opcode = "vadd"; template <> const char *InstARM32Vadd::Opcode = "vadd";
template <> const char *InstARM32Vand::Opcode = "vand"; template <> const char *InstARM32Vand::Opcode = "vand";
template <> const char *InstARM32Vbsl::Opcode = "vbsl";
template <> const char *InstARM32Vdiv::Opcode = "vdiv"; template <> const char *InstARM32Vdiv::Opcode = "vdiv";
template <> const char *InstARM32Veor::Opcode = "veor"; template <> const char *InstARM32Veor::Opcode = "veor";
template <> const char *InstARM32Vmla::Opcode = "vmla"; template <> const char *InstARM32Vmla::Opcode = "vmla";
......
...@@ -427,6 +427,7 @@ public: ...@@ -427,6 +427,7 @@ public:
Vabs, Vabs,
Vadd, Vadd,
Vand, Vand,
Vbsl,
Vcmp, Vcmp,
Vcvt, Vcvt,
Vdiv, Vdiv,
...@@ -992,6 +993,7 @@ using InstARM32Sub = InstARM32ThreeAddrGPR<InstARM32::Sub>; ...@@ -992,6 +993,7 @@ using InstARM32Sub = InstARM32ThreeAddrGPR<InstARM32::Sub>;
using InstARM32Udiv = InstARM32ThreeAddrGPR<InstARM32::Udiv>; using InstARM32Udiv = InstARM32ThreeAddrGPR<InstARM32::Udiv>;
using InstARM32Vadd = InstARM32ThreeAddrFP<InstARM32::Vadd>; using InstARM32Vadd = InstARM32ThreeAddrFP<InstARM32::Vadd>;
using InstARM32Vand = InstARM32ThreeAddrFP<InstARM32::Vand>; using InstARM32Vand = InstARM32ThreeAddrFP<InstARM32::Vand>;
using InstARM32Vbsl = InstARM32ThreeAddrFP<InstARM32::Vbsl>;
using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>; using InstARM32Vdiv = InstARM32ThreeAddrFP<InstARM32::Vdiv>;
using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>; using InstARM32Veor = InstARM32ThreeAddrFP<InstARM32::Veor>;
using InstARM32Vmla = InstARM32FourAddrFP<InstARM32::Vmla>; using InstARM32Vmla = InstARM32FourAddrFP<InstARM32::Vmla>;
......
...@@ -861,24 +861,6 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) { ...@@ -861,24 +861,6 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
} }
return; return;
} }
case Inst::Select: {
Variable *Dest = Instr->getDest();
const auto DestTy = Dest->getType();
if (isVectorType(DestTy)) {
auto *SelectInstr = llvm::cast<InstSelect>(Instr);
scalarizeInstruction(Dest,
[this](Variable *Dest, Variable *Src0,
Variable *Src1, Variable *Src2) {
return Context.insert<InstSelect>(Dest, Src0, Src1,
Src2);
},
llvm::cast<Variable>(SelectInstr->getSrc(0)),
llvm::cast<Variable>(SelectInstr->getSrc(1)),
llvm::cast<Variable>(SelectInstr->getSrc(2)));
SelectInstr->setDeleted();
}
return;
}
} }
} }
...@@ -5727,12 +5709,39 @@ void TargetARM32::lowerSelect(const InstSelect *Instr) { ...@@ -5727,12 +5709,39 @@ void TargetARM32::lowerSelect(const InstSelect *Instr) {
Operand *SrcF = Instr->getFalseOperand(); Operand *SrcF = Instr->getFalseOperand();
Operand *Condition = Instr->getCondition(); Operand *Condition = Instr->getCondition();
if (isVectorType(DestTy)) { if (!isVectorType(DestTy)) {
UnimplementedLoweringError(this, Instr); lowerInt1ForSelect(Dest, Condition, legalizeUndef(SrcT),
legalizeUndef(SrcF));
return; return;
} }
lowerInt1ForSelect(Dest, Condition, legalizeUndef(SrcT), legalizeUndef(SrcF)); Type TType = DestTy;
switch (DestTy) {
default:
llvm::report_fatal_error("Unexpected type for vector select.");
case IceType_v4i1:
TType = IceType_v4i32;
break;
case IceType_v8i1:
TType = IceType_v8i16;
break;
case IceType_v16i1:
TType = IceType_v16i8;
break;
case IceType_v4f32:
TType = IceType_v4i32;
break;
case IceType_v4i32:
case IceType_v8i16:
case IceType_v16i8:
break;
}
auto *T = makeReg(TType);
lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
auto *SrcTR = legalizeToReg(SrcT);
auto *SrcFR = legalizeToReg(SrcF);
_vbsl(T, SrcTR, SrcFR)->setDestRedefined();
_mov(Dest, T);
} }
void TargetARM32::lowerStore(const InstStore *Instr) { void TargetARM32::lowerStore(const InstStore *Instr) {
......
...@@ -854,6 +854,9 @@ protected: ...@@ -854,6 +854,9 @@ protected:
void _vand(Variable *Dest, Variable *Src0, Variable *Src1) { void _vand(Variable *Dest, Variable *Src0, Variable *Src1) {
Context.insert<InstARM32Vand>(Dest, Src0, Src1); Context.insert<InstARM32Vand>(Dest, Src0, Src1);
} }
InstARM32Vbsl *_vbsl(Variable *Dest, Variable *Src0, Variable *Src1) {
return Context.insert<InstARM32Vbsl>(Dest, Src0, Src1);
}
void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant, void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant,
CondARM32::Cond Pred = CondARM32::AL) { CondARM32::Cond Pred = CondARM32::AL) {
Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred); Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred);
......
; Test that we handle select on vectors. ; Test that we handle select on vectors.
; TODO(eholk): This test will need to be updated once comparison is no longer
; scalarized.
; REQUIRES: allow_dump ; REQUIRES: allow_dump
; Compile using standalone assembler. ; Compile using standalone assembler.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \ ; RUN: %p2i --filetype=asm -i %s --target=arm32 --args -O2 \
; RUN: | FileCheck %s --check-prefix=ASM ; RUN: | FileCheck %s --check-prefix=ASM
; Show bytes in assembled standalone code.
; RUN: %p2i --filetype=asm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 --reg-use=s20 | FileCheck %s --check-prefix=DIS
; Compile using integrated assembler.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --args -O2 \
; RUN: --reg-use=s20 \
; RUN: | FileCheck %s --check-prefix=IASM
; Show bytes in assembled integrated code.
; RUN: %p2i --filetype=iasm -i %s --target=arm32 --assemble --disassemble \
; RUN: --args -O2 --reg-use=s20 | FileCheck %s --check-prefix=DIS
define internal <4 x float> @select4float(<4 x i1> %s, <4 x float> %a, define internal <4 x float> @select4float(<4 x i1> %s, <4 x float> %a,
<4 x float> %b) { <4 x float> %b) {
; ASM-LABEL:select4float: ; ASM-LABEL:select4float:
; DIS-LABEL:00000000 <select4float>: ; DIS-LABEL:00000000 <select4float>:
; IASM-LABEL:select4float:
entry: entry:
%res = select <4 x i1> %s, <4 x float> %a, <4 x float> %b %res = select <4 x i1> %s, <4 x float> %a, <4 x float> %b
; ASM: # q3 = def.pseudo ; ASM: vshl.u32 [[M:.*]], {{.*}}, #31
; ASM-NEXT: vmov.s8 r0, d0[0] ; ASM-NEXT: vshr.s32 [[M:.*]], {{.*}}, #31
; ASM-NEXT: vmov.f32 s16, s4 ; ASM-NEXT: vbsl.i32 [[M]], {{.*}}
; ASM-NEXT: vmov.f32 s17, s8 ; DIS: 0: f2bf0550
; ASM-NEXT: tst r0, #1 ; DIS-NEXT: 4: f2a10050
; ASM-NEXT: vmovne.f32 s17, s16 ; DIS-NEXT: 8: f3120154
; ASM-NEXT: vmov.f32 s12, s17 ; IASM-NOT: vshl
; ASM-NEXT: vmov.s8 r0, d0[4] ; IASM-NOT: vshr
; ASM-NEXT: vmov.f32 s16, s5 ; IASM-NOT: vbsl
; ASM-NEXT: vmov.f32 s17, s9
; ASM-NEXT: tst r0, #1
; ASM-NEXT: vmovne.f32 s17, s16
; ASM-NEXT: vmov.f32 s13, s17
; ASM-NEXT: vmov.s8 r0, d1[0]
; ASM-NEXT: vmov.f32 s16, s6
; ASM-NEXT: vmov.f32 s17, s10
; ASM-NEXT: tst r0, #1
; ASM-NEXT: vmovne.f32 s17, s16
; ASM-NEXT: vmov.f32 s14, s17
; ASM-NEXT: vmov.s8 r0, d1[4]
; ASM-NEXT: vmov.f32 s4, s7
; ASM-NEXT: vmov.f32 s8, s11
; ASM-NEXT: tst r0, #1
; ASM-NEXT: vmovne.f32 s8, s4
; ASM-NEXT: vmov.f32 s15, s8
; ASM-NEXT: vmov.f32 q0, q3
; ASM-NEXT: vpop {s16, s17}
; ASM-NEXT: # s16 = def.pseudo
; ASM-NEXT: # s17 = def.pseudo
; ASM-NEXT: bx lr
ret <4 x float> %res ret <4 x float> %res
} }
define internal <4 x i32> @select4i32(<4 x i1> %s, <4 x i32> %a, <4 x i32> %b) { define internal <4 x i32> @select4i32(<4 x i1> %s, <4 x i32> %a, <4 x i32> %b) {
; ASM-LABEL:select4i32: ; ASM-LABEL:select4i32:
; DIS-LABEL:00000000 <select4i32>: ; DIS-LABEL:00000010 <select4i32>:
; IASM-LABEL:select4i32:
entry: entry:
%res = select <4 x i1> %s, <4 x i32> %a, <4 x i32> %b %res = select <4 x i1> %s, <4 x i32> %a, <4 x i32> %b
; ASM: # q3 = def.pseudo ; ASM: vshl.u32 [[M:.*]], {{.*}}, #31
; ASM-NEXT: vmov.s8 r0, d0[0] ; ASM-NEXT: vshr.s32 [[M:.*]], {{.*}}, #31
; ASM-NEXT: vmov.32 r1, d2[0] ; ASM-NEXT: vbsl.i32 [[M]], {{.*}}
; ASM-NEXT: vmov.32 r2, d4[0] ; DIS: 10: f2bf0550
; ASM-NEXT: tst r0, #1 ; DIS-NEXT: 14: f2a10050
; ASM-NEXT: movne r2, r1 ; DIS_NEXT: 18: f3120154
; ASM-NEXT: vmov.32 d6[0], r2 ; IASM-NOT: vshl
; ASM-NEXT: vmov.s8 r0, d0[4] ; IASM-NOT: vshr
; ASM-NEXT: vmov.32 r1, d2[1] ; IASM-NOT: vbsl
; ASM-NEXT: vmov.32 r2, d4[1]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.32 d6[1], r2
; ASM-NEXT: vmov.s8 r0, d1[0]
; ASM-NEXT: vmov.32 r1, d3[0]
; ASM-NEXT: vmov.32 r2, d5[0]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.32 d7[0], r2
; ASM-NEXT: vmov.s8 r0, d1[4]
; ASM-NEXT: vmov.32 r1, d3[1]
; ASM-NEXT: vmov.32 r2, d5[1]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.32 d7[1], r2
; ASM-NEXT: vmov.i32 q0, q3
; ASM-NEXT: bx lr
ret <4 x i32> %res ret <4 x i32> %res
} }
define internal <8 x i16> @select8i16(<8 x i1> %s, <8 x i16> %a, <8 x i16> %b) { define internal <8 x i16> @select8i16(<8 x i1> %s, <8 x i16> %a, <8 x i16> %b) {
; ASM-LABEL:select8i16: ; ASM-LABEL:select8i16:
; DIS-LABEL:00000000 <select8i16>: ; DIS-LABEL:00000020 <select8i16>:
; IASM-LABEL:select8i16:
entry: entry:
%res = select <8 x i1> %s, <8 x i16> %a, <8 x i16> %b %res = select <8 x i1> %s, <8 x i16> %a, <8 x i16> %b
; ASM: # q3 = def.pseudo ; ASM: vshl.u16 [[M:.*]], {{.*}}, #15
; ASM-NEXT: vmov.s8 r0, d0[0] ; ASM-NEXT: vshr.s16 [[M:.*]], {{.*}}, #15
; ASM-NEXT: vmov.s16 r1, d2[0] ; ASM-NEXT: vbsl.i16 [[M]], {{.*}}
; ASM-NEXT: vmov.s16 r2, d4[0] ; DIS: 20: f29f0550
; ASM-NEXT: tst r0, #1 ; DIS-NEXT: 24: f2910050
; ASM-NEXT: movne r2, r1 ; DIS-NEXT: 28: f3120154
; ASM-NEXT: vmov.16 d6[0], r2 ; IASM-NOT: vshl
; ASM-NEXT: vmov.s8 r0, d0[2] ; IASM-NOT: vshr
; ASM-NEXT: vmov.s16 r1, d2[1] ; IASM-NOT: vbsl
; ASM-NEXT: vmov.s16 r2, d4[1]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d6[1], r2
; ASM-NEXT: vmov.s8 r0, d0[4]
; ASM-NEXT: vmov.s16 r1, d2[2]
; ASM-NEXT: vmov.s16 r2, d4[2]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d6[2], r2
; ASM-NEXT: vmov.s8 r0, d0[6]
; ASM-NEXT: vmov.s16 r1, d2[3]
; ASM-NEXT: vmov.s16 r2, d4[3]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d6[3], r2
; ASM-NEXT: vmov.s8 r0, d1[0]
; ASM-NEXT: vmov.s16 r1, d3[0]
; ASM-NEXT: vmov.s16 r2, d5[0]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d7[0], r2
; ASM-NEXT: vmov.s8 r0, d1[2]
; ASM-NEXT: vmov.s16 r1, d3[1]
; ASM-NEXT: vmov.s16 r2, d5[1]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d7[1], r2
; ASM-NEXT: vmov.s8 r0, d1[4]
; ASM-NEXT: vmov.s16 r1, d3[2]
; ASM-NEXT: vmov.s16 r2, d5[2]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d7[2], r2
; ASM-NEXT: vmov.s8 r0, d1[6]
; ASM-NEXT: vmov.s16 r1, d3[3]
; ASM-NEXT: vmov.s16 r2, d5[3]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.16 d7[3], r2
; ASM-NEXT: vmov.i16 q0, q3
; ASM-NEXT: bx lr
ret <8 x i16> %res ret <8 x i16> %res
} }
...@@ -154,110 +86,21 @@ entry: ...@@ -154,110 +86,21 @@ entry:
define internal <16 x i8> @select16i8(<16 x i1> %s, <16 x i8> %a, define internal <16 x i8> @select16i8(<16 x i1> %s, <16 x i8> %a,
<16 x i8> %b) { <16 x i8> %b) {
; ASM-LABEL:select16i8: ; ASM-LABEL:select16i8:
; DIS-LABEL:00000000 <select16i8>: ; DIS-LABEL:00000030 <select16i8>:
; IASM-LABEL:select16i8:
entry: entry:
%res = select <16 x i1> %s, <16 x i8> %a, <16 x i8> %b %res = select <16 x i1> %s, <16 x i8> %a, <16 x i8> %b
; ASM: # q3 = def.pseudo ; ASM: vshl.u8 [[M:.*]], {{.*}}, #7
; ASM-NEXT: vmov.s8 r0, d0[0] ; ASM-NEXT: vshr.s8 [[M:.*]], {{.*}}, #7
; ASM-NEXT: vmov.s8 r1, d2[0] ; ASM-NEXT: vbsl.i8 [[M]], {{.*}}
; ASM-NEXT: vmov.s8 r2, d4[0] ; DIS: 30: f28f0550
; ASM-NEXT: tst r0, #1 ; DIS-NEXT: 34: f2890050
; ASM-NEXT: movne r2, r1 ; DIS-NEXT: 38: f3120154
; ASM-NEXT: vmov.8 d6[0], r2 ; IASM-NOT: vshl
; ASM-NEXT: vmov.s8 r0, d0[1] ; IASM-NOT: vshr
; ASM-NEXT: vmov.s8 r1, d2[1] ; IASM-NOT: vbsl
; ASM-NEXT: vmov.s8 r2, d4[1]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d6[1], r2
; ASM-NEXT: vmov.s8 r0, d0[2]
; ASM-NEXT: vmov.s8 r1, d2[2]
; ASM-NEXT: vmov.s8 r2, d4[2]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d6[2], r2
; ASM-NEXT: vmov.s8 r0, d0[3]
; ASM-NEXT: vmov.s8 r1, d2[3]
; ASM-NEXT: vmov.s8 r2, d4[3]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d6[3], r2
; ASM-NEXT: vmov.s8 r0, d0[4]
; ASM-NEXT: vmov.s8 r1, d2[4]
; ASM-NEXT: vmov.s8 r2, d4[4]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d6[4], r2
; ASM-NEXT: vmov.s8 r0, d0[5]
; ASM-NEXT: vmov.s8 r1, d2[5]
; ASM-NEXT: vmov.s8 r2, d4[5]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d6[5], r2
; ASM-NEXT: vmov.s8 r0, d0[6]
; ASM-NEXT: vmov.s8 r1, d2[6]
; ASM-NEXT: vmov.s8 r2, d4[6]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d6[6], r2
; ASM-NEXT: vmov.s8 r0, d0[7]
; ASM-NEXT: vmov.s8 r1, d2[7]
; ASM-NEXT: vmov.s8 r2, d4[7]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d6[7], r2
; ASM-NEXT: vmov.s8 r0, d1[0]
; ASM-NEXT: vmov.s8 r1, d3[0]
; ASM-NEXT: vmov.s8 r2, d5[0]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[0], r2
; ASM-NEXT: vmov.s8 r0, d1[1]
; ASM-NEXT: vmov.s8 r1, d3[1]
; ASM-NEXT: vmov.s8 r2, d5[1]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[1], r2
; ASM-NEXT: vmov.s8 r0, d1[2]
; ASM-NEXT: vmov.s8 r1, d3[2]
; ASM-NEXT: vmov.s8 r2, d5[2]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[2], r2
; ASM-NEXT: vmov.s8 r0, d1[3]
; ASM-NEXT: vmov.s8 r1, d3[3]
; ASM-NEXT: vmov.s8 r2, d5[3]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[3], r2
; ASM-NEXT: vmov.s8 r0, d1[4]
; ASM-NEXT: vmov.s8 r1, d3[4]
; ASM-NEXT: vmov.s8 r2, d5[4]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[4], r2
; ASM-NEXT: vmov.s8 r0, d1[5]
; ASM-NEXT: vmov.s8 r1, d3[5]
; ASM-NEXT: vmov.s8 r2, d5[5]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[5], r2
; ASM-NEXT: vmov.s8 r0, d1[6]
; ASM-NEXT: vmov.s8 r1, d3[6]
; ASM-NEXT: vmov.s8 r2, d5[6]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[6], r2
; ASM-NEXT: vmov.s8 r0, d1[7]
; ASM-NEXT: vmov.s8 r1, d3[7]
; ASM-NEXT: vmov.s8 r2, d5[7]
; ASM-NEXT: tst r0, #1
; ASM-NEXT: movne r2, r1
; ASM-NEXT: vmov.8 d7[7], r2
; ASM-NEXT: vmov.i8 q0, q3
; ASM-NEXT: bx lr
ret <16 x i8> %res ret <16 x i8> %res
} }
...@@ -46,7 +46,7 @@ define internal <4 x float> @IntVecToFloatVec(<4 x i32> %a) { ...@@ -46,7 +46,7 @@ define internal <4 x float> @IntVecToFloatVec(<4 x i32> %a) {
; ASM: vcvt.f32.s32 q0, q0 ; ASM: vcvt.f32.s32 q0, q0
; DIS: 40: f3bb0640 ; DIS: 40: f3bb0640
; IASM-NOT: vcvt.f32.s32 ; IASM-NOT: vcvt
ret <4 x float> %v ret <4 x float> %v
} }
...@@ -46,7 +46,7 @@ define internal <4 x float> @UIntVecToFloatVec(<4 x i32> %a) { ...@@ -46,7 +46,7 @@ define internal <4 x float> @UIntVecToFloatVec(<4 x i32> %a) {
; ASM: vcvt.f32.u32 q0, q0 ; ASM: vcvt.f32.u32 q0, q0
; DIS: 40: f3bb06c0 ; DIS: 40: f3bb06c0
; IASM-NOT: vcvt.f32.u32 ; IASM-NOT: vcvt
ret <4 x float> %v ret <4 x float> %v
} }
...@@ -46,7 +46,7 @@ define internal <4 x i32> @FloatVecToIntVec(<4 x float> %a) { ...@@ -46,7 +46,7 @@ define internal <4 x i32> @FloatVecToIntVec(<4 x float> %a) {
; ASM: vcvt.s32.f32 q0, q0 ; ASM: vcvt.s32.f32 q0, q0
; DIS: 40: f3bb0740 ; DIS: 40: f3bb0740
; IASM-NOT: vcvt.s32.f32 ; IASM-NOT: vcvt
ret <4 x i32> %v ret <4 x i32> %v
} }
...@@ -45,7 +45,7 @@ define internal <4 x i32> @FloatVecToUIntVec(<4 x float> %a) { ...@@ -45,7 +45,7 @@ define internal <4 x i32> @FloatVecToUIntVec(<4 x float> %a) {
; ASM: vcvt.u32.f32 q0, q0 ; ASM: vcvt.u32.f32 q0, q0
; DIS: 40: f3bb07c0 ; DIS: 40: f3bb07c0
; IASM-NOT: vcvt.u32.f32 ; IASM-NOT: vcvt
ret <4 x i32> %v ret <4 x i32> %v
} }
...@@ -34,8 +34,8 @@ define internal <4 x i32> @SextV4I1(<4 x i32> %a) { ...@@ -34,8 +34,8 @@ define internal <4 x i32> @SextV4I1(<4 x i32> %a) {
; ASM-NEXT: vshr.s32 {{.*}}, #31 ; ASM-NEXT: vshr.s32 {{.*}}, #31
; DIS: 0: f2bf0550 ; DIS: 0: f2bf0550
; DIS-NEXT: 4: f2a10050 ; DIS-NEXT: 4: f2a10050
; IASM-NOT: vshl.u32 {{.*}}, #31 ; IASM-NOT: vshl
; IASM-NOT: vshr.s32 {{.*}}, #31 ; IASM-NOT: vshr
} }
define internal <8 x i16> @SextV8I1(<8 x i16> %a) { define internal <8 x i16> @SextV8I1(<8 x i16> %a) {
...@@ -46,12 +46,12 @@ define internal <8 x i16> @SextV8I1(<8 x i16> %a) { ...@@ -46,12 +46,12 @@ define internal <8 x i16> @SextV8I1(<8 x i16> %a) {
%trunc = trunc <8 x i16> %a to <8 x i1> %trunc = trunc <8 x i16> %a to <8 x i1>
%sext = sext <8 x i1> %trunc to <8 x i16> %sext = sext <8 x i1> %trunc to <8 x i16>
ret <8 x i16> %sext ret <8 x i16> %sext
; ASM: vshl.u16 {{.*}}, #15 ; ASM: vshl.u16 {{.*}}, #15
; ASM-NEXT: vshr.s16 {{.*}}, #15 ; ASM-NEXT: vshr.s16 {{.*}}, #15
; DIS: 10: f29f0550 ; DIS: 10: f29f0550
; DIS-NEXT: 14: f2910050 ; DIS-NEXT: 14: f2910050
; IASM-NOT: vshl.u16 {{.*}}, #15 ; IASM-NOT: vshl
; IASM-NOT: vshr.s16 {{.*}}, #15 ; IASM-NOT: vshr
} }
define internal <16 x i8> @SextV16I1(<16 x i8> %a) { define internal <16 x i8> @SextV16I1(<16 x i8> %a) {
...@@ -62,10 +62,10 @@ define internal <16 x i8> @SextV16I1(<16 x i8> %a) { ...@@ -62,10 +62,10 @@ define internal <16 x i8> @SextV16I1(<16 x i8> %a) {
%trunc = trunc <16 x i8> %a to <16 x i1> %trunc = trunc <16 x i8> %a to <16 x i1>
%sext = sext <16 x i1> %trunc to <16 x i8> %sext = sext <16 x i1> %trunc to <16 x i8>
ret <16 x i8> %sext ret <16 x i8> %sext
; ASM: vshl.u8 {{.*}}, #7 ; ASM: vshl.u8 {{.*}}, #7
; ASM-NEXT: vshr.s8 {{.*}}, #7 ; ASM-NEXT: vshr.s8 {{.*}}, #7
; DIS: 20: f28f0550 ; DIS: 20: f28f0550
; DIS-NEXT: 24: f2890050 ; DIS-NEXT: 24: f2890050
; IASM-NOT: vshl.u8 {{.*}}, #7 ; IASM-NOT: vshl
; IASM-NOT: vshr.s8 {{.*}}, #7 ; IASM-NOT: vshr
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment